From 94fe3e14b81e5895f519ada6725bdfa34588c7a2 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Wed, 6 Jul 2016 04:51:08 +0900 Subject: [PATCH 01/12] CLN: move plotting funcs to pd.plotting --- doc/source/visualization.rst | 24 +- pandas/core/config_init.py | 2 +- pandas/core/frame.py | 4 +- pandas/core/groupby.py | 2 +- pandas/core/series.py | 2 +- pandas/plotting/__init__.py | 1 + pandas/plotting/api.py | 20 + pandas/plotting/compat.py | 51 + pandas/plotting/converter.py | 1032 ++++++++++++++ pandas/plotting/misc.py | 573 ++++++++ pandas/{tools => plotting}/plotting.py | 1229 +---------------- pandas/plotting/style.py | 232 ++++ pandas/plotting/timeseries.py | 339 +++++ pandas/plotting/tools.py | 383 +++++ pandas/tests/api/test_api.py | 2 +- pandas/tests/plotting/common.py | 18 +- .../{tseries => plotting}/test_converter.py | 0 pandas/tests/plotting/test_datetimelike.py | 6 +- pandas/tests/plotting/test_frame.py | 12 +- pandas/tests/plotting/test_series.py | 43 +- pandas/tseries/converter.py | 1043 +------------- pandas/tseries/plotting.py | 345 +---- setup.py | 1 + 23 files changed, 2722 insertions(+), 2642 deletions(-) create mode 100644 pandas/plotting/__init__.py create mode 100644 pandas/plotting/api.py create mode 100644 pandas/plotting/compat.py create mode 100644 pandas/plotting/converter.py create mode 100644 pandas/plotting/misc.py rename pandas/{tools => plotting}/plotting.py (70%) create mode 100644 pandas/plotting/style.py create mode 100644 pandas/plotting/timeseries.py create mode 100644 pandas/plotting/tools.py rename pandas/tests/{tseries => plotting}/test_converter.py (100%) diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index e8998bf6f6f5c..4f655e4c6f476 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -152,7 +152,7 @@ You can also create these other plots using the methods ``DataFrame.plot.` In addition to these ``kind`` s, there are the :ref:`DataFrame.hist() `, and :ref:`DataFrame.boxplot() ` methods, which use a separate interface. -Finally, there are several :ref:`plotting functions ` in ``pandas.tools.plotting`` +Finally, there are several :ref:`plotting functions ` in ``pandas.plotting`` that take a :class:`Series` or :class:`DataFrame` as an argument. These include @@ -823,7 +823,7 @@ before plotting. Plotting Tools -------------- -These functions can be imported from ``pandas.tools.plotting`` +These functions can be imported from ``pandas.plotting`` and take a :class:`Series` or :class:`DataFrame` as an argument. .. _visualization.scatter_matrix: @@ -834,7 +834,7 @@ Scatter Matrix Plot .. versionadded:: 0.7.3 You can create a scatter plot matrix using the -``scatter_matrix`` method in ``pandas.tools.plotting``: +``scatter_matrix`` method in ``pandas.plotting``: .. ipython:: python :suppress: @@ -843,7 +843,7 @@ You can create a scatter plot matrix using the .. ipython:: python - from pandas.tools.plotting import scatter_matrix + from pandas.plotting import scatter_matrix df = pd.DataFrame(np.random.randn(1000, 4), columns=['a', 'b', 'c', 'd']) @savefig scatter_matrix_kde.png @@ -896,7 +896,7 @@ of the same class will usually be closer together and form larger structures. .. ipython:: python - from pandas.tools.plotting import andrews_curves + from pandas.plotting import andrews_curves data = pd.read_csv('data/iris.data') @@ -918,7 +918,7 @@ represents one data point. Points that tend to cluster will appear closer togeth .. ipython:: python - from pandas.tools.plotting import parallel_coordinates + from pandas.plotting import parallel_coordinates data = pd.read_csv('data/iris.data') @@ -948,7 +948,7 @@ implies that the underlying data are not random. .. ipython:: python - from pandas.tools.plotting import lag_plot + from pandas.plotting import lag_plot plt.figure() @@ -983,7 +983,7 @@ confidence band. .. ipython:: python - from pandas.tools.plotting import autocorrelation_plot + from pandas.plotting import autocorrelation_plot plt.figure() @@ -1016,7 +1016,7 @@ are what constitutes the bootstrap plot. .. ipython:: python - from pandas.tools.plotting import bootstrap_plot + from pandas.plotting import bootstrap_plot data = pd.Series(np.random.rand(1000)) @@ -1048,7 +1048,7 @@ be colored differently. .. ipython:: python - from pandas.tools.plotting import radviz + from pandas.plotting import radviz data = pd.read_csv('data/iris.data') @@ -1450,11 +1450,11 @@ Also, you can pass different :class:`DataFrame` or :class:`Series` for ``table`` plt.close('all') -Finally, there is a helper function ``pandas.tools.plotting.table`` to create a table from :class:`DataFrame` and :class:`Series`, and add it to an ``matplotlib.Axes``. This function can accept keywords which matplotlib table has. +Finally, there is a helper function ``pandas.plotting.table`` to create a table from :class:`DataFrame` and :class:`Series`, and add it to an ``matplotlib.Axes``. This function can accept keywords which matplotlib table has. .. ipython:: python - from pandas.tools.plotting import table + from pandas.plotting import table fig, ax = plt.subplots(1, 1) table(ax, np.round(df.describe(), 2), diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 931fe0661818d..a7003c66024e8 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -285,7 +285,7 @@ def mpl_style_cb(key): stacklevel=5) import sys - from pandas.tools.plotting import mpl_stylesheet + from pandas.plotting.style import mpl_stylesheet global style_backup val = cf.get_option(key) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4565250c78387..3cd9bd2c8aae9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -90,7 +90,7 @@ import pandas.core.ops as ops import pandas.formats.format as fmt from pandas.formats.printing import pprint_thing -import pandas.tools.plotting as gfx +import pandas.plotting.plotting as gfx from pandas._libs import lib, algos as libalgos @@ -5909,7 +5909,7 @@ def _put_str(s, space): @Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) def boxplot(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds): - import pandas.tools.plotting as plots + import pandas.plotting as plots import matplotlib.pyplot as plt ax = plots.boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, grid=grid, rot=rot, figsize=figsize, layout=layout, diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 45a9577c8d8b2..ad24d76cbe2d3 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -4159,7 +4159,7 @@ def groupby_series(obj, col=None): return results -from pandas.tools.plotting import boxplot_frame_groupby # noqa +from pandas.plotting.plotting import boxplot_frame_groupby # noqa DataFrameGroupBy.boxplot = boxplot_frame_groupby diff --git a/pandas/core/series.py b/pandas/core/series.py index 3305f0b6c439e..411861f20d97c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3001,7 +3001,7 @@ def create_from_value(value, index, dtype): # ---------------------------------------------------------------------- # Add plotting methods to Series -import pandas.tools.plotting as _gfx # noqa +import pandas.plotting.plotting as _gfx # noqa Series.plot = base.AccessorProperty(_gfx.SeriesPlotMethods, _gfx.SeriesPlotMethods) diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py new file mode 100644 index 0000000000000..374276ddc8b56 --- /dev/null +++ b/pandas/plotting/__init__.py @@ -0,0 +1 @@ +from pandas.plotting.api import * # noqa diff --git a/pandas/plotting/api.py b/pandas/plotting/api.py new file mode 100644 index 0000000000000..a64792e406357 --- /dev/null +++ b/pandas/plotting/api.py @@ -0,0 +1,20 @@ +""" +Plotting api +""" + +# flake8: noqa + +try: # mpl optional + from pandas.plotting import converter as conv + conv.register() # needs to override so set_xlim works with str/number +except ImportError: + pass + +from pandas.plotting.misc import (scatter_matrix, radviz, + andrews_curves, bootstrap_plot, + parallel_coordinates, lag_plot, + autocorrelation_plot) +from pandas.plotting.plotting import (boxplot, scatter_plot, grouped_hist, + hist_frame, hist_series) +from pandas.plotting.style import plot_params +from pandas.plotting.tools import table \ No newline at end of file diff --git a/pandas/plotting/compat.py b/pandas/plotting/compat.py new file mode 100644 index 0000000000000..3191972d78dee --- /dev/null +++ b/pandas/plotting/compat.py @@ -0,0 +1,51 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +from distutils.version import LooseVersion + + +def _mpl_le_1_2_1(): + try: + import matplotlib as mpl + return (str(mpl.__version__) <= LooseVersion('1.2.1') and + str(mpl.__version__)[0] != '0') + except ImportError: + return False + + +def _mpl_ge_1_3_1(): + try: + import matplotlib + # The or v[0] == '0' is because their versioneer is + # messed up on dev + return (matplotlib.__version__ >= LooseVersion('1.3.1') or + matplotlib.__version__[0] == '0') + except ImportError: + return False + + +def _mpl_ge_1_4_0(): + try: + import matplotlib + return (matplotlib.__version__ >= LooseVersion('1.4') or + matplotlib.__version__[0] == '0') + except ImportError: + return False + + +def _mpl_ge_1_5_0(): + try: + import matplotlib + return (matplotlib.__version__ >= LooseVersion('1.5') or + matplotlib.__version__[0] == '0') + except ImportError: + return False + + +def _mpl_ge_2_0_0(): + try: + import matplotlib + return matplotlib.__version__ >= LooseVersion('2.0') + except ImportError: + return False diff --git a/pandas/plotting/converter.py b/pandas/plotting/converter.py new file mode 100644 index 0000000000000..bc768a8bc5b58 --- /dev/null +++ b/pandas/plotting/converter.py @@ -0,0 +1,1032 @@ +from datetime import datetime, timedelta +import datetime as pydt +import numpy as np + +from dateutil.relativedelta import relativedelta + +import matplotlib.units as units +import matplotlib.dates as dates + +from matplotlib.ticker import Formatter, AutoLocator, Locator +from matplotlib.transforms import nonsingular + + +from pandas.types.common import (is_float, is_integer, + is_integer_dtype, + is_float_dtype, + is_datetime64_ns_dtype, + is_period_arraylike, + ) + +from pandas.compat import lrange +import pandas.compat as compat +import pandas._libs.lib as lib +import pandas.core.common as com +from pandas.core.index import Index + +from pandas.core.series import Series +from pandas.tseries.index import date_range +import pandas.tseries.tools as tools +import pandas.tseries.frequencies as frequencies +from pandas.tseries.frequencies import FreqGroup +from pandas.tseries.period import Period, PeriodIndex + +# constants +HOURS_PER_DAY = 24. +MIN_PER_HOUR = 60. +SEC_PER_MIN = 60. + +SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR +SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY + +MUSEC_PER_DAY = 1e6 * SEC_PER_DAY + + +def _mpl_le_2_0_0(): + try: + import matplotlib + return matplotlib.compare_versions('2.0.0', matplotlib.__version__) + except ImportError: + return False + + +def register(): + units.registry[lib.Timestamp] = DatetimeConverter() + units.registry[Period] = PeriodConverter() + units.registry[pydt.datetime] = DatetimeConverter() + units.registry[pydt.date] = DatetimeConverter() + units.registry[pydt.time] = TimeConverter() + units.registry[np.datetime64] = DatetimeConverter() + + +def _to_ordinalf(tm): + tot_sec = (tm.hour * 3600 + tm.minute * 60 + tm.second + + float(tm.microsecond / 1e6)) + return tot_sec + + +def time2num(d): + if isinstance(d, compat.string_types): + parsed = tools.to_datetime(d) + if not isinstance(parsed, datetime): + raise ValueError('Could not parse time %s' % d) + return _to_ordinalf(parsed.time()) + if isinstance(d, pydt.time): + return _to_ordinalf(d) + return d + + +class TimeConverter(units.ConversionInterface): + + @staticmethod + def convert(value, unit, axis): + valid_types = (str, pydt.time) + if (isinstance(value, valid_types) or is_integer(value) or + is_float(value)): + return time2num(value) + if isinstance(value, Index): + return value.map(time2num) + if isinstance(value, (list, tuple, np.ndarray, Index)): + return [time2num(x) for x in value] + return value + + @staticmethod + def axisinfo(unit, axis): + if unit != 'time': + return None + + majloc = AutoLocator() + majfmt = TimeFormatter(majloc) + return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='time') + + @staticmethod + def default_units(x, axis): + return 'time' + + +# time formatter +class TimeFormatter(Formatter): + + def __init__(self, locs): + self.locs = locs + + def __call__(self, x, pos=0): + fmt = '%H:%M:%S' + s = int(x) + ms = int((x - s) * 1e3) + us = int((x - s) * 1e6 - ms) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + _, h = divmod(h, 24) + if us != 0: + fmt += '.%6f' + elif ms != 0: + fmt += '.%3f' + + return pydt.time(h, m, s, us).strftime(fmt) + + +# Period Conversion + + +class PeriodConverter(dates.DateConverter): + + @staticmethod + def convert(values, units, axis): + if not hasattr(axis, 'freq'): + raise TypeError('Axis must have `freq` set to convert to Periods') + valid_types = (compat.string_types, datetime, + Period, pydt.date, pydt.time) + if (isinstance(values, valid_types) or is_integer(values) or + is_float(values)): + return get_datevalue(values, axis.freq) + if isinstance(values, PeriodIndex): + return values.asfreq(axis.freq)._values + if isinstance(values, Index): + return values.map(lambda x: get_datevalue(x, axis.freq)) + if is_period_arraylike(values): + return PeriodIndex(values, freq=axis.freq)._values + if isinstance(values, (list, tuple, np.ndarray, Index)): + return [get_datevalue(x, axis.freq) for x in values] + return values + + +def get_datevalue(date, freq): + if isinstance(date, Period): + return date.asfreq(freq).ordinal + elif isinstance(date, (compat.string_types, datetime, + pydt.date, pydt.time)): + return Period(date, freq).ordinal + elif (is_integer(date) or is_float(date) or + (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): + return date + elif date is None: + return None + raise ValueError("Unrecognizable date '%s'" % date) + + +def _dt_to_float_ordinal(dt): + """ + Convert :mod:`datetime` to the Gregorian date as UTC float days, + preserving hours, minutes, seconds and microseconds. Return value + is a :func:`float`. + """ + if (isinstance(dt, (np.ndarray, Index, Series) + ) and is_datetime64_ns_dtype(dt)): + base = dates.epoch2num(dt.asi8 / 1.0E9) + else: + base = dates.date2num(dt) + return base + + +# Datetime Conversion +class DatetimeConverter(dates.DateConverter): + + @staticmethod + def convert(values, unit, axis): + def try_parse(values): + try: + return _dt_to_float_ordinal(tools.to_datetime(values)) + except Exception: + return values + + if isinstance(values, (datetime, pydt.date)): + return _dt_to_float_ordinal(values) + elif isinstance(values, np.datetime64): + return _dt_to_float_ordinal(lib.Timestamp(values)) + elif isinstance(values, pydt.time): + return dates.date2num(values) + elif (is_integer(values) or is_float(values)): + return values + elif isinstance(values, compat.string_types): + return try_parse(values) + elif isinstance(values, (list, tuple, np.ndarray, Index)): + if isinstance(values, Index): + values = values.values + if not isinstance(values, np.ndarray): + values = com._asarray_tuplesafe(values) + + if is_integer_dtype(values) or is_float_dtype(values): + return values + + try: + values = tools.to_datetime(values) + if isinstance(values, Index): + values = _dt_to_float_ordinal(values) + else: + values = [_dt_to_float_ordinal(x) for x in values] + except Exception: + values = _dt_to_float_ordinal(values) + + return values + + @staticmethod + def axisinfo(unit, axis): + """ + Return the :class:`~matplotlib.units.AxisInfo` for *unit*. + + *unit* is a tzinfo instance or None. + The *axis* argument is required but not used. + """ + tz = unit + + majloc = PandasAutoDateLocator(tz=tz) + majfmt = PandasAutoDateFormatter(majloc, tz=tz) + datemin = pydt.date(2000, 1, 1) + datemax = pydt.date(2010, 1, 1) + + return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='', + default_limits=(datemin, datemax)) + + +class PandasAutoDateFormatter(dates.AutoDateFormatter): + + def __init__(self, locator, tz=None, defaultfmt='%Y-%m-%d'): + dates.AutoDateFormatter.__init__(self, locator, tz, defaultfmt) + # matplotlib.dates._UTC has no _utcoffset called by pandas + if self._tz is dates.UTC: + self._tz._utcoffset = self._tz.utcoffset(None) + + # For mpl > 2.0 the format strings are controlled via rcparams + # so do not mess with them. For mpl < 2.0 change the second + # break point and add a musec break point + if _mpl_le_2_0_0(): + self.scaled[1. / SEC_PER_DAY] = '%H:%M:%S' + self.scaled[1. / MUSEC_PER_DAY] = '%H:%M:%S.%f' + + +class PandasAutoDateLocator(dates.AutoDateLocator): + + def get_locator(self, dmin, dmax): + 'Pick the best locator based on a distance.' + delta = relativedelta(dmax, dmin) + + num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days + num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds + tot_sec = num_days * 86400. + num_sec + + if abs(tot_sec) < self.minticks: + self._freq = -1 + locator = MilliSecondLocator(self.tz) + locator.set_axis(self.axis) + + locator.set_view_interval(*self.axis.get_view_interval()) + locator.set_data_interval(*self.axis.get_data_interval()) + return locator + + return dates.AutoDateLocator.get_locator(self, dmin, dmax) + + def _get_unit(self): + return MilliSecondLocator.get_unit_generic(self._freq) + + +class MilliSecondLocator(dates.DateLocator): + + UNIT = 1. / (24 * 3600 * 1000) + + def __init__(self, tz): + dates.DateLocator.__init__(self, tz) + self._interval = 1. + + def _get_unit(self): + return self.get_unit_generic(-1) + + @staticmethod + def get_unit_generic(freq): + unit = dates.RRuleLocator.get_unit_generic(freq) + if unit < 0: + return MilliSecondLocator.UNIT + return unit + + def __call__(self): + # if no data have been set, this will tank with a ValueError + try: + dmin, dmax = self.viewlim_to_dt() + except ValueError: + return [] + + if dmin > dmax: + dmax, dmin = dmin, dmax + # We need to cap at the endpoints of valid datetime + + # TODO(wesm) unused? + # delta = relativedelta(dmax, dmin) + # try: + # start = dmin - delta + # except ValueError: + # start = _from_ordinal(1.0) + + # try: + # stop = dmax + delta + # except ValueError: + # # The magic number! + # stop = _from_ordinal(3652059.9999999) + + nmax, nmin = dates.date2num((dmax, dmin)) + + num = (nmax - nmin) * 86400 * 1000 + max_millis_ticks = 6 + for interval in [1, 10, 50, 100, 200, 500]: + if num <= interval * (max_millis_ticks - 1): + self._interval = interval + break + else: + # We went through the whole loop without breaking, default to 1 + self._interval = 1000. + + estimate = (nmax - nmin) / (self._get_unit() * self._get_interval()) + + if estimate > self.MAXTICKS * 2: + raise RuntimeError(('MillisecondLocator estimated to generate %d ' + 'ticks from %s to %s: exceeds Locator.MAXTICKS' + '* 2 (%d) ') % + (estimate, dmin, dmax, self.MAXTICKS * 2)) + + freq = '%dL' % self._get_interval() + tz = self.tz.tzname(None) + st = _from_ordinal(dates.date2num(dmin)) # strip tz + ed = _from_ordinal(dates.date2num(dmax)) + all_dates = date_range(start=st, end=ed, freq=freq, tz=tz).asobject + + try: + if len(all_dates) > 0: + locs = self.raise_if_exceeds(dates.date2num(all_dates)) + return locs + except Exception: # pragma: no cover + pass + + lims = dates.date2num([dmin, dmax]) + return lims + + def _get_interval(self): + return self._interval + + def autoscale(self): + """ + Set the view limits to include the data range. + """ + dmin, dmax = self.datalim_to_dt() + if dmin > dmax: + dmax, dmin = dmin, dmax + + # We need to cap at the endpoints of valid datetime + + # TODO(wesm): unused? + + # delta = relativedelta(dmax, dmin) + # try: + # start = dmin - delta + # except ValueError: + # start = _from_ordinal(1.0) + + # try: + # stop = dmax + delta + # except ValueError: + # # The magic number! + # stop = _from_ordinal(3652059.9999999) + + dmin, dmax = self.datalim_to_dt() + + vmin = dates.date2num(dmin) + vmax = dates.date2num(dmax) + + return self.nonsingular(vmin, vmax) + + +def _from_ordinal(x, tz=None): + ix = int(x) + dt = datetime.fromordinal(ix) + remainder = float(x) - ix + hour, remainder = divmod(24 * remainder, 1) + minute, remainder = divmod(60 * remainder, 1) + second, remainder = divmod(60 * remainder, 1) + microsecond = int(1e6 * remainder) + if microsecond < 10: + microsecond = 0 # compensate for rounding errors + dt = datetime(dt.year, dt.month, dt.day, int(hour), int(minute), + int(second), microsecond) + if tz is not None: + dt = dt.astimezone(tz) + + if microsecond > 999990: # compensate for rounding errors + dt += timedelta(microseconds=1e6 - microsecond) + + return dt + +# Fixed frequency dynamic tick locators and formatters + +# ------------------------------------------------------------------------- +# --- Locators --- +# ------------------------------------------------------------------------- + + +def _get_default_annual_spacing(nyears): + """ + Returns a default spacing between consecutive ticks for annual data. + """ + if nyears < 11: + (min_spacing, maj_spacing) = (1, 1) + elif nyears < 20: + (min_spacing, maj_spacing) = (1, 2) + elif nyears < 50: + (min_spacing, maj_spacing) = (1, 5) + elif nyears < 100: + (min_spacing, maj_spacing) = (5, 10) + elif nyears < 200: + (min_spacing, maj_spacing) = (5, 25) + elif nyears < 600: + (min_spacing, maj_spacing) = (10, 50) + else: + factor = nyears // 1000 + 1 + (min_spacing, maj_spacing) = (factor * 20, factor * 100) + return (min_spacing, maj_spacing) + + +def period_break(dates, period): + """ + Returns the indices where the given period changes. + + Parameters + ---------- + dates : PeriodIndex + Array of intervals to monitor. + period : string + Name of the period to monitor. + """ + current = getattr(dates, period) + previous = getattr(dates - 1, period) + return np.nonzero(current - previous)[0] + + +def has_level_label(label_flags, vmin): + """ + Returns true if the ``label_flags`` indicate there is at least one label + for this level. + + if the minimum view limit is not an exact integer, then the first tick + label won't be shown, so we must adjust for that. + """ + if label_flags.size == 0 or (label_flags.size == 1 and + label_flags[0] == 0 and + vmin % 1 > 0.0): + return False + else: + return True + + +def _daily_finder(vmin, vmax, freq): + periodsperday = -1 + + if freq >= FreqGroup.FR_HR: + if freq == FreqGroup.FR_NS: + periodsperday = 24 * 60 * 60 * 1000000000 + elif freq == FreqGroup.FR_US: + periodsperday = 24 * 60 * 60 * 1000000 + elif freq == FreqGroup.FR_MS: + periodsperday = 24 * 60 * 60 * 1000 + elif freq == FreqGroup.FR_SEC: + periodsperday = 24 * 60 * 60 + elif freq == FreqGroup.FR_MIN: + periodsperday = 24 * 60 + elif freq == FreqGroup.FR_HR: + periodsperday = 24 + else: # pragma: no cover + raise ValueError("unexpected frequency: %s" % freq) + periodsperyear = 365 * periodsperday + periodspermonth = 28 * periodsperday + + elif freq == FreqGroup.FR_BUS: + periodsperyear = 261 + periodspermonth = 19 + elif freq == FreqGroup.FR_DAY: + periodsperyear = 365 + periodspermonth = 28 + elif frequencies.get_freq_group(freq) == FreqGroup.FR_WK: + periodsperyear = 52 + periodspermonth = 3 + else: # pragma: no cover + raise ValueError("unexpected frequency") + + # save this for later usage + vmin_orig = vmin + + (vmin, vmax) = (Period(ordinal=int(vmin), freq=freq), + Period(ordinal=int(vmax), freq=freq)) + span = vmax.ordinal - vmin.ordinal + 1 + dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq) + # Initialize the output + info = np.zeros(span, + dtype=[('val', np.int64), ('maj', bool), + ('min', bool), ('fmt', '|S20')]) + info['val'][:] = dates_._values + info['fmt'][:] = '' + info['maj'][[0, -1]] = True + # .. and set some shortcuts + info_maj = info['maj'] + info_min = info['min'] + info_fmt = info['fmt'] + + def first_label(label_flags): + if (label_flags[0] == 0) and (label_flags.size > 1) and \ + ((vmin_orig % 1) > 0.0): + return label_flags[1] + else: + return label_flags[0] + + # Case 1. Less than a month + if span <= periodspermonth: + day_start = period_break(dates_, 'day') + month_start = period_break(dates_, 'month') + + def _hour_finder(label_interval, force_year_start): + _hour = dates_.hour + _prev_hour = (dates_ - 1).hour + hour_start = (_hour - _prev_hour) != 0 + info_maj[day_start] = True + info_min[hour_start & (_hour % label_interval == 0)] = True + year_start = period_break(dates_, 'year') + info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M' + info_fmt[day_start] = '%H:%M\n%d-%b' + info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' + if force_year_start and not has_level_label(year_start, vmin_orig): + info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y' + + def _minute_finder(label_interval): + hour_start = period_break(dates_, 'hour') + _minute = dates_.minute + _prev_minute = (dates_ - 1).minute + minute_start = (_minute - _prev_minute) != 0 + info_maj[hour_start] = True + info_min[minute_start & (_minute % label_interval == 0)] = True + year_start = period_break(dates_, 'year') + info_fmt = info['fmt'] + info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M' + info_fmt[day_start] = '%H:%M\n%d-%b' + info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' + + def _second_finder(label_interval): + minute_start = period_break(dates_, 'minute') + _second = dates_.second + _prev_second = (dates_ - 1).second + second_start = (_second - _prev_second) != 0 + info['maj'][minute_start] = True + info['min'][second_start & (_second % label_interval == 0)] = True + year_start = period_break(dates_, 'year') + info_fmt = info['fmt'] + info_fmt[second_start & (_second % + label_interval == 0)] = '%H:%M:%S' + info_fmt[day_start] = '%H:%M:%S\n%d-%b' + info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y' + + if span < periodsperday / 12000.0: + _second_finder(1) + elif span < periodsperday / 6000.0: + _second_finder(2) + elif span < periodsperday / 2400.0: + _second_finder(5) + elif span < periodsperday / 1200.0: + _second_finder(10) + elif span < periodsperday / 800.0: + _second_finder(15) + elif span < periodsperday / 400.0: + _second_finder(30) + elif span < periodsperday / 150.0: + _minute_finder(1) + elif span < periodsperday / 70.0: + _minute_finder(2) + elif span < periodsperday / 24.0: + _minute_finder(5) + elif span < periodsperday / 12.0: + _minute_finder(15) + elif span < periodsperday / 6.0: + _minute_finder(30) + elif span < periodsperday / 2.5: + _hour_finder(1, False) + elif span < periodsperday / 1.5: + _hour_finder(2, False) + elif span < periodsperday * 1.25: + _hour_finder(3, False) + elif span < periodsperday * 2.5: + _hour_finder(6, True) + elif span < periodsperday * 4: + _hour_finder(12, True) + else: + info_maj[month_start] = True + info_min[day_start] = True + year_start = period_break(dates_, 'year') + info_fmt = info['fmt'] + info_fmt[day_start] = '%d' + info_fmt[month_start] = '%d\n%b' + info_fmt[year_start] = '%d\n%b\n%Y' + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(day_start)] = '%d\n%b\n%Y' + else: + info_fmt[first_label(month_start)] = '%d\n%b\n%Y' + + # Case 2. Less than three months + elif span <= periodsperyear // 4: + month_start = period_break(dates_, 'month') + info_maj[month_start] = True + if freq < FreqGroup.FR_HR: + info['min'] = True + else: + day_start = period_break(dates_, 'day') + info['min'][day_start] = True + week_start = period_break(dates_, 'week') + year_start = period_break(dates_, 'year') + info_fmt[week_start] = '%d' + info_fmt[month_start] = '\n\n%b' + info_fmt[year_start] = '\n\n%b\n%Y' + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(week_start)] = '\n\n%b\n%Y' + else: + info_fmt[first_label(month_start)] = '\n\n%b\n%Y' + # Case 3. Less than 14 months ............... + elif span <= 1.15 * periodsperyear: + year_start = period_break(dates_, 'year') + month_start = period_break(dates_, 'month') + week_start = period_break(dates_, 'week') + info_maj[month_start] = True + info_min[week_start] = True + info_min[year_start] = False + info_min[month_start] = False + info_fmt[month_start] = '%b' + info_fmt[year_start] = '%b\n%Y' + if not has_level_label(year_start, vmin_orig): + info_fmt[first_label(month_start)] = '%b\n%Y' + # Case 4. Less than 2.5 years ............... + elif span <= 2.5 * periodsperyear: + year_start = period_break(dates_, 'year') + quarter_start = period_break(dates_, 'quarter') + month_start = period_break(dates_, 'month') + info_maj[quarter_start] = True + info_min[month_start] = True + info_fmt[quarter_start] = '%b' + info_fmt[year_start] = '%b\n%Y' + # Case 4. Less than 4 years ................. + elif span <= 4 * periodsperyear: + year_start = period_break(dates_, 'year') + month_start = period_break(dates_, 'month') + info_maj[year_start] = True + info_min[month_start] = True + info_min[year_start] = False + + month_break = dates_[month_start].month + jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] + info_fmt[jan_or_jul] = '%b' + info_fmt[year_start] = '%b\n%Y' + # Case 5. Less than 11 years ................ + elif span <= 11 * periodsperyear: + year_start = period_break(dates_, 'year') + quarter_start = period_break(dates_, 'quarter') + info_maj[year_start] = True + info_min[quarter_start] = True + info_min[year_start] = False + info_fmt[year_start] = '%Y' + # Case 6. More than 12 years ................ + else: + year_start = period_break(dates_, 'year') + year_break = dates_[year_start].year + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(year_break % maj_anndef == 0)] + info_maj[major_idx] = True + minor_idx = year_start[(year_break % min_anndef == 0)] + info_min[minor_idx] = True + info_fmt[major_idx] = '%Y' + + return info + + +def _monthly_finder(vmin, vmax, freq): + periodsperyear = 12 + + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + # Initialize the output + info = np.zeros(span, + dtype=[('val', int), ('maj', bool), ('min', bool), + ('fmt', '|S8')]) + info['val'] = np.arange(vmin, vmax + 1) + dates_ = info['val'] + info['fmt'] = '' + year_start = (dates_ % 12 == 0).nonzero()[0] + info_maj = info['maj'] + info_fmt = info['fmt'] + + if span <= 1.15 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + + info_fmt[:] = '%b' + info_fmt[year_start] = '%b\n%Y' + + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = '%b\n%Y' + + elif span <= 2.5 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + # TODO: Check the following : is it really info['fmt'] ? + info['fmt'][quarter_start] = True + info['min'] = True + + info_fmt[quarter_start] = '%b' + info_fmt[year_start] = '%b\n%Y' + + elif span <= 4 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + + jan_or_jul = (dates_ % 12 == 0) | (dates_ % 12 == 6) + info_fmt[jan_or_jul] = '%b' + info_fmt[year_start] = '%b\n%Y' + + elif span <= 11 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + info['min'][quarter_start] = True + + info_fmt[year_start] = '%Y' + + else: + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + years = dates_[year_start] // 12 + 1 + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info['min'][year_start[(years % min_anndef == 0)]] = True + + info_fmt[major_idx] = '%Y' + + return info + + +def _quarterly_finder(vmin, vmax, freq): + periodsperyear = 4 + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + info = np.zeros(span, + dtype=[('val', int), ('maj', bool), ('min', bool), + ('fmt', '|S8')]) + info['val'] = np.arange(vmin, vmax + 1) + info['fmt'] = '' + dates_ = info['val'] + info_maj = info['maj'] + info_fmt = info['fmt'] + year_start = (dates_ % 4 == 0).nonzero()[0] + + if span <= 3.5 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + + info_fmt[:] = 'Q%q' + info_fmt[year_start] = 'Q%q\n%F' + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = 'Q%q\n%F' + + elif span <= 11 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + info_fmt[year_start] = '%F' + + else: + years = dates_[year_start] // 4 + 1 + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info['min'][year_start[(years % min_anndef == 0)]] = True + info_fmt[major_idx] = '%F' + + return info + + +def _annual_finder(vmin, vmax, freq): + (vmin, vmax) = (int(vmin), int(vmax + 1)) + span = vmax - vmin + 1 + + info = np.zeros(span, + dtype=[('val', int), ('maj', bool), ('min', bool), + ('fmt', '|S8')]) + info['val'] = np.arange(vmin, vmax + 1) + info['fmt'] = '' + dates_ = info['val'] + + (min_anndef, maj_anndef) = _get_default_annual_spacing(span) + major_idx = dates_ % maj_anndef == 0 + info['maj'][major_idx] = True + info['min'][(dates_ % min_anndef == 0)] = True + info['fmt'][major_idx] = '%Y' + + return info + + +def get_finder(freq): + if isinstance(freq, compat.string_types): + freq = frequencies.get_freq(freq) + fgroup = frequencies.get_freq_group(freq) + + if fgroup == FreqGroup.FR_ANN: + return _annual_finder + elif fgroup == FreqGroup.FR_QTR: + return _quarterly_finder + elif freq == FreqGroup.FR_MTH: + return _monthly_finder + elif ((freq >= FreqGroup.FR_BUS) or fgroup == FreqGroup.FR_WK): + return _daily_finder + else: # pragma: no cover + errmsg = "Unsupported frequency: %s" % (freq) + raise NotImplementedError(errmsg) + + +class TimeSeries_DateLocator(Locator): + """ + Locates the ticks along an axis controlled by a :class:`Series`. + + Parameters + ---------- + freq : {var} + Valid frequency specifier. + minor_locator : {False, True}, optional + Whether the locator is for minor ticks (True) or not. + dynamic_mode : {True, False}, optional + Whether the locator should work in dynamic mode. + base : {int}, optional + quarter : {int}, optional + month : {int}, optional + day : {int}, optional + """ + + def __init__(self, freq, minor_locator=False, dynamic_mode=True, + base=1, quarter=1, month=1, day=1, plot_obj=None): + if isinstance(freq, compat.string_types): + freq = frequencies.get_freq(freq) + self.freq = freq + self.base = base + (self.quarter, self.month, self.day) = (quarter, month, day) + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _get_default_locs(self, vmin, vmax): + "Returns the default locations of ticks." + + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + + locator = self.plot_obj.date_axis_info + + if self.isminor: + return np.compress(locator['min'], locator['val']) + return np.compress(locator['maj'], locator['val']) + + def __call__(self): + 'Return the locations of the ticks.' + # axis calls Locator.set_axis inside set_m_formatter + vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + vmin, vmax = vi + if vmax < vmin: + vmin, vmax = vmax, vmin + if self.isdynamic: + locs = self._get_default_locs(vmin, vmax) + else: # pragma: no cover + base = self.base + (d, m) = divmod(vmin, base) + vmin = (d + 1) * base + locs = lrange(vmin, vmax + 1, base) + return locs + + def autoscale(self): + """ + Sets the view limits to the nearest multiples of base that contain the + data. + """ + # requires matplotlib >= 0.98.0 + (vmin, vmax) = self.axis.get_data_interval() + + locs = self._get_default_locs(vmin, vmax) + (vmin, vmax) = locs[[0, -1]] + if vmin == vmax: + vmin -= 1 + vmax += 1 + return nonsingular(vmin, vmax) + +# ------------------------------------------------------------------------- +# --- Formatter --- +# ------------------------------------------------------------------------- + + +class TimeSeries_DateFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`PeriodIndex`. + + Parameters + ---------- + freq : {int, string} + Valid frequency specifier. + minor_locator : {False, True} + Whether the current formatter should apply to minor ticks (True) or + major ticks (False). + dynamic_mode : {True, False} + Whether the formatter works in dynamic mode or not. + """ + + def __init__(self, freq, minor_locator=False, dynamic_mode=True, + plot_obj=None): + if isinstance(freq, compat.string_types): + freq = frequencies.get_freq(freq) + self.format = None + self.freq = freq + self.locs = [] + self.formatdict = None + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _set_default_format(self, vmin, vmax): + "Returns the default ticks spacing." + + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + info = self.plot_obj.date_axis_info + + if self.isminor: + format = np.compress(info['min'] & np.logical_not(info['maj']), + info) + else: + format = np.compress(info['maj'], info) + self.formatdict = dict([(x, f) for (x, _, _, f) in format]) + return self.formatdict + + def set_locs(self, locs): + 'Sets the locations of the ticks' + # don't actually use the locs. This is just needed to work with + # matplotlib. Force to use vmin, vmax + self.locs = locs + + (vmin, vmax) = vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + if vmax < vmin: + (vmin, vmax) = (vmax, vmin) + self._set_default_format(vmin, vmax) + + def __call__(self, x, pos=0): + if self.formatdict is None: + return '' + else: + fmt = self.formatdict.pop(x, '') + return Period(ordinal=int(x), freq=self.freq).strftime(fmt) + + +class TimeSeries_TimedeltaFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`. + """ + + @staticmethod + def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10**(n_decimals - 9)) + s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) + if n_decimals > 0: + s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) + if d != 0: + s = '{:d} days '.format(int(d)) + s + return s + + def __call__(self, x, pos=0): + (vmin, vmax) = tuple(self.axis.get_view_interval()) + n_decimals = int(np.ceil(np.log10(100 * 1e9 / (vmax - vmin)))) + if n_decimals > 9: + n_decimals = 9 + return self.format_timedelta_ticks(x, pos, n_decimals) diff --git a/pandas/plotting/misc.py b/pandas/plotting/misc.py new file mode 100644 index 0000000000000..57306ab77f1e1 --- /dev/null +++ b/pandas/plotting/misc.py @@ -0,0 +1,573 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import numpy as np + +from pandas.util.decorators import deprecate_kwarg +from pandas.types.missing import notnull +from pandas.compat import range, lrange, lmap, zip +from pandas.formats.printing import pprint_thing + + +from pandas.plotting.style import _get_standard_colors +from pandas.plotting.tools import _subplots, _set_ticks_props + + +def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, + diagonal='hist', marker='.', density_kwds=None, + hist_kwds=None, range_padding=0.05, **kwds): + """ + Draw a matrix of scatter plots. + + Parameters + ---------- + frame : DataFrame + alpha : float, optional + amount of transparency applied + figsize : (float,float), optional + a tuple (width, height) in inches + ax : Matplotlib axis object, optional + grid : bool, optional + setting this to True will show the grid + diagonal : {'hist', 'kde'} + pick between 'kde' and 'hist' for + either Kernel Density Estimation or Histogram + plot in the diagonal + marker : str, optional + Matplotlib marker type, default '.' + hist_kwds : other plotting keyword arguments + To be passed to hist function + density_kwds : other plotting keyword arguments + To be passed to kernel density estimate plot + range_padding : float, optional + relative extension of axis range in x and y + with respect to (x_max - x_min) or (y_max - y_min), + default 0.05 + kwds : other plotting keyword arguments + To be passed to scatter function + + Examples + -------- + >>> df = DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) + >>> scatter_matrix(df, alpha=0.2) + """ + + df = frame._get_numeric_data() + n = df.columns.size + naxes = n * n + fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, + squeeze=False) + + # no gaps between subplots + fig.subplots_adjust(wspace=0, hspace=0) + + mask = notnull(df) + + marker = _get_marker_compat(marker) + + hist_kwds = hist_kwds or {} + density_kwds = density_kwds or {} + + # GH 14855 + kwds.setdefault('edgecolors', 'none') + + boundaries_list = [] + for a in df.columns: + values = df[a].values[mask[a].values] + rmin_, rmax_ = np.min(values), np.max(values) + rdelta_ext = (rmax_ - rmin_) * range_padding / 2. + boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) + + for i, a in zip(lrange(n), df.columns): + for j, b in zip(lrange(n), df.columns): + ax = axes[i, j] + + if i == j: + values = df[a].values[mask[a].values] + + # Deal with the diagonal by drawing a histogram there. + if diagonal == 'hist': + ax.hist(values, **hist_kwds) + + elif diagonal in ('kde', 'density'): + from scipy.stats import gaussian_kde + y = values + gkde = gaussian_kde(y) + ind = np.linspace(y.min(), y.max(), 1000) + ax.plot(ind, gkde.evaluate(ind), **density_kwds) + + ax.set_xlim(boundaries_list[i]) + + else: + common = (mask[a] & mask[b]).values + + ax.scatter(df[b][common], df[a][common], + marker=marker, alpha=alpha, **kwds) + + ax.set_xlim(boundaries_list[j]) + ax.set_ylim(boundaries_list[i]) + + ax.set_xlabel(b) + ax.set_ylabel(a) + + if j != 0: + ax.yaxis.set_visible(False) + if i != n - 1: + ax.xaxis.set_visible(False) + + if len(df.columns) > 1: + lim1 = boundaries_list[0] + locs = axes[0][1].yaxis.get_majorticklocs() + locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] + adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) + + lim0 = axes[0][0].get_ylim() + adj = adj * (lim0[1] - lim0[0]) + lim0[0] + axes[0][0].yaxis.set_ticks(adj) + + if np.all(locs == locs.astype(int)): + # if all ticks are int + locs = locs.astype(int) + axes[0][0].yaxis.set_ticklabels(locs) + + _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + + return axes + + +def _get_marker_compat(marker): + import matplotlib.lines as mlines + import matplotlib as mpl + if mpl.__version__ < '1.1.0' and marker == '.': + return 'o' + if marker not in mlines.lineMarkers: + return 'o' + return marker + + +def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): + """RadViz - a multivariate data visualization algorithm + + Parameters: + ----------- + frame: DataFrame + class_column: str + Column name containing class names + ax: Matplotlib axis object, optional + color: list or tuple, optional + Colors to use for the different classes + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + kwds: keywords + Options to pass to matplotlib scatter plotting method + + Returns: + -------- + ax: Matplotlib axis object + """ + import matplotlib.pyplot as plt + import matplotlib.patches as patches + + def normalize(series): + a = min(series) + b = max(series) + return (series - a) / (b - a) + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + df = frame.drop(class_column, axis=1).apply(normalize) + + if ax is None: + ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) + + to_plot = {} + colors = _get_standard_colors(num_colors=len(classes), colormap=colormap, + color_type='random', color=color) + + for kls in classes: + to_plot[kls] = [[], []] + + m = len(frame.columns) - 1 + s = np.array([(np.cos(t), np.sin(t)) + for t in [2.0 * np.pi * (i / float(m)) + for i in range(m)]]) + + for i in range(n): + row = df.iloc[i].values + row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) + y = (s * row_).sum(axis=0) / row.sum() + kls = class_col.iat[i] + to_plot[kls][0].append(y[0]) + to_plot[kls][1].append(y[1]) + + for i, kls in enumerate(classes): + ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], + label=pprint_thing(kls), **kwds) + ax.legend() + + ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) + + for xy, name in zip(s, df.columns): + + ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray')) + + if xy[0] < 0.0 and xy[1] < 0.0: + ax.text(xy[0] - 0.025, xy[1] - 0.025, name, + ha='right', va='top', size='small') + elif xy[0] < 0.0 and xy[1] >= 0.0: + ax.text(xy[0] - 0.025, xy[1] + 0.025, name, + ha='right', va='bottom', size='small') + elif xy[0] >= 0.0 and xy[1] < 0.0: + ax.text(xy[0] + 0.025, xy[1] - 0.025, name, + ha='left', va='top', size='small') + elif xy[0] >= 0.0 and xy[1] >= 0.0: + ax.text(xy[0] + 0.025, xy[1] + 0.025, name, + ha='left', va='bottom', size='small') + + ax.axis('equal') + return ax + + +@deprecate_kwarg(old_arg_name='data', new_arg_name='frame') +def andrews_curves(frame, class_column, ax=None, samples=200, color=None, + colormap=None, **kwds): + """ + Generates a matplotlib plot of Andrews curves, for visualising clusters of + multivariate data. + + Andrews curves have the functional form: + + f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + + x_4 sin(2t) + x_5 cos(2t) + ... + + Where x coefficients correspond to the values of each dimension and t is + linearly spaced between -pi and +pi. Each row of frame then corresponds to + a single curve. + + Parameters: + ----------- + frame : DataFrame + Data to be plotted, preferably normalized to (0.0, 1.0) + class_column : Name of the column containing class names + ax : matplotlib axes object, default None + samples : Number of points to plot in each curve + color: list or tuple, optional + Colors to use for the different classes + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + kwds: keywords + Options to pass to matplotlib plotting method + + Returns: + -------- + ax: Matplotlib axis object + + """ + from math import sqrt, pi + import matplotlib.pyplot as plt + + def function(amplitudes): + def f(t): + x1 = amplitudes[0] + result = x1 / sqrt(2.0) + + # Take the rest of the coefficients and resize them + # appropriately. Take a copy of amplitudes as otherwise numpy + # deletes the element from amplitudes itself. + coeffs = np.delete(np.copy(amplitudes), 0) + coeffs.resize(int((coeffs.size + 1) / 2), 2) + + # Generate the harmonics and arguments for the sin and cos + # functions. + harmonics = np.arange(0, coeffs.shape[0]) + 1 + trig_args = np.outer(harmonics, t) + + result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) + + coeffs[:, 1, np.newaxis] * np.cos(trig_args), + axis=0) + return result + return f + + n = len(frame) + class_col = frame[class_column] + classes = frame[class_column].drop_duplicates() + df = frame.drop(class_column, axis=1) + t = np.linspace(-pi, pi, samples) + used_legends = set([]) + + color_values = _get_standard_colors(num_colors=len(classes), + colormap=colormap, color_type='random', + color=color) + colors = dict(zip(classes, color_values)) + if ax is None: + ax = plt.gca(xlim=(-pi, pi)) + for i in range(n): + row = df.iloc[i].values + f = function(row) + y = f(t) + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(t, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(t, y, color=colors[kls], **kwds) + + ax.legend(loc='upper right') + ax.grid() + return ax + + +def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): + """Bootstrap plot. + + Parameters: + ----------- + series: Time series + fig: matplotlib figure object, optional + size: number of data points to consider during each sampling + samples: number of times the bootstrap procedure is performed + kwds: optional keyword arguments for plotting commands, must be accepted + by both hist and plot + + Returns: + -------- + fig: matplotlib figure + """ + import random + import matplotlib.pyplot as plt + + # random.sample(ndarray, int) fails on python 3.3, sigh + data = list(series.values) + samplings = [random.sample(data, size) for _ in range(samples)] + + means = np.array([np.mean(sampling) for sampling in samplings]) + medians = np.array([np.median(sampling) for sampling in samplings]) + midranges = np.array([(min(sampling) + max(sampling)) * 0.5 + for sampling in samplings]) + if fig is None: + fig = plt.figure() + x = lrange(samples) + axes = [] + ax1 = fig.add_subplot(2, 3, 1) + ax1.set_xlabel("Sample") + axes.append(ax1) + ax1.plot(x, means, **kwds) + ax2 = fig.add_subplot(2, 3, 2) + ax2.set_xlabel("Sample") + axes.append(ax2) + ax2.plot(x, medians, **kwds) + ax3 = fig.add_subplot(2, 3, 3) + ax3.set_xlabel("Sample") + axes.append(ax3) + ax3.plot(x, midranges, **kwds) + ax4 = fig.add_subplot(2, 3, 4) + ax4.set_xlabel("Mean") + axes.append(ax4) + ax4.hist(means, **kwds) + ax5 = fig.add_subplot(2, 3, 5) + ax5.set_xlabel("Median") + axes.append(ax5) + ax5.hist(medians, **kwds) + ax6 = fig.add_subplot(2, 3, 6) + ax6.set_xlabel("Midrange") + axes.append(ax6) + ax6.hist(midranges, **kwds) + for axis in axes: + plt.setp(axis.get_xticklabels(), fontsize=8) + plt.setp(axis.get_yticklabels(), fontsize=8) + return fig + + +@deprecate_kwarg(old_arg_name='colors', new_arg_name='color') +@deprecate_kwarg(old_arg_name='data', new_arg_name='frame', stacklevel=3) +def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, + use_columns=False, xticks=None, colormap=None, + axvlines=True, axvlines_kwds=None, sort_labels=False, + **kwds): + """Parallel coordinates plotting. + + Parameters + ---------- + frame: DataFrame + class_column: str + Column name containing class names + cols: list, optional + A list of column names to use + ax: matplotlib.axis, optional + matplotlib axis object + color: list or tuple, optional + Colors to use for the different classes + use_columns: bool, optional + If true, columns will be used as xticks + xticks: list or tuple, optional + A list of values to use for xticks + colormap: str or matplotlib colormap, default None + Colormap to use for line colors. + axvlines: bool, optional + If true, vertical lines will be added at each xtick + axvlines_kwds: keywords, optional + Options to be passed to axvline method for vertical lines + sort_labels: bool, False + Sort class_column labels, useful when assigning colours + + .. versionadded:: 0.20.0 + + kwds: keywords + Options to pass to matplotlib plotting method + + Returns + ------- + ax: matplotlib axis object + + Examples + -------- + >>> from pandas import read_csv + >>> from pandas.tools.plotting import parallel_coordinates + >>> from matplotlib import pyplot as plt + >>> df = read_csv('https://raw.github.com/pandas-dev/pandas/master' + '/pandas/tests/data/iris.csv') + >>> parallel_coordinates(df, 'Name', color=('#556270', + '#4ECDC4', '#C7F464')) + >>> plt.show() + """ + if axvlines_kwds is None: + axvlines_kwds = {'linewidth': 1, 'color': 'black'} + import matplotlib.pyplot as plt + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + + if cols is None: + df = frame.drop(class_column, axis=1) + else: + df = frame[cols] + + used_legends = set([]) + + ncols = len(df.columns) + + # determine values to use for xticks + if use_columns is True: + if not np.all(np.isreal(list(df.columns))): + raise ValueError('Columns must be numeric to be used as xticks') + x = df.columns + elif xticks is not None: + if not np.all(np.isreal(xticks)): + raise ValueError('xticks specified must be numeric') + elif len(xticks) != ncols: + raise ValueError('Length of xticks must match number of columns') + x = xticks + else: + x = lrange(ncols) + + if ax is None: + ax = plt.gca() + + color_values = _get_standard_colors(num_colors=len(classes), + colormap=colormap, color_type='random', + color=color) + + if sort_labels: + classes = sorted(classes) + color_values = sorted(color_values) + colors = dict(zip(classes, color_values)) + + for i in range(n): + y = df.iloc[i].values + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(x, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(x, y, color=colors[kls], **kwds) + + if axvlines: + for i in x: + ax.axvline(i, **axvlines_kwds) + + ax.set_xticks(x) + ax.set_xticklabels(df.columns) + ax.set_xlim(x[0], x[-1]) + ax.legend(loc='upper right') + ax.grid() + return ax + + +def lag_plot(series, lag=1, ax=None, **kwds): + """Lag plot for time series. + + Parameters: + ----------- + series: Time series + lag: lag of the scatter plot, default 1 + ax: Matplotlib axis object, optional + kwds: Matplotlib scatter method keyword arguments, optional + + Returns: + -------- + ax: Matplotlib axis object + """ + import matplotlib.pyplot as plt + + # workaround because `c='b'` is hardcoded in matplotlibs scatter method + kwds.setdefault('c', plt.rcParams['patch.facecolor']) + + data = series.values + y1 = data[:-lag] + y2 = data[lag:] + if ax is None: + ax = plt.gca() + ax.set_xlabel("y(t)") + ax.set_ylabel("y(t + %s)" % lag) + ax.scatter(y1, y2, **kwds) + return ax + + +def autocorrelation_plot(series, ax=None, **kwds): + """Autocorrelation plot for time series. + + Parameters: + ----------- + series: Time series + ax: Matplotlib axis object, optional + kwds : keywords + Options to pass to matplotlib plotting method + + Returns: + ----------- + ax: Matplotlib axis object + """ + import matplotlib.pyplot as plt + n = len(series) + data = np.asarray(series) + if ax is None: + ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) + mean = np.mean(data) + c0 = np.sum((data - mean) ** 2) / float(n) + + def r(h): + return ((data[:n - h] - mean) * + (data[h:] - mean)).sum() / float(n) / c0 + x = np.arange(n) + 1 + y = lmap(r, x) + z95 = 1.959963984540054 + z99 = 2.5758293035489004 + ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') + ax.axhline(y=z95 / np.sqrt(n), color='grey') + ax.axhline(y=0.0, color='black') + ax.axhline(y=-z95 / np.sqrt(n), color='grey') + ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey') + ax.set_xlabel("Lag") + ax.set_ylabel("Autocorrelation") + ax.plot(x, y, **kwds) + if 'label' in kwds: + ax.legend() + ax.grid() + return ax diff --git a/pandas/tools/plotting.py b/pandas/plotting/plotting.py similarity index 70% rename from pandas/tools/plotting.py rename to pandas/plotting/plotting.py index 141e3c74b91c4..6ca00a5035592 100644 --- a/pandas/tools/plotting.py +++ b/pandas/plotting/plotting.py @@ -4,151 +4,38 @@ import warnings import re -from math import ceil from collections import namedtuple -from contextlib import contextmanager from distutils.version import LooseVersion import numpy as np +from pandas.util.decorators import cache_readonly +from pandas.core.base import PandasObject from pandas.types.common import (is_list_like, is_integer, is_number, is_hashable, is_iterator) -from pandas.types.missing import isnull, notnull - -from pandas.util.decorators import cache_readonly, deprecate_kwarg -from pandas.core.base import PandasObject - -from pandas.core.common import AbstractMethodError, _try_sort +from pandas.core.common import AbstractMethodError, isnull, _try_sort from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex from pandas.core.series import Series, remove_na from pandas.tseries.period import PeriodIndex -from pandas.compat import range, lrange, lmap, map, zip, string_types +from pandas.compat import range, lrange, map, zip, string_types import pandas.compat as compat from pandas.formats.printing import pprint_thing from pandas.util.decorators import Appender -try: # mpl optional - import pandas.tseries.converter as conv - conv.register() # needs to override so set_xlim works with str/number -except ImportError: - pass - - -# Extracted from https://gist.github.com/huyng/816622 -# this is the rcParams set when setting display.with_mpl_style -# to True. -mpl_stylesheet = { - 'axes.axisbelow': True, - 'axes.color_cycle': ['#348ABD', - '#7A68A6', - '#A60628', - '#467821', - '#CF4457', - '#188487', - '#E24A33'], - 'axes.edgecolor': '#bcbcbc', - 'axes.facecolor': '#eeeeee', - 'axes.grid': True, - 'axes.labelcolor': '#555555', - 'axes.labelsize': 'large', - 'axes.linewidth': 1.0, - 'axes.titlesize': 'x-large', - 'figure.edgecolor': 'white', - 'figure.facecolor': 'white', - 'figure.figsize': (6.0, 4.0), - 'figure.subplot.hspace': 0.5, - 'font.family': 'monospace', - 'font.monospace': ['Andale Mono', - 'Nimbus Mono L', - 'Courier New', - 'Courier', - 'Fixed', - 'Terminal', - 'monospace'], - 'font.size': 10, - 'interactive': True, - 'keymap.all_axes': ['a'], - 'keymap.back': ['left', 'c', 'backspace'], - 'keymap.forward': ['right', 'v'], - 'keymap.fullscreen': ['f'], - 'keymap.grid': ['g'], - 'keymap.home': ['h', 'r', 'home'], - 'keymap.pan': ['p'], - 'keymap.save': ['s'], - 'keymap.xscale': ['L', 'k'], - 'keymap.yscale': ['l'], - 'keymap.zoom': ['o'], - 'legend.fancybox': True, - 'lines.antialiased': True, - 'lines.linewidth': 1.0, - 'patch.antialiased': True, - 'patch.edgecolor': '#EEEEEE', - 'patch.facecolor': '#348ABD', - 'patch.linewidth': 0.5, - 'toolbar': 'toolbar2', - 'xtick.color': '#555555', - 'xtick.direction': 'in', - 'xtick.major.pad': 6.0, - 'xtick.major.size': 0.0, - 'xtick.minor.pad': 6.0, - 'xtick.minor.size': 0.0, - 'ytick.color': '#555555', - 'ytick.direction': 'in', - 'ytick.major.pad': 6.0, - 'ytick.major.size': 0.0, - 'ytick.minor.pad': 6.0, - 'ytick.minor.size': 0.0 -} - - -def _mpl_le_1_2_1(): - try: - import matplotlib as mpl - return (str(mpl.__version__) <= LooseVersion('1.2.1') and - str(mpl.__version__)[0] != '0') - except ImportError: - return False - -def _mpl_ge_1_3_1(): - try: - import matplotlib - # The or v[0] == '0' is because their versioneer is - # messed up on dev - return (matplotlib.__version__ >= LooseVersion('1.3.1') or - matplotlib.__version__[0] == '0') - except ImportError: - return False +from pandas.plotting.compat import (_mpl_ge_1_3_1, + _mpl_ge_1_5_0) +from pandas.plotting.style import (mpl_stylesheet, plot_params, + _get_standard_colors) +from pandas.plotting.tools import (_subplots, _flatten, table, + _handle_shared_axes, _get_all_lines, + _get_xlim, _set_ticks_props, + format_date_labels) -def _mpl_ge_1_4_0(): - try: - import matplotlib - return (matplotlib.__version__ >= LooseVersion('1.4') or - matplotlib.__version__[0] == '0') - except ImportError: - return False - - -def _mpl_ge_1_5_0(): - try: - import matplotlib - return (matplotlib.__version__ >= LooseVersion('1.5') or - matplotlib.__version__[0] == '0') - except ImportError: - return False - - -def _mpl_ge_2_0_0(): - try: - import matplotlib - return matplotlib.__version__ >= LooseVersion('2.0') - except ImportError: - return False - def _mpl_ge_2_0_1(): try: @@ -169,281 +56,6 @@ def _get_standard_kind(kind): return {'density': 'kde'}.get(kind, kind) -def _get_standard_colors(num_colors=None, colormap=None, color_type='default', - color=None): - import matplotlib.pyplot as plt - - if color is None and colormap is not None: - if isinstance(colormap, compat.string_types): - import matplotlib.cm as cm - cmap = colormap - colormap = cm.get_cmap(colormap) - if colormap is None: - raise ValueError("Colormap {0} is not recognized".format(cmap)) - colors = lmap(colormap, np.linspace(0, 1, num=num_colors)) - elif color is not None: - if colormap is not None: - warnings.warn("'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'") - colors = list(color) if is_list_like(color) else color - else: - if color_type == 'default': - # need to call list() on the result to copy so we don't - # modify the global rcParams below - try: - colors = [c['color'] - for c in list(plt.rcParams['axes.prop_cycle'])] - except KeyError: - colors = list(plt.rcParams.get('axes.color_cycle', - list('bgrcmyk'))) - if isinstance(colors, compat.string_types): - colors = list(colors) - elif color_type == 'random': - import random - - def random_color(column): - random.seed(column) - return [random.random() for _ in range(3)] - - colors = lmap(random_color, lrange(num_colors)) - else: - raise ValueError("color_type must be either 'default' or 'random'") - - if isinstance(colors, compat.string_types): - import matplotlib.colors - conv = matplotlib.colors.ColorConverter() - - def _maybe_valid_colors(colors): - try: - [conv.to_rgba(c) for c in colors] - return True - except ValueError: - return False - - # check whether the string can be convertable to single color - maybe_single_color = _maybe_valid_colors([colors]) - # check whether each character can be convertable to colors - maybe_color_cycle = _maybe_valid_colors(list(colors)) - if maybe_single_color and maybe_color_cycle and len(colors) > 1: - # Special case for single str 'CN' match and convert to hex - # for supporting matplotlib < 2.0.0 - if re.match(r'\AC[0-9]\Z', colors) and _mpl_ge_2_0_0(): - hex_color = [c['color'] - for c in list(plt.rcParams['axes.prop_cycle'])] - colors = [hex_color[int(colors[1])]] - else: - # this may no longer be required - msg = ("'{0}' can be parsed as both single color and " - "color cycle. Specify each color using a list " - "like ['{0}'] or {1}") - raise ValueError(msg.format(colors, list(colors))) - elif maybe_single_color: - colors = [colors] - else: - # ``colors`` is regarded as color cycle. - # mpl will raise error any of them is invalid - pass - - if len(colors) != num_colors: - try: - multiple = num_colors // len(colors) - 1 - except ZeroDivisionError: - raise ValueError("Invalid color argument: ''") - mod = num_colors % len(colors) - - colors += multiple * colors - colors += colors[:mod] - - return colors - - -class _Options(dict): - """ - Stores pandas plotting options. - Allows for parameter aliasing so you can just use parameter names that are - the same as the plot function parameters, but is stored in a canonical - format that makes it easy to breakdown into groups later - """ - - # alias so the names are same as plotting method parameter names - _ALIASES = {'x_compat': 'xaxis.compat'} - _DEFAULT_KEYS = ['xaxis.compat'] - - def __init__(self): - self['xaxis.compat'] = False - - def __getitem__(self, key): - key = self._get_canonical_key(key) - if key not in self: - raise ValueError('%s is not a valid pandas plotting option' % key) - return super(_Options, self).__getitem__(key) - - def __setitem__(self, key, value): - key = self._get_canonical_key(key) - return super(_Options, self).__setitem__(key, value) - - def __delitem__(self, key): - key = self._get_canonical_key(key) - if key in self._DEFAULT_KEYS: - raise ValueError('Cannot remove default parameter %s' % key) - return super(_Options, self).__delitem__(key) - - def __contains__(self, key): - key = self._get_canonical_key(key) - return super(_Options, self).__contains__(key) - - def reset(self): - """ - Reset the option store to its initial state - - Returns - ------- - None - """ - self.__init__() - - def _get_canonical_key(self, key): - return self._ALIASES.get(key, key) - - @contextmanager - def use(self, key, value): - """ - Temporarily set a parameter value using the with statement. - Aliasing allowed. - """ - old_value = self[key] - try: - self[key] = value - yield self - finally: - self[key] = old_value - - -plot_params = _Options() - - -def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, - diagonal='hist', marker='.', density_kwds=None, - hist_kwds=None, range_padding=0.05, **kwds): - """ - Draw a matrix of scatter plots. - - Parameters - ---------- - frame : DataFrame - alpha : float, optional - amount of transparency applied - figsize : (float,float), optional - a tuple (width, height) in inches - ax : Matplotlib axis object, optional - grid : bool, optional - setting this to True will show the grid - diagonal : {'hist', 'kde'} - pick between 'kde' and 'hist' for - either Kernel Density Estimation or Histogram - plot in the diagonal - marker : str, optional - Matplotlib marker type, default '.' - hist_kwds : other plotting keyword arguments - To be passed to hist function - density_kwds : other plotting keyword arguments - To be passed to kernel density estimate plot - range_padding : float, optional - relative extension of axis range in x and y - with respect to (x_max - x_min) or (y_max - y_min), - default 0.05 - kwds : other plotting keyword arguments - To be passed to scatter function - - Examples - -------- - >>> df = DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) - >>> scatter_matrix(df, alpha=0.2) - """ - - df = frame._get_numeric_data() - n = df.columns.size - naxes = n * n - fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, - squeeze=False) - - # no gaps between subplots - fig.subplots_adjust(wspace=0, hspace=0) - - mask = notnull(df) - - marker = _get_marker_compat(marker) - - hist_kwds = hist_kwds or {} - density_kwds = density_kwds or {} - - # GH 14855 - kwds.setdefault('edgecolors', 'none') - - boundaries_list = [] - for a in df.columns: - values = df[a].values[mask[a].values] - rmin_, rmax_ = np.min(values), np.max(values) - rdelta_ext = (rmax_ - rmin_) * range_padding / 2. - boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) - - for i, a in zip(lrange(n), df.columns): - for j, b in zip(lrange(n), df.columns): - ax = axes[i, j] - - if i == j: - values = df[a].values[mask[a].values] - - # Deal with the diagonal by drawing a histogram there. - if diagonal == 'hist': - ax.hist(values, **hist_kwds) - - elif diagonal in ('kde', 'density'): - from scipy.stats import gaussian_kde - y = values - gkde = gaussian_kde(y) - ind = np.linspace(y.min(), y.max(), 1000) - ax.plot(ind, gkde.evaluate(ind), **density_kwds) - - ax.set_xlim(boundaries_list[i]) - - else: - common = (mask[a] & mask[b]).values - - ax.scatter(df[b][common], df[a][common], - marker=marker, alpha=alpha, **kwds) - - ax.set_xlim(boundaries_list[j]) - ax.set_ylim(boundaries_list[i]) - - ax.set_xlabel(b) - ax.set_ylabel(a) - - if j != 0: - ax.yaxis.set_visible(False) - if i != n - 1: - ax.xaxis.set_visible(False) - - if len(df.columns) > 1: - lim1 = boundaries_list[0] - locs = axes[0][1].yaxis.get_majorticklocs() - locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] - adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) - - lim0 = axes[0][0].get_ylim() - adj = adj * (lim0[1] - lim0[0]) + lim0[0] - axes[0][0].yaxis.set_ticks(adj) - - if np.all(locs == locs.astype(int)): - # if all ticks are int - locs = locs.astype(int) - axes[0][0].yaxis.set_ticklabels(locs) - - _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) - - return axes - - def _gca(): import matplotlib.pyplot as plt return plt.gca() @@ -454,443 +66,6 @@ def _gcf(): return plt.gcf() -def _get_marker_compat(marker): - import matplotlib.lines as mlines - import matplotlib as mpl - if mpl.__version__ < '1.1.0' and marker == '.': - return 'o' - if marker not in mlines.lineMarkers: - return 'o' - return marker - - -def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): - """RadViz - a multivariate data visualization algorithm - - Parameters: - ----------- - frame: DataFrame - class_column: str - Column name containing class names - ax: Matplotlib axis object, optional - color: list or tuple, optional - Colors to use for the different classes - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - kwds: keywords - Options to pass to matplotlib scatter plotting method - - Returns: - -------- - ax: Matplotlib axis object - """ - import matplotlib.pyplot as plt - import matplotlib.patches as patches - - def normalize(series): - a = min(series) - b = max(series) - return (series - a) / (b - a) - - n = len(frame) - classes = frame[class_column].drop_duplicates() - class_col = frame[class_column] - df = frame.drop(class_column, axis=1).apply(normalize) - - if ax is None: - ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) - - to_plot = {} - colors = _get_standard_colors(num_colors=len(classes), colormap=colormap, - color_type='random', color=color) - - for kls in classes: - to_plot[kls] = [[], []] - - m = len(frame.columns) - 1 - s = np.array([(np.cos(t), np.sin(t)) - for t in [2.0 * np.pi * (i / float(m)) - for i in range(m)]]) - - for i in range(n): - row = df.iloc[i].values - row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) - y = (s * row_).sum(axis=0) / row.sum() - kls = class_col.iat[i] - to_plot[kls][0].append(y[0]) - to_plot[kls][1].append(y[1]) - - for i, kls in enumerate(classes): - ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], - label=pprint_thing(kls), **kwds) - ax.legend() - - ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) - - for xy, name in zip(s, df.columns): - - ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray')) - - if xy[0] < 0.0 and xy[1] < 0.0: - ax.text(xy[0] - 0.025, xy[1] - 0.025, name, - ha='right', va='top', size='small') - elif xy[0] < 0.0 and xy[1] >= 0.0: - ax.text(xy[0] - 0.025, xy[1] + 0.025, name, - ha='right', va='bottom', size='small') - elif xy[0] >= 0.0 and xy[1] < 0.0: - ax.text(xy[0] + 0.025, xy[1] - 0.025, name, - ha='left', va='top', size='small') - elif xy[0] >= 0.0 and xy[1] >= 0.0: - ax.text(xy[0] + 0.025, xy[1] + 0.025, name, - ha='left', va='bottom', size='small') - - ax.axis('equal') - return ax - - -@deprecate_kwarg(old_arg_name='data', new_arg_name='frame') -def andrews_curves(frame, class_column, ax=None, samples=200, color=None, - colormap=None, **kwds): - """ - Generates a matplotlib plot of Andrews curves, for visualising clusters of - multivariate data. - - Andrews curves have the functional form: - - f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + - x_4 sin(2t) + x_5 cos(2t) + ... - - Where x coefficients correspond to the values of each dimension and t is - linearly spaced between -pi and +pi. Each row of frame then corresponds to - a single curve. - - Parameters: - ----------- - frame : DataFrame - Data to be plotted, preferably normalized to (0.0, 1.0) - class_column : Name of the column containing class names - ax : matplotlib axes object, default None - samples : Number of points to plot in each curve - color: list or tuple, optional - Colors to use for the different classes - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - kwds: keywords - Options to pass to matplotlib plotting method - - Returns: - -------- - ax: Matplotlib axis object - - """ - from math import sqrt, pi - import matplotlib.pyplot as plt - - def function(amplitudes): - def f(t): - x1 = amplitudes[0] - result = x1 / sqrt(2.0) - - # Take the rest of the coefficients and resize them - # appropriately. Take a copy of amplitudes as otherwise numpy - # deletes the element from amplitudes itself. - coeffs = np.delete(np.copy(amplitudes), 0) - coeffs.resize(int((coeffs.size + 1) / 2), 2) - - # Generate the harmonics and arguments for the sin and cos - # functions. - harmonics = np.arange(0, coeffs.shape[0]) + 1 - trig_args = np.outer(harmonics, t) - - result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) + - coeffs[:, 1, np.newaxis] * np.cos(trig_args), - axis=0) - return result - return f - - n = len(frame) - class_col = frame[class_column] - classes = frame[class_column].drop_duplicates() - df = frame.drop(class_column, axis=1) - t = np.linspace(-pi, pi, samples) - used_legends = set([]) - - color_values = _get_standard_colors(num_colors=len(classes), - colormap=colormap, color_type='random', - color=color) - colors = dict(zip(classes, color_values)) - if ax is None: - ax = plt.gca(xlim=(-pi, pi)) - for i in range(n): - row = df.iloc[i].values - f = function(row) - y = f(t) - kls = class_col.iat[i] - label = pprint_thing(kls) - if label not in used_legends: - used_legends.add(label) - ax.plot(t, y, color=colors[kls], label=label, **kwds) - else: - ax.plot(t, y, color=colors[kls], **kwds) - - ax.legend(loc='upper right') - ax.grid() - return ax - - -def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): - """Bootstrap plot. - - Parameters: - ----------- - series: Time series - fig: matplotlib figure object, optional - size: number of data points to consider during each sampling - samples: number of times the bootstrap procedure is performed - kwds: optional keyword arguments for plotting commands, must be accepted - by both hist and plot - - Returns: - -------- - fig: matplotlib figure - """ - import random - import matplotlib.pyplot as plt - - # random.sample(ndarray, int) fails on python 3.3, sigh - data = list(series.values) - samplings = [random.sample(data, size) for _ in range(samples)] - - means = np.array([np.mean(sampling) for sampling in samplings]) - medians = np.array([np.median(sampling) for sampling in samplings]) - midranges = np.array([(min(sampling) + max(sampling)) * 0.5 - for sampling in samplings]) - if fig is None: - fig = plt.figure() - x = lrange(samples) - axes = [] - ax1 = fig.add_subplot(2, 3, 1) - ax1.set_xlabel("Sample") - axes.append(ax1) - ax1.plot(x, means, **kwds) - ax2 = fig.add_subplot(2, 3, 2) - ax2.set_xlabel("Sample") - axes.append(ax2) - ax2.plot(x, medians, **kwds) - ax3 = fig.add_subplot(2, 3, 3) - ax3.set_xlabel("Sample") - axes.append(ax3) - ax3.plot(x, midranges, **kwds) - ax4 = fig.add_subplot(2, 3, 4) - ax4.set_xlabel("Mean") - axes.append(ax4) - ax4.hist(means, **kwds) - ax5 = fig.add_subplot(2, 3, 5) - ax5.set_xlabel("Median") - axes.append(ax5) - ax5.hist(medians, **kwds) - ax6 = fig.add_subplot(2, 3, 6) - ax6.set_xlabel("Midrange") - axes.append(ax6) - ax6.hist(midranges, **kwds) - for axis in axes: - plt.setp(axis.get_xticklabels(), fontsize=8) - plt.setp(axis.get_yticklabels(), fontsize=8) - return fig - - -@deprecate_kwarg(old_arg_name='colors', new_arg_name='color') -@deprecate_kwarg(old_arg_name='data', new_arg_name='frame', stacklevel=3) -def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, - use_columns=False, xticks=None, colormap=None, - axvlines=True, axvlines_kwds=None, sort_labels=False, - **kwds): - """Parallel coordinates plotting. - - Parameters - ---------- - frame: DataFrame - class_column: str - Column name containing class names - cols: list, optional - A list of column names to use - ax: matplotlib.axis, optional - matplotlib axis object - color: list or tuple, optional - Colors to use for the different classes - use_columns: bool, optional - If true, columns will be used as xticks - xticks: list or tuple, optional - A list of values to use for xticks - colormap: str or matplotlib colormap, default None - Colormap to use for line colors. - axvlines: bool, optional - If true, vertical lines will be added at each xtick - axvlines_kwds: keywords, optional - Options to be passed to axvline method for vertical lines - sort_labels: bool, False - Sort class_column labels, useful when assigning colours - - .. versionadded:: 0.20.0 - - kwds: keywords - Options to pass to matplotlib plotting method - - Returns - ------- - ax: matplotlib axis object - - Examples - -------- - >>> from pandas import read_csv - >>> from pandas.tools.plotting import parallel_coordinates - >>> from matplotlib import pyplot as plt - >>> df = read_csv('https://raw.github.com/pandas-dev/pandas/master' - '/pandas/tests/data/iris.csv') - >>> parallel_coordinates(df, 'Name', color=('#556270', - '#4ECDC4', '#C7F464')) - >>> plt.show() - """ - if axvlines_kwds is None: - axvlines_kwds = {'linewidth': 1, 'color': 'black'} - import matplotlib.pyplot as plt - - n = len(frame) - classes = frame[class_column].drop_duplicates() - class_col = frame[class_column] - - if cols is None: - df = frame.drop(class_column, axis=1) - else: - df = frame[cols] - - used_legends = set([]) - - ncols = len(df.columns) - - # determine values to use for xticks - if use_columns is True: - if not np.all(np.isreal(list(df.columns))): - raise ValueError('Columns must be numeric to be used as xticks') - x = df.columns - elif xticks is not None: - if not np.all(np.isreal(xticks)): - raise ValueError('xticks specified must be numeric') - elif len(xticks) != ncols: - raise ValueError('Length of xticks must match number of columns') - x = xticks - else: - x = lrange(ncols) - - if ax is None: - ax = plt.gca() - - color_values = _get_standard_colors(num_colors=len(classes), - colormap=colormap, color_type='random', - color=color) - - if sort_labels: - classes = sorted(classes) - color_values = sorted(color_values) - colors = dict(zip(classes, color_values)) - - for i in range(n): - y = df.iloc[i].values - kls = class_col.iat[i] - label = pprint_thing(kls) - if label not in used_legends: - used_legends.add(label) - ax.plot(x, y, color=colors[kls], label=label, **kwds) - else: - ax.plot(x, y, color=colors[kls], **kwds) - - if axvlines: - for i in x: - ax.axvline(i, **axvlines_kwds) - - ax.set_xticks(x) - ax.set_xticklabels(df.columns) - ax.set_xlim(x[0], x[-1]) - ax.legend(loc='upper right') - ax.grid() - return ax - - -def lag_plot(series, lag=1, ax=None, **kwds): - """Lag plot for time series. - - Parameters: - ----------- - series: Time series - lag: lag of the scatter plot, default 1 - ax: Matplotlib axis object, optional - kwds: Matplotlib scatter method keyword arguments, optional - - Returns: - -------- - ax: Matplotlib axis object - """ - import matplotlib.pyplot as plt - - # workaround because `c='b'` is hardcoded in matplotlibs scatter method - kwds.setdefault('c', plt.rcParams['patch.facecolor']) - - data = series.values - y1 = data[:-lag] - y2 = data[lag:] - if ax is None: - ax = plt.gca() - ax.set_xlabel("y(t)") - ax.set_ylabel("y(t + %s)" % lag) - ax.scatter(y1, y2, **kwds) - return ax - - -def autocorrelation_plot(series, ax=None, **kwds): - """Autocorrelation plot for time series. - - Parameters: - ----------- - series: Time series - ax: Matplotlib axis object, optional - kwds : keywords - Options to pass to matplotlib plotting method - - Returns: - ----------- - ax: Matplotlib axis object - """ - import matplotlib.pyplot as plt - n = len(series) - data = np.asarray(series) - if ax is None: - ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) - mean = np.mean(data) - c0 = np.sum((data - mean) ** 2) / float(n) - - def r(h): - return ((data[:n - h] - mean) * - (data[h:] - mean)).sum() / float(n) / c0 - x = np.arange(n) + 1 - y = lmap(r, x) - z95 = 1.959963984540054 - z99 = 2.5758293035489004 - ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') - ax.axhline(y=z95 / np.sqrt(n), color='grey') - ax.axhline(y=0.0, color='black') - ax.axhline(y=-z95 / np.sqrt(n), color='grey') - ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey') - ax.set_xlabel("Lag") - ax.set_ylabel("Autocorrelation") - ax.plot(x, y, **kwds) - if 'label' in kwds: - ax.legend() - ax.grid() - return ax - - class MPLPlot(object): """ Base class for assembling a pandas plot using matplotlib @@ -1734,12 +909,12 @@ def _is_ts_plot(self): return not self.x_compat and self.use_index and self._use_dynamic_x() def _use_dynamic_x(self): - from pandas.tseries.plotting import _use_dynamic_x + from pandas.plotting.timeseries import _use_dynamic_x return _use_dynamic_x(self._get_ax(0), self.data) def _make_plot(self): if self._is_ts_plot(): - from pandas.tseries.plotting import _maybe_convert_index + from pandas.plotting.timeseries import _maybe_convert_index data = _maybe_convert_index(self._get_ax(0), self.data) x = data.index # dummy, not used @@ -1789,9 +964,9 @@ def _plot(cls, ax, x, y, style=None, column_num=None, @classmethod def _ts_plot(cls, ax, x, data, style=None, **kwds): - from pandas.tseries.plotting import (_maybe_resample, - _decorate_axes, - format_dateaxis) + from pandas.plotting.timeseries import (_maybe_resample, + _decorate_axes, + format_dateaxis) # accept x to be consistent with normal plot func, # x is not passed to tsplot as it uses data.index as x coordinate # column_num must be in kwds for stacking purpose @@ -2849,18 +2024,6 @@ def plot_group(keys, values, ax): return result -def format_date_labels(ax, rot): - # mini version of autofmt_xdate - try: - for label in ax.get_xticklabels(): - label.set_ha('right') - label.set_rotation(rot) - fig = ax.get_figure() - fig.subplots_adjust(bottom=0.2) - except Exception: # pragma: no cover - pass - - def scatter_plot(data, x, y, by=None, ax=None, figsize=None, grid=False, **kwargs): """ @@ -3250,364 +2413,6 @@ def _grouped_plot_by_column(plotf, data, columns=None, by=None, return result -def table(ax, data, rowLabels=None, colLabels=None, - **kwargs): - """ - Helper function to convert DataFrame and Series to matplotlib.table - - Parameters - ---------- - `ax`: Matplotlib axes object - `data`: DataFrame or Series - data for table contents - `kwargs`: keywords, optional - keyword arguments which passed to matplotlib.table.table. - If `rowLabels` or `colLabels` is not specified, data index or column - name will be used. - - Returns - ------- - matplotlib table object - """ - from pandas import DataFrame - if isinstance(data, Series): - data = DataFrame(data, columns=[data.name]) - elif isinstance(data, DataFrame): - pass - else: - raise ValueError('Input data must be DataFrame or Series') - - if rowLabels is None: - rowLabels = data.index - - if colLabels is None: - colLabels = data.columns - - cellText = data.values - - import matplotlib.table - table = matplotlib.table.table(ax, cellText=cellText, - rowLabels=rowLabels, - colLabels=colLabels, **kwargs) - return table - - -def _get_layout(nplots, layout=None, layout_type='box'): - if layout is not None: - if not isinstance(layout, (tuple, list)) or len(layout) != 2: - raise ValueError('Layout must be a tuple of (rows, columns)') - - nrows, ncols = layout - - # Python 2 compat - ceil_ = lambda x: int(ceil(x)) - if nrows == -1 and ncols > 0: - layout = nrows, ncols = (ceil_(float(nplots) / ncols), ncols) - elif ncols == -1 and nrows > 0: - layout = nrows, ncols = (nrows, ceil_(float(nplots) / nrows)) - elif ncols <= 0 and nrows <= 0: - msg = "At least one dimension of layout must be positive" - raise ValueError(msg) - - if nrows * ncols < nplots: - raise ValueError('Layout of %sx%s must be larger than ' - 'required size %s' % (nrows, ncols, nplots)) - - return layout - - if layout_type == 'single': - return (1, 1) - elif layout_type == 'horizontal': - return (1, nplots) - elif layout_type == 'vertical': - return (nplots, 1) - - layouts = {1: (1, 1), 2: (1, 2), 3: (2, 2), 4: (2, 2)} - try: - return layouts[nplots] - except KeyError: - k = 1 - while k ** 2 < nplots: - k += 1 - - if (k - 1) * k >= nplots: - return k, (k - 1) - else: - return k, k - -# copied from matplotlib/pyplot.py and modified for pandas.plotting - - -def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, - subplot_kw=None, ax=None, layout=None, layout_type='box', - **fig_kw): - """Create a figure with a set of subplots already made. - - This utility wrapper makes it convenient to create common layouts of - subplots, including the enclosing figure object, in a single call. - - Keyword arguments: - - naxes : int - Number of required axes. Exceeded axes are set invisible. Default is - nrows * ncols. - - sharex : bool - If True, the X axis will be shared amongst all subplots. - - sharey : bool - If True, the Y axis will be shared amongst all subplots. - - squeeze : bool - - If True, extra dimensions are squeezed out from the returned axis object: - - if only one subplot is constructed (nrows=ncols=1), the resulting - single Axis object is returned as a scalar. - - for Nx1 or 1xN subplots, the returned object is a 1-d numpy object - array of Axis objects are returned as numpy 1-d arrays. - - for NxM subplots with N>1 and M>1 are returned as a 2d array. - - If False, no squeezing at all is done: the returned axis object is always - a 2-d array containing Axis instances, even if it ends up being 1x1. - - subplot_kw : dict - Dict with keywords passed to the add_subplot() call used to create each - subplots. - - ax : Matplotlib axis object, optional - - layout : tuple - Number of rows and columns of the subplot grid. - If not specified, calculated from naxes and layout_type - - layout_type : {'box', 'horziontal', 'vertical'}, default 'box' - Specify how to layout the subplot grid. - - fig_kw : Other keyword arguments to be passed to the figure() call. - Note that all keywords not recognized above will be - automatically included here. - - Returns: - - fig, ax : tuple - - fig is the Matplotlib Figure object - - ax can be either a single axis object or an array of axis objects if - more than one subplot was created. The dimensions of the resulting array - can be controlled with the squeeze keyword, see above. - - **Examples:** - - x = np.linspace(0, 2*np.pi, 400) - y = np.sin(x**2) - - # Just a figure and one subplot - f, ax = plt.subplots() - ax.plot(x, y) - ax.set_title('Simple plot') - - # Two subplots, unpack the output array immediately - f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) - ax1.plot(x, y) - ax1.set_title('Sharing Y axis') - ax2.scatter(x, y) - - # Four polar axes - plt.subplots(2, 2, subplot_kw=dict(polar=True)) - """ - import matplotlib.pyplot as plt - - if subplot_kw is None: - subplot_kw = {} - - if ax is None: - fig = plt.figure(**fig_kw) - else: - if is_list_like(ax): - ax = _flatten(ax) - if layout is not None: - warnings.warn("When passing multiple axes, layout keyword is " - "ignored", UserWarning) - if sharex or sharey: - warnings.warn("When passing multiple axes, sharex and sharey " - "are ignored. These settings must be specified " - "when creating axes", UserWarning, - stacklevel=4) - if len(ax) == naxes: - fig = ax[0].get_figure() - return fig, ax - else: - raise ValueError("The number of passed axes must be {0}, the " - "same as the output plot".format(naxes)) - - fig = ax.get_figure() - # if ax is passed and a number of subplots is 1, return ax as it is - if naxes == 1: - if squeeze: - return fig, ax - else: - return fig, _flatten(ax) - else: - warnings.warn("To output multiple subplots, the figure containing " - "the passed axes is being cleared", UserWarning, - stacklevel=4) - fig.clear() - - nrows, ncols = _get_layout(naxes, layout=layout, layout_type=layout_type) - nplots = nrows * ncols - - # Create empty object array to hold all axes. It's easiest to make it 1-d - # so we can just append subplots upon creation, and then - axarr = np.empty(nplots, dtype=object) - - # Create first subplot separately, so we can share it if requested - ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw) - - if sharex: - subplot_kw['sharex'] = ax0 - if sharey: - subplot_kw['sharey'] = ax0 - axarr[0] = ax0 - - # Note off-by-one counting because add_subplot uses the MATLAB 1-based - # convention. - for i in range(1, nplots): - kwds = subplot_kw.copy() - # Set sharex and sharey to None for blank/dummy axes, these can - # interfere with proper axis limits on the visible axes if - # they share axes e.g. issue #7528 - if i >= naxes: - kwds['sharex'] = None - kwds['sharey'] = None - ax = fig.add_subplot(nrows, ncols, i + 1, **kwds) - axarr[i] = ax - - if naxes != nplots: - for ax in axarr[naxes:]: - ax.set_visible(False) - - _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) - - if squeeze: - # Reshape the array to have the final desired dimension (nrow,ncol), - # though discarding unneeded dimensions that equal 1. If we only have - # one subplot, just return it instead of a 1-element array. - if nplots == 1: - axes = axarr[0] - else: - axes = axarr.reshape(nrows, ncols).squeeze() - else: - # returned axis array will be always 2-d, even if nrows=ncols=1 - axes = axarr.reshape(nrows, ncols) - - return fig, axes - - -def _remove_labels_from_axis(axis): - for t in axis.get_majorticklabels(): - t.set_visible(False) - - try: - # set_visible will not be effective if - # minor axis has NullLocator and NullFormattor (default) - import matplotlib.ticker as ticker - if isinstance(axis.get_minor_locator(), ticker.NullLocator): - axis.set_minor_locator(ticker.AutoLocator()) - if isinstance(axis.get_minor_formatter(), ticker.NullFormatter): - axis.set_minor_formatter(ticker.FormatStrFormatter('')) - for t in axis.get_minorticklabels(): - t.set_visible(False) - except Exception: # pragma no cover - raise - axis.get_label().set_visible(False) - - -def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): - if nplots > 1: - - if nrows > 1: - try: - # first find out the ax layout, - # so that we can correctly handle 'gaps" - layout = np.zeros((nrows + 1, ncols + 1), dtype=np.bool) - for ax in axarr: - layout[ax.rowNum, ax.colNum] = ax.get_visible() - - for ax in axarr: - # only the last row of subplots should get x labels -> all - # other off layout handles the case that the subplot is - # the last in the column, because below is no subplot/gap. - if not layout[ax.rowNum + 1, ax.colNum]: - continue - if sharex or len(ax.get_shared_x_axes() - .get_siblings(ax)) > 1: - _remove_labels_from_axis(ax.xaxis) - - except IndexError: - # if gridspec is used, ax.rowNum and ax.colNum may different - # from layout shape. in this case, use last_row logic - for ax in axarr: - if ax.is_last_row(): - continue - if sharex or len(ax.get_shared_x_axes() - .get_siblings(ax)) > 1: - _remove_labels_from_axis(ax.xaxis) - - if ncols > 1: - for ax in axarr: - # only the first column should get y labels -> set all other to - # off as we only have labels in teh first column and we always - # have a subplot there, we can skip the layout test - if ax.is_first_col(): - continue - if sharey or len(ax.get_shared_y_axes().get_siblings(ax)) > 1: - _remove_labels_from_axis(ax.yaxis) - - -def _flatten(axes): - if not is_list_like(axes): - return np.array([axes]) - elif isinstance(axes, (np.ndarray, Index)): - return axes.ravel() - return np.array(axes) - - -def _get_all_lines(ax): - lines = ax.get_lines() - - if hasattr(ax, 'right_ax'): - lines += ax.right_ax.get_lines() - - if hasattr(ax, 'left_ax'): - lines += ax.left_ax.get_lines() - - return lines - - -def _get_xlim(lines): - left, right = np.inf, -np.inf - for l in lines: - x = l.get_xdata(orig=False) - left = min(x[0], left) - right = max(x[-1], right) - return left, right - - -def _set_ticks_props(axes, xlabelsize=None, xrot=None, - ylabelsize=None, yrot=None): - import matplotlib.pyplot as plt - - for ax in _flatten(axes): - if xlabelsize is not None: - plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) - if xrot is not None: - plt.setp(ax.get_xticklabels(), rotation=xrot) - if ylabelsize is not None: - plt.setp(ax.get_yticklabels(), fontsize=ylabelsize) - if yrot is not None: - plt.setp(ax.get_yticklabels(), rotation=yrot) - return axes - - class BasePlotMethods(PandasObject): def __init__(self, data): diff --git a/pandas/plotting/style.py b/pandas/plotting/style.py new file mode 100644 index 0000000000000..37af63e8bb183 --- /dev/null +++ b/pandas/plotting/style.py @@ -0,0 +1,232 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import warnings +from contextlib import contextmanager + +import numpy as np + +from pandas.types.common import is_list_like +from pandas.compat import range, lrange, lmap +import pandas.compat as compat + + +# Extracted from https://gist.github.com/huyng/816622 +# this is the rcParams set when setting display.with_mpl_style +# to True. +mpl_stylesheet = { + 'axes.axisbelow': True, + 'axes.color_cycle': ['#348ABD', + '#7A68A6', + '#A60628', + '#467821', + '#CF4457', + '#188487', + '#E24A33'], + 'axes.edgecolor': '#bcbcbc', + 'axes.facecolor': '#eeeeee', + 'axes.grid': True, + 'axes.labelcolor': '#555555', + 'axes.labelsize': 'large', + 'axes.linewidth': 1.0, + 'axes.titlesize': 'x-large', + 'figure.edgecolor': 'white', + 'figure.facecolor': 'white', + 'figure.figsize': (6.0, 4.0), + 'figure.subplot.hspace': 0.5, + 'font.family': 'monospace', + 'font.monospace': ['Andale Mono', + 'Nimbus Mono L', + 'Courier New', + 'Courier', + 'Fixed', + 'Terminal', + 'monospace'], + 'font.size': 10, + 'interactive': True, + 'keymap.all_axes': ['a'], + 'keymap.back': ['left', 'c', 'backspace'], + 'keymap.forward': ['right', 'v'], + 'keymap.fullscreen': ['f'], + 'keymap.grid': ['g'], + 'keymap.home': ['h', 'r', 'home'], + 'keymap.pan': ['p'], + 'keymap.save': ['s'], + 'keymap.xscale': ['L', 'k'], + 'keymap.yscale': ['l'], + 'keymap.zoom': ['o'], + 'legend.fancybox': True, + 'lines.antialiased': True, + 'lines.linewidth': 1.0, + 'patch.antialiased': True, + 'patch.edgecolor': '#EEEEEE', + 'patch.facecolor': '#348ABD', + 'patch.linewidth': 0.5, + 'toolbar': 'toolbar2', + 'xtick.color': '#555555', + 'xtick.direction': 'in', + 'xtick.major.pad': 6.0, + 'xtick.major.size': 0.0, + 'xtick.minor.pad': 6.0, + 'xtick.minor.size': 0.0, + 'ytick.color': '#555555', + 'ytick.direction': 'in', + 'ytick.major.pad': 6.0, + 'ytick.major.size': 0.0, + 'ytick.minor.pad': 6.0, + 'ytick.minor.size': 0.0 +} + + +def _get_standard_colors(num_colors=None, colormap=None, color_type='default', + color=None): + import matplotlib.pyplot as plt + + if color is None and colormap is not None: + if isinstance(colormap, compat.string_types): + import matplotlib.cm as cm + cmap = colormap + colormap = cm.get_cmap(colormap) + if colormap is None: + raise ValueError("Colormap {0} is not recognized".format(cmap)) + colors = lmap(colormap, np.linspace(0, 1, num=num_colors)) + elif color is not None: + if colormap is not None: + warnings.warn("'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'") + colors = list(color) if is_list_like(color) else color + else: + if color_type == 'default': + # need to call list() on the result to copy so we don't + # modify the global rcParams below + try: + colors = [c['color'] + for c in list(plt.rcParams['axes.prop_cycle'])] + except KeyError: + colors = list(plt.rcParams.get('axes.color_cycle', + list('bgrcmyk'))) + if isinstance(colors, compat.string_types): + colors = list(colors) + elif color_type == 'random': + import random + + def random_color(column): + random.seed(column) + return [random.random() for _ in range(3)] + + colors = lmap(random_color, lrange(num_colors)) + else: + raise ValueError("color_type must be either 'default' or 'random'") + + if isinstance(colors, compat.string_types): + import matplotlib.colors + conv = matplotlib.colors.ColorConverter() + + def _maybe_valid_colors(colors): + try: + [conv.to_rgba(c) for c in colors] + return True + except ValueError: + return False + + # check whether the string can be convertable to single color + maybe_single_color = _maybe_valid_colors([colors]) + # check whether each character can be convertable to colors + maybe_color_cycle = _maybe_valid_colors(list(colors)) + if maybe_single_color and maybe_color_cycle and len(colors) > 1: + # Special case for single str 'CN' match and convert to hex + # for supporting matplotlib < 2.0.0 + if re.match(r'\AC[0-9]\Z', colors) and _mpl_ge_2_0_0(): + hex_color = [c['color'] + for c in list(plt.rcParams['axes.prop_cycle'])] + colors = [hex_color[int(colors[1])]] + else: + # this may no longer be required + msg = ("'{0}' can be parsed as both single color and " + "color cycle. Specify each color using a list " + "like ['{0}'] or {1}") + raise ValueError(msg.format(colors, list(colors))) + elif maybe_single_color: + colors = [colors] + else: + # ``colors`` is regarded as color cycle. + # mpl will raise error any of them is invalid + pass + + if len(colors) != num_colors: + try: + multiple = num_colors // len(colors) - 1 + except ZeroDivisionError: + raise ValueError("Invalid color argument: ''") + mod = num_colors % len(colors) + + colors += multiple * colors + colors += colors[:mod] + + return colors + + +class _Options(dict): + """ + Stores pandas plotting options. + Allows for parameter aliasing so you can just use parameter names that are + the same as the plot function parameters, but is stored in a canonical + format that makes it easy to breakdown into groups later + """ + + # alias so the names are same as plotting method parameter names + _ALIASES = {'x_compat': 'xaxis.compat'} + _DEFAULT_KEYS = ['xaxis.compat'] + + def __init__(self): + self['xaxis.compat'] = False + + def __getitem__(self, key): + key = self._get_canonical_key(key) + if key not in self: + raise ValueError('%s is not a valid pandas plotting option' % key) + return super(_Options, self).__getitem__(key) + + def __setitem__(self, key, value): + key = self._get_canonical_key(key) + return super(_Options, self).__setitem__(key, value) + + def __delitem__(self, key): + key = self._get_canonical_key(key) + if key in self._DEFAULT_KEYS: + raise ValueError('Cannot remove default parameter %s' % key) + return super(_Options, self).__delitem__(key) + + def __contains__(self, key): + key = self._get_canonical_key(key) + return super(_Options, self).__contains__(key) + + def reset(self): + """ + Reset the option store to its initial state + + Returns + ------- + None + """ + self.__init__() + + def _get_canonical_key(self, key): + return self._ALIASES.get(key, key) + + @contextmanager + def use(self, key, value): + """ + Temporarily set a parameter value using the with statement. + Aliasing allowed. + """ + old_value = self[key] + try: + self[key] = value + yield self + finally: + self[key] = old_value + + +plot_params = _Options() diff --git a/pandas/plotting/timeseries.py b/pandas/plotting/timeseries.py new file mode 100644 index 0000000000000..7d3e27d6154ae --- /dev/null +++ b/pandas/plotting/timeseries.py @@ -0,0 +1,339 @@ +# TODO: Use the fact that axis can have units to simplify the process + +import numpy as np + +from matplotlib import pylab +from pandas.tseries.period import Period +from pandas.tseries.offsets import DateOffset +import pandas.tseries.frequencies as frequencies +from pandas.tseries.index import DatetimeIndex +from pandas.tseries.period import PeriodIndex +from pandas.tseries.tdi import TimedeltaIndex +from pandas.formats.printing import pprint_thing +import pandas.compat as compat + +from pandas.tseries.converter import (TimeSeries_DateLocator, + TimeSeries_DateFormatter, + TimeSeries_TimedeltaFormatter) + +# --------------------------------------------------------------------- +# Plotting functions and monkey patches + + +def tsplot(series, plotf, ax=None, **kwargs): + """ + Plots a Series on the given Matplotlib axes or the current axes + + Parameters + ---------- + axes : Axes + series : Series + + Notes + _____ + Supports same kwargs as Axes.plot + + """ + # Used inferred freq is possible, need a test case for inferred + if ax is None: + import matplotlib.pyplot as plt + ax = plt.gca() + + freq, series = _maybe_resample(series, ax, kwargs) + + # Set ax with freq info + _decorate_axes(ax, freq, kwargs) + ax._plot_data.append((series, plotf, kwargs)) + lines = plotf(ax, series.index._mpl_repr(), series.values, **kwargs) + + # set date formatter, locators and rescale limits + format_dateaxis(ax, ax.freq, series.index) + return lines + + +def _maybe_resample(series, ax, kwargs): + # resample against axes freq if necessary + freq, ax_freq = _get_freq(ax, series) + + if freq is None: # pragma: no cover + raise ValueError('Cannot use dynamic axis without frequency info') + + # Convert DatetimeIndex to PeriodIndex + if isinstance(series.index, DatetimeIndex): + series = series.to_period(freq=freq) + + if ax_freq is not None and freq != ax_freq: + if frequencies.is_superperiod(freq, ax_freq): # upsample input + series = series.copy() + series.index = series.index.asfreq(ax_freq, how='s') + freq = ax_freq + elif _is_sup(freq, ax_freq): # one is weekly + how = kwargs.pop('how', 'last') + series = getattr(series.resample('D'), how)().dropna() + series = getattr(series.resample(ax_freq), how)().dropna() + freq = ax_freq + elif frequencies.is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): + _upsample_others(ax, freq, kwargs) + ax_freq = freq + else: # pragma: no cover + raise ValueError('Incompatible frequency conversion') + return freq, series + + +def _is_sub(f1, f2): + return ((f1.startswith('W') and frequencies.is_subperiod('D', f2)) or + (f2.startswith('W') and frequencies.is_subperiod(f1, 'D'))) + + +def _is_sup(f1, f2): + return ((f1.startswith('W') and frequencies.is_superperiod('D', f2)) or + (f2.startswith('W') and frequencies.is_superperiod(f1, 'D'))) + + +def _upsample_others(ax, freq, kwargs): + legend = ax.get_legend() + lines, labels = _replot_ax(ax, freq, kwargs) + _replot_ax(ax, freq, kwargs) + + other_ax = None + if hasattr(ax, 'left_ax'): + other_ax = ax.left_ax + if hasattr(ax, 'right_ax'): + other_ax = ax.right_ax + + if other_ax is not None: + rlines, rlabels = _replot_ax(other_ax, freq, kwargs) + lines.extend(rlines) + labels.extend(rlabels) + + if (legend is not None and kwargs.get('legend', True) and + len(lines) > 0): + title = legend.get_title().get_text() + if title == 'None': + title = None + ax.legend(lines, labels, loc='best', title=title) + + +def _replot_ax(ax, freq, kwargs): + data = getattr(ax, '_plot_data', None) + + # clear current axes and data + ax._plot_data = [] + ax.clear() + + _decorate_axes(ax, freq, kwargs) + + lines = [] + labels = [] + if data is not None: + for series, plotf, kwds in data: + series = series.copy() + idx = series.index.asfreq(freq, how='S') + series.index = idx + ax._plot_data.append((series, plotf, kwds)) + + # for tsplot + if isinstance(plotf, compat.string_types): + from pandas.tools.plotting import _plot_klass + plotf = _plot_klass[plotf]._plot + + lines.append(plotf(ax, series.index._mpl_repr(), + series.values, **kwds)[0]) + labels.append(pprint_thing(series.name)) + + return lines, labels + + +def _decorate_axes(ax, freq, kwargs): + """Initialize axes for time-series plotting""" + if not hasattr(ax, '_plot_data'): + ax._plot_data = [] + + ax.freq = freq + xaxis = ax.get_xaxis() + xaxis.freq = freq + if not hasattr(ax, 'legendlabels'): + ax.legendlabels = [kwargs.get('label', None)] + else: + ax.legendlabels.append(kwargs.get('label', None)) + ax.view_interval = None + ax.date_axis_info = None + + +def _get_ax_freq(ax): + """ + Get the freq attribute of the ax object if set. + Also checks shared axes (eg when using secondary yaxis, sharex=True + or twinx) + """ + ax_freq = getattr(ax, 'freq', None) + if ax_freq is None: + # check for left/right ax in case of secondary yaxis + if hasattr(ax, 'left_ax'): + ax_freq = getattr(ax.left_ax, 'freq', None) + elif hasattr(ax, 'right_ax'): + ax_freq = getattr(ax.right_ax, 'freq', None) + if ax_freq is None: + # check if a shared ax (sharex/twinx) has already freq set + shared_axes = ax.get_shared_x_axes().get_siblings(ax) + if len(shared_axes) > 1: + for shared_ax in shared_axes: + ax_freq = getattr(shared_ax, 'freq', None) + if ax_freq is not None: + break + return ax_freq + + +def _get_freq(ax, series): + # get frequency from data + freq = getattr(series.index, 'freq', None) + if freq is None: + freq = getattr(series.index, 'inferred_freq', None) + + ax_freq = _get_ax_freq(ax) + + # use axes freq if no data freq + if freq is None: + freq = ax_freq + + # get the period frequency + if isinstance(freq, DateOffset): + freq = freq.rule_code + else: + freq = frequencies.get_base_alias(freq) + + freq = frequencies.get_period_alias(freq) + return freq, ax_freq + + +def _use_dynamic_x(ax, data): + freq = _get_index_freq(data) + ax_freq = _get_ax_freq(ax) + + if freq is None: # convert irregular if axes has freq info + freq = ax_freq + else: # do not use tsplot if irregular was plotted first + if (ax_freq is None) and (len(ax.get_lines()) > 0): + return False + + if freq is None: + return False + + if isinstance(freq, DateOffset): + freq = freq.rule_code + else: + freq = frequencies.get_base_alias(freq) + freq = frequencies.get_period_alias(freq) + + if freq is None: + return False + + # hack this for 0.10.1, creating more technical debt...sigh + if isinstance(data.index, DatetimeIndex): + base = frequencies.get_freq(freq) + x = data.index + if (base <= frequencies.FreqGroup.FR_DAY): + return x[:1].is_normalized + return Period(x[0], freq).to_timestamp(tz=x.tz) == x[0] + return True + + +def _get_index_freq(data): + freq = getattr(data.index, 'freq', None) + if freq is None: + freq = getattr(data.index, 'inferred_freq', None) + if freq == 'B': + weekdays = np.unique(data.index.dayofweek) + if (5 in weekdays) or (6 in weekdays): + freq = None + return freq + + +def _maybe_convert_index(ax, data): + # tsplot converts automatically, but don't want to convert index + # over and over for DataFrames + if isinstance(data.index, DatetimeIndex): + freq = getattr(data.index, 'freq', None) + + if freq is None: + freq = getattr(data.index, 'inferred_freq', None) + if isinstance(freq, DateOffset): + freq = freq.rule_code + + if freq is None: + freq = _get_ax_freq(ax) + + if freq is None: + raise ValueError('Could not get frequency alias for plotting') + + freq = frequencies.get_base_alias(freq) + freq = frequencies.get_period_alias(freq) + + data = data.to_period(freq=freq) + return data + + +# Patch methods for subplot. Only format_dateaxis is currently used. +# Do we need the rest for convenience? + +def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10**(n_decimals - 9)) + s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) + if n_decimals > 0: + s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) + if d != 0: + s = '{:d} days '.format(int(d)) + s + return s + + +def format_dateaxis(subplot, freq, index): + """ + Pretty-formats the date axis (x-axis). + + Major and minor ticks are automatically set for the frequency of the + current underlying series. As the dynamic mode is activated by + default, changing the limits of the x axis will intelligently change + the positions of the ticks. + """ + + # handle index specific formatting + # Note: DatetimeIndex does not use this + # interface. DatetimeIndex uses matplotlib.date directly + if isinstance(index, PeriodIndex): + + majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, + minor_locator=False, + plot_obj=subplot) + minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, + minor_locator=True, + plot_obj=subplot) + subplot.xaxis.set_major_locator(majlocator) + subplot.xaxis.set_minor_locator(minlocator) + + majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minor_locator=False, + plot_obj=subplot) + minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minor_locator=True, + plot_obj=subplot) + subplot.xaxis.set_major_formatter(majformatter) + subplot.xaxis.set_minor_formatter(minformatter) + + # x and y coord info + subplot.format_coord = lambda t, y: ( + "t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) + + elif isinstance(index, TimedeltaIndex): + subplot.xaxis.set_major_formatter( + TimeSeries_TimedeltaFormatter()) + else: + raise TypeError('index type not supported') + + pylab.draw_if_interactive() diff --git a/pandas/plotting/tools.py b/pandas/plotting/tools.py new file mode 100644 index 0000000000000..720f776279869 --- /dev/null +++ b/pandas/plotting/tools.py @@ -0,0 +1,383 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import warnings +from math import ceil + +import numpy as np + +from pandas.types.common import is_list_like +from pandas.core.index import Index +from pandas.core.series import Series +from pandas.compat import range + + +def format_date_labels(ax, rot): + # mini version of autofmt_xdate + try: + for label in ax.get_xticklabels(): + label.set_ha('right') + label.set_rotation(rot) + fig = ax.get_figure() + fig.subplots_adjust(bottom=0.2) + except Exception: # pragma: no cover + pass + + +def table(ax, data, rowLabels=None, colLabels=None, + **kwargs): + """ + Helper function to convert DataFrame and Series to matplotlib.table + + Parameters + ---------- + `ax`: Matplotlib axes object + `data`: DataFrame or Series + data for table contents + `kwargs`: keywords, optional + keyword arguments which passed to matplotlib.table.table. + If `rowLabels` or `colLabels` is not specified, data index or column + name will be used. + + Returns + ------- + matplotlib table object + """ + from pandas import DataFrame + if isinstance(data, Series): + data = DataFrame(data, columns=[data.name]) + elif isinstance(data, DataFrame): + pass + else: + raise ValueError('Input data must be DataFrame or Series') + + if rowLabels is None: + rowLabels = data.index + + if colLabels is None: + colLabels = data.columns + + cellText = data.values + + import matplotlib.table + table = matplotlib.table.table(ax, cellText=cellText, + rowLabels=rowLabels, + colLabels=colLabels, **kwargs) + return table + + +def _get_layout(nplots, layout=None, layout_type='box'): + if layout is not None: + if not isinstance(layout, (tuple, list)) or len(layout) != 2: + raise ValueError('Layout must be a tuple of (rows, columns)') + + nrows, ncols = layout + + # Python 2 compat + ceil_ = lambda x: int(ceil(x)) + if nrows == -1 and ncols > 0: + layout = nrows, ncols = (ceil_(float(nplots) / ncols), ncols) + elif ncols == -1 and nrows > 0: + layout = nrows, ncols = (nrows, ceil_(float(nplots) / nrows)) + elif ncols <= 0 and nrows <= 0: + msg = "At least one dimension of layout must be positive" + raise ValueError(msg) + + if nrows * ncols < nplots: + raise ValueError('Layout of %sx%s must be larger than ' + 'required size %s' % (nrows, ncols, nplots)) + + return layout + + if layout_type == 'single': + return (1, 1) + elif layout_type == 'horizontal': + return (1, nplots) + elif layout_type == 'vertical': + return (nplots, 1) + + layouts = {1: (1, 1), 2: (1, 2), 3: (2, 2), 4: (2, 2)} + try: + return layouts[nplots] + except KeyError: + k = 1 + while k ** 2 < nplots: + k += 1 + + if (k - 1) * k >= nplots: + return k, (k - 1) + else: + return k, k + +# copied from matplotlib/pyplot.py and modified for pandas.plotting + + +def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, + subplot_kw=None, ax=None, layout=None, layout_type='box', + **fig_kw): + """Create a figure with a set of subplots already made. + + This utility wrapper makes it convenient to create common layouts of + subplots, including the enclosing figure object, in a single call. + + Keyword arguments: + + naxes : int + Number of required axes. Exceeded axes are set invisible. Default is + nrows * ncols. + + sharex : bool + If True, the X axis will be shared amongst all subplots. + + sharey : bool + If True, the Y axis will be shared amongst all subplots. + + squeeze : bool + + If True, extra dimensions are squeezed out from the returned axis object: + - if only one subplot is constructed (nrows=ncols=1), the resulting + single Axis object is returned as a scalar. + - for Nx1 or 1xN subplots, the returned object is a 1-d numpy object + array of Axis objects are returned as numpy 1-d arrays. + - for NxM subplots with N>1 and M>1 are returned as a 2d array. + + If False, no squeezing at all is done: the returned axis object is always + a 2-d array containing Axis instances, even if it ends up being 1x1. + + subplot_kw : dict + Dict with keywords passed to the add_subplot() call used to create each + subplots. + + ax : Matplotlib axis object, optional + + layout : tuple + Number of rows and columns of the subplot grid. + If not specified, calculated from naxes and layout_type + + layout_type : {'box', 'horziontal', 'vertical'}, default 'box' + Specify how to layout the subplot grid. + + fig_kw : Other keyword arguments to be passed to the figure() call. + Note that all keywords not recognized above will be + automatically included here. + + Returns: + + fig, ax : tuple + - fig is the Matplotlib Figure object + - ax can be either a single axis object or an array of axis objects if + more than one subplot was created. The dimensions of the resulting array + can be controlled with the squeeze keyword, see above. + + **Examples:** + + x = np.linspace(0, 2*np.pi, 400) + y = np.sin(x**2) + + # Just a figure and one subplot + f, ax = plt.subplots() + ax.plot(x, y) + ax.set_title('Simple plot') + + # Two subplots, unpack the output array immediately + f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) + ax1.plot(x, y) + ax1.set_title('Sharing Y axis') + ax2.scatter(x, y) + + # Four polar axes + plt.subplots(2, 2, subplot_kw=dict(polar=True)) + """ + import matplotlib.pyplot as plt + + if subplot_kw is None: + subplot_kw = {} + + if ax is None: + fig = plt.figure(**fig_kw) + else: + if is_list_like(ax): + ax = _flatten(ax) + if layout is not None: + warnings.warn("When passing multiple axes, layout keyword is " + "ignored", UserWarning) + if sharex or sharey: + warnings.warn("When passing multiple axes, sharex and sharey " + "are ignored. These settings must be specified " + "when creating axes", UserWarning, + stacklevel=4) + if len(ax) == naxes: + fig = ax[0].get_figure() + return fig, ax + else: + raise ValueError("The number of passed axes must be {0}, the " + "same as the output plot".format(naxes)) + + fig = ax.get_figure() + # if ax is passed and a number of subplots is 1, return ax as it is + if naxes == 1: + if squeeze: + return fig, ax + else: + return fig, _flatten(ax) + else: + warnings.warn("To output multiple subplots, the figure containing " + "the passed axes is being cleared", UserWarning, + stacklevel=4) + fig.clear() + + nrows, ncols = _get_layout(naxes, layout=layout, layout_type=layout_type) + nplots = nrows * ncols + + # Create empty object array to hold all axes. It's easiest to make it 1-d + # so we can just append subplots upon creation, and then + axarr = np.empty(nplots, dtype=object) + + # Create first subplot separately, so we can share it if requested + ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw) + + if sharex: + subplot_kw['sharex'] = ax0 + if sharey: + subplot_kw['sharey'] = ax0 + axarr[0] = ax0 + + # Note off-by-one counting because add_subplot uses the MATLAB 1-based + # convention. + for i in range(1, nplots): + kwds = subplot_kw.copy() + # Set sharex and sharey to None for blank/dummy axes, these can + # interfere with proper axis limits on the visible axes if + # they share axes e.g. issue #7528 + if i >= naxes: + kwds['sharex'] = None + kwds['sharey'] = None + ax = fig.add_subplot(nrows, ncols, i + 1, **kwds) + axarr[i] = ax + + if naxes != nplots: + for ax in axarr[naxes:]: + ax.set_visible(False) + + _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) + + if squeeze: + # Reshape the array to have the final desired dimension (nrow,ncol), + # though discarding unneeded dimensions that equal 1. If we only have + # one subplot, just return it instead of a 1-element array. + if nplots == 1: + axes = axarr[0] + else: + axes = axarr.reshape(nrows, ncols).squeeze() + else: + # returned axis array will be always 2-d, even if nrows=ncols=1 + axes = axarr.reshape(nrows, ncols) + + return fig, axes + + +def _remove_labels_from_axis(axis): + for t in axis.get_majorticklabels(): + t.set_visible(False) + + try: + # set_visible will not be effective if + # minor axis has NullLocator and NullFormattor (default) + import matplotlib.ticker as ticker + if isinstance(axis.get_minor_locator(), ticker.NullLocator): + axis.set_minor_locator(ticker.AutoLocator()) + if isinstance(axis.get_minor_formatter(), ticker.NullFormatter): + axis.set_minor_formatter(ticker.FormatStrFormatter('')) + for t in axis.get_minorticklabels(): + t.set_visible(False) + except Exception: # pragma no cover + raise + axis.get_label().set_visible(False) + + +def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): + if nplots > 1: + + if nrows > 1: + try: + # first find out the ax layout, + # so that we can correctly handle 'gaps" + layout = np.zeros((nrows + 1, ncols + 1), dtype=np.bool) + for ax in axarr: + layout[ax.rowNum, ax.colNum] = ax.get_visible() + + for ax in axarr: + # only the last row of subplots should get x labels -> all + # other off layout handles the case that the subplot is + # the last in the column, because below is no subplot/gap. + if not layout[ax.rowNum + 1, ax.colNum]: + continue + if sharex or len(ax.get_shared_x_axes() + .get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.xaxis) + + except IndexError: + # if gridspec is used, ax.rowNum and ax.colNum may different + # from layout shape. in this case, use last_row logic + for ax in axarr: + if ax.is_last_row(): + continue + if sharex or len(ax.get_shared_x_axes() + .get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.xaxis) + + if ncols > 1: + for ax in axarr: + # only the first column should get y labels -> set all other to + # off as we only have labels in teh first column and we always + # have a subplot there, we can skip the layout test + if ax.is_first_col(): + continue + if sharey or len(ax.get_shared_y_axes().get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.yaxis) + + +def _flatten(axes): + if not is_list_like(axes): + return np.array([axes]) + elif isinstance(axes, (np.ndarray, Index)): + return axes.ravel() + return np.array(axes) + + +def _get_all_lines(ax): + lines = ax.get_lines() + + if hasattr(ax, 'right_ax'): + lines += ax.right_ax.get_lines() + + if hasattr(ax, 'left_ax'): + lines += ax.left_ax.get_lines() + + return lines + + +def _get_xlim(lines): + left, right = np.inf, -np.inf + for l in lines: + x = l.get_xdata(orig=False) + left = min(x[0], left) + right = max(x[-1], right) + return left, right + + +def _set_ticks_props(axes, xlabelsize=None, xrot=None, + ylabelsize=None, yrot=None): + import matplotlib.pyplot as plt + + for ax in _flatten(axes): + if xlabelsize is not None: + plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) + if xrot is not None: + plt.setp(ax.get_xticklabels(), rotation=xrot) + if ylabelsize is not None: + plt.setp(ax.get_yticklabels(), fontsize=ylabelsize) + if yrot is not None: + plt.setp(ax.get_yticklabels(), rotation=yrot) + return axes diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index a15d7cf26cbea..dfa8851d9bec1 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -31,7 +31,7 @@ class TestPDApi(Base, tm.TestCase): # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', - 'indexes', 'formats', 'errors', 'pandas', + 'indexes', 'formats', 'errors', 'pandas', 'plotting' 'test', 'tools', 'tseries', 'sparse', 'types', 'util', 'options', 'io'] diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index c31d8b539ae6f..aa4e57c21fcfc 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -16,7 +16,7 @@ import numpy as np from numpy import random -import pandas.tools.plotting as plotting +import pandas.plotting as plotting """ @@ -48,12 +48,12 @@ def setUp(self): import matplotlib as mpl mpl.rcdefaults() - self.mpl_le_1_2_1 = plotting._mpl_le_1_2_1() - self.mpl_ge_1_3_1 = plotting._mpl_ge_1_3_1() - self.mpl_ge_1_4_0 = plotting._mpl_ge_1_4_0() - self.mpl_ge_1_5_0 = plotting._mpl_ge_1_5_0() - self.mpl_ge_2_0_0 = plotting._mpl_ge_2_0_0() - self.mpl_ge_2_0_1 = plotting._mpl_ge_2_0_1() + self.mpl_le_1_2_1 = plotting.compat._mpl_le_1_2_1() + self.mpl_ge_1_3_1 = plotting.compat._mpl_ge_1_3_1() + self.mpl_ge_1_4_0 = plotting.compat._mpl_ge_1_4_0() + self.mpl_ge_1_5_0 = plotting.compat._mpl_ge_1_5_0() + self.mpl_ge_2_0_0 = plotting.compat._mpl_ge_2_0_0() + self.mpl_ge_2_0_1 = plotting.compat._mpl_ge_2_0_1() if self.mpl_ge_1_4_0: self.bp_n_objects = 7 @@ -353,7 +353,7 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, self.assertTrue(len(ax.get_children()) > 0) if layout is not None: - result = self._get_axes_layout(plotting._flatten(axes)) + result = self._get_axes_layout(plotting.tools._flatten(axes)) self.assertEqual(result, layout) self.assert_numpy_array_equal( @@ -379,7 +379,7 @@ def _flatten_visible(self, axes): axes : matplotlib Axes object, or its list-like """ - axes = plotting._flatten(axes) + axes = plotting.tools._flatten(axes) axes = [ax for ax in axes if ax.get_visible()] return axes diff --git a/pandas/tests/tseries/test_converter.py b/pandas/tests/plotting/test_converter.py similarity index 100% rename from pandas/tests/tseries/test_converter.py rename to pandas/tests/plotting/test_converter.py diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 673c34903b259..3caa1935f0943 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -144,7 +144,7 @@ def test_high_freq(self): _check_plot_works(ser.plot) def test_get_datevalue(self): - from pandas.tseries.converter import get_datevalue + from pandas.plotting.converter import get_datevalue self.assertIsNone(get_datevalue(None, 'D')) self.assertEqual(get_datevalue(1987, 'A'), 1987) self.assertEqual(get_datevalue(Period(1987, 'A'), 'M'), @@ -243,7 +243,7 @@ def test_plot_multiple_inferred_freq(self): @slow def test_uhf(self): - import pandas.tseries.converter as conv + import pandas.plotting.converter as conv import matplotlib.pyplot as plt fig = plt.gcf() plt.clf() @@ -387,7 +387,7 @@ def _test(ax): _test(ax) def test_get_finder(self): - import pandas.tseries.converter as conv + import pandas.plotting.converter as conv self.assertEqual(conv.get_finder('B'), conv._daily_finder) self.assertEqual(conv.get_finder('D'), conv._daily_finder) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 8090b9cc44ca3..ada6aa7499f8c 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -22,7 +22,7 @@ import numpy as np from numpy.random import rand, randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) @@ -1979,7 +1979,7 @@ def test_unordered_ts(self): def test_kind_both_ways(self): df = DataFrame({'x': [1, 2, 3]}) - for kind in plotting._common_kinds: + for kind in plotting.plotting._common_kinds: if not _ok_for_gaussian_kde(kind): continue df.plot(kind=kind) @@ -1990,7 +1990,7 @@ def test_kind_both_ways(self): def test_all_invalid_plot_data(self): df = DataFrame(list('abcd')) - for kind in plotting._common_kinds: + for kind in plotting.plotting._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -2001,7 +2001,7 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = 'a' - for kind in plotting._common_kinds: + for kind in plotting.plotting._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -2454,7 +2454,7 @@ def test_memory_leak(self): import gc results = {} - for kind in plotting._plot_klass.keys(): + for kind in plotting.plotting._plot_klass.keys(): if not _ok_for_gaussian_kde(kind): continue args = {} @@ -2653,7 +2653,7 @@ def test_df_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings( DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4]}), - plotting._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) + plotting.plotting._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) def test_option_mpl_style(self): with tm.assert_produces_warning(FutureWarning, diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 8c00d606059a4..b97c34a85dc0c 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -16,7 +16,7 @@ import numpy as np from numpy.random import randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) @@ -622,7 +622,9 @@ def test_boxplot_series(self): @slow def test_kind_both_ways(self): s = Series(range(3)) - for kind in plotting._common_kinds + plotting._series_kinds: + kinds = (plotting.plotting._common_kinds + + plotting.plotting._series_kinds) + for kind in kinds: if not _ok_for_gaussian_kde(kind): continue s.plot(kind=kind) @@ -631,7 +633,7 @@ def test_kind_both_ways(self): @slow def test_invalid_plot_data(self): s = Series(list('abcd')) - for kind in plotting._common_kinds: + for kind in plotting.plotting._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -640,14 +642,14 @@ def test_invalid_plot_data(self): @slow def test_valid_object_plot(self): s = Series(lrange(10), dtype=object) - for kind in plotting._common_kinds: + for kind in plotting.plotting._common_kinds: if not _ok_for_gaussian_kde(kind): continue _check_plot_works(s.plot, kind=kind) def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) - for kind in plotting._common_kinds: + for kind in plotting.plotting._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -718,54 +720,57 @@ def test_table(self): def test_series_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings(Series([1, 2, 3]), - plotting._series_kinds + - plotting._common_kinds) + plotting.plotting._series_kinds + + plotting.plotting._common_kinds) @slow def test_standard_colors(self): + from pandas.plotting.style import _get_standard_colors + for c in ['r', 'red', 'green', '#FF0000']: - result = plotting._get_standard_colors(1, color=c) + result = _get_standard_colors(1, color=c) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(1, color=[c]) + result = _get_standard_colors(1, color=[c]) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(3, color=c) + result = _get_standard_colors(3, color=c) self.assertEqual(result, [c] * 3) - result = plotting._get_standard_colors(3, color=[c]) + result = _get_standard_colors(3, color=[c]) self.assertEqual(result, [c] * 3) @slow def test_standard_colors_all(self): import matplotlib.colors as colors + from pandas.plotting.style import _get_standard_colors # multiple colors like mediumaquamarine for c in colors.cnames: - result = plotting._get_standard_colors(num_colors=1, color=c) + result = _get_standard_colors(num_colors=1, color=c) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(num_colors=1, color=[c]) + result = _get_standard_colors(num_colors=1, color=[c]) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(num_colors=3, color=c) + result = _get_standard_colors(num_colors=3, color=c) self.assertEqual(result, [c] * 3) - result = plotting._get_standard_colors(num_colors=3, color=[c]) + result = _get_standard_colors(num_colors=3, color=[c]) self.assertEqual(result, [c] * 3) # single letter colors like k for c in colors.ColorConverter.colors: - result = plotting._get_standard_colors(num_colors=1, color=c) + result = _get_standard_colors(num_colors=1, color=c) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(num_colors=1, color=[c]) + result = _get_standard_colors(num_colors=1, color=[c]) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(num_colors=3, color=c) + result = _get_standard_colors(num_colors=3, color=c) self.assertEqual(result, [c] * 3) - result = plotting._get_standard_colors(num_colors=3, color=[c]) + result = _get_standard_colors(num_colors=3, color=[c]) self.assertEqual(result, [c] * 3) def test_series_plot_color_kwargs(self): diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index bc768a8bc5b58..a93515b110cf4 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -1,1032 +1,11 @@ -from datetime import datetime, timedelta -import datetime as pydt -import numpy as np - -from dateutil.relativedelta import relativedelta - -import matplotlib.units as units -import matplotlib.dates as dates - -from matplotlib.ticker import Formatter, AutoLocator, Locator -from matplotlib.transforms import nonsingular - - -from pandas.types.common import (is_float, is_integer, - is_integer_dtype, - is_float_dtype, - is_datetime64_ns_dtype, - is_period_arraylike, - ) - -from pandas.compat import lrange -import pandas.compat as compat -import pandas._libs.lib as lib -import pandas.core.common as com -from pandas.core.index import Index - -from pandas.core.series import Series -from pandas.tseries.index import date_range -import pandas.tseries.tools as tools -import pandas.tseries.frequencies as frequencies -from pandas.tseries.frequencies import FreqGroup -from pandas.tseries.period import Period, PeriodIndex - -# constants -HOURS_PER_DAY = 24. -MIN_PER_HOUR = 60. -SEC_PER_MIN = 60. - -SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR -SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY - -MUSEC_PER_DAY = 1e6 * SEC_PER_DAY - - -def _mpl_le_2_0_0(): - try: - import matplotlib - return matplotlib.compare_versions('2.0.0', matplotlib.__version__) - except ImportError: - return False - - -def register(): - units.registry[lib.Timestamp] = DatetimeConverter() - units.registry[Period] = PeriodConverter() - units.registry[pydt.datetime] = DatetimeConverter() - units.registry[pydt.date] = DatetimeConverter() - units.registry[pydt.time] = TimeConverter() - units.registry[np.datetime64] = DatetimeConverter() - - -def _to_ordinalf(tm): - tot_sec = (tm.hour * 3600 + tm.minute * 60 + tm.second + - float(tm.microsecond / 1e6)) - return tot_sec - - -def time2num(d): - if isinstance(d, compat.string_types): - parsed = tools.to_datetime(d) - if not isinstance(parsed, datetime): - raise ValueError('Could not parse time %s' % d) - return _to_ordinalf(parsed.time()) - if isinstance(d, pydt.time): - return _to_ordinalf(d) - return d - - -class TimeConverter(units.ConversionInterface): - - @staticmethod - def convert(value, unit, axis): - valid_types = (str, pydt.time) - if (isinstance(value, valid_types) or is_integer(value) or - is_float(value)): - return time2num(value) - if isinstance(value, Index): - return value.map(time2num) - if isinstance(value, (list, tuple, np.ndarray, Index)): - return [time2num(x) for x in value] - return value - - @staticmethod - def axisinfo(unit, axis): - if unit != 'time': - return None - - majloc = AutoLocator() - majfmt = TimeFormatter(majloc) - return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='time') - - @staticmethod - def default_units(x, axis): - return 'time' - - -# time formatter -class TimeFormatter(Formatter): - - def __init__(self, locs): - self.locs = locs - - def __call__(self, x, pos=0): - fmt = '%H:%M:%S' - s = int(x) - ms = int((x - s) * 1e3) - us = int((x - s) * 1e6 - ms) - m, s = divmod(s, 60) - h, m = divmod(m, 60) - _, h = divmod(h, 24) - if us != 0: - fmt += '.%6f' - elif ms != 0: - fmt += '.%3f' - - return pydt.time(h, m, s, us).strftime(fmt) - - -# Period Conversion - - -class PeriodConverter(dates.DateConverter): - - @staticmethod - def convert(values, units, axis): - if not hasattr(axis, 'freq'): - raise TypeError('Axis must have `freq` set to convert to Periods') - valid_types = (compat.string_types, datetime, - Period, pydt.date, pydt.time) - if (isinstance(values, valid_types) or is_integer(values) or - is_float(values)): - return get_datevalue(values, axis.freq) - if isinstance(values, PeriodIndex): - return values.asfreq(axis.freq)._values - if isinstance(values, Index): - return values.map(lambda x: get_datevalue(x, axis.freq)) - if is_period_arraylike(values): - return PeriodIndex(values, freq=axis.freq)._values - if isinstance(values, (list, tuple, np.ndarray, Index)): - return [get_datevalue(x, axis.freq) for x in values] - return values - - -def get_datevalue(date, freq): - if isinstance(date, Period): - return date.asfreq(freq).ordinal - elif isinstance(date, (compat.string_types, datetime, - pydt.date, pydt.time)): - return Period(date, freq).ordinal - elif (is_integer(date) or is_float(date) or - (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): - return date - elif date is None: - return None - raise ValueError("Unrecognizable date '%s'" % date) - - -def _dt_to_float_ordinal(dt): - """ - Convert :mod:`datetime` to the Gregorian date as UTC float days, - preserving hours, minutes, seconds and microseconds. Return value - is a :func:`float`. - """ - if (isinstance(dt, (np.ndarray, Index, Series) - ) and is_datetime64_ns_dtype(dt)): - base = dates.epoch2num(dt.asi8 / 1.0E9) - else: - base = dates.date2num(dt) - return base - - -# Datetime Conversion -class DatetimeConverter(dates.DateConverter): - - @staticmethod - def convert(values, unit, axis): - def try_parse(values): - try: - return _dt_to_float_ordinal(tools.to_datetime(values)) - except Exception: - return values - - if isinstance(values, (datetime, pydt.date)): - return _dt_to_float_ordinal(values) - elif isinstance(values, np.datetime64): - return _dt_to_float_ordinal(lib.Timestamp(values)) - elif isinstance(values, pydt.time): - return dates.date2num(values) - elif (is_integer(values) or is_float(values)): - return values - elif isinstance(values, compat.string_types): - return try_parse(values) - elif isinstance(values, (list, tuple, np.ndarray, Index)): - if isinstance(values, Index): - values = values.values - if not isinstance(values, np.ndarray): - values = com._asarray_tuplesafe(values) - - if is_integer_dtype(values) or is_float_dtype(values): - return values - - try: - values = tools.to_datetime(values) - if isinstance(values, Index): - values = _dt_to_float_ordinal(values) - else: - values = [_dt_to_float_ordinal(x) for x in values] - except Exception: - values = _dt_to_float_ordinal(values) - - return values - - @staticmethod - def axisinfo(unit, axis): - """ - Return the :class:`~matplotlib.units.AxisInfo` for *unit*. - - *unit* is a tzinfo instance or None. - The *axis* argument is required but not used. - """ - tz = unit - - majloc = PandasAutoDateLocator(tz=tz) - majfmt = PandasAutoDateFormatter(majloc, tz=tz) - datemin = pydt.date(2000, 1, 1) - datemax = pydt.date(2010, 1, 1) - - return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='', - default_limits=(datemin, datemax)) - - -class PandasAutoDateFormatter(dates.AutoDateFormatter): - - def __init__(self, locator, tz=None, defaultfmt='%Y-%m-%d'): - dates.AutoDateFormatter.__init__(self, locator, tz, defaultfmt) - # matplotlib.dates._UTC has no _utcoffset called by pandas - if self._tz is dates.UTC: - self._tz._utcoffset = self._tz.utcoffset(None) - - # For mpl > 2.0 the format strings are controlled via rcparams - # so do not mess with them. For mpl < 2.0 change the second - # break point and add a musec break point - if _mpl_le_2_0_0(): - self.scaled[1. / SEC_PER_DAY] = '%H:%M:%S' - self.scaled[1. / MUSEC_PER_DAY] = '%H:%M:%S.%f' - - -class PandasAutoDateLocator(dates.AutoDateLocator): - - def get_locator(self, dmin, dmax): - 'Pick the best locator based on a distance.' - delta = relativedelta(dmax, dmin) - - num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days - num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds - tot_sec = num_days * 86400. + num_sec - - if abs(tot_sec) < self.minticks: - self._freq = -1 - locator = MilliSecondLocator(self.tz) - locator.set_axis(self.axis) - - locator.set_view_interval(*self.axis.get_view_interval()) - locator.set_data_interval(*self.axis.get_data_interval()) - return locator - - return dates.AutoDateLocator.get_locator(self, dmin, dmax) - - def _get_unit(self): - return MilliSecondLocator.get_unit_generic(self._freq) - - -class MilliSecondLocator(dates.DateLocator): - - UNIT = 1. / (24 * 3600 * 1000) - - def __init__(self, tz): - dates.DateLocator.__init__(self, tz) - self._interval = 1. - - def _get_unit(self): - return self.get_unit_generic(-1) - - @staticmethod - def get_unit_generic(freq): - unit = dates.RRuleLocator.get_unit_generic(freq) - if unit < 0: - return MilliSecondLocator.UNIT - return unit - - def __call__(self): - # if no data have been set, this will tank with a ValueError - try: - dmin, dmax = self.viewlim_to_dt() - except ValueError: - return [] - - if dmin > dmax: - dmax, dmin = dmin, dmax - # We need to cap at the endpoints of valid datetime - - # TODO(wesm) unused? - # delta = relativedelta(dmax, dmin) - # try: - # start = dmin - delta - # except ValueError: - # start = _from_ordinal(1.0) - - # try: - # stop = dmax + delta - # except ValueError: - # # The magic number! - # stop = _from_ordinal(3652059.9999999) - - nmax, nmin = dates.date2num((dmax, dmin)) - - num = (nmax - nmin) * 86400 * 1000 - max_millis_ticks = 6 - for interval in [1, 10, 50, 100, 200, 500]: - if num <= interval * (max_millis_ticks - 1): - self._interval = interval - break - else: - # We went through the whole loop without breaking, default to 1 - self._interval = 1000. - - estimate = (nmax - nmin) / (self._get_unit() * self._get_interval()) - - if estimate > self.MAXTICKS * 2: - raise RuntimeError(('MillisecondLocator estimated to generate %d ' - 'ticks from %s to %s: exceeds Locator.MAXTICKS' - '* 2 (%d) ') % - (estimate, dmin, dmax, self.MAXTICKS * 2)) - - freq = '%dL' % self._get_interval() - tz = self.tz.tzname(None) - st = _from_ordinal(dates.date2num(dmin)) # strip tz - ed = _from_ordinal(dates.date2num(dmax)) - all_dates = date_range(start=st, end=ed, freq=freq, tz=tz).asobject - - try: - if len(all_dates) > 0: - locs = self.raise_if_exceeds(dates.date2num(all_dates)) - return locs - except Exception: # pragma: no cover - pass - - lims = dates.date2num([dmin, dmax]) - return lims - - def _get_interval(self): - return self._interval - - def autoscale(self): - """ - Set the view limits to include the data range. - """ - dmin, dmax = self.datalim_to_dt() - if dmin > dmax: - dmax, dmin = dmin, dmax - - # We need to cap at the endpoints of valid datetime - - # TODO(wesm): unused? - - # delta = relativedelta(dmax, dmin) - # try: - # start = dmin - delta - # except ValueError: - # start = _from_ordinal(1.0) - - # try: - # stop = dmax + delta - # except ValueError: - # # The magic number! - # stop = _from_ordinal(3652059.9999999) - - dmin, dmax = self.datalim_to_dt() - - vmin = dates.date2num(dmin) - vmax = dates.date2num(dmax) - - return self.nonsingular(vmin, vmax) - - -def _from_ordinal(x, tz=None): - ix = int(x) - dt = datetime.fromordinal(ix) - remainder = float(x) - ix - hour, remainder = divmod(24 * remainder, 1) - minute, remainder = divmod(60 * remainder, 1) - second, remainder = divmod(60 * remainder, 1) - microsecond = int(1e6 * remainder) - if microsecond < 10: - microsecond = 0 # compensate for rounding errors - dt = datetime(dt.year, dt.month, dt.day, int(hour), int(minute), - int(second), microsecond) - if tz is not None: - dt = dt.astimezone(tz) - - if microsecond > 999990: # compensate for rounding errors - dt += timedelta(microseconds=1e6 - microsecond) - - return dt - -# Fixed frequency dynamic tick locators and formatters - -# ------------------------------------------------------------------------- -# --- Locators --- -# ------------------------------------------------------------------------- - - -def _get_default_annual_spacing(nyears): - """ - Returns a default spacing between consecutive ticks for annual data. - """ - if nyears < 11: - (min_spacing, maj_spacing) = (1, 1) - elif nyears < 20: - (min_spacing, maj_spacing) = (1, 2) - elif nyears < 50: - (min_spacing, maj_spacing) = (1, 5) - elif nyears < 100: - (min_spacing, maj_spacing) = (5, 10) - elif nyears < 200: - (min_spacing, maj_spacing) = (5, 25) - elif nyears < 600: - (min_spacing, maj_spacing) = (10, 50) - else: - factor = nyears // 1000 + 1 - (min_spacing, maj_spacing) = (factor * 20, factor * 100) - return (min_spacing, maj_spacing) - - -def period_break(dates, period): - """ - Returns the indices where the given period changes. - - Parameters - ---------- - dates : PeriodIndex - Array of intervals to monitor. - period : string - Name of the period to monitor. - """ - current = getattr(dates, period) - previous = getattr(dates - 1, period) - return np.nonzero(current - previous)[0] - - -def has_level_label(label_flags, vmin): - """ - Returns true if the ``label_flags`` indicate there is at least one label - for this level. - - if the minimum view limit is not an exact integer, then the first tick - label won't be shown, so we must adjust for that. - """ - if label_flags.size == 0 or (label_flags.size == 1 and - label_flags[0] == 0 and - vmin % 1 > 0.0): - return False - else: - return True - - -def _daily_finder(vmin, vmax, freq): - periodsperday = -1 - - if freq >= FreqGroup.FR_HR: - if freq == FreqGroup.FR_NS: - periodsperday = 24 * 60 * 60 * 1000000000 - elif freq == FreqGroup.FR_US: - periodsperday = 24 * 60 * 60 * 1000000 - elif freq == FreqGroup.FR_MS: - periodsperday = 24 * 60 * 60 * 1000 - elif freq == FreqGroup.FR_SEC: - periodsperday = 24 * 60 * 60 - elif freq == FreqGroup.FR_MIN: - periodsperday = 24 * 60 - elif freq == FreqGroup.FR_HR: - periodsperday = 24 - else: # pragma: no cover - raise ValueError("unexpected frequency: %s" % freq) - periodsperyear = 365 * periodsperday - periodspermonth = 28 * periodsperday - - elif freq == FreqGroup.FR_BUS: - periodsperyear = 261 - periodspermonth = 19 - elif freq == FreqGroup.FR_DAY: - periodsperyear = 365 - periodspermonth = 28 - elif frequencies.get_freq_group(freq) == FreqGroup.FR_WK: - periodsperyear = 52 - periodspermonth = 3 - else: # pragma: no cover - raise ValueError("unexpected frequency") - - # save this for later usage - vmin_orig = vmin - - (vmin, vmax) = (Period(ordinal=int(vmin), freq=freq), - Period(ordinal=int(vmax), freq=freq)) - span = vmax.ordinal - vmin.ordinal + 1 - dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq) - # Initialize the output - info = np.zeros(span, - dtype=[('val', np.int64), ('maj', bool), - ('min', bool), ('fmt', '|S20')]) - info['val'][:] = dates_._values - info['fmt'][:] = '' - info['maj'][[0, -1]] = True - # .. and set some shortcuts - info_maj = info['maj'] - info_min = info['min'] - info_fmt = info['fmt'] - - def first_label(label_flags): - if (label_flags[0] == 0) and (label_flags.size > 1) and \ - ((vmin_orig % 1) > 0.0): - return label_flags[1] - else: - return label_flags[0] - - # Case 1. Less than a month - if span <= periodspermonth: - day_start = period_break(dates_, 'day') - month_start = period_break(dates_, 'month') - - def _hour_finder(label_interval, force_year_start): - _hour = dates_.hour - _prev_hour = (dates_ - 1).hour - hour_start = (_hour - _prev_hour) != 0 - info_maj[day_start] = True - info_min[hour_start & (_hour % label_interval == 0)] = True - year_start = period_break(dates_, 'year') - info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M' - info_fmt[day_start] = '%H:%M\n%d-%b' - info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' - if force_year_start and not has_level_label(year_start, vmin_orig): - info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y' - - def _minute_finder(label_interval): - hour_start = period_break(dates_, 'hour') - _minute = dates_.minute - _prev_minute = (dates_ - 1).minute - minute_start = (_minute - _prev_minute) != 0 - info_maj[hour_start] = True - info_min[minute_start & (_minute % label_interval == 0)] = True - year_start = period_break(dates_, 'year') - info_fmt = info['fmt'] - info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M' - info_fmt[day_start] = '%H:%M\n%d-%b' - info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' - - def _second_finder(label_interval): - minute_start = period_break(dates_, 'minute') - _second = dates_.second - _prev_second = (dates_ - 1).second - second_start = (_second - _prev_second) != 0 - info['maj'][minute_start] = True - info['min'][second_start & (_second % label_interval == 0)] = True - year_start = period_break(dates_, 'year') - info_fmt = info['fmt'] - info_fmt[second_start & (_second % - label_interval == 0)] = '%H:%M:%S' - info_fmt[day_start] = '%H:%M:%S\n%d-%b' - info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y' - - if span < periodsperday / 12000.0: - _second_finder(1) - elif span < periodsperday / 6000.0: - _second_finder(2) - elif span < periodsperday / 2400.0: - _second_finder(5) - elif span < periodsperday / 1200.0: - _second_finder(10) - elif span < periodsperday / 800.0: - _second_finder(15) - elif span < periodsperday / 400.0: - _second_finder(30) - elif span < periodsperday / 150.0: - _minute_finder(1) - elif span < periodsperday / 70.0: - _minute_finder(2) - elif span < periodsperday / 24.0: - _minute_finder(5) - elif span < periodsperday / 12.0: - _minute_finder(15) - elif span < periodsperday / 6.0: - _minute_finder(30) - elif span < periodsperday / 2.5: - _hour_finder(1, False) - elif span < periodsperday / 1.5: - _hour_finder(2, False) - elif span < periodsperday * 1.25: - _hour_finder(3, False) - elif span < periodsperday * 2.5: - _hour_finder(6, True) - elif span < periodsperday * 4: - _hour_finder(12, True) - else: - info_maj[month_start] = True - info_min[day_start] = True - year_start = period_break(dates_, 'year') - info_fmt = info['fmt'] - info_fmt[day_start] = '%d' - info_fmt[month_start] = '%d\n%b' - info_fmt[year_start] = '%d\n%b\n%Y' - if not has_level_label(year_start, vmin_orig): - if not has_level_label(month_start, vmin_orig): - info_fmt[first_label(day_start)] = '%d\n%b\n%Y' - else: - info_fmt[first_label(month_start)] = '%d\n%b\n%Y' - - # Case 2. Less than three months - elif span <= periodsperyear // 4: - month_start = period_break(dates_, 'month') - info_maj[month_start] = True - if freq < FreqGroup.FR_HR: - info['min'] = True - else: - day_start = period_break(dates_, 'day') - info['min'][day_start] = True - week_start = period_break(dates_, 'week') - year_start = period_break(dates_, 'year') - info_fmt[week_start] = '%d' - info_fmt[month_start] = '\n\n%b' - info_fmt[year_start] = '\n\n%b\n%Y' - if not has_level_label(year_start, vmin_orig): - if not has_level_label(month_start, vmin_orig): - info_fmt[first_label(week_start)] = '\n\n%b\n%Y' - else: - info_fmt[first_label(month_start)] = '\n\n%b\n%Y' - # Case 3. Less than 14 months ............... - elif span <= 1.15 * periodsperyear: - year_start = period_break(dates_, 'year') - month_start = period_break(dates_, 'month') - week_start = period_break(dates_, 'week') - info_maj[month_start] = True - info_min[week_start] = True - info_min[year_start] = False - info_min[month_start] = False - info_fmt[month_start] = '%b' - info_fmt[year_start] = '%b\n%Y' - if not has_level_label(year_start, vmin_orig): - info_fmt[first_label(month_start)] = '%b\n%Y' - # Case 4. Less than 2.5 years ............... - elif span <= 2.5 * periodsperyear: - year_start = period_break(dates_, 'year') - quarter_start = period_break(dates_, 'quarter') - month_start = period_break(dates_, 'month') - info_maj[quarter_start] = True - info_min[month_start] = True - info_fmt[quarter_start] = '%b' - info_fmt[year_start] = '%b\n%Y' - # Case 4. Less than 4 years ................. - elif span <= 4 * periodsperyear: - year_start = period_break(dates_, 'year') - month_start = period_break(dates_, 'month') - info_maj[year_start] = True - info_min[month_start] = True - info_min[year_start] = False - - month_break = dates_[month_start].month - jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] - info_fmt[jan_or_jul] = '%b' - info_fmt[year_start] = '%b\n%Y' - # Case 5. Less than 11 years ................ - elif span <= 11 * periodsperyear: - year_start = period_break(dates_, 'year') - quarter_start = period_break(dates_, 'quarter') - info_maj[year_start] = True - info_min[quarter_start] = True - info_min[year_start] = False - info_fmt[year_start] = '%Y' - # Case 6. More than 12 years ................ - else: - year_start = period_break(dates_, 'year') - year_break = dates_[year_start].year - nyears = span / periodsperyear - (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) - major_idx = year_start[(year_break % maj_anndef == 0)] - info_maj[major_idx] = True - minor_idx = year_start[(year_break % min_anndef == 0)] - info_min[minor_idx] = True - info_fmt[major_idx] = '%Y' - - return info - - -def _monthly_finder(vmin, vmax, freq): - periodsperyear = 12 - - vmin_orig = vmin - (vmin, vmax) = (int(vmin), int(vmax)) - span = vmax - vmin + 1 - - # Initialize the output - info = np.zeros(span, - dtype=[('val', int), ('maj', bool), ('min', bool), - ('fmt', '|S8')]) - info['val'] = np.arange(vmin, vmax + 1) - dates_ = info['val'] - info['fmt'] = '' - year_start = (dates_ % 12 == 0).nonzero()[0] - info_maj = info['maj'] - info_fmt = info['fmt'] - - if span <= 1.15 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - - info_fmt[:] = '%b' - info_fmt[year_start] = '%b\n%Y' - - if not has_level_label(year_start, vmin_orig): - if dates_.size > 1: - idx = 1 - else: - idx = 0 - info_fmt[idx] = '%b\n%Y' - - elif span <= 2.5 * periodsperyear: - quarter_start = (dates_ % 3 == 0).nonzero() - info_maj[year_start] = True - # TODO: Check the following : is it really info['fmt'] ? - info['fmt'][quarter_start] = True - info['min'] = True - - info_fmt[quarter_start] = '%b' - info_fmt[year_start] = '%b\n%Y' - - elif span <= 4 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - - jan_or_jul = (dates_ % 12 == 0) | (dates_ % 12 == 6) - info_fmt[jan_or_jul] = '%b' - info_fmt[year_start] = '%b\n%Y' - - elif span <= 11 * periodsperyear: - quarter_start = (dates_ % 3 == 0).nonzero() - info_maj[year_start] = True - info['min'][quarter_start] = True - - info_fmt[year_start] = '%Y' - - else: - nyears = span / periodsperyear - (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) - years = dates_[year_start] // 12 + 1 - major_idx = year_start[(years % maj_anndef == 0)] - info_maj[major_idx] = True - info['min'][year_start[(years % min_anndef == 0)]] = True - - info_fmt[major_idx] = '%Y' - - return info - - -def _quarterly_finder(vmin, vmax, freq): - periodsperyear = 4 - vmin_orig = vmin - (vmin, vmax) = (int(vmin), int(vmax)) - span = vmax - vmin + 1 - - info = np.zeros(span, - dtype=[('val', int), ('maj', bool), ('min', bool), - ('fmt', '|S8')]) - info['val'] = np.arange(vmin, vmax + 1) - info['fmt'] = '' - dates_ = info['val'] - info_maj = info['maj'] - info_fmt = info['fmt'] - year_start = (dates_ % 4 == 0).nonzero()[0] - - if span <= 3.5 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - - info_fmt[:] = 'Q%q' - info_fmt[year_start] = 'Q%q\n%F' - if not has_level_label(year_start, vmin_orig): - if dates_.size > 1: - idx = 1 - else: - idx = 0 - info_fmt[idx] = 'Q%q\n%F' - - elif span <= 11 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - info_fmt[year_start] = '%F' - - else: - years = dates_[year_start] // 4 + 1 - nyears = span / periodsperyear - (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) - major_idx = year_start[(years % maj_anndef == 0)] - info_maj[major_idx] = True - info['min'][year_start[(years % min_anndef == 0)]] = True - info_fmt[major_idx] = '%F' - - return info - - -def _annual_finder(vmin, vmax, freq): - (vmin, vmax) = (int(vmin), int(vmax + 1)) - span = vmax - vmin + 1 - - info = np.zeros(span, - dtype=[('val', int), ('maj', bool), ('min', bool), - ('fmt', '|S8')]) - info['val'] = np.arange(vmin, vmax + 1) - info['fmt'] = '' - dates_ = info['val'] - - (min_anndef, maj_anndef) = _get_default_annual_spacing(span) - major_idx = dates_ % maj_anndef == 0 - info['maj'][major_idx] = True - info['min'][(dates_ % min_anndef == 0)] = True - info['fmt'][major_idx] = '%Y' - - return info - - -def get_finder(freq): - if isinstance(freq, compat.string_types): - freq = frequencies.get_freq(freq) - fgroup = frequencies.get_freq_group(freq) - - if fgroup == FreqGroup.FR_ANN: - return _annual_finder - elif fgroup == FreqGroup.FR_QTR: - return _quarterly_finder - elif freq == FreqGroup.FR_MTH: - return _monthly_finder - elif ((freq >= FreqGroup.FR_BUS) or fgroup == FreqGroup.FR_WK): - return _daily_finder - else: # pragma: no cover - errmsg = "Unsupported frequency: %s" % (freq) - raise NotImplementedError(errmsg) - - -class TimeSeries_DateLocator(Locator): - """ - Locates the ticks along an axis controlled by a :class:`Series`. - - Parameters - ---------- - freq : {var} - Valid frequency specifier. - minor_locator : {False, True}, optional - Whether the locator is for minor ticks (True) or not. - dynamic_mode : {True, False}, optional - Whether the locator should work in dynamic mode. - base : {int}, optional - quarter : {int}, optional - month : {int}, optional - day : {int}, optional - """ - - def __init__(self, freq, minor_locator=False, dynamic_mode=True, - base=1, quarter=1, month=1, day=1, plot_obj=None): - if isinstance(freq, compat.string_types): - freq = frequencies.get_freq(freq) - self.freq = freq - self.base = base - (self.quarter, self.month, self.day) = (quarter, month, day) - self.isminor = minor_locator - self.isdynamic = dynamic_mode - self.offset = 0 - self.plot_obj = plot_obj - self.finder = get_finder(freq) - - def _get_default_locs(self, vmin, vmax): - "Returns the default locations of ticks." - - if self.plot_obj.date_axis_info is None: - self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) - - locator = self.plot_obj.date_axis_info - - if self.isminor: - return np.compress(locator['min'], locator['val']) - return np.compress(locator['maj'], locator['val']) - - def __call__(self): - 'Return the locations of the ticks.' - # axis calls Locator.set_axis inside set_m_formatter - vi = tuple(self.axis.get_view_interval()) - if vi != self.plot_obj.view_interval: - self.plot_obj.date_axis_info = None - self.plot_obj.view_interval = vi - vmin, vmax = vi - if vmax < vmin: - vmin, vmax = vmax, vmin - if self.isdynamic: - locs = self._get_default_locs(vmin, vmax) - else: # pragma: no cover - base = self.base - (d, m) = divmod(vmin, base) - vmin = (d + 1) * base - locs = lrange(vmin, vmax + 1, base) - return locs - - def autoscale(self): - """ - Sets the view limits to the nearest multiples of base that contain the - data. - """ - # requires matplotlib >= 0.98.0 - (vmin, vmax) = self.axis.get_data_interval() - - locs = self._get_default_locs(vmin, vmax) - (vmin, vmax) = locs[[0, -1]] - if vmin == vmax: - vmin -= 1 - vmax += 1 - return nonsingular(vmin, vmax) - -# ------------------------------------------------------------------------- -# --- Formatter --- -# ------------------------------------------------------------------------- - - -class TimeSeries_DateFormatter(Formatter): - """ - Formats the ticks along an axis controlled by a :class:`PeriodIndex`. - - Parameters - ---------- - freq : {int, string} - Valid frequency specifier. - minor_locator : {False, True} - Whether the current formatter should apply to minor ticks (True) or - major ticks (False). - dynamic_mode : {True, False} - Whether the formatter works in dynamic mode or not. - """ - - def __init__(self, freq, minor_locator=False, dynamic_mode=True, - plot_obj=None): - if isinstance(freq, compat.string_types): - freq = frequencies.get_freq(freq) - self.format = None - self.freq = freq - self.locs = [] - self.formatdict = None - self.isminor = minor_locator - self.isdynamic = dynamic_mode - self.offset = 0 - self.plot_obj = plot_obj - self.finder = get_finder(freq) - - def _set_default_format(self, vmin, vmax): - "Returns the default ticks spacing." - - if self.plot_obj.date_axis_info is None: - self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) - info = self.plot_obj.date_axis_info - - if self.isminor: - format = np.compress(info['min'] & np.logical_not(info['maj']), - info) - else: - format = np.compress(info['maj'], info) - self.formatdict = dict([(x, f) for (x, _, _, f) in format]) - return self.formatdict - - def set_locs(self, locs): - 'Sets the locations of the ticks' - # don't actually use the locs. This is just needed to work with - # matplotlib. Force to use vmin, vmax - self.locs = locs - - (vmin, vmax) = vi = tuple(self.axis.get_view_interval()) - if vi != self.plot_obj.view_interval: - self.plot_obj.date_axis_info = None - self.plot_obj.view_interval = vi - if vmax < vmin: - (vmin, vmax) = (vmax, vmin) - self._set_default_format(vmin, vmax) - - def __call__(self, x, pos=0): - if self.formatdict is None: - return '' - else: - fmt = self.formatdict.pop(x, '') - return Period(ordinal=int(x), freq=self.freq).strftime(fmt) - - -class TimeSeries_TimedeltaFormatter(Formatter): - """ - Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`. - """ - - @staticmethod - def format_timedelta_ticks(x, pos, n_decimals): - """ - Convert seconds to 'D days HH:MM:SS.F' - """ - s, ns = divmod(x, 1e9) - m, s = divmod(s, 60) - h, m = divmod(m, 60) - d, h = divmod(h, 24) - decimals = int(ns * 10**(n_decimals - 9)) - s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) - if n_decimals > 0: - s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) - if d != 0: - s = '{:d} days '.format(int(d)) + s - return s - - def __call__(self, x, pos=0): - (vmin, vmax) = tuple(self.axis.get_view_interval()) - n_decimals = int(np.ceil(np.log10(100 * 1e9 / (vmax - vmin)))) - if n_decimals > 9: - n_decimals = 9 - return self.format_timedelta_ticks(x, pos, n_decimals) +# flake8: noqa + +from pandas.plotting.converter import (register, time2num, + TimeConverter, TimeFormatter, + PeriodConverter, get_datevalue, + DatetimeConverter, + PandasAutoDateFormatter, + PandasAutoDateLocator, + MilliSecondLocator, get_finder, + TimeSeries_DateLocator, + TimeSeries_DateFormatter) diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py index 4eddf54701889..6ecada90665cd 100644 --- a/pandas/tseries/plotting.py +++ b/pandas/tseries/plotting.py @@ -1,344 +1,3 @@ -""" -Period formatters and locators adapted from scikits.timeseries by -Pierre GF Gerard-Marchant & Matt Knox -""" +# flake8: noqa -# TODO: Use the fact that axis can have units to simplify the process - -import numpy as np - -from matplotlib import pylab -from pandas.tseries.period import Period -from pandas.tseries.offsets import DateOffset -import pandas.tseries.frequencies as frequencies -from pandas.tseries.index import DatetimeIndex -from pandas.tseries.period import PeriodIndex -from pandas.tseries.tdi import TimedeltaIndex -from pandas.formats.printing import pprint_thing -import pandas.compat as compat - -from pandas.tseries.converter import (TimeSeries_DateLocator, - TimeSeries_DateFormatter, - TimeSeries_TimedeltaFormatter) - -# --------------------------------------------------------------------- -# Plotting functions and monkey patches - - -def tsplot(series, plotf, ax=None, **kwargs): - """ - Plots a Series on the given Matplotlib axes or the current axes - - Parameters - ---------- - axes : Axes - series : Series - - Notes - _____ - Supports same kwargs as Axes.plot - - """ - # Used inferred freq is possible, need a test case for inferred - if ax is None: - import matplotlib.pyplot as plt - ax = plt.gca() - - freq, series = _maybe_resample(series, ax, kwargs) - - # Set ax with freq info - _decorate_axes(ax, freq, kwargs) - ax._plot_data.append((series, plotf, kwargs)) - lines = plotf(ax, series.index._mpl_repr(), series.values, **kwargs) - - # set date formatter, locators and rescale limits - format_dateaxis(ax, ax.freq, series.index) - return lines - - -def _maybe_resample(series, ax, kwargs): - # resample against axes freq if necessary - freq, ax_freq = _get_freq(ax, series) - - if freq is None: # pragma: no cover - raise ValueError('Cannot use dynamic axis without frequency info') - - # Convert DatetimeIndex to PeriodIndex - if isinstance(series.index, DatetimeIndex): - series = series.to_period(freq=freq) - - if ax_freq is not None and freq != ax_freq: - if frequencies.is_superperiod(freq, ax_freq): # upsample input - series = series.copy() - series.index = series.index.asfreq(ax_freq, how='s') - freq = ax_freq - elif _is_sup(freq, ax_freq): # one is weekly - how = kwargs.pop('how', 'last') - series = getattr(series.resample('D'), how)().dropna() - series = getattr(series.resample(ax_freq), how)().dropna() - freq = ax_freq - elif frequencies.is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): - _upsample_others(ax, freq, kwargs) - ax_freq = freq - else: # pragma: no cover - raise ValueError('Incompatible frequency conversion') - return freq, series - - -def _is_sub(f1, f2): - return ((f1.startswith('W') and frequencies.is_subperiod('D', f2)) or - (f2.startswith('W') and frequencies.is_subperiod(f1, 'D'))) - - -def _is_sup(f1, f2): - return ((f1.startswith('W') and frequencies.is_superperiod('D', f2)) or - (f2.startswith('W') and frequencies.is_superperiod(f1, 'D'))) - - -def _upsample_others(ax, freq, kwargs): - legend = ax.get_legend() - lines, labels = _replot_ax(ax, freq, kwargs) - _replot_ax(ax, freq, kwargs) - - other_ax = None - if hasattr(ax, 'left_ax'): - other_ax = ax.left_ax - if hasattr(ax, 'right_ax'): - other_ax = ax.right_ax - - if other_ax is not None: - rlines, rlabels = _replot_ax(other_ax, freq, kwargs) - lines.extend(rlines) - labels.extend(rlabels) - - if (legend is not None and kwargs.get('legend', True) and - len(lines) > 0): - title = legend.get_title().get_text() - if title == 'None': - title = None - ax.legend(lines, labels, loc='best', title=title) - - -def _replot_ax(ax, freq, kwargs): - data = getattr(ax, '_plot_data', None) - - # clear current axes and data - ax._plot_data = [] - ax.clear() - - _decorate_axes(ax, freq, kwargs) - - lines = [] - labels = [] - if data is not None: - for series, plotf, kwds in data: - series = series.copy() - idx = series.index.asfreq(freq, how='S') - series.index = idx - ax._plot_data.append((series, plotf, kwds)) - - # for tsplot - if isinstance(plotf, compat.string_types): - from pandas.tools.plotting import _plot_klass - plotf = _plot_klass[plotf]._plot - - lines.append(plotf(ax, series.index._mpl_repr(), - series.values, **kwds)[0]) - labels.append(pprint_thing(series.name)) - - return lines, labels - - -def _decorate_axes(ax, freq, kwargs): - """Initialize axes for time-series plotting""" - if not hasattr(ax, '_plot_data'): - ax._plot_data = [] - - ax.freq = freq - xaxis = ax.get_xaxis() - xaxis.freq = freq - if not hasattr(ax, 'legendlabels'): - ax.legendlabels = [kwargs.get('label', None)] - else: - ax.legendlabels.append(kwargs.get('label', None)) - ax.view_interval = None - ax.date_axis_info = None - - -def _get_ax_freq(ax): - """ - Get the freq attribute of the ax object if set. - Also checks shared axes (eg when using secondary yaxis, sharex=True - or twinx) - """ - ax_freq = getattr(ax, 'freq', None) - if ax_freq is None: - # check for left/right ax in case of secondary yaxis - if hasattr(ax, 'left_ax'): - ax_freq = getattr(ax.left_ax, 'freq', None) - elif hasattr(ax, 'right_ax'): - ax_freq = getattr(ax.right_ax, 'freq', None) - if ax_freq is None: - # check if a shared ax (sharex/twinx) has already freq set - shared_axes = ax.get_shared_x_axes().get_siblings(ax) - if len(shared_axes) > 1: - for shared_ax in shared_axes: - ax_freq = getattr(shared_ax, 'freq', None) - if ax_freq is not None: - break - return ax_freq - - -def _get_freq(ax, series): - # get frequency from data - freq = getattr(series.index, 'freq', None) - if freq is None: - freq = getattr(series.index, 'inferred_freq', None) - - ax_freq = _get_ax_freq(ax) - - # use axes freq if no data freq - if freq is None: - freq = ax_freq - - # get the period frequency - if isinstance(freq, DateOffset): - freq = freq.rule_code - else: - freq = frequencies.get_base_alias(freq) - - freq = frequencies.get_period_alias(freq) - return freq, ax_freq - - -def _use_dynamic_x(ax, data): - freq = _get_index_freq(data) - ax_freq = _get_ax_freq(ax) - - if freq is None: # convert irregular if axes has freq info - freq = ax_freq - else: # do not use tsplot if irregular was plotted first - if (ax_freq is None) and (len(ax.get_lines()) > 0): - return False - - if freq is None: - return False - - if isinstance(freq, DateOffset): - freq = freq.rule_code - else: - freq = frequencies.get_base_alias(freq) - freq = frequencies.get_period_alias(freq) - - if freq is None: - return False - - # hack this for 0.10.1, creating more technical debt...sigh - if isinstance(data.index, DatetimeIndex): - base = frequencies.get_freq(freq) - x = data.index - if (base <= frequencies.FreqGroup.FR_DAY): - return x[:1].is_normalized - return Period(x[0], freq).to_timestamp(tz=x.tz) == x[0] - return True - - -def _get_index_freq(data): - freq = getattr(data.index, 'freq', None) - if freq is None: - freq = getattr(data.index, 'inferred_freq', None) - if freq == 'B': - weekdays = np.unique(data.index.dayofweek) - if (5 in weekdays) or (6 in weekdays): - freq = None - return freq - - -def _maybe_convert_index(ax, data): - # tsplot converts automatically, but don't want to convert index - # over and over for DataFrames - if isinstance(data.index, DatetimeIndex): - freq = getattr(data.index, 'freq', None) - - if freq is None: - freq = getattr(data.index, 'inferred_freq', None) - if isinstance(freq, DateOffset): - freq = freq.rule_code - - if freq is None: - freq = _get_ax_freq(ax) - - if freq is None: - raise ValueError('Could not get frequency alias for plotting') - - freq = frequencies.get_base_alias(freq) - freq = frequencies.get_period_alias(freq) - - data = data.to_period(freq=freq) - return data - - -# Patch methods for subplot. Only format_dateaxis is currently used. -# Do we need the rest for convenience? - -def format_timedelta_ticks(x, pos, n_decimals): - """ - Convert seconds to 'D days HH:MM:SS.F' - """ - s, ns = divmod(x, 1e9) - m, s = divmod(s, 60) - h, m = divmod(m, 60) - d, h = divmod(h, 24) - decimals = int(ns * 10**(n_decimals - 9)) - s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) - if n_decimals > 0: - s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) - if d != 0: - s = '{:d} days '.format(int(d)) + s - return s - - -def format_dateaxis(subplot, freq, index): - """ - Pretty-formats the date axis (x-axis). - - Major and minor ticks are automatically set for the frequency of the - current underlying series. As the dynamic mode is activated by - default, changing the limits of the x axis will intelligently change - the positions of the ticks. - """ - - # handle index specific formatting - # Note: DatetimeIndex does not use this - # interface. DatetimeIndex uses matplotlib.date directly - if isinstance(index, PeriodIndex): - - majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, - minor_locator=False, - plot_obj=subplot) - minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, - minor_locator=True, - plot_obj=subplot) - subplot.xaxis.set_major_locator(majlocator) - subplot.xaxis.set_minor_locator(minlocator) - - majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, - minor_locator=False, - plot_obj=subplot) - minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, - minor_locator=True, - plot_obj=subplot) - subplot.xaxis.set_major_formatter(majformatter) - subplot.xaxis.set_minor_formatter(minformatter) - - # x and y coord info - subplot.format_coord = lambda t, y: ( - "t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) - - elif isinstance(index, TimedeltaIndex): - subplot.xaxis.set_major_formatter( - TimeSeries_TimedeltaFormatter()) - else: - raise TypeError('index type not supported') - - pylab.draw_if_interactive() +from pandas.plotting.timeseries import tsplot diff --git a/setup.py b/setup.py index 6707af7eb0908..eef3df7bbe6da 100755 --- a/setup.py +++ b/setup.py @@ -649,6 +649,7 @@ def pxd(name): 'pandas.io.msgpack', 'pandas._libs', 'pandas.formats', + 'pandas.plotting' 'pandas.sparse', 'pandas.stats', 'pandas.util', From 1fba0c67ee3cbc437c59811e6c2552405bcce55f Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 14 Jan 2017 17:50:05 +0900 Subject: [PATCH 02/12] Add deprecation wrapper --- pandas/__init__.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 529750cd97076..7b6a63842598e 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -49,7 +49,10 @@ from pandas.tools.merge import (merge, ordered_merge, merge_ordered, merge_asof) from pandas.tools.pivot import pivot_table, crosstab -from pandas.tools.plotting import scatter_matrix, plot_params + +# deprecated +import pandas.tools.plotting +from pandas.plotting import scatter_matrix, plot_params from pandas.tools.tile import cut, qcut from pandas.tools.util import to_numeric from pandas.core.reshape import melt From 859ad243cd9e639dc34317a6aec95c542beac621 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 21 Jan 2017 10:23:57 +0900 Subject: [PATCH 03/12] Move tests upder plotting and other corrections --- pandas/__init__.py | 8 ++- pandas/core/frame.py | 2 +- pandas/core/groupby.py | 2 +- pandas/core/series.py | 2 +- pandas/plotting/api.py | 10 +-- pandas/plotting/{plotting.py => core.py} | 0 .../plotting => plotting/tests}/__init__.py | 0 .../plotting => plotting/tests}/common.py | 3 +- .../tests}/test_boxplot_method.py | 4 +- .../tests}/test_converter.py | 0 .../tests}/test_datetimelike.py | 2 +- pandas/plotting/tests/test_deprecated.py | 68 +++++++++++++++++++ .../plotting => plotting/tests}/test_frame.py | 12 ++-- .../tests}/test_groupby.py | 2 +- .../tests}/test_hist_method.py | 4 +- .../plotting => plotting/tests}/test_misc.py | 16 ++--- .../tests}/test_series.py | 16 ++--- pandas/plotting/timeseries.py | 2 +- pandas/tools/plotting.py | 21 ++++++ pandas/util/doctools.py | 2 +- setup.py | 1 + 21 files changed, 136 insertions(+), 41 deletions(-) rename pandas/plotting/{plotting.py => core.py} (100%) rename pandas/{tests/plotting => plotting/tests}/__init__.py (100%) rename pandas/{tests/plotting => plotting/tests}/common.py (99%) rename pandas/{tests/plotting => plotting/tests}/test_boxplot_method.py (99%) rename pandas/{tests/plotting => plotting/tests}/test_converter.py (100%) rename pandas/{tests/plotting => plotting/tests}/test_datetimelike.py (99%) create mode 100644 pandas/plotting/tests/test_deprecated.py rename pandas/{tests/plotting => plotting/tests}/test_frame.py (99%) rename pandas/{tests/plotting => plotting/tests}/test_groupby.py (97%) rename pandas/{tests/plotting => plotting/tests}/test_hist_method.py (99%) rename pandas/{tests/plotting => plotting/tests}/test_misc.py (96%) rename pandas/{tests/plotting => plotting/tests}/test_series.py (98%) create mode 100644 pandas/tools/plotting.py diff --git a/pandas/__init__.py b/pandas/__init__.py index 7b6a63842598e..15db65ce76b75 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -50,9 +50,13 @@ merge_ordered, merge_asof) from pandas.tools.pivot import pivot_table, crosstab -# deprecated +# deprecate tools.plotting, and directly imported scatter_matrix import pandas.tools.plotting -from pandas.plotting import scatter_matrix, plot_params +from pandas.plotting import plot_params +from pandas.util.decorators import deprecate +scatter_matrix = deprecate('pandas.scatter_matrix', pandas.plotting.scatter_matrix, + 'pandas.plotting.scatter_matrix') + from pandas.tools.tile import cut, qcut from pandas.tools.util import to_numeric from pandas.core.reshape import melt diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3cd9bd2c8aae9..46f33f270c3c2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -90,7 +90,7 @@ import pandas.core.ops as ops import pandas.formats.format as fmt from pandas.formats.printing import pprint_thing -import pandas.plotting.plotting as gfx +import pandas.plotting.core as gfx from pandas._libs import lib, algos as libalgos diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ad24d76cbe2d3..68e37d4998f51 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -4159,7 +4159,7 @@ def groupby_series(obj, col=None): return results -from pandas.plotting.plotting import boxplot_frame_groupby # noqa +from pandas.plotting.core import boxplot_frame_groupby # noqa DataFrameGroupBy.boxplot = boxplot_frame_groupby diff --git a/pandas/core/series.py b/pandas/core/series.py index 411861f20d97c..f7571d882047f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3001,7 +3001,7 @@ def create_from_value(value, index, dtype): # ---------------------------------------------------------------------- # Add plotting methods to Series -import pandas.plotting.plotting as _gfx # noqa +import pandas.plotting.core as _gfx # noqa Series.plot = base.AccessorProperty(_gfx.SeriesPlotMethods, _gfx.SeriesPlotMethods) diff --git a/pandas/plotting/api.py b/pandas/plotting/api.py index a64792e406357..f1df2e4426151 100644 --- a/pandas/plotting/api.py +++ b/pandas/plotting/api.py @@ -5,8 +5,8 @@ # flake8: noqa try: # mpl optional - from pandas.plotting import converter as conv - conv.register() # needs to override so set_xlim works with str/number + from pandas.plotting import converter + converter.register() # needs to override so set_xlim works with str/number except ImportError: pass @@ -14,7 +14,7 @@ andrews_curves, bootstrap_plot, parallel_coordinates, lag_plot, autocorrelation_plot) -from pandas.plotting.plotting import (boxplot, scatter_plot, grouped_hist, - hist_frame, hist_series) +from pandas.plotting.core import (boxplot, scatter_plot, grouped_hist, + hist_frame, hist_series) from pandas.plotting.style import plot_params -from pandas.plotting.tools import table \ No newline at end of file +from pandas.plotting.tools import table diff --git a/pandas/plotting/plotting.py b/pandas/plotting/core.py similarity index 100% rename from pandas/plotting/plotting.py rename to pandas/plotting/core.py diff --git a/pandas/tests/plotting/__init__.py b/pandas/plotting/tests/__init__.py similarity index 100% rename from pandas/tests/plotting/__init__.py rename to pandas/plotting/tests/__init__.py diff --git a/pandas/tests/plotting/common.py b/pandas/plotting/tests/common.py similarity index 99% rename from pandas/tests/plotting/common.py rename to pandas/plotting/tests/common.py index aa4e57c21fcfc..91372ba25fb59 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/plotting/tests/common.py @@ -73,7 +73,8 @@ def setUp(self): self.default_tick_position = 'left' if self.mpl_ge_2_0_0 else 'default' # common test data from pandas import read_csv - path = os.path.join(os.path.dirname(curpath()), 'data', 'iris.csv') + base = os.path.join(os.path.dirname(curpath()), os.pardir) + path = os.path.join(base, 'tests', 'data', 'iris.csv') self.iris = read_csv(path) n = 100 diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/plotting/tests/test_boxplot_method.py similarity index 99% rename from pandas/tests/plotting/test_boxplot_method.py rename to pandas/plotting/tests/test_boxplot_method.py index 31c150bc1e64f..e8dfde25d9769 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/plotting/tests/test_boxplot_method.py @@ -14,9 +14,9 @@ from numpy import random from numpy.random import randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting -from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) +from pandas.plotting.tests.common import (TestPlotBase, _check_plot_works) """ Test cases for .boxplot method """ diff --git a/pandas/tests/plotting/test_converter.py b/pandas/plotting/tests/test_converter.py similarity index 100% rename from pandas/tests/plotting/test_converter.py rename to pandas/plotting/tests/test_converter.py diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/plotting/tests/test_datetimelike.py similarity index 99% rename from pandas/tests/plotting/test_datetimelike.py rename to pandas/plotting/tests/test_datetimelike.py index 3caa1935f0943..eedbf5c5bbda4 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/plotting/tests/test_datetimelike.py @@ -17,7 +17,7 @@ from pandas.util.testing import assert_series_equal, ensure_clean, slow import pandas.util.testing as tm -from pandas.tests.plotting.common import (TestPlotBase, +from pandas.plotting.tests.common import (TestPlotBase, _skip_if_no_scipy_gaussian_kde) diff --git a/pandas/plotting/tests/test_deprecated.py b/pandas/plotting/tests/test_deprecated.py new file mode 100644 index 0000000000000..59434ec813f2b --- /dev/null +++ b/pandas/plotting/tests/test_deprecated.py @@ -0,0 +1,68 @@ +# coding: utf-8 + +import nose +import string + +import pandas as pd +import pandas.util.testing as tm +from pandas.util.testing import slow + +import numpy as np +from numpy.random import randn + +import pandas.tools.plotting as plotting + +from pandas.plotting.tests.common import TestPlotBase + + +""" +Test cases for plot functions imported from deprecated +pandas.tools.plotting +""" + + +@tm.mplskip +class TestDeprecatedNameSpace(TestPlotBase): + + @slow + def test_scatter_plot_legacy(self): + tm._skip_if_no_scipy() + + df = pd.DataFrame(randn(100, 2)) + + with tm.assert_produces_warning(FutureWarning): + plotting.scatter_matrix(df) + + with tm.assert_produces_warning(FutureWarning): + pd.scatter_matrix(df) + + @slow + def test_boxplot_deprecated(self): + df = pd.DataFrame(randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=['one', 'two', 'three', 'four']) + df['indic'] = ['foo', 'bar'] * 3 + + with tm.assert_produces_warning(FutureWarning): + plotting.boxplot(df, column=['one', 'two'], + by='indic') + + @slow + def test_grouped_hist_legacy(self): + df = pd.DataFrame(randn(500, 2), columns=['A', 'B']) + df['C'] = np.random.randint(0, 4, 500) + df['D'] = ['X'] * 500 + + with tm.assert_produces_warning(FutureWarning): + plotting.grouped_hist(df.A, by=df.C) + + @slow + def test_radviz_deprecated(self): + df = self.iris + with tm.assert_produces_warning(FutureWarning): + plotting.radviz(frame=df, class_column='Name') + + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + exit=False) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/plotting/tests/test_frame.py similarity index 99% rename from pandas/tests/plotting/test_frame.py rename to pandas/plotting/tests/test_frame.py index ada6aa7499f8c..1b49a4b4a1a78 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/plotting/tests/test_frame.py @@ -23,7 +23,7 @@ from numpy.random import rand, randn import pandas.plotting as plotting -from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, +from pandas.plotting.tests.common import (TestPlotBase, _check_plot_works, _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) @@ -1979,7 +1979,7 @@ def test_unordered_ts(self): def test_kind_both_ways(self): df = DataFrame({'x': [1, 2, 3]}) - for kind in plotting.plotting._common_kinds: + for kind in plotting.core._common_kinds: if not _ok_for_gaussian_kde(kind): continue df.plot(kind=kind) @@ -1990,7 +1990,7 @@ def test_kind_both_ways(self): def test_all_invalid_plot_data(self): df = DataFrame(list('abcd')) - for kind in plotting.plotting._common_kinds: + for kind in plotting.core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -2001,7 +2001,7 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = 'a' - for kind in plotting.plotting._common_kinds: + for kind in plotting.core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -2454,7 +2454,7 @@ def test_memory_leak(self): import gc results = {} - for kind in plotting.plotting._plot_klass.keys(): + for kind in plotting.core._plot_klass.keys(): if not _ok_for_gaussian_kde(kind): continue args = {} @@ -2653,7 +2653,7 @@ def test_df_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings( DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4]}), - plotting.plotting._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) + plotting.core._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) def test_option_mpl_style(self): with tm.assert_produces_warning(FutureWarning, diff --git a/pandas/tests/plotting/test_groupby.py b/pandas/plotting/tests/test_groupby.py similarity index 97% rename from pandas/tests/plotting/test_groupby.py rename to pandas/plotting/tests/test_groupby.py index 93efb3f994c38..d670bb7746b80 100644 --- a/pandas/tests/plotting/test_groupby.py +++ b/pandas/plotting/tests/test_groupby.py @@ -8,7 +8,7 @@ import numpy as np -from pandas.tests.plotting.common import TestPlotBase +from pandas.plotting.tests.common import TestPlotBase @tm.mplskip diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/plotting/tests/test_hist_method.py similarity index 99% rename from pandas/tests/plotting/test_hist_method.py rename to pandas/plotting/tests/test_hist_method.py index 380bdc12abce4..bd50a6d67a287 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/plotting/tests/test_hist_method.py @@ -9,8 +9,8 @@ import numpy as np from numpy.random import randn -import pandas.tools.plotting as plotting -from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) +import pandas.plotting as plotting +from pandas.plotting.tests.common import (TestPlotBase, _check_plot_works) @tm.mplskip diff --git a/pandas/tests/plotting/test_misc.py b/pandas/plotting/tests/test_misc.py similarity index 96% rename from pandas/tests/plotting/test_misc.py rename to pandas/plotting/tests/test_misc.py index 504c55bcfcfd0..d7e1e825d8e79 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/plotting/tests/test_misc.py @@ -11,8 +11,8 @@ from numpy import random from numpy.random import randn -import pandas.tools.plotting as plotting -from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, +import pandas.plotting as plotting +from pandas.plotting.tests.common import (TestPlotBase, _check_plot_works, _ok_for_gaussian_kde) @@ -29,7 +29,7 @@ def setUp(self): @slow def test_autocorrelation_plot(self): - from pandas.tools.plotting import autocorrelation_plot + from pandas.plotting import autocorrelation_plot _check_plot_works(autocorrelation_plot, series=self.ts) _check_plot_works(autocorrelation_plot, series=self.ts.values) @@ -38,13 +38,13 @@ def test_autocorrelation_plot(self): @slow def test_lag_plot(self): - from pandas.tools.plotting import lag_plot + from pandas.plotting import lag_plot _check_plot_works(lag_plot, series=self.ts) _check_plot_works(lag_plot, series=self.ts, lag=5) @slow def test_bootstrap_plot(self): - from pandas.tools.plotting import bootstrap_plot + from pandas.plotting import bootstrap_plot _check_plot_works(bootstrap_plot, series=self.ts, size=10) @@ -130,7 +130,7 @@ def test_scatter_matrix_axis(self): @slow def test_andrews_curves(self): - from pandas.tools.plotting import andrews_curves + from pandas.plotting import andrews_curves from matplotlib import cm df = self.iris @@ -195,7 +195,7 @@ def test_andrews_curves(self): @slow def test_parallel_coordinates(self): - from pandas.tools.plotting import parallel_coordinates + from pandas.plotting import parallel_coordinates from matplotlib import cm df = self.iris @@ -263,7 +263,7 @@ def test_parallel_coordinates_with_sorted_labels(self): @slow def test_radviz(self): - from pandas.tools.plotting import radviz + from pandas.plotting import radviz from matplotlib import cm df = self.iris diff --git a/pandas/tests/plotting/test_series.py b/pandas/plotting/tests/test_series.py similarity index 98% rename from pandas/tests/plotting/test_series.py rename to pandas/plotting/tests/test_series.py index b97c34a85dc0c..be8cb4e2ec2cb 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/plotting/tests/test_series.py @@ -17,7 +17,7 @@ from numpy.random import randn import pandas.plotting as plotting -from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, +from pandas.plotting.tests.common import (TestPlotBase, _check_plot_works, _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) @@ -622,8 +622,8 @@ def test_boxplot_series(self): @slow def test_kind_both_ways(self): s = Series(range(3)) - kinds = (plotting.plotting._common_kinds + - plotting.plotting._series_kinds) + kinds = (plotting.core._common_kinds + + plotting.core._series_kinds) for kind in kinds: if not _ok_for_gaussian_kde(kind): continue @@ -633,7 +633,7 @@ def test_kind_both_ways(self): @slow def test_invalid_plot_data(self): s = Series(list('abcd')) - for kind in plotting.plotting._common_kinds: + for kind in plotting.core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -642,14 +642,14 @@ def test_invalid_plot_data(self): @slow def test_valid_object_plot(self): s = Series(lrange(10), dtype=object) - for kind in plotting.plotting._common_kinds: + for kind in plotting.core._common_kinds: if not _ok_for_gaussian_kde(kind): continue _check_plot_works(s.plot, kind=kind) def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) - for kind in plotting.plotting._common_kinds: + for kind in plotting.core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -720,8 +720,8 @@ def test_table(self): def test_series_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings(Series([1, 2, 3]), - plotting.plotting._series_kinds + - plotting.plotting._common_kinds) + plotting.core._series_kinds + + plotting.core._common_kinds) @slow def test_standard_colors(self): diff --git a/pandas/plotting/timeseries.py b/pandas/plotting/timeseries.py index 7d3e27d6154ae..259143beb6e44 100644 --- a/pandas/plotting/timeseries.py +++ b/pandas/plotting/timeseries.py @@ -134,7 +134,7 @@ def _replot_ax(ax, freq, kwargs): # for tsplot if isinstance(plotf, compat.string_types): - from pandas.tools.plotting import _plot_klass + from pandas.plotting.core import _plot_klass plotf = _plot_klass[plotf]._plot lines.append(plotf(ax, series.index._mpl_repr(), diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py new file mode 100644 index 0000000000000..c7d17760bbdbb --- /dev/null +++ b/pandas/tools/plotting.py @@ -0,0 +1,21 @@ +import sys +import warnings + +import pandas.plotting.api as api + +# back-compat of public API +# deprecate these functions +m = sys.modules['pandas.tools.plotting'] +for t in [t for t in dir(api) if not t.startswith('_')]: + + def outer(t=t): + + def wrapper(*args, **kwargs): + warnings.warn("pandas.tools.plotting.{t} is deprecated. " + "import from the " + "pandas.plotting.{t} instead".format(t=t), + FutureWarning, stacklevel=2) + return getattr(api, t)(*args, **kwargs) + return wrapper + + setattr(m, t, outer(t)) diff --git a/pandas/util/doctools.py b/pandas/util/doctools.py index 6df6444aeafab..cbc9518b96416 100644 --- a/pandas/util/doctools.py +++ b/pandas/util/doctools.py @@ -131,7 +131,7 @@ def _make_table(self, ax, df, title, height=None): ax.set_visible(False) return - import pandas.tools.plotting as plotting + import pandas.plotting as plotting idx_nlevels = df.index.nlevels col_nlevels = df.columns.nlevels diff --git a/setup.py b/setup.py index eef3df7bbe6da..bd1e4dcd08224 100755 --- a/setup.py +++ b/setup.py @@ -675,6 +675,7 @@ def pxd(name): 'pandas.tests.tools', 'pandas.tests.types', 'pandas.tests.plotting', + 'pandas.tests.test_msgpack', 'pandas.tools', 'pandas.tseries', 'pandas.types', From ea757d44304b2d0cc26d5fe31a112af7b13cb616 Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 21 Jan 2017 12:28:04 +0900 Subject: [PATCH 04/12] Do not import deprecate decorator on the top namespace --- pandas/__init__.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/__init__.py b/pandas/__init__.py index 15db65ce76b75..7bf3adc39bf2d 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -50,12 +50,13 @@ merge_ordered, merge_asof) from pandas.tools.pivot import pivot_table, crosstab -# deprecate tools.plotting, and directly imported scatter_matrix +# deprecate tools.plotting, and scatter_matrix on the top namespace import pandas.tools.plotting from pandas.plotting import plot_params -from pandas.util.decorators import deprecate -scatter_matrix = deprecate('pandas.scatter_matrix', pandas.plotting.scatter_matrix, - 'pandas.plotting.scatter_matrix') +# do not import deprecate to top namespace +scatter_matrix = pandas.util.decorators.deprecate( + 'pandas.scatter_matrix', pandas.plotting.scatter_matrix, + 'pandas.plotting.scatter_matrix') from pandas.tools.tile import cut, qcut from pandas.tools.util import to_numeric From b5328a2e316cc6b1d031771e9b9ed5d005433c8d Mon Sep 17 00:00:00 2001 From: sinhrks Date: Sat, 21 Jan 2017 14:28:32 +0900 Subject: [PATCH 05/12] addd whatsnew --- doc/source/whatsnew/v0.20.0.txt | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 133757b131312..944612b014c56 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -21,6 +21,7 @@ Highlights include: - Support for S3 handling now uses ``s3fs``, see :ref:`here ` - Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` - Switched the test framework to use `pytest `__ (:issue:`13097`) +- The ``pandas.tools.plotting`` module has been deprecated, moved to ``pandas.plotting``. See :ref:`here ` (:issue:`12548`) Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -557,6 +558,32 @@ Using ``.iloc``. Here we will get the location of the 'A' column, then use *posi df.iloc[[0, 2], df.columns.get_loc('A')] +.. _whatsnew_0200.api_breaking.deprecate_plotting + +Deprecate .plotting +^^^^^^^^^^^^^^^^^^^ + +``pandas.tools.plotting`` module has been deprecated, moving directory under the +top namespace ``pandas.plotting``. All the public plotting functions should be available +from ``pandas.plotting``. + +Also, ``scatter_matrix`` function imported directly under ``pandas`` namespace is also deprecated. +Users shoud use ``pandas.plotting.scatter_matrix`` instead. + +Previous script: + +.. code-block:: python + + pd.tools.plotting.scatter_matrix(df) + pd.scatter_matrix(df) + +Should be changed to: + +.. code-block:: python + + pd.plotting.scatter_matrix(df) + + .. _whatsnew_0200.api_breaking.deprecate_panel: Deprecate Panel From a1f272a0f1e8a9f4e5c63a2cdbd7aa8192a1a5da Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 15 Apr 2017 00:11:57 +0200 Subject: [PATCH 06/12] Move test to tests/plotting --- pandas/plotting/compat.py | 8 ++++++++ pandas/plotting/core.py | 9 --------- pandas/{plotting/tests => tests/plotting}/__init__.py | 0 pandas/{plotting/tests => tests/plotting}/common.py | 0 .../tests => tests/plotting}/test_boxplot_method.py | 2 +- .../{plotting/tests => tests/plotting}/test_converter.py | 0 .../tests => tests/plotting}/test_datetimelike.py | 2 +- .../tests => tests/plotting}/test_deprecated.py | 8 +++++++- pandas/{plotting/tests => tests/plotting}/test_frame.py | 8 ++++---- .../{plotting/tests => tests/plotting}/test_groupby.py | 2 +- .../tests => tests/plotting}/test_hist_method.py | 2 +- pandas/{plotting/tests => tests/plotting}/test_misc.py | 2 +- pandas/{plotting/tests => tests/plotting}/test_series.py | 2 +- setup.py | 1 - 14 files changed, 25 insertions(+), 21 deletions(-) rename pandas/{plotting/tests => tests/plotting}/__init__.py (100%) rename pandas/{plotting/tests => tests/plotting}/common.py (100%) rename pandas/{plotting/tests => tests/plotting}/test_boxplot_method.py (99%) rename pandas/{plotting/tests => tests/plotting}/test_converter.py (100%) rename pandas/{plotting/tests => tests/plotting}/test_datetimelike.py (99%) rename pandas/{plotting/tests => tests/plotting}/test_deprecated.py (89%) rename pandas/{plotting/tests => tests/plotting}/test_frame.py (99%) rename pandas/{plotting/tests => tests/plotting}/test_groupby.py (97%) rename pandas/{plotting/tests => tests/plotting}/test_hist_method.py (99%) rename pandas/{plotting/tests => tests/plotting}/test_misc.py (99%) rename pandas/{plotting/tests => tests/plotting}/test_series.py (99%) diff --git a/pandas/plotting/compat.py b/pandas/plotting/compat.py index 3191972d78dee..c24a8d247dd8c 100644 --- a/pandas/plotting/compat.py +++ b/pandas/plotting/compat.py @@ -49,3 +49,11 @@ def _mpl_ge_2_0_0(): return matplotlib.__version__ >= LooseVersion('2.0') except ImportError: return False + + +def _mpl_ge_2_0_1(): + try: + import matplotlib + return matplotlib.__version__ >= LooseVersion('2.0.1') + except ImportError: + return False diff --git a/pandas/plotting/core.py b/pandas/plotting/core.py index 6ca00a5035592..110d89b2b1004 100644 --- a/pandas/plotting/core.py +++ b/pandas/plotting/core.py @@ -36,15 +36,6 @@ format_date_labels) - -def _mpl_ge_2_0_1(): - try: - import matplotlib - return matplotlib.__version__ >= LooseVersion('2.0.1') - except ImportError: - return False - - if _mpl_ge_1_5_0(): # Compat with mp 1.5, which uses cycler. import cycler diff --git a/pandas/plotting/tests/__init__.py b/pandas/tests/plotting/__init__.py similarity index 100% rename from pandas/plotting/tests/__init__.py rename to pandas/tests/plotting/__init__.py diff --git a/pandas/plotting/tests/common.py b/pandas/tests/plotting/common.py similarity index 100% rename from pandas/plotting/tests/common.py rename to pandas/tests/plotting/common.py diff --git a/pandas/plotting/tests/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py similarity index 99% rename from pandas/plotting/tests/test_boxplot_method.py rename to pandas/tests/plotting/test_boxplot_method.py index e8dfde25d9769..ef4237492b050 100644 --- a/pandas/plotting/tests/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -16,7 +16,7 @@ import pandas.plotting as plotting -from pandas.plotting.tests.common import (TestPlotBase, _check_plot_works) +from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) """ Test cases for .boxplot method """ diff --git a/pandas/plotting/tests/test_converter.py b/pandas/tests/plotting/test_converter.py similarity index 100% rename from pandas/plotting/tests/test_converter.py rename to pandas/tests/plotting/test_converter.py diff --git a/pandas/plotting/tests/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py similarity index 99% rename from pandas/plotting/tests/test_datetimelike.py rename to pandas/tests/plotting/test_datetimelike.py index eedbf5c5bbda4..3caa1935f0943 100644 --- a/pandas/plotting/tests/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -17,7 +17,7 @@ from pandas.util.testing import assert_series_equal, ensure_clean, slow import pandas.util.testing as tm -from pandas.plotting.tests.common import (TestPlotBase, +from pandas.tests.plotting.common import (TestPlotBase, _skip_if_no_scipy_gaussian_kde) diff --git a/pandas/plotting/tests/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py similarity index 89% rename from pandas/plotting/tests/test_deprecated.py rename to pandas/tests/plotting/test_deprecated.py index 59434ec813f2b..19ccbbc7d6042 100644 --- a/pandas/plotting/tests/test_deprecated.py +++ b/pandas/tests/plotting/test_deprecated.py @@ -12,7 +12,7 @@ import pandas.tools.plotting as plotting -from pandas.plotting.tests.common import TestPlotBase +from pandas.tests.plotting.common import TestPlotBase """ @@ -62,6 +62,12 @@ def test_radviz_deprecated(self): with tm.assert_produces_warning(FutureWarning): plotting.radviz(frame=df, class_column='Name') + @slow + def test_plot_params(self): + + with tm.assert_produces_warning(FutureWarning): + pd.plot_params['xaxis.compat'] = True + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/plotting/tests/test_frame.py b/pandas/tests/plotting/test_frame.py similarity index 99% rename from pandas/plotting/tests/test_frame.py rename to pandas/tests/plotting/test_frame.py index 1b49a4b4a1a78..2e2adcb9e7be2 100644 --- a/pandas/plotting/tests/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -23,7 +23,7 @@ from numpy.random import rand, randn import pandas.plotting as plotting -from pandas.plotting.tests.common import (TestPlotBase, _check_plot_works, +from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) @@ -240,13 +240,13 @@ def test_xcompat(self): self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) tm.close() - pd.plot_params['xaxis.compat'] = True + pd.plotting.plot_params['xaxis.compat'] = True ax = df.plot() lines = ax.get_lines() self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) tm.close() - pd.plot_params['x_compat'] = False + pd.plotting.plot_params['x_compat'] = False ax = df.plot() lines = ax.get_lines() self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) @@ -254,7 +254,7 @@ def test_xcompat(self): tm.close() # useful if you're plotting a bunch together - with pd.plot_params.use('x_compat', True): + with pd.plotting.plot_params.use('x_compat', True): ax = df.plot() lines = ax.get_lines() self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) diff --git a/pandas/plotting/tests/test_groupby.py b/pandas/tests/plotting/test_groupby.py similarity index 97% rename from pandas/plotting/tests/test_groupby.py rename to pandas/tests/plotting/test_groupby.py index d670bb7746b80..93efb3f994c38 100644 --- a/pandas/plotting/tests/test_groupby.py +++ b/pandas/tests/plotting/test_groupby.py @@ -8,7 +8,7 @@ import numpy as np -from pandas.plotting.tests.common import TestPlotBase +from pandas.tests.plotting.common import TestPlotBase @tm.mplskip diff --git a/pandas/plotting/tests/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py similarity index 99% rename from pandas/plotting/tests/test_hist_method.py rename to pandas/tests/plotting/test_hist_method.py index bd50a6d67a287..2986af1c8e177 100644 --- a/pandas/plotting/tests/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -10,7 +10,7 @@ from numpy.random import randn import pandas.plotting as plotting -from pandas.plotting.tests.common import (TestPlotBase, _check_plot_works) +from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) @tm.mplskip diff --git a/pandas/plotting/tests/test_misc.py b/pandas/tests/plotting/test_misc.py similarity index 99% rename from pandas/plotting/tests/test_misc.py rename to pandas/tests/plotting/test_misc.py index d7e1e825d8e79..adc8c021b7c3d 100644 --- a/pandas/plotting/tests/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -12,7 +12,7 @@ from numpy.random import randn import pandas.plotting as plotting -from pandas.plotting.tests.common import (TestPlotBase, _check_plot_works, +from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _ok_for_gaussian_kde) diff --git a/pandas/plotting/tests/test_series.py b/pandas/tests/plotting/test_series.py similarity index 99% rename from pandas/plotting/tests/test_series.py rename to pandas/tests/plotting/test_series.py index be8cb4e2ec2cb..6c04d1b705719 100644 --- a/pandas/plotting/tests/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -17,7 +17,7 @@ from numpy.random import randn import pandas.plotting as plotting -from pandas.plotting.tests.common import (TestPlotBase, _check_plot_works, +from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) diff --git a/setup.py b/setup.py index bd1e4dcd08224..eef3df7bbe6da 100755 --- a/setup.py +++ b/setup.py @@ -675,7 +675,6 @@ def pxd(name): 'pandas.tests.tools', 'pandas.tests.types', 'pandas.tests.plotting', - 'pandas.tests.test_msgpack', 'pandas.tools', 'pandas.tseries', 'pandas.types', From c6452b359547c4ebb62a316172f414720006e038 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 15 Apr 2017 00:56:49 +0200 Subject: [PATCH 07/12] also move plot_params --- doc/source/visualization.rst | 4 ++-- doc/source/whatsnew/v0.20.0.txt | 7 +++---- pandas/__init__.py | 4 ++-- pandas/plotting/style.py | 16 ++++++++++++++-- pandas/tests/api/test_api.py | 4 ++-- 5 files changed, 23 insertions(+), 12 deletions(-) diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 4f655e4c6f476..fb799c642131d 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -1228,14 +1228,14 @@ Using the ``x_compat`` parameter, you can suppress this behavior: plt.close('all') If you have more than one plot that needs to be suppressed, the ``use`` method -in ``pandas.plot_params`` can be used in a `with statement`: +in ``pandas.plotting.plot_params`` can be used in a `with statement`: .. ipython:: python plt.figure() @savefig ser_plot_suppress_context.png - with pd.plot_params.use('x_compat', True): + with pd.plotting.plot_params.use('x_compat', True): df.A.plot(color='r') df.B.plot(color='g') df.C.plot(color='b') diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 944612b014c56..869197166885c 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -563,12 +563,11 @@ Using ``.iloc``. Here we will get the location of the 'A' column, then use *posi Deprecate .plotting ^^^^^^^^^^^^^^^^^^^ -``pandas.tools.plotting`` module has been deprecated, moving directory under the -top namespace ``pandas.plotting``. All the public plotting functions should be available +The ``pandas.tools.plotting`` module has been deprecated, in favor of the top level ``pandas.plotting`` module. All the public plotting functions are now available from ``pandas.plotting``. -Also, ``scatter_matrix`` function imported directly under ``pandas`` namespace is also deprecated. -Users shoud use ``pandas.plotting.scatter_matrix`` instead. +Further, the top-level ``pandas.scatter_matrix`` and ``pandas.plot_params`` are also deprecated. +Users can import these from ``pandas.plotting`` as well. Previous script: diff --git a/pandas/__init__.py b/pandas/__init__.py index 7bf3adc39bf2d..1bcfbbc6eb350 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -50,9 +50,9 @@ merge_ordered, merge_asof) from pandas.tools.pivot import pivot_table, crosstab -# deprecate tools.plotting, and scatter_matrix on the top namespace +# deprecate tools.plotting, plot_params and scatter_matrix on the top namespace import pandas.tools.plotting -from pandas.plotting import plot_params +plot_params = pandas.plotting.style._Options(deprecated=True) # do not import deprecate to top namespace scatter_matrix = pandas.util.decorators.deprecate( 'pandas.scatter_matrix', pandas.plotting.scatter_matrix, diff --git a/pandas/plotting/style.py b/pandas/plotting/style.py index 37af63e8bb183..21c6ad3ce4cb5 100644 --- a/pandas/plotting/style.py +++ b/pandas/plotting/style.py @@ -179,16 +179,26 @@ class _Options(dict): _ALIASES = {'x_compat': 'xaxis.compat'} _DEFAULT_KEYS = ['xaxis.compat'] - def __init__(self): - self['xaxis.compat'] = False + def __init__(self, deprecated=False): + self._deprecated = deprecated + # self['xaxis.compat'] = False + super(_Options, self).__setitem__('xaxis.compat', False) + + def _warn_if_deprecated(self): + if self._deprecated: + warnings.warn("'pandas.plot_params' is deprecated. Use " + "'pandas.plotting.plot_params' instead", + FutureWarning, stacklevel=3) def __getitem__(self, key): + self._warn_if_deprecated() key = self._get_canonical_key(key) if key not in self: raise ValueError('%s is not a valid pandas plotting option' % key) return super(_Options, self).__getitem__(key) def __setitem__(self, key, value): + self._warn_if_deprecated() key = self._get_canonical_key(key) return super(_Options, self).__setitem__(key, value) @@ -210,6 +220,7 @@ def reset(self): ------- None """ + self._warn_if_deprecated() self.__init__() def _get_canonical_key(self, key): @@ -221,6 +232,7 @@ def use(self, key, value): Temporarily set a parameter value using the with statement. Aliasing allowed. """ + self._warn_if_deprecated() old_value = self[key] try: self[key] = value diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index dfa8851d9bec1..fd6984bbf1dc7 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -69,7 +69,7 @@ class TestPDApi(Base, tm.TestCase): 'melt', 'notnull', 'offsets', 'merge', 'merge_ordered', 'merge_asof', 'period_range', - 'pivot', 'pivot_table', 'plot_params', 'qcut', + 'pivot', 'pivot_table', 'qcut', 'scatter_matrix', 'show_versions', 'timedelta_range', 'unique', 'value_counts', 'wide_to_long'] @@ -103,7 +103,7 @@ class TestPDApi(Base, tm.TestCase): 'rolling_median', 'rolling_min', 'rolling_quantile', 'rolling_skew', 'rolling_std', 'rolling_sum', 'rolling_var', 'rolling_window', 'ordered_merge', - 'pnow', 'match', 'groupby', 'get_store'] + 'pnow', 'match', 'groupby', 'get_store', 'plot_params'] def test_api(self): From d4bdb0a6aa39f5dfb01814090c0d2e1c0b8b73b3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 15 Apr 2017 01:00:02 +0200 Subject: [PATCH 08/12] remaining feedback + reorg --- pandas/__init__.py | 2 +- pandas/core/config_init.py | 2 +- pandas/core/frame.py | 10 +++---- pandas/core/groupby.py | 2 +- pandas/core/series.py | 2 +- pandas/plotting/__init__.py | 20 +++++++++++++- pandas/plotting/{compat.py => _compat.py} | 0 .../plotting/{converter.py => _converter.py} | 0 pandas/plotting/{core.py => _core.py} | 26 +++++++++---------- pandas/plotting/{misc.py => _misc.py} | 4 +-- pandas/plotting/{style.py => _style.py} | 0 .../{timeseries.py => _timeseries.py} | 2 +- pandas/plotting/{tools.py => _tools.py} | 0 pandas/plotting/api.py | 20 -------------- pandas/tests/plotting/common.py | 17 ++++++------ pandas/tests/plotting/test_boxplot_method.py | 3 ++- pandas/tests/plotting/test_datetimelike.py | 6 ++--- pandas/tests/plotting/test_deprecated.py | 9 ------- pandas/tests/plotting/test_frame.py | 10 +++---- pandas/tests/plotting/test_hist_method.py | 15 +++++------ pandas/tests/plotting/test_misc.py | 2 +- pandas/tests/plotting/test_series.py | 18 ++++++------- pandas/tools/plotting.py | 11 ++++---- pandas/tseries/converter.py | 18 ++++++------- pandas/tseries/plotting.py | 2 +- 25 files changed, 95 insertions(+), 106 deletions(-) rename pandas/plotting/{compat.py => _compat.py} (100%) rename pandas/plotting/{converter.py => _converter.py} (100%) rename pandas/plotting/{core.py => _core.py} (99%) rename pandas/plotting/{misc.py => _misc.py} (99%) rename pandas/plotting/{style.py => _style.py} (100%) rename pandas/plotting/{timeseries.py => _timeseries.py} (99%) rename pandas/plotting/{tools.py => _tools.py} (100%) delete mode 100644 pandas/plotting/api.py diff --git a/pandas/__init__.py b/pandas/__init__.py index 1bcfbbc6eb350..bc38919f2c78c 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -52,7 +52,7 @@ # deprecate tools.plotting, plot_params and scatter_matrix on the top namespace import pandas.tools.plotting -plot_params = pandas.plotting.style._Options(deprecated=True) +plot_params = pandas.plotting._style._Options(deprecated=True) # do not import deprecate to top namespace scatter_matrix = pandas.util.decorators.deprecate( 'pandas.scatter_matrix', pandas.plotting.scatter_matrix, diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index a7003c66024e8..cf2a653638e90 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -285,7 +285,7 @@ def mpl_style_cb(key): stacklevel=5) import sys - from pandas.plotting.style import mpl_stylesheet + from pandas.plotting._style import mpl_stylesheet global style_backup val = cf.get_option(key) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 46f33f270c3c2..a5256868ce419 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -90,7 +90,7 @@ import pandas.core.ops as ops import pandas.formats.format as fmt from pandas.formats.printing import pprint_thing -import pandas.plotting.core as gfx +import pandas.plotting._core as gfx from pandas._libs import lib, algos as libalgos @@ -5909,11 +5909,11 @@ def _put_str(s, space): @Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) def boxplot(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds): - import pandas.plotting as plots + from pandas.plotting._core import boxplot import matplotlib.pyplot as plt - ax = plots.boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, - grid=grid, rot=rot, figsize=figsize, layout=layout, - return_type=return_type, **kwds) + ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, + grid=grid, rot=rot, figsize=figsize, layout=layout, + return_type=return_type, **kwds) plt.draw_if_interactive() return ax diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 68e37d4998f51..27e256a8eb572 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -4159,7 +4159,7 @@ def groupby_series(obj, col=None): return results -from pandas.plotting.core import boxplot_frame_groupby # noqa +from pandas.plotting._core import boxplot_frame_groupby # noqa DataFrameGroupBy.boxplot = boxplot_frame_groupby diff --git a/pandas/core/series.py b/pandas/core/series.py index f7571d882047f..e5dc92592addd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3001,7 +3001,7 @@ def create_from_value(value, index, dtype): # ---------------------------------------------------------------------- # Add plotting methods to Series -import pandas.plotting.core as _gfx # noqa +import pandas.plotting._core as _gfx # noqa Series.plot = base.AccessorProperty(_gfx.SeriesPlotMethods, _gfx.SeriesPlotMethods) diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py index 374276ddc8b56..c3cbedb0fc28c 100644 --- a/pandas/plotting/__init__.py +++ b/pandas/plotting/__init__.py @@ -1 +1,19 @@ -from pandas.plotting.api import * # noqa +""" +Plotting api +""" + +# flake8: noqa + +try: # mpl optional + from pandas.plotting import _converter + _converter.register() # needs to override so set_xlim works with str/number +except ImportError: + pass + +from pandas.plotting._misc import (scatter_matrix, radviz, + andrews_curves, bootstrap_plot, + parallel_coordinates, lag_plot, + autocorrelation_plot) +from pandas.plotting._core import boxplot +from pandas.plotting._style import plot_params +from pandas.plotting._tools import table diff --git a/pandas/plotting/compat.py b/pandas/plotting/_compat.py similarity index 100% rename from pandas/plotting/compat.py rename to pandas/plotting/_compat.py diff --git a/pandas/plotting/converter.py b/pandas/plotting/_converter.py similarity index 100% rename from pandas/plotting/converter.py rename to pandas/plotting/_converter.py diff --git a/pandas/plotting/core.py b/pandas/plotting/_core.py similarity index 99% rename from pandas/plotting/core.py rename to pandas/plotting/_core.py index 110d89b2b1004..3980f5e7f2f61 100644 --- a/pandas/plotting/core.py +++ b/pandas/plotting/_core.py @@ -26,14 +26,14 @@ from pandas.formats.printing import pprint_thing from pandas.util.decorators import Appender -from pandas.plotting.compat import (_mpl_ge_1_3_1, - _mpl_ge_1_5_0) -from pandas.plotting.style import (mpl_stylesheet, plot_params, - _get_standard_colors) -from pandas.plotting.tools import (_subplots, _flatten, table, - _handle_shared_axes, _get_all_lines, - _get_xlim, _set_ticks_props, - format_date_labels) +from pandas.plotting._compat import (_mpl_ge_1_3_1, + _mpl_ge_1_5_0) +from pandas.plotting._style import (mpl_stylesheet, plot_params, + _get_standard_colors) +from pandas.plotting._tools import (_subplots, _flatten, table, + _handle_shared_axes, _get_all_lines, + _get_xlim, _set_ticks_props, + format_date_labels) if _mpl_ge_1_5_0(): @@ -900,12 +900,12 @@ def _is_ts_plot(self): return not self.x_compat and self.use_index and self._use_dynamic_x() def _use_dynamic_x(self): - from pandas.plotting.timeseries import _use_dynamic_x + from pandas.plotting._timeseries import _use_dynamic_x return _use_dynamic_x(self._get_ax(0), self.data) def _make_plot(self): if self._is_ts_plot(): - from pandas.plotting.timeseries import _maybe_convert_index + from pandas.plotting._timeseries import _maybe_convert_index data = _maybe_convert_index(self._get_ax(0), self.data) x = data.index # dummy, not used @@ -955,9 +955,9 @@ def _plot(cls, ax, x, y, style=None, column_num=None, @classmethod def _ts_plot(cls, ax, x, data, style=None, **kwds): - from pandas.plotting.timeseries import (_maybe_resample, - _decorate_axes, - format_dateaxis) + from pandas.plotting._timeseries import (_maybe_resample, + _decorate_axes, + format_dateaxis) # accept x to be consistent with normal plot func, # x is not passed to tsplot as it uses data.index as x coordinate # column_num must be in kwds for stacking purpose diff --git a/pandas/plotting/misc.py b/pandas/plotting/_misc.py similarity index 99% rename from pandas/plotting/misc.py rename to pandas/plotting/_misc.py index 57306ab77f1e1..2c32a532dd2e2 100644 --- a/pandas/plotting/misc.py +++ b/pandas/plotting/_misc.py @@ -10,8 +10,8 @@ from pandas.formats.printing import pprint_thing -from pandas.plotting.style import _get_standard_colors -from pandas.plotting.tools import _subplots, _set_ticks_props +from pandas.plotting._style import _get_standard_colors +from pandas.plotting._tools import _subplots, _set_ticks_props def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, diff --git a/pandas/plotting/style.py b/pandas/plotting/_style.py similarity index 100% rename from pandas/plotting/style.py rename to pandas/plotting/_style.py diff --git a/pandas/plotting/timeseries.py b/pandas/plotting/_timeseries.py similarity index 99% rename from pandas/plotting/timeseries.py rename to pandas/plotting/_timeseries.py index 259143beb6e44..be683a78a409d 100644 --- a/pandas/plotting/timeseries.py +++ b/pandas/plotting/_timeseries.py @@ -134,7 +134,7 @@ def _replot_ax(ax, freq, kwargs): # for tsplot if isinstance(plotf, compat.string_types): - from pandas.plotting.core import _plot_klass + from pandas.plotting._core import _plot_klass plotf = _plot_klass[plotf]._plot lines.append(plotf(ax, series.index._mpl_repr(), diff --git a/pandas/plotting/tools.py b/pandas/plotting/_tools.py similarity index 100% rename from pandas/plotting/tools.py rename to pandas/plotting/_tools.py diff --git a/pandas/plotting/api.py b/pandas/plotting/api.py deleted file mode 100644 index f1df2e4426151..0000000000000 --- a/pandas/plotting/api.py +++ /dev/null @@ -1,20 +0,0 @@ -""" -Plotting api -""" - -# flake8: noqa - -try: # mpl optional - from pandas.plotting import converter - converter.register() # needs to override so set_xlim works with str/number -except ImportError: - pass - -from pandas.plotting.misc import (scatter_matrix, radviz, - andrews_curves, bootstrap_plot, - parallel_coordinates, lag_plot, - autocorrelation_plot) -from pandas.plotting.core import (boxplot, scatter_plot, grouped_hist, - hist_frame, hist_series) -from pandas.plotting.style import plot_params -from pandas.plotting.tools import table diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index 91372ba25fb59..d81f73e73ae69 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -17,6 +17,7 @@ from numpy import random import pandas.plotting as plotting +from pandas.plotting._tools import _flatten """ @@ -48,12 +49,12 @@ def setUp(self): import matplotlib as mpl mpl.rcdefaults() - self.mpl_le_1_2_1 = plotting.compat._mpl_le_1_2_1() - self.mpl_ge_1_3_1 = plotting.compat._mpl_ge_1_3_1() - self.mpl_ge_1_4_0 = plotting.compat._mpl_ge_1_4_0() - self.mpl_ge_1_5_0 = plotting.compat._mpl_ge_1_5_0() - self.mpl_ge_2_0_0 = plotting.compat._mpl_ge_2_0_0() - self.mpl_ge_2_0_1 = plotting.compat._mpl_ge_2_0_1() + self.mpl_le_1_2_1 = plotting._compat._mpl_le_1_2_1() + self.mpl_ge_1_3_1 = plotting._compat._mpl_ge_1_3_1() + self.mpl_ge_1_4_0 = plotting._compat._mpl_ge_1_4_0() + self.mpl_ge_1_5_0 = plotting._compat._mpl_ge_1_5_0() + self.mpl_ge_2_0_0 = plotting._compat._mpl_ge_2_0_0() + self.mpl_ge_2_0_1 = plotting._compat._mpl_ge_2_0_1() if self.mpl_ge_1_4_0: self.bp_n_objects = 7 @@ -354,7 +355,7 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, self.assertTrue(len(ax.get_children()) > 0) if layout is not None: - result = self._get_axes_layout(plotting.tools._flatten(axes)) + result = self._get_axes_layout(_flatten(axes)) self.assertEqual(result, layout) self.assert_numpy_array_equal( @@ -380,7 +381,7 @@ def _flatten_visible(self, axes): axes : matplotlib Axes object, or its list-like """ - axes = plotting.tools._flatten(axes) + axes = _flatten(axes) axes = [ax for ax in axes if ax.get_visible()] return axes diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index ef4237492b050..5b9c13bd26708 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -54,7 +54,8 @@ def test_boxplot_legacy(self): _check_plot_works(df.boxplot, by='indic') with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by=['indic', 'indic2']) - _check_plot_works(plotting.boxplot, data=df['one'], return_type='dict') + _check_plot_works(plotting._core.boxplot, data=df['one'], + return_type='dict') _check_plot_works(df.boxplot, notch=1, return_type='dict') with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by='indic', notch=1) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 3caa1935f0943..b3692c5a8d2d2 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -144,7 +144,7 @@ def test_high_freq(self): _check_plot_works(ser.plot) def test_get_datevalue(self): - from pandas.plotting.converter import get_datevalue + from pandas.plotting._converter import get_datevalue self.assertIsNone(get_datevalue(None, 'D')) self.assertEqual(get_datevalue(1987, 'A'), 1987) self.assertEqual(get_datevalue(Period(1987, 'A'), 'M'), @@ -243,7 +243,7 @@ def test_plot_multiple_inferred_freq(self): @slow def test_uhf(self): - import pandas.plotting.converter as conv + import pandas.plotting._converter as conv import matplotlib.pyplot as plt fig = plt.gcf() plt.clf() @@ -387,7 +387,7 @@ def _test(ax): _test(ax) def test_get_finder(self): - import pandas.plotting.converter as conv + import pandas.plotting._converter as conv self.assertEqual(conv.get_finder('B'), conv._daily_finder) self.assertEqual(conv.get_finder('D'), conv._daily_finder) diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py index 19ccbbc7d6042..e1c57dd464333 100644 --- a/pandas/tests/plotting/test_deprecated.py +++ b/pandas/tests/plotting/test_deprecated.py @@ -47,15 +47,6 @@ def test_boxplot_deprecated(self): plotting.boxplot(df, column=['one', 'two'], by='indic') - @slow - def test_grouped_hist_legacy(self): - df = pd.DataFrame(randn(500, 2), columns=['A', 'B']) - df['C'] = np.random.randint(0, 4, 500) - df['D'] = ['X'] * 500 - - with tm.assert_produces_warning(FutureWarning): - plotting.grouped_hist(df.A, by=df.C) - @slow def test_radviz_deprecated(self): df = self.iris diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 2e2adcb9e7be2..404752b567f63 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -1979,7 +1979,7 @@ def test_unordered_ts(self): def test_kind_both_ways(self): df = DataFrame({'x': [1, 2, 3]}) - for kind in plotting.core._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue df.plot(kind=kind) @@ -1990,7 +1990,7 @@ def test_kind_both_ways(self): def test_all_invalid_plot_data(self): df = DataFrame(list('abcd')) - for kind in plotting.core._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -2001,7 +2001,7 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = 'a' - for kind in plotting.core._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -2454,7 +2454,7 @@ def test_memory_leak(self): import gc results = {} - for kind in plotting.core._plot_klass.keys(): + for kind in plotting._core._plot_klass.keys(): if not _ok_for_gaussian_kde(kind): continue args = {} @@ -2653,7 +2653,7 @@ def test_df_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings( DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4]}), - plotting.core._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) + plotting._core._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) def test_option_mpl_style(self): with tm.assert_produces_warning(FutureWarning, diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 2986af1c8e177..0a13a6e9893a8 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -9,7 +9,7 @@ import numpy as np from numpy.random import randn -import pandas.plotting as plotting +from pandas.plotting._core import grouped_hist from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) @@ -260,7 +260,7 @@ def test_grouped_hist_legacy(self): df['C'] = np.random.randint(0, 4, 500) df['D'] = ['X'] * 500 - axes = plotting.grouped_hist(df.A, by=df.C) + axes = grouped_hist(df.A, by=df.C) self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) tm.close() @@ -277,10 +277,9 @@ def test_grouped_hist_legacy(self): # make sure kwargs to hist are handled xf, yf = 20, 18 xrot, yrot = 30, 40 - axes = plotting.grouped_hist(df.A, by=df.C, normed=True, - cumulative=True, bins=4, - xlabelsize=xf, xrot=xrot, - ylabelsize=yf, yrot=yrot) + axes = grouped_hist(df.A, by=df.C, normed=True, cumulative=True, + bins=4, xlabelsize=xf, xrot=xrot, + ylabelsize=yf, yrot=yrot) # height of last bin (index 5) must be 1.0 for ax in axes.ravel(): rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] @@ -290,14 +289,14 @@ def test_grouped_hist_legacy(self): ylabelsize=yf, yrot=yrot) tm.close() - axes = plotting.grouped_hist(df.A, by=df.C, log=True) + axes = grouped_hist(df.A, by=df.C, log=True) # scale of y must be 'log' self._check_ax_scales(axes, yaxis='log') tm.close() # propagate attr exception from matplotlib.Axes.hist with tm.assertRaises(AttributeError): - plotting.grouped_hist(df.A, by=df.C, foo='bar') + grouped_hist(df.A, by=df.C, foo='bar') with tm.assert_produces_warning(FutureWarning): df.hist(by='C', figsize='default') diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index adc8c021b7c3d..fe0b6c103a0e1 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -84,7 +84,7 @@ def scat(**kwds): _check_plot_works(scat, facecolor='rgb') def scat2(x, y, by=None, ax=None, figsize=None): - return plotting.scatter_plot(df, x, y, by, ax, figsize=None) + return plotting._core.scatter_plot(df, x, y, by, ax, figsize=None) _check_plot_works(scat2, x=0, y=1) grouper = Series(np.repeat([1, 2, 3, 4, 5], 20), df.index) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 6c04d1b705719..c3bc3ca6bf414 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -622,8 +622,8 @@ def test_boxplot_series(self): @slow def test_kind_both_ways(self): s = Series(range(3)) - kinds = (plotting.core._common_kinds + - plotting.core._series_kinds) + kinds = (plotting._core._common_kinds + + plotting._core._series_kinds) for kind in kinds: if not _ok_for_gaussian_kde(kind): continue @@ -633,7 +633,7 @@ def test_kind_both_ways(self): @slow def test_invalid_plot_data(self): s = Series(list('abcd')) - for kind in plotting.core._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -642,14 +642,14 @@ def test_invalid_plot_data(self): @slow def test_valid_object_plot(self): s = Series(lrange(10), dtype=object) - for kind in plotting.core._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue _check_plot_works(s.plot, kind=kind) def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) - for kind in plotting.core._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -720,12 +720,12 @@ def test_table(self): def test_series_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings(Series([1, 2, 3]), - plotting.core._series_kinds + - plotting.core._common_kinds) + plotting._core._series_kinds + + plotting._core._common_kinds) @slow def test_standard_colors(self): - from pandas.plotting.style import _get_standard_colors + from pandas.plotting._style import _get_standard_colors for c in ['r', 'red', 'green', '#FF0000']: result = _get_standard_colors(1, color=c) @@ -743,7 +743,7 @@ def test_standard_colors(self): @slow def test_standard_colors_all(self): import matplotlib.colors as colors - from pandas.plotting.style import _get_standard_colors + from pandas.plotting._style import _get_standard_colors # multiple colors like mediumaquamarine for c in colors.cnames: diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index c7d17760bbdbb..a68da67a219e2 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1,21 +1,20 @@ import sys import warnings -import pandas.plotting.api as api +import pandas.plotting as _plotting # back-compat of public API # deprecate these functions m = sys.modules['pandas.tools.plotting'] -for t in [t for t in dir(api) if not t.startswith('_')]: +for t in [t for t in dir(_plotting) if not t.startswith('_')]: def outer(t=t): def wrapper(*args, **kwargs): - warnings.warn("pandas.tools.plotting.{t} is deprecated. " - "import from the " - "pandas.plotting.{t} instead".format(t=t), + warnings.warn("'pandas.tools.plotting.{t}' is deprecated, " + "import 'pandas.plotting.{t}' instead.".format(t=t), FutureWarning, stacklevel=2) - return getattr(api, t)(*args, **kwargs) + return getattr(_plotting, t)(*args, **kwargs) return wrapper setattr(m, t, outer(t)) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index a93515b110cf4..df603c4d880d8 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -1,11 +1,11 @@ # flake8: noqa -from pandas.plotting.converter import (register, time2num, - TimeConverter, TimeFormatter, - PeriodConverter, get_datevalue, - DatetimeConverter, - PandasAutoDateFormatter, - PandasAutoDateLocator, - MilliSecondLocator, get_finder, - TimeSeries_DateLocator, - TimeSeries_DateFormatter) +from pandas.plotting._converter import (register, time2num, + TimeConverter, TimeFormatter, + PeriodConverter, get_datevalue, + DatetimeConverter, + PandasAutoDateFormatter, + PandasAutoDateLocator, + MilliSecondLocator, get_finder, + TimeSeries_DateLocator, + TimeSeries_DateFormatter) diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py index 6ecada90665cd..302016907635d 100644 --- a/pandas/tseries/plotting.py +++ b/pandas/tseries/plotting.py @@ -1,3 +1,3 @@ # flake8: noqa -from pandas.plotting.timeseries import tsplot +from pandas.plotting._timeseries import tsplot From 72ffe974eb644437699957e96b7f9e8311f95c4c Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 15 Apr 2017 01:48:13 +0200 Subject: [PATCH 09/12] fixups from rebase --- pandas/plotting/_timeseries.py | 6 +++--- pandas/tests/plotting/test_converter.py | 13 +++++++------ setup.py | 2 +- 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/plotting/_timeseries.py b/pandas/plotting/_timeseries.py index be683a78a409d..7533e417b0026 100644 --- a/pandas/plotting/_timeseries.py +++ b/pandas/plotting/_timeseries.py @@ -12,9 +12,9 @@ from pandas.formats.printing import pprint_thing import pandas.compat as compat -from pandas.tseries.converter import (TimeSeries_DateLocator, - TimeSeries_DateFormatter, - TimeSeries_TimedeltaFormatter) +from pandas.plotting._converter import (TimeSeries_DateLocator, + TimeSeries_DateFormatter, + TimeSeries_TimedeltaFormatter) # --------------------------------------------------------------------- # Plotting functions and monkey patches diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 5351e26f0e62b..c21d46328c3d9 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -2,6 +2,7 @@ from datetime import datetime, date import numpy as np +import matplotlib.dates as dates from pandas import Timestamp, Period, Index from pandas.compat import u import pandas.util.testing as tm @@ -83,7 +84,7 @@ def test_conversion_float(self): rs = self.dtc.convert( Timestamp('2012-1-1 01:02:03', tz='UTC'), None, None) - xp = converter.dates.date2num(Timestamp('2012-1-1 01:02:03', tz='UTC')) + xp = dates.date2num(Timestamp('2012-1-1 01:02:03', tz='UTC')) tm.assert_almost_equal(rs, xp, decimals) rs = self.dtc.convert( @@ -97,18 +98,18 @@ def test_conversion_outofbounds_datetime(self): # 2579 values = [date(1677, 1, 1), date(1677, 1, 2)] rs = self.dtc.convert(values, None, None) - xp = converter.dates.date2num(values) + xp = dates.date2num(values) tm.assert_numpy_array_equal(rs, xp) rs = self.dtc.convert(values[0], None, None) - xp = converter.dates.date2num(values[0]) + xp = dates.date2num(values[0]) self.assertEqual(rs, xp) values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] rs = self.dtc.convert(values, None, None) - xp = converter.dates.date2num(values) + xp = dates.date2num(values) tm.assert_numpy_array_equal(rs, xp) rs = self.dtc.convert(values[0], None, None) - xp = converter.dates.date2num(values[0]) + xp = dates.date2num(values[0]) self.assertEqual(rs, xp) def test_time_formatter(self): @@ -120,7 +121,7 @@ def test_dateindex_conversion(self): for freq in ('B', 'L', 'S'): dateindex = tm.makeDateIndex(k=10, freq=freq) rs = self.dtc.convert(dateindex, None, None) - xp = converter.dates.date2num(dateindex._mpl_repr()) + xp = dates.date2num(dateindex._mpl_repr()) tm.assert_almost_equal(rs, xp, decimals) def test_resolution(self): diff --git a/setup.py b/setup.py index eef3df7bbe6da..f8882e748927b 100755 --- a/setup.py +++ b/setup.py @@ -649,7 +649,7 @@ def pxd(name): 'pandas.io.msgpack', 'pandas._libs', 'pandas.formats', - 'pandas.plotting' + 'pandas.plotting', 'pandas.sparse', 'pandas.stats', 'pandas.util', From 09291e251318395fc9c53cb7d0a1a465bf51e761 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 15 Apr 2017 02:38:53 +0200 Subject: [PATCH 10/12] further fixups --- pandas/plotting/_style.py | 2 ++ pandas/tests/api/test_api.py | 2 +- pandas/tests/plotting/test_deprecated.py | 7 ------- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py index 21c6ad3ce4cb5..5d6dc7cbcdfc6 100644 --- a/pandas/plotting/_style.py +++ b/pandas/plotting/_style.py @@ -4,12 +4,14 @@ import warnings from contextlib import contextmanager +import re import numpy as np from pandas.types.common import is_list_like from pandas.compat import range, lrange, lmap import pandas.compat as compat +from pandas.plotting._compat import _mpl_ge_2_0_0 # Extracted from https://gist.github.com/huyng/816622 diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index fd6984bbf1dc7..478f2f35e631f 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -31,7 +31,7 @@ class TestPDApi(Base, tm.TestCase): # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', - 'indexes', 'formats', 'errors', 'pandas', 'plotting' + 'indexes', 'formats', 'errors', 'pandas', 'plotting', 'test', 'tools', 'tseries', 'sparse', 'types', 'util', 'options', 'io'] diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py index e1c57dd464333..d7eaa69460a3a 100644 --- a/pandas/tests/plotting/test_deprecated.py +++ b/pandas/tests/plotting/test_deprecated.py @@ -1,13 +1,11 @@ # coding: utf-8 -import nose import string import pandas as pd import pandas.util.testing as tm from pandas.util.testing import slow -import numpy as np from numpy.random import randn import pandas.tools.plotting as plotting @@ -58,8 +56,3 @@ def test_plot_params(self): with tm.assert_produces_warning(FutureWarning): pd.plot_params['xaxis.compat'] = True - - -if __name__ == '__main__': - nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], - exit=False) From 468313aad49dfce5a5b603a276fbb8036b65ae97 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 15 Apr 2017 12:17:01 +0200 Subject: [PATCH 11/12] fix converter import --- pandas/tests/plotting/test_converter.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index c21d46328c3d9..4629103d033f5 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -2,14 +2,13 @@ from datetime import datetime, date import numpy as np -import matplotlib.dates as dates from pandas import Timestamp, Period, Index from pandas.compat import u import pandas.util.testing as tm from pandas.tseries.offsets import Second, Milli, Micro, Day from pandas.compat.numpy import np_datetime64_compat -converter = pytest.importorskip('pandas.tseries.converter') +converter = pytest.importorskip('pandas.plotting._converter') def test_timtetonum_accepts_unicode(): @@ -84,7 +83,7 @@ def test_conversion_float(self): rs = self.dtc.convert( Timestamp('2012-1-1 01:02:03', tz='UTC'), None, None) - xp = dates.date2num(Timestamp('2012-1-1 01:02:03', tz='UTC')) + xp = converter.dates.date2num(Timestamp('2012-1-1 01:02:03', tz='UTC')) tm.assert_almost_equal(rs, xp, decimals) rs = self.dtc.convert( @@ -98,18 +97,18 @@ def test_conversion_outofbounds_datetime(self): # 2579 values = [date(1677, 1, 1), date(1677, 1, 2)] rs = self.dtc.convert(values, None, None) - xp = dates.date2num(values) + xp = converter.dates.date2num(values) tm.assert_numpy_array_equal(rs, xp) rs = self.dtc.convert(values[0], None, None) - xp = dates.date2num(values[0]) + xp = converter.dates.date2num(values[0]) self.assertEqual(rs, xp) values = [datetime(1677, 1, 1, 12), datetime(1677, 1, 2, 12)] rs = self.dtc.convert(values, None, None) - xp = dates.date2num(values) + xp = converter.dates.date2num(values) tm.assert_numpy_array_equal(rs, xp) rs = self.dtc.convert(values[0], None, None) - xp = dates.date2num(values[0]) + xp = converter.dates.date2num(values[0]) self.assertEqual(rs, xp) def test_time_formatter(self): @@ -121,7 +120,7 @@ def test_dateindex_conversion(self): for freq in ('B', 'L', 'S'): dateindex = tm.makeDateIndex(k=10, freq=freq) rs = self.dtc.convert(dateindex, None, None) - xp = dates.date2num(dateindex._mpl_repr()) + xp = converter.dates.date2num(dateindex._mpl_repr()) tm.assert_almost_equal(rs, xp, decimals) def test_resolution(self): From 7f895cb3ec4d33c4ed12e05684067c2ff555ef2f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 15 Apr 2017 16:12:47 +0200 Subject: [PATCH 12/12] feedback --- doc/source/whatsnew/v0.20.0.txt | 6 +++--- pandas/plotting/_compat.py | 8 ++++++++ pandas/plotting/_converter.py | 10 ++-------- pandas/tests/api/test_api.py | 4 ++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 869197166885c..463ccfc1a3e46 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -21,7 +21,7 @@ Highlights include: - Support for S3 handling now uses ``s3fs``, see :ref:`here ` - Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` - Switched the test framework to use `pytest `__ (:issue:`13097`) -- The ``pandas.tools.plotting`` module has been deprecated, moved to ``pandas.plotting``. See :ref:`here ` (:issue:`12548`) +- The ``pandas.tools.plotting`` module has been deprecated, moved to ``pandas.plotting``. See :ref:`here ` Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -564,9 +564,9 @@ Deprecate .plotting ^^^^^^^^^^^^^^^^^^^ The ``pandas.tools.plotting`` module has been deprecated, in favor of the top level ``pandas.plotting`` module. All the public plotting functions are now available -from ``pandas.plotting``. +from ``pandas.plotting`` (:issue:`12548`). -Further, the top-level ``pandas.scatter_matrix`` and ``pandas.plot_params`` are also deprecated. +Furthermore, the top-level ``pandas.scatter_matrix`` and ``pandas.plot_params`` are deprecated. Users can import these from ``pandas.plotting`` as well. Previous script: diff --git a/pandas/plotting/_compat.py b/pandas/plotting/_compat.py index c24a8d247dd8c..7b04b9e1171ec 100644 --- a/pandas/plotting/_compat.py +++ b/pandas/plotting/_compat.py @@ -51,6 +51,14 @@ def _mpl_ge_2_0_0(): return False +def _mpl_le_2_0_0(): + try: + import matplotlib + return matplotlib.compare_versions('2.0.0', matplotlib.__version__) + except ImportError: + return False + + def _mpl_ge_2_0_1(): try: import matplotlib diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index bc768a8bc5b58..0aa8cc31646c5 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -31,6 +31,8 @@ from pandas.tseries.frequencies import FreqGroup from pandas.tseries.period import Period, PeriodIndex +from pandas.plotting._compat import _mpl_le_2_0_0 + # constants HOURS_PER_DAY = 24. MIN_PER_HOUR = 60. @@ -42,14 +44,6 @@ MUSEC_PER_DAY = 1e6 * SEC_PER_DAY -def _mpl_le_2_0_0(): - try: - import matplotlib - return matplotlib.compare_versions('2.0.0', matplotlib.__version__) - except ImportError: - return False - - def register(): units.registry[lib.Timestamp] = DatetimeConverter() units.registry[Period] = PeriodConverter() diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 478f2f35e631f..638c9b05839e1 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -70,7 +70,6 @@ class TestPDApi(Base, tm.TestCase): 'merge', 'merge_ordered', 'merge_asof', 'period_range', 'pivot', 'pivot_table', 'qcut', - 'scatter_matrix', 'show_versions', 'timedelta_range', 'unique', 'value_counts', 'wide_to_long'] @@ -103,7 +102,8 @@ class TestPDApi(Base, tm.TestCase): 'rolling_median', 'rolling_min', 'rolling_quantile', 'rolling_skew', 'rolling_std', 'rolling_sum', 'rolling_var', 'rolling_window', 'ordered_merge', - 'pnow', 'match', 'groupby', 'get_store', 'plot_params'] + 'pnow', 'match', 'groupby', 'get_store', + 'plot_params', 'scatter_matrix'] def test_api(self):