diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index e8998bf6f6f5c..fb799c642131d 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -152,7 +152,7 @@ You can also create these other plots using the methods ``DataFrame.plot.` In addition to these ``kind`` s, there are the :ref:`DataFrame.hist() `, and :ref:`DataFrame.boxplot() ` methods, which use a separate interface. -Finally, there are several :ref:`plotting functions ` in ``pandas.tools.plotting`` +Finally, there are several :ref:`plotting functions ` in ``pandas.plotting`` that take a :class:`Series` or :class:`DataFrame` as an argument. These include @@ -823,7 +823,7 @@ before plotting. Plotting Tools -------------- -These functions can be imported from ``pandas.tools.plotting`` +These functions can be imported from ``pandas.plotting`` and take a :class:`Series` or :class:`DataFrame` as an argument. .. _visualization.scatter_matrix: @@ -834,7 +834,7 @@ Scatter Matrix Plot .. versionadded:: 0.7.3 You can create a scatter plot matrix using the -``scatter_matrix`` method in ``pandas.tools.plotting``: +``scatter_matrix`` method in ``pandas.plotting``: .. ipython:: python :suppress: @@ -843,7 +843,7 @@ You can create a scatter plot matrix using the .. ipython:: python - from pandas.tools.plotting import scatter_matrix + from pandas.plotting import scatter_matrix df = pd.DataFrame(np.random.randn(1000, 4), columns=['a', 'b', 'c', 'd']) @savefig scatter_matrix_kde.png @@ -896,7 +896,7 @@ of the same class will usually be closer together and form larger structures. .. ipython:: python - from pandas.tools.plotting import andrews_curves + from pandas.plotting import andrews_curves data = pd.read_csv('data/iris.data') @@ -918,7 +918,7 @@ represents one data point. Points that tend to cluster will appear closer togeth .. ipython:: python - from pandas.tools.plotting import parallel_coordinates + from pandas.plotting import parallel_coordinates data = pd.read_csv('data/iris.data') @@ -948,7 +948,7 @@ implies that the underlying data are not random. .. ipython:: python - from pandas.tools.plotting import lag_plot + from pandas.plotting import lag_plot plt.figure() @@ -983,7 +983,7 @@ confidence band. .. ipython:: python - from pandas.tools.plotting import autocorrelation_plot + from pandas.plotting import autocorrelation_plot plt.figure() @@ -1016,7 +1016,7 @@ are what constitutes the bootstrap plot. .. ipython:: python - from pandas.tools.plotting import bootstrap_plot + from pandas.plotting import bootstrap_plot data = pd.Series(np.random.rand(1000)) @@ -1048,7 +1048,7 @@ be colored differently. .. ipython:: python - from pandas.tools.plotting import radviz + from pandas.plotting import radviz data = pd.read_csv('data/iris.data') @@ -1228,14 +1228,14 @@ Using the ``x_compat`` parameter, you can suppress this behavior: plt.close('all') If you have more than one plot that needs to be suppressed, the ``use`` method -in ``pandas.plot_params`` can be used in a `with statement`: +in ``pandas.plotting.plot_params`` can be used in a `with statement`: .. ipython:: python plt.figure() @savefig ser_plot_suppress_context.png - with pd.plot_params.use('x_compat', True): + with pd.plotting.plot_params.use('x_compat', True): df.A.plot(color='r') df.B.plot(color='g') df.C.plot(color='b') @@ -1450,11 +1450,11 @@ Also, you can pass different :class:`DataFrame` or :class:`Series` for ``table`` plt.close('all') -Finally, there is a helper function ``pandas.tools.plotting.table`` to create a table from :class:`DataFrame` and :class:`Series`, and add it to an ``matplotlib.Axes``. This function can accept keywords which matplotlib table has. +Finally, there is a helper function ``pandas.plotting.table`` to create a table from :class:`DataFrame` and :class:`Series`, and add it to an ``matplotlib.Axes``. This function can accept keywords which matplotlib table has. .. ipython:: python - from pandas.tools.plotting import table + from pandas.plotting import table fig, ax = plt.subplots(1, 1) table(ax, np.round(df.describe(), 2), diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 133757b131312..463ccfc1a3e46 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -21,6 +21,7 @@ Highlights include: - Support for S3 handling now uses ``s3fs``, see :ref:`here ` - Google BigQuery support now uses the ``pandas-gbq`` library, see :ref:`here ` - Switched the test framework to use `pytest `__ (:issue:`13097`) +- The ``pandas.tools.plotting`` module has been deprecated, moved to ``pandas.plotting``. See :ref:`here ` Check the :ref:`API Changes ` and :ref:`deprecations ` before updating. @@ -557,6 +558,31 @@ Using ``.iloc``. Here we will get the location of the 'A' column, then use *posi df.iloc[[0, 2], df.columns.get_loc('A')] +.. _whatsnew_0200.api_breaking.deprecate_plotting + +Deprecate .plotting +^^^^^^^^^^^^^^^^^^^ + +The ``pandas.tools.plotting`` module has been deprecated, in favor of the top level ``pandas.plotting`` module. All the public plotting functions are now available +from ``pandas.plotting`` (:issue:`12548`). + +Furthermore, the top-level ``pandas.scatter_matrix`` and ``pandas.plot_params`` are deprecated. +Users can import these from ``pandas.plotting`` as well. + +Previous script: + +.. code-block:: python + + pd.tools.plotting.scatter_matrix(df) + pd.scatter_matrix(df) + +Should be changed to: + +.. code-block:: python + + pd.plotting.scatter_matrix(df) + + .. _whatsnew_0200.api_breaking.deprecate_panel: Deprecate Panel diff --git a/pandas/__init__.py b/pandas/__init__.py index 529750cd97076..bc38919f2c78c 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -49,7 +49,15 @@ from pandas.tools.merge import (merge, ordered_merge, merge_ordered, merge_asof) from pandas.tools.pivot import pivot_table, crosstab -from pandas.tools.plotting import scatter_matrix, plot_params + +# deprecate tools.plotting, plot_params and scatter_matrix on the top namespace +import pandas.tools.plotting +plot_params = pandas.plotting._style._Options(deprecated=True) +# do not import deprecate to top namespace +scatter_matrix = pandas.util.decorators.deprecate( + 'pandas.scatter_matrix', pandas.plotting.scatter_matrix, + 'pandas.plotting.scatter_matrix') + from pandas.tools.tile import cut, qcut from pandas.tools.util import to_numeric from pandas.core.reshape import melt diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 931fe0661818d..cf2a653638e90 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -285,7 +285,7 @@ def mpl_style_cb(key): stacklevel=5) import sys - from pandas.tools.plotting import mpl_stylesheet + from pandas.plotting._style import mpl_stylesheet global style_backup val = cf.get_option(key) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4565250c78387..a5256868ce419 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -90,7 +90,7 @@ import pandas.core.ops as ops import pandas.formats.format as fmt from pandas.formats.printing import pprint_thing -import pandas.tools.plotting as gfx +import pandas.plotting._core as gfx from pandas._libs import lib, algos as libalgos @@ -5909,11 +5909,11 @@ def _put_str(s, space): @Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) def boxplot(self, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, **kwds): - import pandas.tools.plotting as plots + from pandas.plotting._core import boxplot import matplotlib.pyplot as plt - ax = plots.boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, - grid=grid, rot=rot, figsize=figsize, layout=layout, - return_type=return_type, **kwds) + ax = boxplot(self, column=column, by=by, ax=ax, fontsize=fontsize, + grid=grid, rot=rot, figsize=figsize, layout=layout, + return_type=return_type, **kwds) plt.draw_if_interactive() return ax diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 45a9577c8d8b2..27e256a8eb572 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -4159,7 +4159,7 @@ def groupby_series(obj, col=None): return results -from pandas.tools.plotting import boxplot_frame_groupby # noqa +from pandas.plotting._core import boxplot_frame_groupby # noqa DataFrameGroupBy.boxplot = boxplot_frame_groupby diff --git a/pandas/core/series.py b/pandas/core/series.py index 3305f0b6c439e..e5dc92592addd 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3001,7 +3001,7 @@ def create_from_value(value, index, dtype): # ---------------------------------------------------------------------- # Add plotting methods to Series -import pandas.tools.plotting as _gfx # noqa +import pandas.plotting._core as _gfx # noqa Series.plot = base.AccessorProperty(_gfx.SeriesPlotMethods, _gfx.SeriesPlotMethods) diff --git a/pandas/plotting/__init__.py b/pandas/plotting/__init__.py new file mode 100644 index 0000000000000..c3cbedb0fc28c --- /dev/null +++ b/pandas/plotting/__init__.py @@ -0,0 +1,19 @@ +""" +Plotting api +""" + +# flake8: noqa + +try: # mpl optional + from pandas.plotting import _converter + _converter.register() # needs to override so set_xlim works with str/number +except ImportError: + pass + +from pandas.plotting._misc import (scatter_matrix, radviz, + andrews_curves, bootstrap_plot, + parallel_coordinates, lag_plot, + autocorrelation_plot) +from pandas.plotting._core import boxplot +from pandas.plotting._style import plot_params +from pandas.plotting._tools import table diff --git a/pandas/plotting/_compat.py b/pandas/plotting/_compat.py new file mode 100644 index 0000000000000..7b04b9e1171ec --- /dev/null +++ b/pandas/plotting/_compat.py @@ -0,0 +1,67 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +from distutils.version import LooseVersion + + +def _mpl_le_1_2_1(): + try: + import matplotlib as mpl + return (str(mpl.__version__) <= LooseVersion('1.2.1') and + str(mpl.__version__)[0] != '0') + except ImportError: + return False + + +def _mpl_ge_1_3_1(): + try: + import matplotlib + # The or v[0] == '0' is because their versioneer is + # messed up on dev + return (matplotlib.__version__ >= LooseVersion('1.3.1') or + matplotlib.__version__[0] == '0') + except ImportError: + return False + + +def _mpl_ge_1_4_0(): + try: + import matplotlib + return (matplotlib.__version__ >= LooseVersion('1.4') or + matplotlib.__version__[0] == '0') + except ImportError: + return False + + +def _mpl_ge_1_5_0(): + try: + import matplotlib + return (matplotlib.__version__ >= LooseVersion('1.5') or + matplotlib.__version__[0] == '0') + except ImportError: + return False + + +def _mpl_ge_2_0_0(): + try: + import matplotlib + return matplotlib.__version__ >= LooseVersion('2.0') + except ImportError: + return False + + +def _mpl_le_2_0_0(): + try: + import matplotlib + return matplotlib.compare_versions('2.0.0', matplotlib.__version__) + except ImportError: + return False + + +def _mpl_ge_2_0_1(): + try: + import matplotlib + return matplotlib.__version__ >= LooseVersion('2.0.1') + except ImportError: + return False diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py new file mode 100644 index 0000000000000..0aa8cc31646c5 --- /dev/null +++ b/pandas/plotting/_converter.py @@ -0,0 +1,1026 @@ +from datetime import datetime, timedelta +import datetime as pydt +import numpy as np + +from dateutil.relativedelta import relativedelta + +import matplotlib.units as units +import matplotlib.dates as dates + +from matplotlib.ticker import Formatter, AutoLocator, Locator +from matplotlib.transforms import nonsingular + + +from pandas.types.common import (is_float, is_integer, + is_integer_dtype, + is_float_dtype, + is_datetime64_ns_dtype, + is_period_arraylike, + ) + +from pandas.compat import lrange +import pandas.compat as compat +import pandas._libs.lib as lib +import pandas.core.common as com +from pandas.core.index import Index + +from pandas.core.series import Series +from pandas.tseries.index import date_range +import pandas.tseries.tools as tools +import pandas.tseries.frequencies as frequencies +from pandas.tseries.frequencies import FreqGroup +from pandas.tseries.period import Period, PeriodIndex + +from pandas.plotting._compat import _mpl_le_2_0_0 + +# constants +HOURS_PER_DAY = 24. +MIN_PER_HOUR = 60. +SEC_PER_MIN = 60. + +SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR +SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY + +MUSEC_PER_DAY = 1e6 * SEC_PER_DAY + + +def register(): + units.registry[lib.Timestamp] = DatetimeConverter() + units.registry[Period] = PeriodConverter() + units.registry[pydt.datetime] = DatetimeConverter() + units.registry[pydt.date] = DatetimeConverter() + units.registry[pydt.time] = TimeConverter() + units.registry[np.datetime64] = DatetimeConverter() + + +def _to_ordinalf(tm): + tot_sec = (tm.hour * 3600 + tm.minute * 60 + tm.second + + float(tm.microsecond / 1e6)) + return tot_sec + + +def time2num(d): + if isinstance(d, compat.string_types): + parsed = tools.to_datetime(d) + if not isinstance(parsed, datetime): + raise ValueError('Could not parse time %s' % d) + return _to_ordinalf(parsed.time()) + if isinstance(d, pydt.time): + return _to_ordinalf(d) + return d + + +class TimeConverter(units.ConversionInterface): + + @staticmethod + def convert(value, unit, axis): + valid_types = (str, pydt.time) + if (isinstance(value, valid_types) or is_integer(value) or + is_float(value)): + return time2num(value) + if isinstance(value, Index): + return value.map(time2num) + if isinstance(value, (list, tuple, np.ndarray, Index)): + return [time2num(x) for x in value] + return value + + @staticmethod + def axisinfo(unit, axis): + if unit != 'time': + return None + + majloc = AutoLocator() + majfmt = TimeFormatter(majloc) + return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='time') + + @staticmethod + def default_units(x, axis): + return 'time' + + +# time formatter +class TimeFormatter(Formatter): + + def __init__(self, locs): + self.locs = locs + + def __call__(self, x, pos=0): + fmt = '%H:%M:%S' + s = int(x) + ms = int((x - s) * 1e3) + us = int((x - s) * 1e6 - ms) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + _, h = divmod(h, 24) + if us != 0: + fmt += '.%6f' + elif ms != 0: + fmt += '.%3f' + + return pydt.time(h, m, s, us).strftime(fmt) + + +# Period Conversion + + +class PeriodConverter(dates.DateConverter): + + @staticmethod + def convert(values, units, axis): + if not hasattr(axis, 'freq'): + raise TypeError('Axis must have `freq` set to convert to Periods') + valid_types = (compat.string_types, datetime, + Period, pydt.date, pydt.time) + if (isinstance(values, valid_types) or is_integer(values) or + is_float(values)): + return get_datevalue(values, axis.freq) + if isinstance(values, PeriodIndex): + return values.asfreq(axis.freq)._values + if isinstance(values, Index): + return values.map(lambda x: get_datevalue(x, axis.freq)) + if is_period_arraylike(values): + return PeriodIndex(values, freq=axis.freq)._values + if isinstance(values, (list, tuple, np.ndarray, Index)): + return [get_datevalue(x, axis.freq) for x in values] + return values + + +def get_datevalue(date, freq): + if isinstance(date, Period): + return date.asfreq(freq).ordinal + elif isinstance(date, (compat.string_types, datetime, + pydt.date, pydt.time)): + return Period(date, freq).ordinal + elif (is_integer(date) or is_float(date) or + (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): + return date + elif date is None: + return None + raise ValueError("Unrecognizable date '%s'" % date) + + +def _dt_to_float_ordinal(dt): + """ + Convert :mod:`datetime` to the Gregorian date as UTC float days, + preserving hours, minutes, seconds and microseconds. Return value + is a :func:`float`. + """ + if (isinstance(dt, (np.ndarray, Index, Series) + ) and is_datetime64_ns_dtype(dt)): + base = dates.epoch2num(dt.asi8 / 1.0E9) + else: + base = dates.date2num(dt) + return base + + +# Datetime Conversion +class DatetimeConverter(dates.DateConverter): + + @staticmethod + def convert(values, unit, axis): + def try_parse(values): + try: + return _dt_to_float_ordinal(tools.to_datetime(values)) + except Exception: + return values + + if isinstance(values, (datetime, pydt.date)): + return _dt_to_float_ordinal(values) + elif isinstance(values, np.datetime64): + return _dt_to_float_ordinal(lib.Timestamp(values)) + elif isinstance(values, pydt.time): + return dates.date2num(values) + elif (is_integer(values) or is_float(values)): + return values + elif isinstance(values, compat.string_types): + return try_parse(values) + elif isinstance(values, (list, tuple, np.ndarray, Index)): + if isinstance(values, Index): + values = values.values + if not isinstance(values, np.ndarray): + values = com._asarray_tuplesafe(values) + + if is_integer_dtype(values) or is_float_dtype(values): + return values + + try: + values = tools.to_datetime(values) + if isinstance(values, Index): + values = _dt_to_float_ordinal(values) + else: + values = [_dt_to_float_ordinal(x) for x in values] + except Exception: + values = _dt_to_float_ordinal(values) + + return values + + @staticmethod + def axisinfo(unit, axis): + """ + Return the :class:`~matplotlib.units.AxisInfo` for *unit*. + + *unit* is a tzinfo instance or None. + The *axis* argument is required but not used. + """ + tz = unit + + majloc = PandasAutoDateLocator(tz=tz) + majfmt = PandasAutoDateFormatter(majloc, tz=tz) + datemin = pydt.date(2000, 1, 1) + datemax = pydt.date(2010, 1, 1) + + return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='', + default_limits=(datemin, datemax)) + + +class PandasAutoDateFormatter(dates.AutoDateFormatter): + + def __init__(self, locator, tz=None, defaultfmt='%Y-%m-%d'): + dates.AutoDateFormatter.__init__(self, locator, tz, defaultfmt) + # matplotlib.dates._UTC has no _utcoffset called by pandas + if self._tz is dates.UTC: + self._tz._utcoffset = self._tz.utcoffset(None) + + # For mpl > 2.0 the format strings are controlled via rcparams + # so do not mess with them. For mpl < 2.0 change the second + # break point and add a musec break point + if _mpl_le_2_0_0(): + self.scaled[1. / SEC_PER_DAY] = '%H:%M:%S' + self.scaled[1. / MUSEC_PER_DAY] = '%H:%M:%S.%f' + + +class PandasAutoDateLocator(dates.AutoDateLocator): + + def get_locator(self, dmin, dmax): + 'Pick the best locator based on a distance.' + delta = relativedelta(dmax, dmin) + + num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days + num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds + tot_sec = num_days * 86400. + num_sec + + if abs(tot_sec) < self.minticks: + self._freq = -1 + locator = MilliSecondLocator(self.tz) + locator.set_axis(self.axis) + + locator.set_view_interval(*self.axis.get_view_interval()) + locator.set_data_interval(*self.axis.get_data_interval()) + return locator + + return dates.AutoDateLocator.get_locator(self, dmin, dmax) + + def _get_unit(self): + return MilliSecondLocator.get_unit_generic(self._freq) + + +class MilliSecondLocator(dates.DateLocator): + + UNIT = 1. / (24 * 3600 * 1000) + + def __init__(self, tz): + dates.DateLocator.__init__(self, tz) + self._interval = 1. + + def _get_unit(self): + return self.get_unit_generic(-1) + + @staticmethod + def get_unit_generic(freq): + unit = dates.RRuleLocator.get_unit_generic(freq) + if unit < 0: + return MilliSecondLocator.UNIT + return unit + + def __call__(self): + # if no data have been set, this will tank with a ValueError + try: + dmin, dmax = self.viewlim_to_dt() + except ValueError: + return [] + + if dmin > dmax: + dmax, dmin = dmin, dmax + # We need to cap at the endpoints of valid datetime + + # TODO(wesm) unused? + # delta = relativedelta(dmax, dmin) + # try: + # start = dmin - delta + # except ValueError: + # start = _from_ordinal(1.0) + + # try: + # stop = dmax + delta + # except ValueError: + # # The magic number! + # stop = _from_ordinal(3652059.9999999) + + nmax, nmin = dates.date2num((dmax, dmin)) + + num = (nmax - nmin) * 86400 * 1000 + max_millis_ticks = 6 + for interval in [1, 10, 50, 100, 200, 500]: + if num <= interval * (max_millis_ticks - 1): + self._interval = interval + break + else: + # We went through the whole loop without breaking, default to 1 + self._interval = 1000. + + estimate = (nmax - nmin) / (self._get_unit() * self._get_interval()) + + if estimate > self.MAXTICKS * 2: + raise RuntimeError(('MillisecondLocator estimated to generate %d ' + 'ticks from %s to %s: exceeds Locator.MAXTICKS' + '* 2 (%d) ') % + (estimate, dmin, dmax, self.MAXTICKS * 2)) + + freq = '%dL' % self._get_interval() + tz = self.tz.tzname(None) + st = _from_ordinal(dates.date2num(dmin)) # strip tz + ed = _from_ordinal(dates.date2num(dmax)) + all_dates = date_range(start=st, end=ed, freq=freq, tz=tz).asobject + + try: + if len(all_dates) > 0: + locs = self.raise_if_exceeds(dates.date2num(all_dates)) + return locs + except Exception: # pragma: no cover + pass + + lims = dates.date2num([dmin, dmax]) + return lims + + def _get_interval(self): + return self._interval + + def autoscale(self): + """ + Set the view limits to include the data range. + """ + dmin, dmax = self.datalim_to_dt() + if dmin > dmax: + dmax, dmin = dmin, dmax + + # We need to cap at the endpoints of valid datetime + + # TODO(wesm): unused? + + # delta = relativedelta(dmax, dmin) + # try: + # start = dmin - delta + # except ValueError: + # start = _from_ordinal(1.0) + + # try: + # stop = dmax + delta + # except ValueError: + # # The magic number! + # stop = _from_ordinal(3652059.9999999) + + dmin, dmax = self.datalim_to_dt() + + vmin = dates.date2num(dmin) + vmax = dates.date2num(dmax) + + return self.nonsingular(vmin, vmax) + + +def _from_ordinal(x, tz=None): + ix = int(x) + dt = datetime.fromordinal(ix) + remainder = float(x) - ix + hour, remainder = divmod(24 * remainder, 1) + minute, remainder = divmod(60 * remainder, 1) + second, remainder = divmod(60 * remainder, 1) + microsecond = int(1e6 * remainder) + if microsecond < 10: + microsecond = 0 # compensate for rounding errors + dt = datetime(dt.year, dt.month, dt.day, int(hour), int(minute), + int(second), microsecond) + if tz is not None: + dt = dt.astimezone(tz) + + if microsecond > 999990: # compensate for rounding errors + dt += timedelta(microseconds=1e6 - microsecond) + + return dt + +# Fixed frequency dynamic tick locators and formatters + +# ------------------------------------------------------------------------- +# --- Locators --- +# ------------------------------------------------------------------------- + + +def _get_default_annual_spacing(nyears): + """ + Returns a default spacing between consecutive ticks for annual data. + """ + if nyears < 11: + (min_spacing, maj_spacing) = (1, 1) + elif nyears < 20: + (min_spacing, maj_spacing) = (1, 2) + elif nyears < 50: + (min_spacing, maj_spacing) = (1, 5) + elif nyears < 100: + (min_spacing, maj_spacing) = (5, 10) + elif nyears < 200: + (min_spacing, maj_spacing) = (5, 25) + elif nyears < 600: + (min_spacing, maj_spacing) = (10, 50) + else: + factor = nyears // 1000 + 1 + (min_spacing, maj_spacing) = (factor * 20, factor * 100) + return (min_spacing, maj_spacing) + + +def period_break(dates, period): + """ + Returns the indices where the given period changes. + + Parameters + ---------- + dates : PeriodIndex + Array of intervals to monitor. + period : string + Name of the period to monitor. + """ + current = getattr(dates, period) + previous = getattr(dates - 1, period) + return np.nonzero(current - previous)[0] + + +def has_level_label(label_flags, vmin): + """ + Returns true if the ``label_flags`` indicate there is at least one label + for this level. + + if the minimum view limit is not an exact integer, then the first tick + label won't be shown, so we must adjust for that. + """ + if label_flags.size == 0 or (label_flags.size == 1 and + label_flags[0] == 0 and + vmin % 1 > 0.0): + return False + else: + return True + + +def _daily_finder(vmin, vmax, freq): + periodsperday = -1 + + if freq >= FreqGroup.FR_HR: + if freq == FreqGroup.FR_NS: + periodsperday = 24 * 60 * 60 * 1000000000 + elif freq == FreqGroup.FR_US: + periodsperday = 24 * 60 * 60 * 1000000 + elif freq == FreqGroup.FR_MS: + periodsperday = 24 * 60 * 60 * 1000 + elif freq == FreqGroup.FR_SEC: + periodsperday = 24 * 60 * 60 + elif freq == FreqGroup.FR_MIN: + periodsperday = 24 * 60 + elif freq == FreqGroup.FR_HR: + periodsperday = 24 + else: # pragma: no cover + raise ValueError("unexpected frequency: %s" % freq) + periodsperyear = 365 * periodsperday + periodspermonth = 28 * periodsperday + + elif freq == FreqGroup.FR_BUS: + periodsperyear = 261 + periodspermonth = 19 + elif freq == FreqGroup.FR_DAY: + periodsperyear = 365 + periodspermonth = 28 + elif frequencies.get_freq_group(freq) == FreqGroup.FR_WK: + periodsperyear = 52 + periodspermonth = 3 + else: # pragma: no cover + raise ValueError("unexpected frequency") + + # save this for later usage + vmin_orig = vmin + + (vmin, vmax) = (Period(ordinal=int(vmin), freq=freq), + Period(ordinal=int(vmax), freq=freq)) + span = vmax.ordinal - vmin.ordinal + 1 + dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq) + # Initialize the output + info = np.zeros(span, + dtype=[('val', np.int64), ('maj', bool), + ('min', bool), ('fmt', '|S20')]) + info['val'][:] = dates_._values + info['fmt'][:] = '' + info['maj'][[0, -1]] = True + # .. and set some shortcuts + info_maj = info['maj'] + info_min = info['min'] + info_fmt = info['fmt'] + + def first_label(label_flags): + if (label_flags[0] == 0) and (label_flags.size > 1) and \ + ((vmin_orig % 1) > 0.0): + return label_flags[1] + else: + return label_flags[0] + + # Case 1. Less than a month + if span <= periodspermonth: + day_start = period_break(dates_, 'day') + month_start = period_break(dates_, 'month') + + def _hour_finder(label_interval, force_year_start): + _hour = dates_.hour + _prev_hour = (dates_ - 1).hour + hour_start = (_hour - _prev_hour) != 0 + info_maj[day_start] = True + info_min[hour_start & (_hour % label_interval == 0)] = True + year_start = period_break(dates_, 'year') + info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M' + info_fmt[day_start] = '%H:%M\n%d-%b' + info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' + if force_year_start and not has_level_label(year_start, vmin_orig): + info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y' + + def _minute_finder(label_interval): + hour_start = period_break(dates_, 'hour') + _minute = dates_.minute + _prev_minute = (dates_ - 1).minute + minute_start = (_minute - _prev_minute) != 0 + info_maj[hour_start] = True + info_min[minute_start & (_minute % label_interval == 0)] = True + year_start = period_break(dates_, 'year') + info_fmt = info['fmt'] + info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M' + info_fmt[day_start] = '%H:%M\n%d-%b' + info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' + + def _second_finder(label_interval): + minute_start = period_break(dates_, 'minute') + _second = dates_.second + _prev_second = (dates_ - 1).second + second_start = (_second - _prev_second) != 0 + info['maj'][minute_start] = True + info['min'][second_start & (_second % label_interval == 0)] = True + year_start = period_break(dates_, 'year') + info_fmt = info['fmt'] + info_fmt[second_start & (_second % + label_interval == 0)] = '%H:%M:%S' + info_fmt[day_start] = '%H:%M:%S\n%d-%b' + info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y' + + if span < periodsperday / 12000.0: + _second_finder(1) + elif span < periodsperday / 6000.0: + _second_finder(2) + elif span < periodsperday / 2400.0: + _second_finder(5) + elif span < periodsperday / 1200.0: + _second_finder(10) + elif span < periodsperday / 800.0: + _second_finder(15) + elif span < periodsperday / 400.0: + _second_finder(30) + elif span < periodsperday / 150.0: + _minute_finder(1) + elif span < periodsperday / 70.0: + _minute_finder(2) + elif span < periodsperday / 24.0: + _minute_finder(5) + elif span < periodsperday / 12.0: + _minute_finder(15) + elif span < periodsperday / 6.0: + _minute_finder(30) + elif span < periodsperday / 2.5: + _hour_finder(1, False) + elif span < periodsperday / 1.5: + _hour_finder(2, False) + elif span < periodsperday * 1.25: + _hour_finder(3, False) + elif span < periodsperday * 2.5: + _hour_finder(6, True) + elif span < periodsperday * 4: + _hour_finder(12, True) + else: + info_maj[month_start] = True + info_min[day_start] = True + year_start = period_break(dates_, 'year') + info_fmt = info['fmt'] + info_fmt[day_start] = '%d' + info_fmt[month_start] = '%d\n%b' + info_fmt[year_start] = '%d\n%b\n%Y' + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(day_start)] = '%d\n%b\n%Y' + else: + info_fmt[first_label(month_start)] = '%d\n%b\n%Y' + + # Case 2. Less than three months + elif span <= periodsperyear // 4: + month_start = period_break(dates_, 'month') + info_maj[month_start] = True + if freq < FreqGroup.FR_HR: + info['min'] = True + else: + day_start = period_break(dates_, 'day') + info['min'][day_start] = True + week_start = period_break(dates_, 'week') + year_start = period_break(dates_, 'year') + info_fmt[week_start] = '%d' + info_fmt[month_start] = '\n\n%b' + info_fmt[year_start] = '\n\n%b\n%Y' + if not has_level_label(year_start, vmin_orig): + if not has_level_label(month_start, vmin_orig): + info_fmt[first_label(week_start)] = '\n\n%b\n%Y' + else: + info_fmt[first_label(month_start)] = '\n\n%b\n%Y' + # Case 3. Less than 14 months ............... + elif span <= 1.15 * periodsperyear: + year_start = period_break(dates_, 'year') + month_start = period_break(dates_, 'month') + week_start = period_break(dates_, 'week') + info_maj[month_start] = True + info_min[week_start] = True + info_min[year_start] = False + info_min[month_start] = False + info_fmt[month_start] = '%b' + info_fmt[year_start] = '%b\n%Y' + if not has_level_label(year_start, vmin_orig): + info_fmt[first_label(month_start)] = '%b\n%Y' + # Case 4. Less than 2.5 years ............... + elif span <= 2.5 * periodsperyear: + year_start = period_break(dates_, 'year') + quarter_start = period_break(dates_, 'quarter') + month_start = period_break(dates_, 'month') + info_maj[quarter_start] = True + info_min[month_start] = True + info_fmt[quarter_start] = '%b' + info_fmt[year_start] = '%b\n%Y' + # Case 4. Less than 4 years ................. + elif span <= 4 * periodsperyear: + year_start = period_break(dates_, 'year') + month_start = period_break(dates_, 'month') + info_maj[year_start] = True + info_min[month_start] = True + info_min[year_start] = False + + month_break = dates_[month_start].month + jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] + info_fmt[jan_or_jul] = '%b' + info_fmt[year_start] = '%b\n%Y' + # Case 5. Less than 11 years ................ + elif span <= 11 * periodsperyear: + year_start = period_break(dates_, 'year') + quarter_start = period_break(dates_, 'quarter') + info_maj[year_start] = True + info_min[quarter_start] = True + info_min[year_start] = False + info_fmt[year_start] = '%Y' + # Case 6. More than 12 years ................ + else: + year_start = period_break(dates_, 'year') + year_break = dates_[year_start].year + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(year_break % maj_anndef == 0)] + info_maj[major_idx] = True + minor_idx = year_start[(year_break % min_anndef == 0)] + info_min[minor_idx] = True + info_fmt[major_idx] = '%Y' + + return info + + +def _monthly_finder(vmin, vmax, freq): + periodsperyear = 12 + + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + # Initialize the output + info = np.zeros(span, + dtype=[('val', int), ('maj', bool), ('min', bool), + ('fmt', '|S8')]) + info['val'] = np.arange(vmin, vmax + 1) + dates_ = info['val'] + info['fmt'] = '' + year_start = (dates_ % 12 == 0).nonzero()[0] + info_maj = info['maj'] + info_fmt = info['fmt'] + + if span <= 1.15 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + + info_fmt[:] = '%b' + info_fmt[year_start] = '%b\n%Y' + + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = '%b\n%Y' + + elif span <= 2.5 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + # TODO: Check the following : is it really info['fmt'] ? + info['fmt'][quarter_start] = True + info['min'] = True + + info_fmt[quarter_start] = '%b' + info_fmt[year_start] = '%b\n%Y' + + elif span <= 4 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + + jan_or_jul = (dates_ % 12 == 0) | (dates_ % 12 == 6) + info_fmt[jan_or_jul] = '%b' + info_fmt[year_start] = '%b\n%Y' + + elif span <= 11 * periodsperyear: + quarter_start = (dates_ % 3 == 0).nonzero() + info_maj[year_start] = True + info['min'][quarter_start] = True + + info_fmt[year_start] = '%Y' + + else: + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + years = dates_[year_start] // 12 + 1 + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info['min'][year_start[(years % min_anndef == 0)]] = True + + info_fmt[major_idx] = '%Y' + + return info + + +def _quarterly_finder(vmin, vmax, freq): + periodsperyear = 4 + vmin_orig = vmin + (vmin, vmax) = (int(vmin), int(vmax)) + span = vmax - vmin + 1 + + info = np.zeros(span, + dtype=[('val', int), ('maj', bool), ('min', bool), + ('fmt', '|S8')]) + info['val'] = np.arange(vmin, vmax + 1) + info['fmt'] = '' + dates_ = info['val'] + info_maj = info['maj'] + info_fmt = info['fmt'] + year_start = (dates_ % 4 == 0).nonzero()[0] + + if span <= 3.5 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + + info_fmt[:] = 'Q%q' + info_fmt[year_start] = 'Q%q\n%F' + if not has_level_label(year_start, vmin_orig): + if dates_.size > 1: + idx = 1 + else: + idx = 0 + info_fmt[idx] = 'Q%q\n%F' + + elif span <= 11 * periodsperyear: + info_maj[year_start] = True + info['min'] = True + info_fmt[year_start] = '%F' + + else: + years = dates_[year_start] // 4 + 1 + nyears = span / periodsperyear + (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) + major_idx = year_start[(years % maj_anndef == 0)] + info_maj[major_idx] = True + info['min'][year_start[(years % min_anndef == 0)]] = True + info_fmt[major_idx] = '%F' + + return info + + +def _annual_finder(vmin, vmax, freq): + (vmin, vmax) = (int(vmin), int(vmax + 1)) + span = vmax - vmin + 1 + + info = np.zeros(span, + dtype=[('val', int), ('maj', bool), ('min', bool), + ('fmt', '|S8')]) + info['val'] = np.arange(vmin, vmax + 1) + info['fmt'] = '' + dates_ = info['val'] + + (min_anndef, maj_anndef) = _get_default_annual_spacing(span) + major_idx = dates_ % maj_anndef == 0 + info['maj'][major_idx] = True + info['min'][(dates_ % min_anndef == 0)] = True + info['fmt'][major_idx] = '%Y' + + return info + + +def get_finder(freq): + if isinstance(freq, compat.string_types): + freq = frequencies.get_freq(freq) + fgroup = frequencies.get_freq_group(freq) + + if fgroup == FreqGroup.FR_ANN: + return _annual_finder + elif fgroup == FreqGroup.FR_QTR: + return _quarterly_finder + elif freq == FreqGroup.FR_MTH: + return _monthly_finder + elif ((freq >= FreqGroup.FR_BUS) or fgroup == FreqGroup.FR_WK): + return _daily_finder + else: # pragma: no cover + errmsg = "Unsupported frequency: %s" % (freq) + raise NotImplementedError(errmsg) + + +class TimeSeries_DateLocator(Locator): + """ + Locates the ticks along an axis controlled by a :class:`Series`. + + Parameters + ---------- + freq : {var} + Valid frequency specifier. + minor_locator : {False, True}, optional + Whether the locator is for minor ticks (True) or not. + dynamic_mode : {True, False}, optional + Whether the locator should work in dynamic mode. + base : {int}, optional + quarter : {int}, optional + month : {int}, optional + day : {int}, optional + """ + + def __init__(self, freq, minor_locator=False, dynamic_mode=True, + base=1, quarter=1, month=1, day=1, plot_obj=None): + if isinstance(freq, compat.string_types): + freq = frequencies.get_freq(freq) + self.freq = freq + self.base = base + (self.quarter, self.month, self.day) = (quarter, month, day) + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _get_default_locs(self, vmin, vmax): + "Returns the default locations of ticks." + + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + + locator = self.plot_obj.date_axis_info + + if self.isminor: + return np.compress(locator['min'], locator['val']) + return np.compress(locator['maj'], locator['val']) + + def __call__(self): + 'Return the locations of the ticks.' + # axis calls Locator.set_axis inside set_m_formatter + vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + vmin, vmax = vi + if vmax < vmin: + vmin, vmax = vmax, vmin + if self.isdynamic: + locs = self._get_default_locs(vmin, vmax) + else: # pragma: no cover + base = self.base + (d, m) = divmod(vmin, base) + vmin = (d + 1) * base + locs = lrange(vmin, vmax + 1, base) + return locs + + def autoscale(self): + """ + Sets the view limits to the nearest multiples of base that contain the + data. + """ + # requires matplotlib >= 0.98.0 + (vmin, vmax) = self.axis.get_data_interval() + + locs = self._get_default_locs(vmin, vmax) + (vmin, vmax) = locs[[0, -1]] + if vmin == vmax: + vmin -= 1 + vmax += 1 + return nonsingular(vmin, vmax) + +# ------------------------------------------------------------------------- +# --- Formatter --- +# ------------------------------------------------------------------------- + + +class TimeSeries_DateFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`PeriodIndex`. + + Parameters + ---------- + freq : {int, string} + Valid frequency specifier. + minor_locator : {False, True} + Whether the current formatter should apply to minor ticks (True) or + major ticks (False). + dynamic_mode : {True, False} + Whether the formatter works in dynamic mode or not. + """ + + def __init__(self, freq, minor_locator=False, dynamic_mode=True, + plot_obj=None): + if isinstance(freq, compat.string_types): + freq = frequencies.get_freq(freq) + self.format = None + self.freq = freq + self.locs = [] + self.formatdict = None + self.isminor = minor_locator + self.isdynamic = dynamic_mode + self.offset = 0 + self.plot_obj = plot_obj + self.finder = get_finder(freq) + + def _set_default_format(self, vmin, vmax): + "Returns the default ticks spacing." + + if self.plot_obj.date_axis_info is None: + self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) + info = self.plot_obj.date_axis_info + + if self.isminor: + format = np.compress(info['min'] & np.logical_not(info['maj']), + info) + else: + format = np.compress(info['maj'], info) + self.formatdict = dict([(x, f) for (x, _, _, f) in format]) + return self.formatdict + + def set_locs(self, locs): + 'Sets the locations of the ticks' + # don't actually use the locs. This is just needed to work with + # matplotlib. Force to use vmin, vmax + self.locs = locs + + (vmin, vmax) = vi = tuple(self.axis.get_view_interval()) + if vi != self.plot_obj.view_interval: + self.plot_obj.date_axis_info = None + self.plot_obj.view_interval = vi + if vmax < vmin: + (vmin, vmax) = (vmax, vmin) + self._set_default_format(vmin, vmax) + + def __call__(self, x, pos=0): + if self.formatdict is None: + return '' + else: + fmt = self.formatdict.pop(x, '') + return Period(ordinal=int(x), freq=self.freq).strftime(fmt) + + +class TimeSeries_TimedeltaFormatter(Formatter): + """ + Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`. + """ + + @staticmethod + def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10**(n_decimals - 9)) + s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) + if n_decimals > 0: + s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) + if d != 0: + s = '{:d} days '.format(int(d)) + s + return s + + def __call__(self, x, pos=0): + (vmin, vmax) = tuple(self.axis.get_view_interval()) + n_decimals = int(np.ceil(np.log10(100 * 1e9 / (vmax - vmin)))) + if n_decimals > 9: + n_decimals = 9 + return self.format_timedelta_ticks(x, pos, n_decimals) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py new file mode 100644 index 0000000000000..3980f5e7f2f61 --- /dev/null +++ b/pandas/plotting/_core.py @@ -0,0 +1,2828 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import warnings +import re +from collections import namedtuple +from distutils.version import LooseVersion + +import numpy as np + +from pandas.util.decorators import cache_readonly +from pandas.core.base import PandasObject +from pandas.types.common import (is_list_like, + is_integer, + is_number, + is_hashable, + is_iterator) +from pandas.core.common import AbstractMethodError, isnull, _try_sort +from pandas.core.generic import _shared_docs, _shared_doc_kwargs +from pandas.core.index import Index, MultiIndex +from pandas.core.series import Series, remove_na +from pandas.tseries.period import PeriodIndex +from pandas.compat import range, lrange, map, zip, string_types +import pandas.compat as compat +from pandas.formats.printing import pprint_thing +from pandas.util.decorators import Appender + +from pandas.plotting._compat import (_mpl_ge_1_3_1, + _mpl_ge_1_5_0) +from pandas.plotting._style import (mpl_stylesheet, plot_params, + _get_standard_colors) +from pandas.plotting._tools import (_subplots, _flatten, table, + _handle_shared_axes, _get_all_lines, + _get_xlim, _set_ticks_props, + format_date_labels) + + +if _mpl_ge_1_5_0(): + # Compat with mp 1.5, which uses cycler. + import cycler + colors = mpl_stylesheet.pop('axes.color_cycle') + mpl_stylesheet['axes.prop_cycle'] = cycler.cycler('color', colors) + + +def _get_standard_kind(kind): + return {'density': 'kde'}.get(kind, kind) + + +def _gca(): + import matplotlib.pyplot as plt + return plt.gca() + + +def _gcf(): + import matplotlib.pyplot as plt + return plt.gcf() + + +class MPLPlot(object): + """ + Base class for assembling a pandas plot using matplotlib + + Parameters + ---------- + data : + + """ + + @property + def _kind(self): + """Specify kind str. Must be overridden in child class""" + raise NotImplementedError + + _layout_type = 'vertical' + _default_rot = 0 + orientation = None + _pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog', + 'mark_right', 'stacked'] + _attr_defaults = {'logy': False, 'logx': False, 'loglog': False, + 'mark_right': True, 'stacked': False} + + def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, + sharey=False, use_index=True, + figsize=None, grid=None, legend=True, rot=None, + ax=None, fig=None, title=None, xlim=None, ylim=None, + xticks=None, yticks=None, + sort_columns=False, fontsize=None, + secondary_y=False, colormap=None, + table=False, layout=None, **kwds): + + self.data = data + self.by = by + + self.kind = kind + + self.sort_columns = sort_columns + + self.subplots = subplots + + if sharex is None: + if ax is None: + self.sharex = True + else: + # if we get an axis, the users should do the visibility + # setting... + self.sharex = False + else: + self.sharex = sharex + + self.sharey = sharey + self.figsize = figsize + self.layout = layout + + self.xticks = xticks + self.yticks = yticks + self.xlim = xlim + self.ylim = ylim + self.title = title + self.use_index = use_index + + self.fontsize = fontsize + + if rot is not None: + self.rot = rot + # need to know for format_date_labels since it's rotated to 30 by + # default + self._rot_set = True + else: + self._rot_set = False + self.rot = self._default_rot + + if grid is None: + grid = False if secondary_y else self.plt.rcParams['axes.grid'] + + self.grid = grid + self.legend = legend + self.legend_handles = [] + self.legend_labels = [] + + for attr in self._pop_attributes: + value = kwds.pop(attr, self._attr_defaults.get(attr, None)) + setattr(self, attr, value) + + self.ax = ax + self.fig = fig + self.axes = None + + # parse errorbar input if given + xerr = kwds.pop('xerr', None) + yerr = kwds.pop('yerr', None) + self.errors = {} + for kw, err in zip(['xerr', 'yerr'], [xerr, yerr]): + self.errors[kw] = self._parse_errorbars(kw, err) + + if not isinstance(secondary_y, (bool, tuple, list, np.ndarray, Index)): + secondary_y = [secondary_y] + self.secondary_y = secondary_y + + # ugly TypeError if user passes matplotlib's `cmap` name. + # Probably better to accept either. + if 'cmap' in kwds and colormap: + raise TypeError("Only specify one of `cmap` and `colormap`.") + elif 'cmap' in kwds: + self.colormap = kwds.pop('cmap') + else: + self.colormap = colormap + + self.table = table + + self.kwds = kwds + + self._validate_color_args() + + def _validate_color_args(self): + if 'color' not in self.kwds and 'colors' in self.kwds: + warnings.warn(("'colors' is being deprecated. Please use 'color'" + "instead of 'colors'")) + colors = self.kwds.pop('colors') + self.kwds['color'] = colors + + if ('color' in self.kwds and self.nseries == 1): + # support series.plot(color='green') + self.kwds['color'] = [self.kwds['color']] + + if ('color' in self.kwds or 'colors' in self.kwds) and \ + self.colormap is not None: + warnings.warn("'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'") + + if 'color' in self.kwds and self.style is not None: + if is_list_like(self.style): + styles = self.style + else: + styles = [self.style] + # need only a single match + for s in styles: + if re.match('^[a-z]+?', s) is not None: + raise ValueError( + "Cannot pass 'style' string with a color " + "symbol and 'color' keyword argument. Please" + " use one or the other or pass 'style' " + "without a color symbol") + + def _iter_data(self, data=None, keep_index=False, fillna=None): + if data is None: + data = self.data + if fillna is not None: + data = data.fillna(fillna) + + # TODO: unused? + # if self.sort_columns: + # columns = _try_sort(data.columns) + # else: + # columns = data.columns + + for col, values in data.iteritems(): + if keep_index is True: + yield col, values + else: + yield col, values.values + + @property + def nseries(self): + if self.data.ndim == 1: + return 1 + else: + return self.data.shape[1] + + def draw(self): + self.plt.draw_if_interactive() + + def generate(self): + self._args_adjust() + self._compute_plot_data() + self._setup_subplots() + self._make_plot() + self._add_table() + self._make_legend() + self._adorn_subplots() + + for ax in self.axes: + self._post_plot_logic_common(ax, self.data) + self._post_plot_logic(ax, self.data) + + def _args_adjust(self): + pass + + def _has_plotted_object(self, ax): + """check whether ax has data""" + return (len(ax.lines) != 0 or + len(ax.artists) != 0 or + len(ax.containers) != 0) + + def _maybe_right_yaxis(self, ax, axes_num): + if not self.on_right(axes_num): + # secondary axes may be passed via ax kw + return self._get_ax_layer(ax) + + if hasattr(ax, 'right_ax'): + # if it has right_ax proparty, ``ax`` must be left axes + return ax.right_ax + elif hasattr(ax, 'left_ax'): + # if it has left_ax proparty, ``ax`` must be right axes + return ax + else: + # otherwise, create twin axes + orig_ax, new_ax = ax, ax.twinx() + # TODO: use Matplotlib public API when available + new_ax._get_lines = orig_ax._get_lines + new_ax._get_patches_for_fill = orig_ax._get_patches_for_fill + orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax + + if not self._has_plotted_object(orig_ax): # no data on left y + orig_ax.get_yaxis().set_visible(False) + return new_ax + + def _setup_subplots(self): + if self.subplots: + fig, axes = _subplots(naxes=self.nseries, + sharex=self.sharex, sharey=self.sharey, + figsize=self.figsize, ax=self.ax, + layout=self.layout, + layout_type=self._layout_type) + else: + if self.ax is None: + fig = self.plt.figure(figsize=self.figsize) + axes = fig.add_subplot(111) + else: + fig = self.ax.get_figure() + if self.figsize is not None: + fig.set_size_inches(self.figsize) + axes = self.ax + + axes = _flatten(axes) + + if self.logx or self.loglog: + [a.set_xscale('log') for a in axes] + if self.logy or self.loglog: + [a.set_yscale('log') for a in axes] + + self.fig = fig + self.axes = axes + + @property + def result(self): + """ + Return result axes + """ + if self.subplots: + if self.layout is not None and not is_list_like(self.ax): + return self.axes.reshape(*self.layout) + else: + return self.axes + else: + sec_true = isinstance(self.secondary_y, bool) and self.secondary_y + all_sec = (is_list_like(self.secondary_y) and + len(self.secondary_y) == self.nseries) + if (sec_true or all_sec): + # if all data is plotted on secondary, return right axes + return self._get_ax_layer(self.axes[0], primary=False) + else: + return self.axes[0] + + def _compute_plot_data(self): + data = self.data + + if isinstance(data, Series): + label = self.label + if label is None and data.name is None: + label = 'None' + data = data.to_frame(name=label) + + numeric_data = data._convert(datetime=True)._get_numeric_data() + + try: + is_empty = numeric_data.empty + except AttributeError: + is_empty = not len(numeric_data) + + # no empty frames or series allowed + if is_empty: + raise TypeError('Empty {0!r}: no numeric data to ' + 'plot'.format(numeric_data.__class__.__name__)) + + self.data = numeric_data + + def _make_plot(self): + raise AbstractMethodError(self) + + def _add_table(self): + if self.table is False: + return + elif self.table is True: + data = self.data.transpose() + else: + data = self.table + ax = self._get_ax(0) + table(ax, data) + + def _post_plot_logic_common(self, ax, data): + """Common post process for each axes""" + labels = [pprint_thing(key) for key in data.index] + labels = dict(zip(range(len(data.index)), labels)) + + if self.orientation == 'vertical' or self.orientation is None: + if self._need_to_set_index: + xticklabels = [labels.get(x, '') for x in ax.get_xticks()] + ax.set_xticklabels(xticklabels) + self._apply_axis_properties(ax.xaxis, rot=self.rot, + fontsize=self.fontsize) + self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) + elif self.orientation == 'horizontal': + if self._need_to_set_index: + yticklabels = [labels.get(y, '') for y in ax.get_yticks()] + ax.set_yticklabels(yticklabels) + self._apply_axis_properties(ax.yaxis, rot=self.rot, + fontsize=self.fontsize) + self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize) + else: # pragma no cover + raise ValueError + + def _post_plot_logic(self, ax, data): + """Post process for each axes. Overridden in child classes""" + pass + + def _adorn_subplots(self): + """Common post process unrelated to data""" + if len(self.axes) > 0: + all_axes = self._get_subplots() + nrows, ncols = self._get_axes_layout() + _handle_shared_axes(axarr=all_axes, nplots=len(all_axes), + naxes=nrows * ncols, nrows=nrows, + ncols=ncols, sharex=self.sharex, + sharey=self.sharey) + + for ax in self.axes: + if self.yticks is not None: + ax.set_yticks(self.yticks) + + if self.xticks is not None: + ax.set_xticks(self.xticks) + + if self.ylim is not None: + ax.set_ylim(self.ylim) + + if self.xlim is not None: + ax.set_xlim(self.xlim) + + ax.grid(self.grid) + + if self.title: + if self.subplots: + if is_list_like(self.title): + if len(self.title) != self.nseries: + msg = ('The length of `title` must equal the number ' + 'of columns if using `title` of type `list` ' + 'and `subplots=True`.\n' + 'length of title = {}\n' + 'number of columns = {}').format( + len(self.title), self.nseries) + raise ValueError(msg) + + for (ax, title) in zip(self.axes, self.title): + ax.set_title(title) + else: + self.fig.suptitle(self.title) + else: + if is_list_like(self.title): + msg = ('Using `title` of type `list` is not supported ' + 'unless `subplots=True` is passed') + raise ValueError(msg) + self.axes[0].set_title(self.title) + + def _apply_axis_properties(self, axis, rot=None, fontsize=None): + labels = axis.get_majorticklabels() + axis.get_minorticklabels() + for label in labels: + if rot is not None: + label.set_rotation(rot) + if fontsize is not None: + label.set_fontsize(fontsize) + + @property + def legend_title(self): + if not isinstance(self.data.columns, MultiIndex): + name = self.data.columns.name + if name is not None: + name = pprint_thing(name) + return name + else: + stringified = map(pprint_thing, + self.data.columns.names) + return ','.join(stringified) + + def _add_legend_handle(self, handle, label, index=None): + if label is not None: + if self.mark_right and index is not None: + if self.on_right(index): + label = label + ' (right)' + self.legend_handles.append(handle) + self.legend_labels.append(label) + + def _make_legend(self): + ax, leg = self._get_ax_legend(self.axes[0]) + + handles = [] + labels = [] + title = '' + + if not self.subplots: + if leg is not None: + title = leg.get_title().get_text() + handles = leg.legendHandles + labels = [x.get_text() for x in leg.get_texts()] + + if self.legend: + if self.legend == 'reverse': + self.legend_handles = reversed(self.legend_handles) + self.legend_labels = reversed(self.legend_labels) + + handles += self.legend_handles + labels += self.legend_labels + if self.legend_title is not None: + title = self.legend_title + + if len(handles) > 0: + ax.legend(handles, labels, loc='best', title=title) + + elif self.subplots and self.legend: + for ax in self.axes: + if ax.get_visible(): + ax.legend(loc='best') + + def _get_ax_legend(self, ax): + leg = ax.get_legend() + other_ax = (getattr(ax, 'left_ax', None) or + getattr(ax, 'right_ax', None)) + other_leg = None + if other_ax is not None: + other_leg = other_ax.get_legend() + if leg is None and other_leg is not None: + leg = other_leg + ax = other_ax + return ax, leg + + @cache_readonly + def plt(self): + import matplotlib.pyplot as plt + return plt + + @staticmethod + def mpl_ge_1_3_1(): + return _mpl_ge_1_3_1() + + @staticmethod + def mpl_ge_1_5_0(): + return _mpl_ge_1_5_0() + + _need_to_set_index = False + + def _get_xticks(self, convert_period=False): + index = self.data.index + is_datetype = index.inferred_type in ('datetime', 'date', + 'datetime64', 'time') + + if self.use_index: + if convert_period and isinstance(index, PeriodIndex): + self.data = self.data.reindex(index=index.sort_values()) + x = self.data.index.to_timestamp()._mpl_repr() + elif index.is_numeric(): + """ + Matplotlib supports numeric values or datetime objects as + xaxis values. Taking LBYL approach here, by the time + matplotlib raises exception when using non numeric/datetime + values for xaxis, several actions are already taken by plt. + """ + x = index._mpl_repr() + elif is_datetype: + self.data = self.data.sort_index() + x = self.data.index._mpl_repr() + else: + self._need_to_set_index = True + x = lrange(len(index)) + else: + x = lrange(len(index)) + + return x + + @classmethod + def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): + mask = isnull(y) + if mask.any(): + y = np.ma.array(y) + y = np.ma.masked_where(mask, y) + + if isinstance(x, Index): + x = x._mpl_repr() + + if is_errorbar: + if 'xerr' in kwds: + kwds['xerr'] = np.array(kwds.get('xerr')) + if 'yerr' in kwds: + kwds['yerr'] = np.array(kwds.get('yerr')) + return ax.errorbar(x, y, **kwds) + else: + # prevent style kwarg from going to errorbar, where it is + # unsupported + if style is not None: + args = (x, y, style) + else: + args = (x, y) + return ax.plot(*args, **kwds) + + def _get_index_name(self): + if isinstance(self.data.index, MultiIndex): + name = self.data.index.names + if any(x is not None for x in name): + name = ','.join([pprint_thing(x) for x in name]) + else: + name = None + else: + name = self.data.index.name + if name is not None: + name = pprint_thing(name) + + return name + + @classmethod + def _get_ax_layer(cls, ax, primary=True): + """get left (primary) or right (secondary) axes""" + if primary: + return getattr(ax, 'left_ax', ax) + else: + return getattr(ax, 'right_ax', ax) + + def _get_ax(self, i): + # get the twinx ax if appropriate + if self.subplots: + ax = self.axes[i] + ax = self._maybe_right_yaxis(ax, i) + self.axes[i] = ax + else: + ax = self.axes[0] + ax = self._maybe_right_yaxis(ax, i) + + ax.get_yaxis().set_visible(True) + return ax + + def on_right(self, i): + if isinstance(self.secondary_y, bool): + return self.secondary_y + + if isinstance(self.secondary_y, (tuple, list, np.ndarray, Index)): + return self.data.columns[i] in self.secondary_y + + def _apply_style_colors(self, colors, kwds, col_num, label): + """ + Manage style and color based on column number and its label. + Returns tuple of appropriate style and kwds which "color" may be added. + """ + style = None + if self.style is not None: + if isinstance(self.style, list): + try: + style = self.style[col_num] + except IndexError: + pass + elif isinstance(self.style, dict): + style = self.style.get(label, style) + else: + style = self.style + + has_color = 'color' in kwds or self.colormap is not None + nocolor_style = style is None or re.match('[a-z]+', style) is None + if (has_color or self.subplots) and nocolor_style: + kwds['color'] = colors[col_num % len(colors)] + return style, kwds + + def _get_colors(self, num_colors=None, color_kwds='color'): + if num_colors is None: + num_colors = self.nseries + + return _get_standard_colors(num_colors=num_colors, + colormap=self.colormap, + color=self.kwds.get(color_kwds)) + + def _parse_errorbars(self, label, err): + """ + Look for error keyword arguments and return the actual errorbar data + or return the error DataFrame/dict + + Error bars can be specified in several ways: + Series: the user provides a pandas.Series object of the same + length as the data + ndarray: provides a np.ndarray of the same length as the data + DataFrame/dict: error values are paired with keys matching the + key in the plotted DataFrame + str: the name of the column within the plotted DataFrame + """ + + if err is None: + return None + + from pandas import DataFrame, Series + + def match_labels(data, e): + e = e.reindex_axis(data.index) + return e + + # key-matched DataFrame + if isinstance(err, DataFrame): + + err = match_labels(self.data, err) + # key-matched dict + elif isinstance(err, dict): + pass + + # Series of error values + elif isinstance(err, Series): + # broadcast error series across data + err = match_labels(self.data, err) + err = np.atleast_2d(err) + err = np.tile(err, (self.nseries, 1)) + + # errors are a column in the dataframe + elif isinstance(err, string_types): + evalues = self.data[err].values + self.data = self.data[self.data.columns.drop(err)] + err = np.atleast_2d(evalues) + err = np.tile(err, (self.nseries, 1)) + + elif is_list_like(err): + if is_iterator(err): + err = np.atleast_2d(list(err)) + else: + # raw error values + err = np.atleast_2d(err) + + err_shape = err.shape + + # asymmetrical error bars + if err.ndim == 3: + if (err_shape[0] != self.nseries) or \ + (err_shape[1] != 2) or \ + (err_shape[2] != len(self.data)): + msg = "Asymmetrical error bars should be provided " + \ + "with the shape (%u, 2, %u)" % \ + (self.nseries, len(self.data)) + raise ValueError(msg) + + # broadcast errors to each data series + if len(err) == 1: + err = np.tile(err, (self.nseries, 1)) + + elif is_number(err): + err = np.tile([err], (self.nseries, len(self.data))) + + else: + msg = "No valid %s detected" % label + raise ValueError(msg) + + return err + + def _get_errorbars(self, label=None, index=None, xerr=True, yerr=True): + from pandas import DataFrame + errors = {} + + for kw, flag in zip(['xerr', 'yerr'], [xerr, yerr]): + if flag: + err = self.errors[kw] + # user provided label-matched dataframe of errors + if isinstance(err, (DataFrame, dict)): + if label is not None and label in err.keys(): + err = err[label] + else: + err = None + elif index is not None and err is not None: + err = err[index] + + if err is not None: + errors[kw] = err + return errors + + def _get_subplots(self): + from matplotlib.axes import Subplot + return [ax for ax in self.axes[0].get_figure().get_axes() + if isinstance(ax, Subplot)] + + def _get_axes_layout(self): + axes = self._get_subplots() + x_set = set() + y_set = set() + for ax in axes: + # check axes coordinates to estimate layout + points = ax.get_position().get_points() + x_set.add(points[0][0]) + y_set.add(points[0][1]) + return (len(y_set), len(x_set)) + + +class PlanePlot(MPLPlot): + """ + Abstract class for plotting on plane, currently scatter and hexbin. + """ + + _layout_type = 'single' + + def __init__(self, data, x, y, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + if x is None or y is None: + raise ValueError(self._kind + ' requires and x and y column') + if is_integer(x) and not self.data.columns.holds_integer(): + x = self.data.columns[x] + if is_integer(y) and not self.data.columns.holds_integer(): + y = self.data.columns[y] + self.x = x + self.y = y + + @property + def nseries(self): + return 1 + + def _post_plot_logic(self, ax, data): + x, y = self.x, self.y + ax.set_ylabel(pprint_thing(y)) + ax.set_xlabel(pprint_thing(x)) + + +class ScatterPlot(PlanePlot): + _kind = 'scatter' + + def __init__(self, data, x, y, s=None, c=None, **kwargs): + if s is None: + # hide the matplotlib default for size, in case we want to change + # the handling of this argument later + s = 20 + super(ScatterPlot, self).__init__(data, x, y, s=s, **kwargs) + if is_integer(c) and not self.data.columns.holds_integer(): + c = self.data.columns[c] + self.c = c + + def _make_plot(self): + x, y, c, data = self.x, self.y, self.c, self.data + ax = self.axes[0] + + c_is_column = is_hashable(c) and c in self.data.columns + + # plot a colorbar only if a colormap is provided or necessary + cb = self.kwds.pop('colorbar', self.colormap or c_is_column) + + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or 'Greys' + cmap = self.plt.cm.get_cmap(cmap) + color = self.kwds.pop("color", None) + if c is not None and color is not None: + raise TypeError('Specify exactly one of `c` and `color`') + elif c is None and color is None: + c_values = self.plt.rcParams['patch.facecolor'] + elif color is not None: + c_values = color + elif c_is_column: + c_values = self.data[c].values + else: + c_values = c + + if self.legend and hasattr(self, 'label'): + label = self.label + else: + label = None + scatter = ax.scatter(data[x].values, data[y].values, c=c_values, + label=label, cmap=cmap, **self.kwds) + if cb: + img = ax.collections[0] + kws = dict(ax=ax) + if self.mpl_ge_1_3_1(): + kws['label'] = c if c_is_column else '' + self.fig.colorbar(img, **kws) + + if label is not None: + self._add_legend_handle(scatter, label) + else: + self.legend = False + + errors_x = self._get_errorbars(label=x, index=0, yerr=False) + errors_y = self._get_errorbars(label=y, index=0, xerr=False) + if len(errors_x) > 0 or len(errors_y) > 0: + err_kwds = dict(errors_x, **errors_y) + err_kwds['ecolor'] = scatter.get_facecolor()[0] + ax.errorbar(data[x].values, data[y].values, + linestyle='none', **err_kwds) + + +class HexBinPlot(PlanePlot): + _kind = 'hexbin' + + def __init__(self, data, x, y, C=None, **kwargs): + super(HexBinPlot, self).__init__(data, x, y, **kwargs) + if is_integer(C) and not self.data.columns.holds_integer(): + C = self.data.columns[C] + self.C = C + + def _make_plot(self): + x, y, data, C = self.x, self.y, self.data, self.C + ax = self.axes[0] + # pandas uses colormap, matplotlib uses cmap. + cmap = self.colormap or 'BuGn' + cmap = self.plt.cm.get_cmap(cmap) + cb = self.kwds.pop('colorbar', True) + + if C is None: + c_values = None + else: + c_values = data[C].values + + ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap, + **self.kwds) + if cb: + img = ax.collections[0] + self.fig.colorbar(img, ax=ax) + + def _make_legend(self): + pass + + +class LinePlot(MPLPlot): + _kind = 'line' + _default_rot = 0 + orientation = 'vertical' + + def __init__(self, data, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + if self.stacked: + self.data = self.data.fillna(value=0) + self.x_compat = plot_params['x_compat'] + if 'x_compat' in self.kwds: + self.x_compat = bool(self.kwds.pop('x_compat')) + + def _is_ts_plot(self): + # this is slightly deceptive + return not self.x_compat and self.use_index and self._use_dynamic_x() + + def _use_dynamic_x(self): + from pandas.plotting._timeseries import _use_dynamic_x + return _use_dynamic_x(self._get_ax(0), self.data) + + def _make_plot(self): + if self._is_ts_plot(): + from pandas.plotting._timeseries import _maybe_convert_index + data = _maybe_convert_index(self._get_ax(0), self.data) + + x = data.index # dummy, not used + plotf = self._ts_plot + it = self._iter_data(data=data, keep_index=True) + else: + x = self._get_xticks(convert_period=True) + plotf = self._plot + it = self._iter_data() + + stacking_id = self._get_stacking_id() + is_errorbar = any(e is not None for e in self.errors.values()) + + colors = self._get_colors() + for i, (label, y) in enumerate(it): + ax = self._get_ax(i) + kwds = self.kwds.copy() + style, kwds = self._apply_style_colors(colors, kwds, i, label) + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) # .encode('utf-8') + kwds['label'] = label + + newlines = plotf(ax, x, y, style=style, column_num=i, + stacking_id=stacking_id, + is_errorbar=is_errorbar, + **kwds) + self._add_legend_handle(newlines[0], label, index=i) + + lines = _get_all_lines(ax) + left, right = _get_xlim(lines) + ax.set_xlim(left, right) + + @classmethod + def _plot(cls, ax, x, y, style=None, column_num=None, + stacking_id=None, **kwds): + # column_num is used to get the target column from protf in line and + # area plots + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) + lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds) + cls._update_stacker(ax, stacking_id, y) + return lines + + @classmethod + def _ts_plot(cls, ax, x, data, style=None, **kwds): + from pandas.plotting._timeseries import (_maybe_resample, + _decorate_axes, + format_dateaxis) + # accept x to be consistent with normal plot func, + # x is not passed to tsplot as it uses data.index as x coordinate + # column_num must be in kwds for stacking purpose + freq, data = _maybe_resample(data, ax, kwds) + + # Set ax with freq info + _decorate_axes(ax, freq, kwds) + # digging deeper + if hasattr(ax, 'left_ax'): + _decorate_axes(ax.left_ax, freq, kwds) + if hasattr(ax, 'right_ax'): + _decorate_axes(ax.right_ax, freq, kwds) + ax._plot_data.append((data, cls._kind, kwds)) + + lines = cls._plot(ax, data.index, data.values, style=style, **kwds) + # set date formatter, locators and rescale limits + format_dateaxis(ax, ax.freq, data.index) + return lines + + def _get_stacking_id(self): + if self.stacked: + return id(self.data) + else: + return None + + @classmethod + def _initialize_stacker(cls, ax, stacking_id, n): + if stacking_id is None: + return + if not hasattr(ax, '_stacker_pos_prior'): + ax._stacker_pos_prior = {} + if not hasattr(ax, '_stacker_neg_prior'): + ax._stacker_neg_prior = {} + ax._stacker_pos_prior[stacking_id] = np.zeros(n) + ax._stacker_neg_prior[stacking_id] = np.zeros(n) + + @classmethod + def _get_stacked_values(cls, ax, stacking_id, values, label): + if stacking_id is None: + return values + if not hasattr(ax, '_stacker_pos_prior'): + # stacker may not be initialized for subplots + cls._initialize_stacker(ax, stacking_id, len(values)) + + if (values >= 0).all(): + return ax._stacker_pos_prior[stacking_id] + values + elif (values <= 0).all(): + return ax._stacker_neg_prior[stacking_id] + values + + raise ValueError('When stacked is True, each column must be either ' + 'all positive or negative.' + '{0} contains both positive and negative values' + .format(label)) + + @classmethod + def _update_stacker(cls, ax, stacking_id, values): + if stacking_id is None: + return + if (values >= 0).all(): + ax._stacker_pos_prior[stacking_id] += values + elif (values <= 0).all(): + ax._stacker_neg_prior[stacking_id] += values + + def _post_plot_logic(self, ax, data): + condition = (not self._use_dynamic_x() and + data.index.is_all_dates and + not self.subplots or + (self.subplots and self.sharex)) + + index_name = self._get_index_name() + + if condition: + # irregular TS rotated 30 deg. by default + # probably a better place to check / set this. + if not self._rot_set: + self.rot = 30 + format_date_labels(ax, rot=self.rot) + + if index_name is not None and self.use_index: + ax.set_xlabel(index_name) + + +class AreaPlot(LinePlot): + _kind = 'area' + + def __init__(self, data, **kwargs): + kwargs.setdefault('stacked', True) + data = data.fillna(value=0) + LinePlot.__init__(self, data, **kwargs) + + if not self.stacked: + # use smaller alpha to distinguish overlap + self.kwds.setdefault('alpha', 0.5) + + if self.logy or self.loglog: + raise ValueError("Log-y scales are not supported in area plot") + + @classmethod + def _plot(cls, ax, x, y, style=None, column_num=None, + stacking_id=None, is_errorbar=False, **kwds): + + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(y)) + y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) + + # need to remove label, because subplots uses mpl legend as it is + line_kwds = kwds.copy() + if cls.mpl_ge_1_5_0(): + line_kwds.pop('label') + lines = MPLPlot._plot(ax, x, y_values, style=style, **line_kwds) + + # get data from the line to get coordinates for fill_between + xdata, y_values = lines[0].get_data(orig=False) + + # unable to use ``_get_stacked_values`` here to get starting point + if stacking_id is None: + start = np.zeros(len(y)) + elif (y >= 0).all(): + start = ax._stacker_pos_prior[stacking_id] + elif (y <= 0).all(): + start = ax._stacker_neg_prior[stacking_id] + else: + start = np.zeros(len(y)) + + if 'color' not in kwds: + kwds['color'] = lines[0].get_color() + + rect = ax.fill_between(xdata, start, y_values, **kwds) + cls._update_stacker(ax, stacking_id, y) + + # LinePlot expects list of artists + res = [rect] if cls.mpl_ge_1_5_0() else lines + return res + + def _add_legend_handle(self, handle, label, index=None): + if not self.mpl_ge_1_5_0(): + from matplotlib.patches import Rectangle + # Because fill_between isn't supported in legend, + # specifically add Rectangle handle here + alpha = self.kwds.get('alpha', None) + handle = Rectangle((0, 0), 1, 1, fc=handle.get_color(), + alpha=alpha) + LinePlot._add_legend_handle(self, handle, label, index=index) + + def _post_plot_logic(self, ax, data): + LinePlot._post_plot_logic(self, ax, data) + + if self.ylim is None: + if (data >= 0).all().all(): + ax.set_ylim(0, None) + elif (data <= 0).all().all(): + ax.set_ylim(None, 0) + + +class BarPlot(MPLPlot): + _kind = 'bar' + _default_rot = 90 + orientation = 'vertical' + + def __init__(self, data, **kwargs): + self.bar_width = kwargs.pop('width', 0.5) + pos = kwargs.pop('position', 0.5) + kwargs.setdefault('align', 'center') + self.tick_pos = np.arange(len(data)) + + self.bottom = kwargs.pop('bottom', 0) + self.left = kwargs.pop('left', 0) + + self.log = kwargs.pop('log', False) + MPLPlot.__init__(self, data, **kwargs) + + if self.stacked or self.subplots: + self.tickoffset = self.bar_width * pos + if kwargs['align'] == 'edge': + self.lim_offset = self.bar_width / 2 + else: + self.lim_offset = 0 + else: + if kwargs['align'] == 'edge': + w = self.bar_width / self.nseries + self.tickoffset = self.bar_width * (pos - 0.5) + w * 0.5 + self.lim_offset = w * 0.5 + else: + self.tickoffset = self.bar_width * pos + self.lim_offset = 0 + + self.ax_pos = self.tick_pos - self.tickoffset + + def _args_adjust(self): + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + if is_list_like(self.left): + self.left = np.array(self.left) + + @classmethod + def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): + return ax.bar(x, y, w, bottom=start, log=log, **kwds) + + @property + def _start_base(self): + return self.bottom + + def _make_plot(self): + import matplotlib as mpl + + colors = self._get_colors() + ncolors = len(colors) + + pos_prior = neg_prior = np.zeros(len(self.data)) + K = self.nseries + + for i, (label, y) in enumerate(self._iter_data(fillna=0)): + ax = self._get_ax(i) + kwds = self.kwds.copy() + kwds['color'] = colors[i % ncolors] + + errors = self._get_errorbars(label=label, index=i) + kwds = dict(kwds, **errors) + + label = pprint_thing(label) + + if (('yerr' in kwds) or ('xerr' in kwds)) \ + and (kwds.get('ecolor') is None): + kwds['ecolor'] = mpl.rcParams['xtick.color'] + + start = 0 + if self.log and (y >= 1).all(): + start = 1 + start = start + self._start_base + + if self.subplots: + w = self.bar_width / 2 + rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, + start=start, label=label, + log=self.log, **kwds) + ax.set_title(label) + elif self.stacked: + mask = y > 0 + start = np.where(mask, pos_prior, neg_prior) + self._start_base + w = self.bar_width / 2 + rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, + start=start, label=label, + log=self.log, **kwds) + pos_prior = pos_prior + np.where(mask, y, 0) + neg_prior = neg_prior + np.where(mask, 0, y) + else: + w = self.bar_width / K + rect = self._plot(ax, self.ax_pos + (i + 0.5) * w, y, w, + start=start, label=label, + log=self.log, **kwds) + self._add_legend_handle(rect, label, index=i) + + def _post_plot_logic(self, ax, data): + if self.use_index: + str_index = [pprint_thing(key) for key in data.index] + else: + str_index = [pprint_thing(key) for key in range(data.shape[0])] + name = self._get_index_name() + + s_edge = self.ax_pos[0] - 0.25 + self.lim_offset + e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset + + self._decorate_ticks(ax, name, str_index, s_edge, e_edge) + + def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): + ax.set_xlim((start_edge, end_edge)) + ax.set_xticks(self.tick_pos) + ax.set_xticklabels(ticklabels) + if name is not None and self.use_index: + ax.set_xlabel(name) + + +class BarhPlot(BarPlot): + _kind = 'barh' + _default_rot = 0 + orientation = 'horizontal' + + @property + def _start_base(self): + return self.left + + @classmethod + def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): + return ax.barh(x, y, w, left=start, log=log, **kwds) + + def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): + # horizontal bars + ax.set_ylim((start_edge, end_edge)) + ax.set_yticks(self.tick_pos) + ax.set_yticklabels(ticklabels) + if name is not None and self.use_index: + ax.set_ylabel(name) + + +class HistPlot(LinePlot): + _kind = 'hist' + + def __init__(self, data, bins=10, bottom=0, **kwargs): + self.bins = bins # use mpl default + self.bottom = bottom + # Do not call LinePlot.__init__ which may fill nan + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + if is_integer(self.bins): + # create common bin edge + values = (self.data._convert(datetime=True)._get_numeric_data()) + values = np.ravel(values) + values = values[~isnull(values)] + + hist, self.bins = np.histogram( + values, bins=self.bins, + range=self.kwds.get('range', None), + weights=self.kwds.get('weights', None)) + + if is_list_like(self.bottom): + self.bottom = np.array(self.bottom) + + @classmethod + def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, + stacking_id=None, **kwds): + if column_num == 0: + cls._initialize_stacker(ax, stacking_id, len(bins) - 1) + y = y[~isnull(y)] + + base = np.zeros(len(bins) - 1) + bottom = bottom + \ + cls._get_stacked_values(ax, stacking_id, base, kwds['label']) + # ignore style + n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds) + cls._update_stacker(ax, stacking_id, n) + return patches + + def _make_plot(self): + colors = self._get_colors() + stacking_id = self._get_stacking_id() + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + + kwds = self.kwds.copy() + + label = pprint_thing(label) + kwds['label'] = label + + style, kwds = self._apply_style_colors(colors, kwds, i, label) + if style is not None: + kwds['style'] = style + + kwds = self._make_plot_keywords(kwds, y) + artists = self._plot(ax, y, column_num=i, + stacking_id=stacking_id, **kwds) + self._add_legend_handle(artists[0], label, index=i) + + def _make_plot_keywords(self, kwds, y): + """merge BoxPlot/KdePlot properties to passed kwds""" + # y is required for KdePlot + kwds['bottom'] = self.bottom + kwds['bins'] = self.bins + return kwds + + def _post_plot_logic(self, ax, data): + if self.orientation == 'horizontal': + ax.set_xlabel('Frequency') + else: + ax.set_ylabel('Frequency') + + @property + def orientation(self): + if self.kwds.get('orientation', None) == 'horizontal': + return 'horizontal' + else: + return 'vertical' + + +class KdePlot(HistPlot): + _kind = 'kde' + orientation = 'vertical' + + def __init__(self, data, bw_method=None, ind=None, **kwargs): + MPLPlot.__init__(self, data, **kwargs) + self.bw_method = bw_method + self.ind = ind + + def _args_adjust(self): + pass + + def _get_ind(self, y): + if self.ind is None: + # np.nanmax() and np.nanmin() ignores the missing values + sample_range = np.nanmax(y) - np.nanmin(y) + ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, + np.nanmax(y) + 0.5 * sample_range, 1000) + else: + ind = self.ind + return ind + + @classmethod + def _plot(cls, ax, y, style=None, bw_method=None, ind=None, + column_num=None, stacking_id=None, **kwds): + from scipy.stats import gaussian_kde + from scipy import __version__ as spv + + y = remove_na(y) + + if LooseVersion(spv) >= '0.11.0': + gkde = gaussian_kde(y, bw_method=bw_method) + else: + gkde = gaussian_kde(y) + if bw_method is not None: + msg = ('bw_method was added in Scipy 0.11.0.' + + ' Scipy version in use is %s.' % spv) + warnings.warn(msg) + + y = gkde.evaluate(ind) + lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) + return lines + + def _make_plot_keywords(self, kwds, y): + kwds['bw_method'] = self.bw_method + kwds['ind'] = self._get_ind(y) + return kwds + + def _post_plot_logic(self, ax, data): + ax.set_ylabel('Density') + + +class PiePlot(MPLPlot): + _kind = 'pie' + _layout_type = 'horizontal' + + def __init__(self, data, kind=None, **kwargs): + data = data.fillna(value=0) + if (data < 0).any().any(): + raise ValueError("{0} doesn't allow negative values".format(kind)) + MPLPlot.__init__(self, data, kind=kind, **kwargs) + + def _args_adjust(self): + self.grid = False + self.logy = False + self.logx = False + self.loglog = False + + def _validate_color_args(self): + pass + + def _make_plot(self): + colors = self._get_colors( + num_colors=len(self.data), color_kwds='colors') + self.kwds.setdefault('colors', colors) + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + if label is not None: + label = pprint_thing(label) + ax.set_ylabel(label) + + kwds = self.kwds.copy() + + def blank_labeler(label, value): + if value == 0: + return '' + else: + return label + + idx = [pprint_thing(v) for v in self.data.index] + labels = kwds.pop('labels', idx) + # labels is used for each wedge's labels + # Blank out labels for values of 0 so they don't overlap + # with nonzero wedges + if labels is not None: + blabels = [blank_labeler(l, value) for + l, value in zip(labels, y)] + else: + blabels = None + results = ax.pie(y, labels=blabels, **kwds) + + if kwds.get('autopct', None) is not None: + patches, texts, autotexts = results + else: + patches, texts = results + autotexts = [] + + if self.fontsize is not None: + for t in texts + autotexts: + t.set_fontsize(self.fontsize) + + # leglabels is used for legend labels + leglabels = labels if labels is not None else idx + for p, l in zip(patches, leglabels): + self._add_legend_handle(p, l) + + +class BoxPlot(LinePlot): + _kind = 'box' + _layout_type = 'horizontal' + + _valid_return_types = (None, 'axes', 'dict', 'both') + # namedtuple to hold results + BP = namedtuple("Boxplot", ['ax', 'lines']) + + def __init__(self, data, return_type='axes', **kwargs): + # Do not call LinePlot.__init__ which may fill nan + if return_type not in self._valid_return_types: + raise ValueError( + "return_type must be {None, 'axes', 'dict', 'both'}") + + self.return_type = return_type + MPLPlot.__init__(self, data, **kwargs) + + def _args_adjust(self): + if self.subplots: + # Disable label ax sharing. Otherwise, all subplots shows last + # column label + if self.orientation == 'vertical': + self.sharex = False + else: + self.sharey = False + + @classmethod + def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds): + if y.ndim == 2: + y = [remove_na(v) for v in y] + # Boxplot fails with empty arrays, so need to add a NaN + # if any cols are empty + # GH 8181 + y = [v if v.size > 0 else np.array([np.nan]) for v in y] + else: + y = remove_na(y) + bp = ax.boxplot(y, **kwds) + + if return_type == 'dict': + return bp, bp + elif return_type == 'both': + return cls.BP(ax=ax, lines=bp), bp + else: + return ax, bp + + def _validate_color_args(self): + if 'color' in self.kwds: + if self.colormap is not None: + warnings.warn("'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'") + self.color = self.kwds.pop('color') + + if isinstance(self.color, dict): + valid_keys = ['boxes', 'whiskers', 'medians', 'caps'] + for key, values in compat.iteritems(self.color): + if key not in valid_keys: + raise ValueError("color dict contains invalid " + "key '{0}' " + "The key must be either {1}" + .format(key, valid_keys)) + else: + self.color = None + + # get standard colors for default + colors = _get_standard_colors(num_colors=3, + colormap=self.colormap, + color=None) + # use 2 colors by default, for box/whisker and median + # flier colors isn't needed here + # because it can be specified by ``sym`` kw + self._boxes_c = colors[0] + self._whiskers_c = colors[0] + self._medians_c = colors[2] + self._caps_c = 'k' # mpl default + + def _get_colors(self, num_colors=None, color_kwds='color'): + pass + + def maybe_color_bp(self, bp): + if isinstance(self.color, dict): + boxes = self.color.get('boxes', self._boxes_c) + whiskers = self.color.get('whiskers', self._whiskers_c) + medians = self.color.get('medians', self._medians_c) + caps = self.color.get('caps', self._caps_c) + else: + # Other types are forwarded to matplotlib + # If None, use default colors + boxes = self.color or self._boxes_c + whiskers = self.color or self._whiskers_c + medians = self.color or self._medians_c + caps = self.color or self._caps_c + + from matplotlib.artist import setp + setp(bp['boxes'], color=boxes, alpha=1) + setp(bp['whiskers'], color=whiskers, alpha=1) + setp(bp['medians'], color=medians, alpha=1) + setp(bp['caps'], color=caps, alpha=1) + + def _make_plot(self): + if self.subplots: + self._return_obj = Series() + + for i, (label, y) in enumerate(self._iter_data()): + ax = self._get_ax(i) + kwds = self.kwds.copy() + + ret, bp = self._plot(ax, y, column_num=i, + return_type=self.return_type, **kwds) + self.maybe_color_bp(bp) + self._return_obj[label] = ret + + label = [pprint_thing(label)] + self._set_ticklabels(ax, label) + else: + y = self.data.values.T + ax = self._get_ax(0) + kwds = self.kwds.copy() + + ret, bp = self._plot(ax, y, column_num=0, + return_type=self.return_type, **kwds) + self.maybe_color_bp(bp) + self._return_obj = ret + + labels = [l for l, _ in self._iter_data()] + labels = [pprint_thing(l) for l in labels] + if not self.use_index: + labels = [pprint_thing(key) for key in range(len(labels))] + self._set_ticklabels(ax, labels) + + def _set_ticklabels(self, ax, labels): + if self.orientation == 'vertical': + ax.set_xticklabels(labels) + else: + ax.set_yticklabels(labels) + + def _make_legend(self): + pass + + def _post_plot_logic(self, ax, data): + pass + + @property + def orientation(self): + if self.kwds.get('vert', True): + return 'vertical' + else: + return 'horizontal' + + @property + def result(self): + if self.return_type is None: + return super(BoxPlot, self).result + else: + return self._return_obj + + +# kinds supported by both dataframe and series +_common_kinds = ['line', 'bar', 'barh', + 'kde', 'density', 'area', 'hist', 'box'] +# kinds supported by dataframe +_dataframe_kinds = ['scatter', 'hexbin'] +# kinds supported only by series or dataframe single column +_series_kinds = ['pie'] +_all_kinds = _common_kinds + _dataframe_kinds + _series_kinds + +_klasses = [LinePlot, BarPlot, BarhPlot, KdePlot, HistPlot, BoxPlot, + ScatterPlot, HexBinPlot, AreaPlot, PiePlot] + +_plot_klass = {} +for klass in _klasses: + _plot_klass[klass._kind] = klass + + +def _plot(data, x=None, y=None, subplots=False, + ax=None, kind='line', **kwds): + kind = _get_standard_kind(kind.lower().strip()) + if kind in _all_kinds: + klass = _plot_klass[kind] + else: + raise ValueError("%r is not a valid plot kind" % kind) + + from pandas import DataFrame + if kind in _dataframe_kinds: + if isinstance(data, DataFrame): + plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax, + kind=kind, **kwds) + else: + raise ValueError("plot kind %r can only be used for data frames" + % kind) + + elif kind in _series_kinds: + if isinstance(data, DataFrame): + if y is None and subplots is False: + msg = "{0} requires either y column or 'subplots=True'" + raise ValueError(msg.format(kind)) + elif y is not None: + if is_integer(y) and not data.columns.holds_integer(): + y = data.columns[y] + # converted to series actually. copy to not modify + data = data[y].copy() + data.index.name = y + plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) + else: + if isinstance(data, DataFrame): + if x is not None: + if is_integer(x) and not data.columns.holds_integer(): + x = data.columns[x] + data = data.set_index(x) + + if y is not None: + if is_integer(y) and not data.columns.holds_integer(): + y = data.columns[y] + label = kwds['label'] if 'label' in kwds else y + series = data[y].copy() # Don't modify + series.name = label + + for kw in ['xerr', 'yerr']: + if (kw in kwds) and \ + (isinstance(kwds[kw], string_types) or + is_integer(kwds[kw])): + try: + kwds[kw] = data[kwds[kw]] + except (IndexError, KeyError, TypeError): + pass + data = series + plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) + + plot_obj.generate() + plot_obj.draw() + return plot_obj.result + + +df_kind = """- 'scatter' : scatter plot + - 'hexbin' : hexbin plot""" +series_kind = "" + +df_coord = """x : label or position, default None + y : label or position, default None + Allows plotting of one column versus another""" +series_coord = "" + +df_unique = """stacked : boolean, default False in line and + bar plots, and True in area plot. If True, create stacked plot. + sort_columns : boolean, default False + Sort column names to determine plot ordering + secondary_y : boolean or sequence, default False + Whether to plot on the secondary y-axis + If a list/tuple, which columns to plot on secondary y-axis""" +series_unique = """label : label argument to provide to plot + secondary_y : boolean or sequence of ints, default False + If True then y-axis will be on the right""" + +df_ax = """ax : matplotlib axes object, default None + subplots : boolean, default False + Make separate subplots for each column + sharex : boolean, default True if ax is None else False + In case subplots=True, share x axis and set some x axis labels to + invisible; defaults to True if ax is None otherwise False if an ax + is passed in; Be aware, that passing in both an ax and sharex=True + will alter all x axis labels for all axis in a figure! + sharey : boolean, default False + In case subplots=True, share y axis and set some y axis labels to + invisible + layout : tuple (optional) + (rows, columns) for the layout of subplots""" +series_ax = """ax : matplotlib axes object + If not passed, uses gca()""" + +df_note = """- If `kind` = 'scatter' and the argument `c` is the name of a dataframe + column, the values of that column are used to color each point. + - If `kind` = 'hexbin', you can control the size of the bins with the + `gridsize` argument. By default, a histogram of the counts around each + `(x, y)` point is computed. You can specify alternative aggregations + by passing values to the `C` and `reduce_C_function` arguments. + `C` specifies the value at each `(x, y)` point and `reduce_C_function` + is a function of one argument that reduces all the values in a bin to + a single number (e.g. `mean`, `max`, `sum`, `std`).""" +series_note = "" + +_shared_doc_df_kwargs = dict(klass='DataFrame', klass_obj='df', + klass_kind=df_kind, klass_coord=df_coord, + klass_ax=df_ax, klass_unique=df_unique, + klass_note=df_note) +_shared_doc_series_kwargs = dict(klass='Series', klass_obj='s', + klass_kind=series_kind, + klass_coord=series_coord, klass_ax=series_ax, + klass_unique=series_unique, + klass_note=series_note) + +_shared_docs['plot'] = """ + Make plots of %(klass)s using matplotlib / pylab. + + *New in version 0.17.0:* Each plot kind has a corresponding method on the + ``%(klass)s.plot`` accessor: + ``%(klass_obj)s.plot(kind='line')`` is equivalent to + ``%(klass_obj)s.plot.line()``. + + Parameters + ---------- + data : %(klass)s + %(klass_coord)s + kind : str + - 'line' : line plot (default) + - 'bar' : vertical bar plot + - 'barh' : horizontal bar plot + - 'hist' : histogram + - 'box' : boxplot + - 'kde' : Kernel Density Estimation plot + - 'density' : same as 'kde' + - 'area' : area plot + - 'pie' : pie plot + %(klass_kind)s + %(klass_ax)s + figsize : a tuple (width, height) in inches + use_index : boolean, default True + Use index as ticks for x axis + title : string or list + Title to use for the plot. If a string is passed, print the string at + the top of the figure. If a list is passed and `subplots` is True, + print each item in the list above the corresponding subplot. + grid : boolean, default None (matlab style default) + Axis grid lines + legend : False/True/'reverse' + Place legend on axis subplots + style : list or dict + matplotlib line style per column + logx : boolean, default False + Use log scaling on x axis + logy : boolean, default False + Use log scaling on y axis + loglog : boolean, default False + Use log scaling on both x and y axes + xticks : sequence + Values to use for the xticks + yticks : sequence + Values to use for the yticks + xlim : 2-tuple/list + ylim : 2-tuple/list + rot : int, default None + Rotation for ticks (xticks for vertical, yticks for horizontal plots) + fontsize : int, default None + Font size for xticks and yticks + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + colorbar : boolean, optional + If True, plot colorbar (only relevant for 'scatter' and 'hexbin' plots) + position : float + Specify relative alignments for bar plot layout. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) + layout : tuple (optional) + (rows, columns) for the layout of the plot + table : boolean, Series or DataFrame, default False + If True, draw a table using the data in the DataFrame and the data will + be transposed to meet matplotlib's default layout. + If a Series or DataFrame is passed, use passed data to draw a table. + yerr : DataFrame, Series, array-like, dict and str + See :ref:`Plotting with Error Bars ` for + detail. + xerr : same types as yerr. + %(klass_unique)s + mark_right : boolean, default True + When using a secondary_y axis, automatically mark the column + labels with "(right)" in the legend + kwds : keywords + Options to pass to matplotlib plotting method + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + + Notes + ----- + + - See matplotlib documentation online for more on this subject + - If `kind` = 'bar' or 'barh', you can specify relative alignments + for bar plot layout by `position` keyword. + From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) + %(klass_note)s + + """ + + +@Appender(_shared_docs['plot'] % _shared_doc_df_kwargs) +def plot_frame(data, x=None, y=None, kind='line', ax=None, + subplots=False, sharex=None, sharey=False, layout=None, + figsize=None, use_index=True, title=None, grid=None, + legend=True, style=None, logx=False, logy=False, loglog=False, + xticks=None, yticks=None, xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + secondary_y=False, sort_columns=False, + **kwds): + return _plot(data, kind=kind, x=x, y=y, ax=ax, + subplots=subplots, sharex=sharex, sharey=sharey, + layout=layout, figsize=figsize, use_index=use_index, + title=title, grid=grid, legend=legend, + style=style, logx=logx, logy=logy, loglog=loglog, + xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, + rot=rot, fontsize=fontsize, colormap=colormap, table=table, + yerr=yerr, xerr=xerr, + secondary_y=secondary_y, sort_columns=sort_columns, + **kwds) + + +@Appender(_shared_docs['plot'] % _shared_doc_series_kwargs) +def plot_series(data, kind='line', ax=None, # Series unique + figsize=None, use_index=True, title=None, grid=None, + legend=False, style=None, logx=False, logy=False, loglog=False, + xticks=None, yticks=None, xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + label=None, secondary_y=False, # Series unique + **kwds): + + import matplotlib.pyplot as plt + """ + If no axes is specified, check whether there are existing figures + If there is no existing figures, _gca() will + create a figure with the default figsize, causing the figsize=parameter to + be ignored. + """ + if ax is None and len(plt.get_fignums()) > 0: + ax = _gca() + ax = MPLPlot._get_ax_layer(ax) + return _plot(data, kind=kind, ax=ax, + figsize=figsize, use_index=use_index, title=title, + grid=grid, legend=legend, + style=style, logx=logx, logy=logy, loglog=loglog, + xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, + rot=rot, fontsize=fontsize, colormap=colormap, table=table, + yerr=yerr, xerr=xerr, + label=label, secondary_y=secondary_y, + **kwds) + + +_shared_docs['boxplot'] = """ + Make a box plot from DataFrame column optionally grouped by some columns or + other inputs + + Parameters + ---------- + data : the pandas object holding the data + column : column name or list of names, or vector + Can be any valid input to groupby + by : string or sequence + Column in the DataFrame to group by + ax : Matplotlib axes object, optional + fontsize : int or string + rot : label rotation angle + figsize : A tuple (width, height) in inches + grid : Setting this to True will show the grid + layout : tuple (optional) + (rows, columns) for the layout of the plot + return_type : {None, 'axes', 'dict', 'both'}, default None + The kind of object to return. The default is ``axes`` + 'axes' returns the matplotlib axes the boxplot is drawn on; + 'dict' returns a dictionary whose values are the matplotlib + Lines of the boxplot; + 'both' returns a namedtuple with the axes and dict. + + When grouping with ``by``, a Series mapping columns to ``return_type`` + is returned, unless ``return_type`` is None, in which case a NumPy + array of axes is returned with the same shape as ``layout``. + See the prose documentation for more. + + kwds : other plotting keyword arguments to be passed to matplotlib boxplot + function + + Returns + ------- + lines : dict + ax : matplotlib Axes + (ax, lines): namedtuple + + Notes + ----- + Use ``return_type='dict'`` when you want to tweak the appearance + of the lines after plotting. In this case a dict containing the Lines + making up the boxes, caps, fliers, medians, and whiskers is returned. + """ + + +@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) +def boxplot(data, column=None, by=None, ax=None, fontsize=None, + rot=0, grid=True, figsize=None, layout=None, return_type=None, + **kwds): + + # validate return_type: + if return_type not in BoxPlot._valid_return_types: + raise ValueError("return_type must be {'axes', 'dict', 'both'}") + + from pandas import Series, DataFrame + if isinstance(data, Series): + data = DataFrame({'x': data}) + column = 'x' + + def _get_colors(): + return _get_standard_colors(color=kwds.get('color'), num_colors=1) + + def maybe_color_bp(bp): + if 'color' not in kwds: + from matplotlib.artist import setp + setp(bp['boxes'], color=colors[0], alpha=1) + setp(bp['whiskers'], color=colors[0], alpha=1) + setp(bp['medians'], color=colors[2], alpha=1) + + def plot_group(keys, values, ax): + keys = [pprint_thing(x) for x in keys] + values = [remove_na(v) for v in values] + bp = ax.boxplot(values, **kwds) + if fontsize is not None: + ax.tick_params(axis='both', labelsize=fontsize) + if kwds.get('vert', 1): + ax.set_xticklabels(keys, rotation=rot) + else: + ax.set_yticklabels(keys, rotation=rot) + maybe_color_bp(bp) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type == 'dict': + return bp + elif return_type == 'both': + return BoxPlot.BP(ax=ax, lines=bp) + else: + return ax + + colors = _get_colors() + if column is None: + columns = None + else: + if isinstance(column, (list, tuple)): + columns = column + else: + columns = [column] + + if by is not None: + # Prefer array return type for 2-D plots to match the subplot layout + # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580 + result = _grouped_plot_by_column(plot_group, data, columns=columns, + by=by, grid=grid, figsize=figsize, + ax=ax, layout=layout, + return_type=return_type) + else: + if return_type is None: + return_type = 'axes' + if layout is not None: + raise ValueError("The 'layout' keyword is not supported when " + "'by' is None") + + if ax is None: + ax = _gca() + data = data._get_numeric_data() + if columns is None: + columns = data.columns + else: + data = data[columns] + + result = plot_group(columns, data.values.T, ax) + ax.grid(grid) + + return result + + +def scatter_plot(data, x, y, by=None, ax=None, figsize=None, grid=False, + **kwargs): + """ + Make a scatter plot from two DataFrame columns + + Parameters + ---------- + data : DataFrame + x : Column name for the x-axis values + y : Column name for the y-axis values + ax : Matplotlib axis object + figsize : A tuple (width, height) in inches + grid : Setting this to True will show the grid + kwargs : other plotting keyword arguments + To be passed to scatter function + + Returns + ------- + fig : matplotlib.Figure + """ + import matplotlib.pyplot as plt + + kwargs.setdefault('edgecolors', 'none') + + def plot_group(group, ax): + xvals = group[x].values + yvals = group[y].values + ax.scatter(xvals, yvals, **kwargs) + ax.grid(grid) + + if by is not None: + fig = _grouped_plot(plot_group, data, by=by, figsize=figsize, ax=ax) + else: + if ax is None: + fig = plt.figure() + ax = fig.add_subplot(111) + else: + fig = ax.get_figure() + plot_group(data, ax) + ax.set_ylabel(pprint_thing(y)) + ax.set_xlabel(pprint_thing(x)) + + ax.grid(grid) + + return fig + + +def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, + sharey=False, figsize=None, layout=None, bins=10, **kwds): + """ + Draw histogram of the DataFrame's series using matplotlib / pylab. + + Parameters + ---------- + data : DataFrame + column : string or sequence + If passed, will be used to limit data to a subset of columns + by : object, optional + If passed, then used to form histograms for separate groups + grid : boolean, default True + Whether to show axis grid lines + xlabelsize : int, default None + If specified changes the x-axis label size + xrot : float, default None + rotation of x axis labels + ylabelsize : int, default None + If specified changes the y-axis label size + yrot : float, default None + rotation of y axis labels + ax : matplotlib axes object, default None + sharex : boolean, default True if ax is None else False + In case subplots=True, share x axis and set some x axis labels to + invisible; defaults to True if ax is None otherwise False if an ax + is passed in; Be aware, that passing in both an ax and sharex=True + will alter all x axis labels for all subplots in a figure! + sharey : boolean, default False + In case subplots=True, share y axis and set some y axis labels to + invisible + figsize : tuple + The size of the figure to create in inches by default + layout : tuple, optional + Tuple of (rows, columns) for the layout of the histograms + bins : integer, default 10 + Number of histogram bins to be used + kwds : other plotting keyword arguments + To be passed to hist function + """ + + if by is not None: + axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid, + figsize=figsize, sharex=sharex, sharey=sharey, + layout=layout, bins=bins, xlabelsize=xlabelsize, + xrot=xrot, ylabelsize=ylabelsize, + yrot=yrot, **kwds) + return axes + + if column is not None: + if not isinstance(column, (list, np.ndarray, Index)): + column = [column] + data = data[column] + data = data._get_numeric_data() + naxes = len(data.columns) + + fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, + sharex=sharex, sharey=sharey, figsize=figsize, + layout=layout) + _axes = _flatten(axes) + + for i, col in enumerate(_try_sort(data.columns)): + ax = _axes[i] + ax.hist(data[col].dropna().values, bins=bins, **kwds) + ax.set_title(col) + ax.grid(grid) + + _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot) + fig.subplots_adjust(wspace=0.3, hspace=0.3) + + return axes + + +def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, + xrot=None, ylabelsize=None, yrot=None, figsize=None, + bins=10, **kwds): + """ + Draw histogram of the input series using matplotlib + + Parameters + ---------- + by : object, optional + If passed, then used to form histograms for separate groups + ax : matplotlib axis object + If not passed, uses gca() + grid : boolean, default True + Whether to show axis grid lines + xlabelsize : int, default None + If specified changes the x-axis label size + xrot : float, default None + rotation of x axis labels + ylabelsize : int, default None + If specified changes the y-axis label size + yrot : float, default None + rotation of y axis labels + figsize : tuple, default None + figure size in inches by default + bins: integer, default 10 + Number of histogram bins to be used + kwds : keywords + To be passed to the actual plotting function + + Notes + ----- + See matplotlib documentation online for more on this + + """ + import matplotlib.pyplot as plt + + if by is None: + if kwds.get('layout', None) is not None: + raise ValueError("The 'layout' keyword is not supported when " + "'by' is None") + # hack until the plotting interface is a bit more unified + fig = kwds.pop('figure', plt.gcf() if plt.get_fignums() else + plt.figure(figsize=figsize)) + if (figsize is not None and tuple(figsize) != + tuple(fig.get_size_inches())): + fig.set_size_inches(*figsize, forward=True) + if ax is None: + ax = fig.gca() + elif ax.get_figure() != fig: + raise AssertionError('passed axis not bound to passed figure') + values = self.dropna().values + + ax.hist(values, bins=bins, **kwds) + ax.grid(grid) + axes = np.array([ax]) + + _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot) + + else: + if 'figure' in kwds: + raise ValueError("Cannot pass 'figure' when using the " + "'by' argument, since a new 'Figure' instance " + "will be created") + axes = grouped_hist(self, by=by, ax=ax, grid=grid, figsize=figsize, + bins=bins, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot, **kwds) + + if hasattr(axes, 'ndim'): + if axes.ndim == 1 and len(axes) == 1: + return axes[0] + return axes + + +def grouped_hist(data, column=None, by=None, ax=None, bins=50, figsize=None, + layout=None, sharex=False, sharey=False, rot=90, grid=True, + xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, + **kwargs): + """ + Grouped histogram + + Parameters + ---------- + data: Series/DataFrame + column: object, optional + by: object, optional + ax: axes, optional + bins: int, default 50 + figsize: tuple, optional + layout: optional + sharex: boolean, default False + sharey: boolean, default False + rot: int, default 90 + grid: bool, default True + kwargs: dict, keyword arguments passed to matplotlib.Axes.hist + + Returns + ------- + axes: collection of Matplotlib Axes + """ + def plot_group(group, ax): + ax.hist(group.dropna().values, bins=bins, **kwargs) + + xrot = xrot or rot + + fig, axes = _grouped_plot(plot_group, data, column=column, + by=by, sharex=sharex, sharey=sharey, ax=ax, + figsize=figsize, layout=layout, rot=rot) + + _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, + ylabelsize=ylabelsize, yrot=yrot) + + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, + hspace=0.5, wspace=0.3) + return axes + + +def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, + rot=0, grid=True, ax=None, figsize=None, + layout=None, **kwds): + """ + Make box plots from DataFrameGroupBy data. + + Parameters + ---------- + grouped : Grouped DataFrame + subplots : + * ``False`` - no subplots will be used + * ``True`` - create a subplot for each group + column : column name or list of names, or vector + Can be any valid input to groupby + fontsize : int or string + rot : label rotation angle + grid : Setting this to True will show the grid + ax : Matplotlib axis object, default None + figsize : A tuple (width, height) in inches + layout : tuple (optional) + (rows, columns) for the layout of the plot + kwds : other plotting keyword arguments to be passed to matplotlib boxplot + function + + Returns + ------- + dict of key/value = group key/DataFrame.boxplot return value + or DataFrame.boxplot return value in case subplots=figures=False + + Examples + -------- + >>> import pandas + >>> import numpy as np + >>> import itertools + >>> + >>> tuples = [t for t in itertools.product(range(1000), range(4))] + >>> index = pandas.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) + >>> data = np.random.randn(len(index),4) + >>> df = pandas.DataFrame(data, columns=list('ABCD'), index=index) + >>> + >>> grouped = df.groupby(level='lvl1') + >>> boxplot_frame_groupby(grouped) + >>> + >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) + >>> boxplot_frame_groupby(grouped, subplots=False) + """ + if subplots is True: + naxes = len(grouped) + fig, axes = _subplots(naxes=naxes, squeeze=False, + ax=ax, sharex=False, sharey=True, + figsize=figsize, layout=layout) + axes = _flatten(axes) + + ret = Series() + for (key, group), ax in zip(grouped, axes): + d = group.boxplot(ax=ax, column=column, fontsize=fontsize, + rot=rot, grid=grid, **kwds) + ax.set_title(pprint_thing(key)) + ret.loc[key] = d + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, + right=0.9, wspace=0.2) + else: + from pandas.tools.concat import concat + keys, frames = zip(*grouped) + if grouped.axis == 0: + df = concat(frames, keys=keys, axis=1) + else: + if len(frames) > 1: + df = frames[0].join(frames[1::]) + else: + df = frames[0] + ret = df.boxplot(column=column, fontsize=fontsize, rot=rot, + grid=grid, ax=ax, figsize=figsize, + layout=layout, **kwds) + return ret + + +def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, + figsize=None, sharex=True, sharey=True, layout=None, + rot=0, ax=None, **kwargs): + from pandas import DataFrame + + if figsize == 'default': + # allowed to specify mpl default with 'default' + warnings.warn("figsize='default' is deprecated. Specify figure" + "size by tuple instead", FutureWarning, stacklevel=4) + figsize = None + + grouped = data.groupby(by) + if column is not None: + grouped = grouped[column] + + naxes = len(grouped) + fig, axes = _subplots(naxes=naxes, figsize=figsize, + sharex=sharex, sharey=sharey, ax=ax, + layout=layout) + + _axes = _flatten(axes) + + for i, (key, group) in enumerate(grouped): + ax = _axes[i] + if numeric_only and isinstance(group, DataFrame): + group = group._get_numeric_data() + plotf(group, ax, **kwargs) + ax.set_title(pprint_thing(key)) + + return fig, axes + + +def _grouped_plot_by_column(plotf, data, columns=None, by=None, + numeric_only=True, grid=False, + figsize=None, ax=None, layout=None, + return_type=None, **kwargs): + grouped = data.groupby(by) + if columns is None: + if not isinstance(by, (list, tuple)): + by = [by] + columns = data._get_numeric_data().columns.difference(by) + naxes = len(columns) + fig, axes = _subplots(naxes=naxes, sharex=True, sharey=True, + figsize=figsize, ax=ax, layout=layout) + + _axes = _flatten(axes) + + result = Series() + ax_values = [] + + for i, col in enumerate(columns): + ax = _axes[i] + gp_col = grouped[col] + keys, values = zip(*gp_col) + re_plotf = plotf(keys, values, ax, **kwargs) + ax.set_title(col) + ax.set_xlabel(pprint_thing(by)) + ax_values.append(re_plotf) + ax.grid(grid) + + result = Series(ax_values, index=columns) + + # Return axes in multiplot case, maybe revisit later # 985 + if return_type is None: + result = axes + + byline = by[0] if len(by) == 1 else by + fig.suptitle('Boxplot grouped by %s' % byline) + fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) + + return result + + +class BasePlotMethods(PandasObject): + + def __init__(self, data): + self._data = data + + def __call__(self, *args, **kwargs): + raise NotImplementedError + + +class SeriesPlotMethods(BasePlotMethods): + """Series plotting accessor and method + + Examples + -------- + >>> s.plot.line() + >>> s.plot.bar() + >>> s.plot.hist() + + Plotting methods can also be accessed by calling the accessor as a method + with the ``kind`` argument: + ``s.plot(kind='line')`` is equivalent to ``s.plot.line()`` + """ + + def __call__(self, kind='line', ax=None, + figsize=None, use_index=True, title=None, grid=None, + legend=False, style=None, logx=False, logy=False, + loglog=False, xticks=None, yticks=None, + xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + label=None, secondary_y=False, **kwds): + return plot_series(self._data, kind=kind, ax=ax, figsize=figsize, + use_index=use_index, title=title, grid=grid, + legend=legend, style=style, logx=logx, logy=logy, + loglog=loglog, xticks=xticks, yticks=yticks, + xlim=xlim, ylim=ylim, rot=rot, fontsize=fontsize, + colormap=colormap, table=table, yerr=yerr, + xerr=xerr, label=label, secondary_y=secondary_y, + **kwds) + __call__.__doc__ = plot_series.__doc__ + + def line(self, **kwds): + """ + Line plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='line', **kwds) + + def bar(self, **kwds): + """ + Vertical bar plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='bar', **kwds) + + def barh(self, **kwds): + """ + Horizontal bar plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='barh', **kwds) + + def box(self, **kwds): + """ + Boxplot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='box', **kwds) + + def hist(self, bins=10, **kwds): + """ + Histogram + + .. versionadded:: 0.17.0 + + Parameters + ---------- + bins: integer, default 10 + Number of histogram bins to be used + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='hist', bins=bins, **kwds) + + def kde(self, **kwds): + """ + Kernel Density Estimate plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='kde', **kwds) + + density = kde + + def area(self, **kwds): + """ + Area plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='area', **kwds) + + def pie(self, **kwds): + """ + Pie chart + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='pie', **kwds) + + +class FramePlotMethods(BasePlotMethods): + """DataFrame plotting accessor and method + + Examples + -------- + >>> df.plot.line() + >>> df.plot.scatter('x', 'y') + >>> df.plot.hexbin() + + These plotting methods can also be accessed by calling the accessor as a + method with the ``kind`` argument: + ``df.plot(kind='line')`` is equivalent to ``df.plot.line()`` + """ + + def __call__(self, x=None, y=None, kind='line', ax=None, + subplots=False, sharex=None, sharey=False, layout=None, + figsize=None, use_index=True, title=None, grid=None, + legend=True, style=None, logx=False, logy=False, loglog=False, + xticks=None, yticks=None, xlim=None, ylim=None, + rot=None, fontsize=None, colormap=None, table=False, + yerr=None, xerr=None, + secondary_y=False, sort_columns=False, **kwds): + return plot_frame(self._data, kind=kind, x=x, y=y, ax=ax, + subplots=subplots, sharex=sharex, sharey=sharey, + layout=layout, figsize=figsize, use_index=use_index, + title=title, grid=grid, legend=legend, style=style, + logx=logx, logy=logy, loglog=loglog, xticks=xticks, + yticks=yticks, xlim=xlim, ylim=ylim, rot=rot, + fontsize=fontsize, colormap=colormap, table=table, + yerr=yerr, xerr=xerr, secondary_y=secondary_y, + sort_columns=sort_columns, **kwds) + __call__.__doc__ = plot_frame.__doc__ + + def line(self, x=None, y=None, **kwds): + """ + Line plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='line', x=x, y=y, **kwds) + + def bar(self, x=None, y=None, **kwds): + """ + Vertical bar plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='bar', x=x, y=y, **kwds) + + def barh(self, x=None, y=None, **kwds): + """ + Horizontal bar plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='barh', x=x, y=y, **kwds) + + def box(self, by=None, **kwds): + """ + Boxplot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + by : string or sequence + Column in the DataFrame to group by. + \*\*kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='box', by=by, **kwds) + + def hist(self, by=None, bins=10, **kwds): + """ + Histogram + + .. versionadded:: 0.17.0 + + Parameters + ---------- + by : string or sequence + Column in the DataFrame to group by. + bins: integer, default 10 + Number of histogram bins to be used + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='hist', by=by, bins=bins, **kwds) + + def kde(self, **kwds): + """ + Kernel Density Estimate plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='kde', **kwds) + + density = kde + + def area(self, x=None, y=None, **kwds): + """ + Area plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='area', x=x, y=y, **kwds) + + def pie(self, y=None, **kwds): + """ + Pie chart + + .. versionadded:: 0.17.0 + + Parameters + ---------- + y : label or position, optional + Column to plot. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='pie', y=y, **kwds) + + def scatter(self, x, y, s=None, c=None, **kwds): + """ + Scatter plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + s : scalar or array_like, optional + Size of each point. + c : label or position, optional + Color of each point. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + return self(kind='scatter', x=x, y=y, c=c, s=s, **kwds) + + def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, + **kwds): + """ + Hexbin plot + + .. versionadded:: 0.17.0 + + Parameters + ---------- + x, y : label or position, optional + Coordinates for each point. + C : label or position, optional + The value at each `(x, y)` point. + reduce_C_function : callable, optional + Function of one argument that reduces all the values in a bin to + a single number (e.g. `mean`, `max`, `sum`, `std`). + gridsize : int, optional + Number of bins. + **kwds : optional + Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + + Returns + ------- + axes : matplotlib.AxesSubplot or np.array of them + """ + if reduce_C_function is not None: + kwds['reduce_C_function'] = reduce_C_function + if gridsize is not None: + kwds['gridsize'] = gridsize + return self(kind='hexbin', x=x, y=y, C=C, **kwds) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py new file mode 100644 index 0000000000000..2c32a532dd2e2 --- /dev/null +++ b/pandas/plotting/_misc.py @@ -0,0 +1,573 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import numpy as np + +from pandas.util.decorators import deprecate_kwarg +from pandas.types.missing import notnull +from pandas.compat import range, lrange, lmap, zip +from pandas.formats.printing import pprint_thing + + +from pandas.plotting._style import _get_standard_colors +from pandas.plotting._tools import _subplots, _set_ticks_props + + +def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, + diagonal='hist', marker='.', density_kwds=None, + hist_kwds=None, range_padding=0.05, **kwds): + """ + Draw a matrix of scatter plots. + + Parameters + ---------- + frame : DataFrame + alpha : float, optional + amount of transparency applied + figsize : (float,float), optional + a tuple (width, height) in inches + ax : Matplotlib axis object, optional + grid : bool, optional + setting this to True will show the grid + diagonal : {'hist', 'kde'} + pick between 'kde' and 'hist' for + either Kernel Density Estimation or Histogram + plot in the diagonal + marker : str, optional + Matplotlib marker type, default '.' + hist_kwds : other plotting keyword arguments + To be passed to hist function + density_kwds : other plotting keyword arguments + To be passed to kernel density estimate plot + range_padding : float, optional + relative extension of axis range in x and y + with respect to (x_max - x_min) or (y_max - y_min), + default 0.05 + kwds : other plotting keyword arguments + To be passed to scatter function + + Examples + -------- + >>> df = DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) + >>> scatter_matrix(df, alpha=0.2) + """ + + df = frame._get_numeric_data() + n = df.columns.size + naxes = n * n + fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, + squeeze=False) + + # no gaps between subplots + fig.subplots_adjust(wspace=0, hspace=0) + + mask = notnull(df) + + marker = _get_marker_compat(marker) + + hist_kwds = hist_kwds or {} + density_kwds = density_kwds or {} + + # GH 14855 + kwds.setdefault('edgecolors', 'none') + + boundaries_list = [] + for a in df.columns: + values = df[a].values[mask[a].values] + rmin_, rmax_ = np.min(values), np.max(values) + rdelta_ext = (rmax_ - rmin_) * range_padding / 2. + boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) + + for i, a in zip(lrange(n), df.columns): + for j, b in zip(lrange(n), df.columns): + ax = axes[i, j] + + if i == j: + values = df[a].values[mask[a].values] + + # Deal with the diagonal by drawing a histogram there. + if diagonal == 'hist': + ax.hist(values, **hist_kwds) + + elif diagonal in ('kde', 'density'): + from scipy.stats import gaussian_kde + y = values + gkde = gaussian_kde(y) + ind = np.linspace(y.min(), y.max(), 1000) + ax.plot(ind, gkde.evaluate(ind), **density_kwds) + + ax.set_xlim(boundaries_list[i]) + + else: + common = (mask[a] & mask[b]).values + + ax.scatter(df[b][common], df[a][common], + marker=marker, alpha=alpha, **kwds) + + ax.set_xlim(boundaries_list[j]) + ax.set_ylim(boundaries_list[i]) + + ax.set_xlabel(b) + ax.set_ylabel(a) + + if j != 0: + ax.yaxis.set_visible(False) + if i != n - 1: + ax.xaxis.set_visible(False) + + if len(df.columns) > 1: + lim1 = boundaries_list[0] + locs = axes[0][1].yaxis.get_majorticklocs() + locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] + adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) + + lim0 = axes[0][0].get_ylim() + adj = adj * (lim0[1] - lim0[0]) + lim0[0] + axes[0][0].yaxis.set_ticks(adj) + + if np.all(locs == locs.astype(int)): + # if all ticks are int + locs = locs.astype(int) + axes[0][0].yaxis.set_ticklabels(locs) + + _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) + + return axes + + +def _get_marker_compat(marker): + import matplotlib.lines as mlines + import matplotlib as mpl + if mpl.__version__ < '1.1.0' and marker == '.': + return 'o' + if marker not in mlines.lineMarkers: + return 'o' + return marker + + +def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): + """RadViz - a multivariate data visualization algorithm + + Parameters: + ----------- + frame: DataFrame + class_column: str + Column name containing class names + ax: Matplotlib axis object, optional + color: list or tuple, optional + Colors to use for the different classes + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + kwds: keywords + Options to pass to matplotlib scatter plotting method + + Returns: + -------- + ax: Matplotlib axis object + """ + import matplotlib.pyplot as plt + import matplotlib.patches as patches + + def normalize(series): + a = min(series) + b = max(series) + return (series - a) / (b - a) + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + df = frame.drop(class_column, axis=1).apply(normalize) + + if ax is None: + ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) + + to_plot = {} + colors = _get_standard_colors(num_colors=len(classes), colormap=colormap, + color_type='random', color=color) + + for kls in classes: + to_plot[kls] = [[], []] + + m = len(frame.columns) - 1 + s = np.array([(np.cos(t), np.sin(t)) + for t in [2.0 * np.pi * (i / float(m)) + for i in range(m)]]) + + for i in range(n): + row = df.iloc[i].values + row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) + y = (s * row_).sum(axis=0) / row.sum() + kls = class_col.iat[i] + to_plot[kls][0].append(y[0]) + to_plot[kls][1].append(y[1]) + + for i, kls in enumerate(classes): + ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], + label=pprint_thing(kls), **kwds) + ax.legend() + + ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) + + for xy, name in zip(s, df.columns): + + ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray')) + + if xy[0] < 0.0 and xy[1] < 0.0: + ax.text(xy[0] - 0.025, xy[1] - 0.025, name, + ha='right', va='top', size='small') + elif xy[0] < 0.0 and xy[1] >= 0.0: + ax.text(xy[0] - 0.025, xy[1] + 0.025, name, + ha='right', va='bottom', size='small') + elif xy[0] >= 0.0 and xy[1] < 0.0: + ax.text(xy[0] + 0.025, xy[1] - 0.025, name, + ha='left', va='top', size='small') + elif xy[0] >= 0.0 and xy[1] >= 0.0: + ax.text(xy[0] + 0.025, xy[1] + 0.025, name, + ha='left', va='bottom', size='small') + + ax.axis('equal') + return ax + + +@deprecate_kwarg(old_arg_name='data', new_arg_name='frame') +def andrews_curves(frame, class_column, ax=None, samples=200, color=None, + colormap=None, **kwds): + """ + Generates a matplotlib plot of Andrews curves, for visualising clusters of + multivariate data. + + Andrews curves have the functional form: + + f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + + x_4 sin(2t) + x_5 cos(2t) + ... + + Where x coefficients correspond to the values of each dimension and t is + linearly spaced between -pi and +pi. Each row of frame then corresponds to + a single curve. + + Parameters: + ----------- + frame : DataFrame + Data to be plotted, preferably normalized to (0.0, 1.0) + class_column : Name of the column containing class names + ax : matplotlib axes object, default None + samples : Number of points to plot in each curve + color: list or tuple, optional + Colors to use for the different classes + colormap : str or matplotlib colormap object, default None + Colormap to select colors from. If string, load colormap with that name + from matplotlib. + kwds: keywords + Options to pass to matplotlib plotting method + + Returns: + -------- + ax: Matplotlib axis object + + """ + from math import sqrt, pi + import matplotlib.pyplot as plt + + def function(amplitudes): + def f(t): + x1 = amplitudes[0] + result = x1 / sqrt(2.0) + + # Take the rest of the coefficients and resize them + # appropriately. Take a copy of amplitudes as otherwise numpy + # deletes the element from amplitudes itself. + coeffs = np.delete(np.copy(amplitudes), 0) + coeffs.resize(int((coeffs.size + 1) / 2), 2) + + # Generate the harmonics and arguments for the sin and cos + # functions. + harmonics = np.arange(0, coeffs.shape[0]) + 1 + trig_args = np.outer(harmonics, t) + + result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) + + coeffs[:, 1, np.newaxis] * np.cos(trig_args), + axis=0) + return result + return f + + n = len(frame) + class_col = frame[class_column] + classes = frame[class_column].drop_duplicates() + df = frame.drop(class_column, axis=1) + t = np.linspace(-pi, pi, samples) + used_legends = set([]) + + color_values = _get_standard_colors(num_colors=len(classes), + colormap=colormap, color_type='random', + color=color) + colors = dict(zip(classes, color_values)) + if ax is None: + ax = plt.gca(xlim=(-pi, pi)) + for i in range(n): + row = df.iloc[i].values + f = function(row) + y = f(t) + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(t, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(t, y, color=colors[kls], **kwds) + + ax.legend(loc='upper right') + ax.grid() + return ax + + +def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): + """Bootstrap plot. + + Parameters: + ----------- + series: Time series + fig: matplotlib figure object, optional + size: number of data points to consider during each sampling + samples: number of times the bootstrap procedure is performed + kwds: optional keyword arguments for plotting commands, must be accepted + by both hist and plot + + Returns: + -------- + fig: matplotlib figure + """ + import random + import matplotlib.pyplot as plt + + # random.sample(ndarray, int) fails on python 3.3, sigh + data = list(series.values) + samplings = [random.sample(data, size) for _ in range(samples)] + + means = np.array([np.mean(sampling) for sampling in samplings]) + medians = np.array([np.median(sampling) for sampling in samplings]) + midranges = np.array([(min(sampling) + max(sampling)) * 0.5 + for sampling in samplings]) + if fig is None: + fig = plt.figure() + x = lrange(samples) + axes = [] + ax1 = fig.add_subplot(2, 3, 1) + ax1.set_xlabel("Sample") + axes.append(ax1) + ax1.plot(x, means, **kwds) + ax2 = fig.add_subplot(2, 3, 2) + ax2.set_xlabel("Sample") + axes.append(ax2) + ax2.plot(x, medians, **kwds) + ax3 = fig.add_subplot(2, 3, 3) + ax3.set_xlabel("Sample") + axes.append(ax3) + ax3.plot(x, midranges, **kwds) + ax4 = fig.add_subplot(2, 3, 4) + ax4.set_xlabel("Mean") + axes.append(ax4) + ax4.hist(means, **kwds) + ax5 = fig.add_subplot(2, 3, 5) + ax5.set_xlabel("Median") + axes.append(ax5) + ax5.hist(medians, **kwds) + ax6 = fig.add_subplot(2, 3, 6) + ax6.set_xlabel("Midrange") + axes.append(ax6) + ax6.hist(midranges, **kwds) + for axis in axes: + plt.setp(axis.get_xticklabels(), fontsize=8) + plt.setp(axis.get_yticklabels(), fontsize=8) + return fig + + +@deprecate_kwarg(old_arg_name='colors', new_arg_name='color') +@deprecate_kwarg(old_arg_name='data', new_arg_name='frame', stacklevel=3) +def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, + use_columns=False, xticks=None, colormap=None, + axvlines=True, axvlines_kwds=None, sort_labels=False, + **kwds): + """Parallel coordinates plotting. + + Parameters + ---------- + frame: DataFrame + class_column: str + Column name containing class names + cols: list, optional + A list of column names to use + ax: matplotlib.axis, optional + matplotlib axis object + color: list or tuple, optional + Colors to use for the different classes + use_columns: bool, optional + If true, columns will be used as xticks + xticks: list or tuple, optional + A list of values to use for xticks + colormap: str or matplotlib colormap, default None + Colormap to use for line colors. + axvlines: bool, optional + If true, vertical lines will be added at each xtick + axvlines_kwds: keywords, optional + Options to be passed to axvline method for vertical lines + sort_labels: bool, False + Sort class_column labels, useful when assigning colours + + .. versionadded:: 0.20.0 + + kwds: keywords + Options to pass to matplotlib plotting method + + Returns + ------- + ax: matplotlib axis object + + Examples + -------- + >>> from pandas import read_csv + >>> from pandas.tools.plotting import parallel_coordinates + >>> from matplotlib import pyplot as plt + >>> df = read_csv('https://raw.github.com/pandas-dev/pandas/master' + '/pandas/tests/data/iris.csv') + >>> parallel_coordinates(df, 'Name', color=('#556270', + '#4ECDC4', '#C7F464')) + >>> plt.show() + """ + if axvlines_kwds is None: + axvlines_kwds = {'linewidth': 1, 'color': 'black'} + import matplotlib.pyplot as plt + + n = len(frame) + classes = frame[class_column].drop_duplicates() + class_col = frame[class_column] + + if cols is None: + df = frame.drop(class_column, axis=1) + else: + df = frame[cols] + + used_legends = set([]) + + ncols = len(df.columns) + + # determine values to use for xticks + if use_columns is True: + if not np.all(np.isreal(list(df.columns))): + raise ValueError('Columns must be numeric to be used as xticks') + x = df.columns + elif xticks is not None: + if not np.all(np.isreal(xticks)): + raise ValueError('xticks specified must be numeric') + elif len(xticks) != ncols: + raise ValueError('Length of xticks must match number of columns') + x = xticks + else: + x = lrange(ncols) + + if ax is None: + ax = plt.gca() + + color_values = _get_standard_colors(num_colors=len(classes), + colormap=colormap, color_type='random', + color=color) + + if sort_labels: + classes = sorted(classes) + color_values = sorted(color_values) + colors = dict(zip(classes, color_values)) + + for i in range(n): + y = df.iloc[i].values + kls = class_col.iat[i] + label = pprint_thing(kls) + if label not in used_legends: + used_legends.add(label) + ax.plot(x, y, color=colors[kls], label=label, **kwds) + else: + ax.plot(x, y, color=colors[kls], **kwds) + + if axvlines: + for i in x: + ax.axvline(i, **axvlines_kwds) + + ax.set_xticks(x) + ax.set_xticklabels(df.columns) + ax.set_xlim(x[0], x[-1]) + ax.legend(loc='upper right') + ax.grid() + return ax + + +def lag_plot(series, lag=1, ax=None, **kwds): + """Lag plot for time series. + + Parameters: + ----------- + series: Time series + lag: lag of the scatter plot, default 1 + ax: Matplotlib axis object, optional + kwds: Matplotlib scatter method keyword arguments, optional + + Returns: + -------- + ax: Matplotlib axis object + """ + import matplotlib.pyplot as plt + + # workaround because `c='b'` is hardcoded in matplotlibs scatter method + kwds.setdefault('c', plt.rcParams['patch.facecolor']) + + data = series.values + y1 = data[:-lag] + y2 = data[lag:] + if ax is None: + ax = plt.gca() + ax.set_xlabel("y(t)") + ax.set_ylabel("y(t + %s)" % lag) + ax.scatter(y1, y2, **kwds) + return ax + + +def autocorrelation_plot(series, ax=None, **kwds): + """Autocorrelation plot for time series. + + Parameters: + ----------- + series: Time series + ax: Matplotlib axis object, optional + kwds : keywords + Options to pass to matplotlib plotting method + + Returns: + ----------- + ax: Matplotlib axis object + """ + import matplotlib.pyplot as plt + n = len(series) + data = np.asarray(series) + if ax is None: + ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) + mean = np.mean(data) + c0 = np.sum((data - mean) ** 2) / float(n) + + def r(h): + return ((data[:n - h] - mean) * + (data[h:] - mean)).sum() / float(n) / c0 + x = np.arange(n) + 1 + y = lmap(r, x) + z95 = 1.959963984540054 + z99 = 2.5758293035489004 + ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') + ax.axhline(y=z95 / np.sqrt(n), color='grey') + ax.axhline(y=0.0, color='black') + ax.axhline(y=-z95 / np.sqrt(n), color='grey') + ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey') + ax.set_xlabel("Lag") + ax.set_ylabel("Autocorrelation") + ax.plot(x, y, **kwds) + if 'label' in kwds: + ax.legend() + ax.grid() + return ax diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py new file mode 100644 index 0000000000000..5d6dc7cbcdfc6 --- /dev/null +++ b/pandas/plotting/_style.py @@ -0,0 +1,246 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import warnings +from contextlib import contextmanager +import re + +import numpy as np + +from pandas.types.common import is_list_like +from pandas.compat import range, lrange, lmap +import pandas.compat as compat +from pandas.plotting._compat import _mpl_ge_2_0_0 + + +# Extracted from https://gist.github.com/huyng/816622 +# this is the rcParams set when setting display.with_mpl_style +# to True. +mpl_stylesheet = { + 'axes.axisbelow': True, + 'axes.color_cycle': ['#348ABD', + '#7A68A6', + '#A60628', + '#467821', + '#CF4457', + '#188487', + '#E24A33'], + 'axes.edgecolor': '#bcbcbc', + 'axes.facecolor': '#eeeeee', + 'axes.grid': True, + 'axes.labelcolor': '#555555', + 'axes.labelsize': 'large', + 'axes.linewidth': 1.0, + 'axes.titlesize': 'x-large', + 'figure.edgecolor': 'white', + 'figure.facecolor': 'white', + 'figure.figsize': (6.0, 4.0), + 'figure.subplot.hspace': 0.5, + 'font.family': 'monospace', + 'font.monospace': ['Andale Mono', + 'Nimbus Mono L', + 'Courier New', + 'Courier', + 'Fixed', + 'Terminal', + 'monospace'], + 'font.size': 10, + 'interactive': True, + 'keymap.all_axes': ['a'], + 'keymap.back': ['left', 'c', 'backspace'], + 'keymap.forward': ['right', 'v'], + 'keymap.fullscreen': ['f'], + 'keymap.grid': ['g'], + 'keymap.home': ['h', 'r', 'home'], + 'keymap.pan': ['p'], + 'keymap.save': ['s'], + 'keymap.xscale': ['L', 'k'], + 'keymap.yscale': ['l'], + 'keymap.zoom': ['o'], + 'legend.fancybox': True, + 'lines.antialiased': True, + 'lines.linewidth': 1.0, + 'patch.antialiased': True, + 'patch.edgecolor': '#EEEEEE', + 'patch.facecolor': '#348ABD', + 'patch.linewidth': 0.5, + 'toolbar': 'toolbar2', + 'xtick.color': '#555555', + 'xtick.direction': 'in', + 'xtick.major.pad': 6.0, + 'xtick.major.size': 0.0, + 'xtick.minor.pad': 6.0, + 'xtick.minor.size': 0.0, + 'ytick.color': '#555555', + 'ytick.direction': 'in', + 'ytick.major.pad': 6.0, + 'ytick.major.size': 0.0, + 'ytick.minor.pad': 6.0, + 'ytick.minor.size': 0.0 +} + + +def _get_standard_colors(num_colors=None, colormap=None, color_type='default', + color=None): + import matplotlib.pyplot as plt + + if color is None and colormap is not None: + if isinstance(colormap, compat.string_types): + import matplotlib.cm as cm + cmap = colormap + colormap = cm.get_cmap(colormap) + if colormap is None: + raise ValueError("Colormap {0} is not recognized".format(cmap)) + colors = lmap(colormap, np.linspace(0, 1, num=num_colors)) + elif color is not None: + if colormap is not None: + warnings.warn("'color' and 'colormap' cannot be used " + "simultaneously. Using 'color'") + colors = list(color) if is_list_like(color) else color + else: + if color_type == 'default': + # need to call list() on the result to copy so we don't + # modify the global rcParams below + try: + colors = [c['color'] + for c in list(plt.rcParams['axes.prop_cycle'])] + except KeyError: + colors = list(plt.rcParams.get('axes.color_cycle', + list('bgrcmyk'))) + if isinstance(colors, compat.string_types): + colors = list(colors) + elif color_type == 'random': + import random + + def random_color(column): + random.seed(column) + return [random.random() for _ in range(3)] + + colors = lmap(random_color, lrange(num_colors)) + else: + raise ValueError("color_type must be either 'default' or 'random'") + + if isinstance(colors, compat.string_types): + import matplotlib.colors + conv = matplotlib.colors.ColorConverter() + + def _maybe_valid_colors(colors): + try: + [conv.to_rgba(c) for c in colors] + return True + except ValueError: + return False + + # check whether the string can be convertable to single color + maybe_single_color = _maybe_valid_colors([colors]) + # check whether each character can be convertable to colors + maybe_color_cycle = _maybe_valid_colors(list(colors)) + if maybe_single_color and maybe_color_cycle and len(colors) > 1: + # Special case for single str 'CN' match and convert to hex + # for supporting matplotlib < 2.0.0 + if re.match(r'\AC[0-9]\Z', colors) and _mpl_ge_2_0_0(): + hex_color = [c['color'] + for c in list(plt.rcParams['axes.prop_cycle'])] + colors = [hex_color[int(colors[1])]] + else: + # this may no longer be required + msg = ("'{0}' can be parsed as both single color and " + "color cycle. Specify each color using a list " + "like ['{0}'] or {1}") + raise ValueError(msg.format(colors, list(colors))) + elif maybe_single_color: + colors = [colors] + else: + # ``colors`` is regarded as color cycle. + # mpl will raise error any of them is invalid + pass + + if len(colors) != num_colors: + try: + multiple = num_colors // len(colors) - 1 + except ZeroDivisionError: + raise ValueError("Invalid color argument: ''") + mod = num_colors % len(colors) + + colors += multiple * colors + colors += colors[:mod] + + return colors + + +class _Options(dict): + """ + Stores pandas plotting options. + Allows for parameter aliasing so you can just use parameter names that are + the same as the plot function parameters, but is stored in a canonical + format that makes it easy to breakdown into groups later + """ + + # alias so the names are same as plotting method parameter names + _ALIASES = {'x_compat': 'xaxis.compat'} + _DEFAULT_KEYS = ['xaxis.compat'] + + def __init__(self, deprecated=False): + self._deprecated = deprecated + # self['xaxis.compat'] = False + super(_Options, self).__setitem__('xaxis.compat', False) + + def _warn_if_deprecated(self): + if self._deprecated: + warnings.warn("'pandas.plot_params' is deprecated. Use " + "'pandas.plotting.plot_params' instead", + FutureWarning, stacklevel=3) + + def __getitem__(self, key): + self._warn_if_deprecated() + key = self._get_canonical_key(key) + if key not in self: + raise ValueError('%s is not a valid pandas plotting option' % key) + return super(_Options, self).__getitem__(key) + + def __setitem__(self, key, value): + self._warn_if_deprecated() + key = self._get_canonical_key(key) + return super(_Options, self).__setitem__(key, value) + + def __delitem__(self, key): + key = self._get_canonical_key(key) + if key in self._DEFAULT_KEYS: + raise ValueError('Cannot remove default parameter %s' % key) + return super(_Options, self).__delitem__(key) + + def __contains__(self, key): + key = self._get_canonical_key(key) + return super(_Options, self).__contains__(key) + + def reset(self): + """ + Reset the option store to its initial state + + Returns + ------- + None + """ + self._warn_if_deprecated() + self.__init__() + + def _get_canonical_key(self, key): + return self._ALIASES.get(key, key) + + @contextmanager + def use(self, key, value): + """ + Temporarily set a parameter value using the with statement. + Aliasing allowed. + """ + self._warn_if_deprecated() + old_value = self[key] + try: + self[key] = value + yield self + finally: + self[key] = old_value + + +plot_params = _Options() diff --git a/pandas/plotting/_timeseries.py b/pandas/plotting/_timeseries.py new file mode 100644 index 0000000000000..7533e417b0026 --- /dev/null +++ b/pandas/plotting/_timeseries.py @@ -0,0 +1,339 @@ +# TODO: Use the fact that axis can have units to simplify the process + +import numpy as np + +from matplotlib import pylab +from pandas.tseries.period import Period +from pandas.tseries.offsets import DateOffset +import pandas.tseries.frequencies as frequencies +from pandas.tseries.index import DatetimeIndex +from pandas.tseries.period import PeriodIndex +from pandas.tseries.tdi import TimedeltaIndex +from pandas.formats.printing import pprint_thing +import pandas.compat as compat + +from pandas.plotting._converter import (TimeSeries_DateLocator, + TimeSeries_DateFormatter, + TimeSeries_TimedeltaFormatter) + +# --------------------------------------------------------------------- +# Plotting functions and monkey patches + + +def tsplot(series, plotf, ax=None, **kwargs): + """ + Plots a Series on the given Matplotlib axes or the current axes + + Parameters + ---------- + axes : Axes + series : Series + + Notes + _____ + Supports same kwargs as Axes.plot + + """ + # Used inferred freq is possible, need a test case for inferred + if ax is None: + import matplotlib.pyplot as plt + ax = plt.gca() + + freq, series = _maybe_resample(series, ax, kwargs) + + # Set ax with freq info + _decorate_axes(ax, freq, kwargs) + ax._plot_data.append((series, plotf, kwargs)) + lines = plotf(ax, series.index._mpl_repr(), series.values, **kwargs) + + # set date formatter, locators and rescale limits + format_dateaxis(ax, ax.freq, series.index) + return lines + + +def _maybe_resample(series, ax, kwargs): + # resample against axes freq if necessary + freq, ax_freq = _get_freq(ax, series) + + if freq is None: # pragma: no cover + raise ValueError('Cannot use dynamic axis without frequency info') + + # Convert DatetimeIndex to PeriodIndex + if isinstance(series.index, DatetimeIndex): + series = series.to_period(freq=freq) + + if ax_freq is not None and freq != ax_freq: + if frequencies.is_superperiod(freq, ax_freq): # upsample input + series = series.copy() + series.index = series.index.asfreq(ax_freq, how='s') + freq = ax_freq + elif _is_sup(freq, ax_freq): # one is weekly + how = kwargs.pop('how', 'last') + series = getattr(series.resample('D'), how)().dropna() + series = getattr(series.resample(ax_freq), how)().dropna() + freq = ax_freq + elif frequencies.is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): + _upsample_others(ax, freq, kwargs) + ax_freq = freq + else: # pragma: no cover + raise ValueError('Incompatible frequency conversion') + return freq, series + + +def _is_sub(f1, f2): + return ((f1.startswith('W') and frequencies.is_subperiod('D', f2)) or + (f2.startswith('W') and frequencies.is_subperiod(f1, 'D'))) + + +def _is_sup(f1, f2): + return ((f1.startswith('W') and frequencies.is_superperiod('D', f2)) or + (f2.startswith('W') and frequencies.is_superperiod(f1, 'D'))) + + +def _upsample_others(ax, freq, kwargs): + legend = ax.get_legend() + lines, labels = _replot_ax(ax, freq, kwargs) + _replot_ax(ax, freq, kwargs) + + other_ax = None + if hasattr(ax, 'left_ax'): + other_ax = ax.left_ax + if hasattr(ax, 'right_ax'): + other_ax = ax.right_ax + + if other_ax is not None: + rlines, rlabels = _replot_ax(other_ax, freq, kwargs) + lines.extend(rlines) + labels.extend(rlabels) + + if (legend is not None and kwargs.get('legend', True) and + len(lines) > 0): + title = legend.get_title().get_text() + if title == 'None': + title = None + ax.legend(lines, labels, loc='best', title=title) + + +def _replot_ax(ax, freq, kwargs): + data = getattr(ax, '_plot_data', None) + + # clear current axes and data + ax._plot_data = [] + ax.clear() + + _decorate_axes(ax, freq, kwargs) + + lines = [] + labels = [] + if data is not None: + for series, plotf, kwds in data: + series = series.copy() + idx = series.index.asfreq(freq, how='S') + series.index = idx + ax._plot_data.append((series, plotf, kwds)) + + # for tsplot + if isinstance(plotf, compat.string_types): + from pandas.plotting._core import _plot_klass + plotf = _plot_klass[plotf]._plot + + lines.append(plotf(ax, series.index._mpl_repr(), + series.values, **kwds)[0]) + labels.append(pprint_thing(series.name)) + + return lines, labels + + +def _decorate_axes(ax, freq, kwargs): + """Initialize axes for time-series plotting""" + if not hasattr(ax, '_plot_data'): + ax._plot_data = [] + + ax.freq = freq + xaxis = ax.get_xaxis() + xaxis.freq = freq + if not hasattr(ax, 'legendlabels'): + ax.legendlabels = [kwargs.get('label', None)] + else: + ax.legendlabels.append(kwargs.get('label', None)) + ax.view_interval = None + ax.date_axis_info = None + + +def _get_ax_freq(ax): + """ + Get the freq attribute of the ax object if set. + Also checks shared axes (eg when using secondary yaxis, sharex=True + or twinx) + """ + ax_freq = getattr(ax, 'freq', None) + if ax_freq is None: + # check for left/right ax in case of secondary yaxis + if hasattr(ax, 'left_ax'): + ax_freq = getattr(ax.left_ax, 'freq', None) + elif hasattr(ax, 'right_ax'): + ax_freq = getattr(ax.right_ax, 'freq', None) + if ax_freq is None: + # check if a shared ax (sharex/twinx) has already freq set + shared_axes = ax.get_shared_x_axes().get_siblings(ax) + if len(shared_axes) > 1: + for shared_ax in shared_axes: + ax_freq = getattr(shared_ax, 'freq', None) + if ax_freq is not None: + break + return ax_freq + + +def _get_freq(ax, series): + # get frequency from data + freq = getattr(series.index, 'freq', None) + if freq is None: + freq = getattr(series.index, 'inferred_freq', None) + + ax_freq = _get_ax_freq(ax) + + # use axes freq if no data freq + if freq is None: + freq = ax_freq + + # get the period frequency + if isinstance(freq, DateOffset): + freq = freq.rule_code + else: + freq = frequencies.get_base_alias(freq) + + freq = frequencies.get_period_alias(freq) + return freq, ax_freq + + +def _use_dynamic_x(ax, data): + freq = _get_index_freq(data) + ax_freq = _get_ax_freq(ax) + + if freq is None: # convert irregular if axes has freq info + freq = ax_freq + else: # do not use tsplot if irregular was plotted first + if (ax_freq is None) and (len(ax.get_lines()) > 0): + return False + + if freq is None: + return False + + if isinstance(freq, DateOffset): + freq = freq.rule_code + else: + freq = frequencies.get_base_alias(freq) + freq = frequencies.get_period_alias(freq) + + if freq is None: + return False + + # hack this for 0.10.1, creating more technical debt...sigh + if isinstance(data.index, DatetimeIndex): + base = frequencies.get_freq(freq) + x = data.index + if (base <= frequencies.FreqGroup.FR_DAY): + return x[:1].is_normalized + return Period(x[0], freq).to_timestamp(tz=x.tz) == x[0] + return True + + +def _get_index_freq(data): + freq = getattr(data.index, 'freq', None) + if freq is None: + freq = getattr(data.index, 'inferred_freq', None) + if freq == 'B': + weekdays = np.unique(data.index.dayofweek) + if (5 in weekdays) or (6 in weekdays): + freq = None + return freq + + +def _maybe_convert_index(ax, data): + # tsplot converts automatically, but don't want to convert index + # over and over for DataFrames + if isinstance(data.index, DatetimeIndex): + freq = getattr(data.index, 'freq', None) + + if freq is None: + freq = getattr(data.index, 'inferred_freq', None) + if isinstance(freq, DateOffset): + freq = freq.rule_code + + if freq is None: + freq = _get_ax_freq(ax) + + if freq is None: + raise ValueError('Could not get frequency alias for plotting') + + freq = frequencies.get_base_alias(freq) + freq = frequencies.get_period_alias(freq) + + data = data.to_period(freq=freq) + return data + + +# Patch methods for subplot. Only format_dateaxis is currently used. +# Do we need the rest for convenience? + +def format_timedelta_ticks(x, pos, n_decimals): + """ + Convert seconds to 'D days HH:MM:SS.F' + """ + s, ns = divmod(x, 1e9) + m, s = divmod(s, 60) + h, m = divmod(m, 60) + d, h = divmod(h, 24) + decimals = int(ns * 10**(n_decimals - 9)) + s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) + if n_decimals > 0: + s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) + if d != 0: + s = '{:d} days '.format(int(d)) + s + return s + + +def format_dateaxis(subplot, freq, index): + """ + Pretty-formats the date axis (x-axis). + + Major and minor ticks are automatically set for the frequency of the + current underlying series. As the dynamic mode is activated by + default, changing the limits of the x axis will intelligently change + the positions of the ticks. + """ + + # handle index specific formatting + # Note: DatetimeIndex does not use this + # interface. DatetimeIndex uses matplotlib.date directly + if isinstance(index, PeriodIndex): + + majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, + minor_locator=False, + plot_obj=subplot) + minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, + minor_locator=True, + plot_obj=subplot) + subplot.xaxis.set_major_locator(majlocator) + subplot.xaxis.set_minor_locator(minlocator) + + majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minor_locator=False, + plot_obj=subplot) + minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, + minor_locator=True, + plot_obj=subplot) + subplot.xaxis.set_major_formatter(majformatter) + subplot.xaxis.set_minor_formatter(minformatter) + + # x and y coord info + subplot.format_coord = lambda t, y: ( + "t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) + + elif isinstance(index, TimedeltaIndex): + subplot.xaxis.set_major_formatter( + TimeSeries_TimedeltaFormatter()) + else: + raise TypeError('index type not supported') + + pylab.draw_if_interactive() diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py new file mode 100644 index 0000000000000..720f776279869 --- /dev/null +++ b/pandas/plotting/_tools.py @@ -0,0 +1,383 @@ +# being a bit too dynamic +# pylint: disable=E1101 +from __future__ import division + +import warnings +from math import ceil + +import numpy as np + +from pandas.types.common import is_list_like +from pandas.core.index import Index +from pandas.core.series import Series +from pandas.compat import range + + +def format_date_labels(ax, rot): + # mini version of autofmt_xdate + try: + for label in ax.get_xticklabels(): + label.set_ha('right') + label.set_rotation(rot) + fig = ax.get_figure() + fig.subplots_adjust(bottom=0.2) + except Exception: # pragma: no cover + pass + + +def table(ax, data, rowLabels=None, colLabels=None, + **kwargs): + """ + Helper function to convert DataFrame and Series to matplotlib.table + + Parameters + ---------- + `ax`: Matplotlib axes object + `data`: DataFrame or Series + data for table contents + `kwargs`: keywords, optional + keyword arguments which passed to matplotlib.table.table. + If `rowLabels` or `colLabels` is not specified, data index or column + name will be used. + + Returns + ------- + matplotlib table object + """ + from pandas import DataFrame + if isinstance(data, Series): + data = DataFrame(data, columns=[data.name]) + elif isinstance(data, DataFrame): + pass + else: + raise ValueError('Input data must be DataFrame or Series') + + if rowLabels is None: + rowLabels = data.index + + if colLabels is None: + colLabels = data.columns + + cellText = data.values + + import matplotlib.table + table = matplotlib.table.table(ax, cellText=cellText, + rowLabels=rowLabels, + colLabels=colLabels, **kwargs) + return table + + +def _get_layout(nplots, layout=None, layout_type='box'): + if layout is not None: + if not isinstance(layout, (tuple, list)) or len(layout) != 2: + raise ValueError('Layout must be a tuple of (rows, columns)') + + nrows, ncols = layout + + # Python 2 compat + ceil_ = lambda x: int(ceil(x)) + if nrows == -1 and ncols > 0: + layout = nrows, ncols = (ceil_(float(nplots) / ncols), ncols) + elif ncols == -1 and nrows > 0: + layout = nrows, ncols = (nrows, ceil_(float(nplots) / nrows)) + elif ncols <= 0 and nrows <= 0: + msg = "At least one dimension of layout must be positive" + raise ValueError(msg) + + if nrows * ncols < nplots: + raise ValueError('Layout of %sx%s must be larger than ' + 'required size %s' % (nrows, ncols, nplots)) + + return layout + + if layout_type == 'single': + return (1, 1) + elif layout_type == 'horizontal': + return (1, nplots) + elif layout_type == 'vertical': + return (nplots, 1) + + layouts = {1: (1, 1), 2: (1, 2), 3: (2, 2), 4: (2, 2)} + try: + return layouts[nplots] + except KeyError: + k = 1 + while k ** 2 < nplots: + k += 1 + + if (k - 1) * k >= nplots: + return k, (k - 1) + else: + return k, k + +# copied from matplotlib/pyplot.py and modified for pandas.plotting + + +def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, + subplot_kw=None, ax=None, layout=None, layout_type='box', + **fig_kw): + """Create a figure with a set of subplots already made. + + This utility wrapper makes it convenient to create common layouts of + subplots, including the enclosing figure object, in a single call. + + Keyword arguments: + + naxes : int + Number of required axes. Exceeded axes are set invisible. Default is + nrows * ncols. + + sharex : bool + If True, the X axis will be shared amongst all subplots. + + sharey : bool + If True, the Y axis will be shared amongst all subplots. + + squeeze : bool + + If True, extra dimensions are squeezed out from the returned axis object: + - if only one subplot is constructed (nrows=ncols=1), the resulting + single Axis object is returned as a scalar. + - for Nx1 or 1xN subplots, the returned object is a 1-d numpy object + array of Axis objects are returned as numpy 1-d arrays. + - for NxM subplots with N>1 and M>1 are returned as a 2d array. + + If False, no squeezing at all is done: the returned axis object is always + a 2-d array containing Axis instances, even if it ends up being 1x1. + + subplot_kw : dict + Dict with keywords passed to the add_subplot() call used to create each + subplots. + + ax : Matplotlib axis object, optional + + layout : tuple + Number of rows and columns of the subplot grid. + If not specified, calculated from naxes and layout_type + + layout_type : {'box', 'horziontal', 'vertical'}, default 'box' + Specify how to layout the subplot grid. + + fig_kw : Other keyword arguments to be passed to the figure() call. + Note that all keywords not recognized above will be + automatically included here. + + Returns: + + fig, ax : tuple + - fig is the Matplotlib Figure object + - ax can be either a single axis object or an array of axis objects if + more than one subplot was created. The dimensions of the resulting array + can be controlled with the squeeze keyword, see above. + + **Examples:** + + x = np.linspace(0, 2*np.pi, 400) + y = np.sin(x**2) + + # Just a figure and one subplot + f, ax = plt.subplots() + ax.plot(x, y) + ax.set_title('Simple plot') + + # Two subplots, unpack the output array immediately + f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) + ax1.plot(x, y) + ax1.set_title('Sharing Y axis') + ax2.scatter(x, y) + + # Four polar axes + plt.subplots(2, 2, subplot_kw=dict(polar=True)) + """ + import matplotlib.pyplot as plt + + if subplot_kw is None: + subplot_kw = {} + + if ax is None: + fig = plt.figure(**fig_kw) + else: + if is_list_like(ax): + ax = _flatten(ax) + if layout is not None: + warnings.warn("When passing multiple axes, layout keyword is " + "ignored", UserWarning) + if sharex or sharey: + warnings.warn("When passing multiple axes, sharex and sharey " + "are ignored. These settings must be specified " + "when creating axes", UserWarning, + stacklevel=4) + if len(ax) == naxes: + fig = ax[0].get_figure() + return fig, ax + else: + raise ValueError("The number of passed axes must be {0}, the " + "same as the output plot".format(naxes)) + + fig = ax.get_figure() + # if ax is passed and a number of subplots is 1, return ax as it is + if naxes == 1: + if squeeze: + return fig, ax + else: + return fig, _flatten(ax) + else: + warnings.warn("To output multiple subplots, the figure containing " + "the passed axes is being cleared", UserWarning, + stacklevel=4) + fig.clear() + + nrows, ncols = _get_layout(naxes, layout=layout, layout_type=layout_type) + nplots = nrows * ncols + + # Create empty object array to hold all axes. It's easiest to make it 1-d + # so we can just append subplots upon creation, and then + axarr = np.empty(nplots, dtype=object) + + # Create first subplot separately, so we can share it if requested + ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw) + + if sharex: + subplot_kw['sharex'] = ax0 + if sharey: + subplot_kw['sharey'] = ax0 + axarr[0] = ax0 + + # Note off-by-one counting because add_subplot uses the MATLAB 1-based + # convention. + for i in range(1, nplots): + kwds = subplot_kw.copy() + # Set sharex and sharey to None for blank/dummy axes, these can + # interfere with proper axis limits on the visible axes if + # they share axes e.g. issue #7528 + if i >= naxes: + kwds['sharex'] = None + kwds['sharey'] = None + ax = fig.add_subplot(nrows, ncols, i + 1, **kwds) + axarr[i] = ax + + if naxes != nplots: + for ax in axarr[naxes:]: + ax.set_visible(False) + + _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) + + if squeeze: + # Reshape the array to have the final desired dimension (nrow,ncol), + # though discarding unneeded dimensions that equal 1. If we only have + # one subplot, just return it instead of a 1-element array. + if nplots == 1: + axes = axarr[0] + else: + axes = axarr.reshape(nrows, ncols).squeeze() + else: + # returned axis array will be always 2-d, even if nrows=ncols=1 + axes = axarr.reshape(nrows, ncols) + + return fig, axes + + +def _remove_labels_from_axis(axis): + for t in axis.get_majorticklabels(): + t.set_visible(False) + + try: + # set_visible will not be effective if + # minor axis has NullLocator and NullFormattor (default) + import matplotlib.ticker as ticker + if isinstance(axis.get_minor_locator(), ticker.NullLocator): + axis.set_minor_locator(ticker.AutoLocator()) + if isinstance(axis.get_minor_formatter(), ticker.NullFormatter): + axis.set_minor_formatter(ticker.FormatStrFormatter('')) + for t in axis.get_minorticklabels(): + t.set_visible(False) + except Exception: # pragma no cover + raise + axis.get_label().set_visible(False) + + +def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): + if nplots > 1: + + if nrows > 1: + try: + # first find out the ax layout, + # so that we can correctly handle 'gaps" + layout = np.zeros((nrows + 1, ncols + 1), dtype=np.bool) + for ax in axarr: + layout[ax.rowNum, ax.colNum] = ax.get_visible() + + for ax in axarr: + # only the last row of subplots should get x labels -> all + # other off layout handles the case that the subplot is + # the last in the column, because below is no subplot/gap. + if not layout[ax.rowNum + 1, ax.colNum]: + continue + if sharex or len(ax.get_shared_x_axes() + .get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.xaxis) + + except IndexError: + # if gridspec is used, ax.rowNum and ax.colNum may different + # from layout shape. in this case, use last_row logic + for ax in axarr: + if ax.is_last_row(): + continue + if sharex or len(ax.get_shared_x_axes() + .get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.xaxis) + + if ncols > 1: + for ax in axarr: + # only the first column should get y labels -> set all other to + # off as we only have labels in teh first column and we always + # have a subplot there, we can skip the layout test + if ax.is_first_col(): + continue + if sharey or len(ax.get_shared_y_axes().get_siblings(ax)) > 1: + _remove_labels_from_axis(ax.yaxis) + + +def _flatten(axes): + if not is_list_like(axes): + return np.array([axes]) + elif isinstance(axes, (np.ndarray, Index)): + return axes.ravel() + return np.array(axes) + + +def _get_all_lines(ax): + lines = ax.get_lines() + + if hasattr(ax, 'right_ax'): + lines += ax.right_ax.get_lines() + + if hasattr(ax, 'left_ax'): + lines += ax.left_ax.get_lines() + + return lines + + +def _get_xlim(lines): + left, right = np.inf, -np.inf + for l in lines: + x = l.get_xdata(orig=False) + left = min(x[0], left) + right = max(x[-1], right) + return left, right + + +def _set_ticks_props(axes, xlabelsize=None, xrot=None, + ylabelsize=None, yrot=None): + import matplotlib.pyplot as plt + + for ax in _flatten(axes): + if xlabelsize is not None: + plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) + if xrot is not None: + plt.setp(ax.get_xticklabels(), rotation=xrot) + if ylabelsize is not None: + plt.setp(ax.get_yticklabels(), fontsize=ylabelsize) + if yrot is not None: + plt.setp(ax.get_yticklabels(), rotation=yrot) + return axes diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index a15d7cf26cbea..638c9b05839e1 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -31,7 +31,7 @@ class TestPDApi(Base, tm.TestCase): # top-level sub-packages lib = ['api', 'compat', 'computation', 'core', - 'indexes', 'formats', 'errors', 'pandas', + 'indexes', 'formats', 'errors', 'pandas', 'plotting', 'test', 'tools', 'tseries', 'sparse', 'types', 'util', 'options', 'io'] @@ -69,8 +69,7 @@ class TestPDApi(Base, tm.TestCase): 'melt', 'notnull', 'offsets', 'merge', 'merge_ordered', 'merge_asof', 'period_range', - 'pivot', 'pivot_table', 'plot_params', 'qcut', - 'scatter_matrix', + 'pivot', 'pivot_table', 'qcut', 'show_versions', 'timedelta_range', 'unique', 'value_counts', 'wide_to_long'] @@ -103,7 +102,8 @@ class TestPDApi(Base, tm.TestCase): 'rolling_median', 'rolling_min', 'rolling_quantile', 'rolling_skew', 'rolling_std', 'rolling_sum', 'rolling_var', 'rolling_window', 'ordered_merge', - 'pnow', 'match', 'groupby', 'get_store'] + 'pnow', 'match', 'groupby', 'get_store', + 'plot_params', 'scatter_matrix'] def test_api(self): diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py index c31d8b539ae6f..d81f73e73ae69 100644 --- a/pandas/tests/plotting/common.py +++ b/pandas/tests/plotting/common.py @@ -16,7 +16,8 @@ import numpy as np from numpy import random -import pandas.tools.plotting as plotting +import pandas.plotting as plotting +from pandas.plotting._tools import _flatten """ @@ -48,12 +49,12 @@ def setUp(self): import matplotlib as mpl mpl.rcdefaults() - self.mpl_le_1_2_1 = plotting._mpl_le_1_2_1() - self.mpl_ge_1_3_1 = plotting._mpl_ge_1_3_1() - self.mpl_ge_1_4_0 = plotting._mpl_ge_1_4_0() - self.mpl_ge_1_5_0 = plotting._mpl_ge_1_5_0() - self.mpl_ge_2_0_0 = plotting._mpl_ge_2_0_0() - self.mpl_ge_2_0_1 = plotting._mpl_ge_2_0_1() + self.mpl_le_1_2_1 = plotting._compat._mpl_le_1_2_1() + self.mpl_ge_1_3_1 = plotting._compat._mpl_ge_1_3_1() + self.mpl_ge_1_4_0 = plotting._compat._mpl_ge_1_4_0() + self.mpl_ge_1_5_0 = plotting._compat._mpl_ge_1_5_0() + self.mpl_ge_2_0_0 = plotting._compat._mpl_ge_2_0_0() + self.mpl_ge_2_0_1 = plotting._compat._mpl_ge_2_0_1() if self.mpl_ge_1_4_0: self.bp_n_objects = 7 @@ -73,7 +74,8 @@ def setUp(self): self.default_tick_position = 'left' if self.mpl_ge_2_0_0 else 'default' # common test data from pandas import read_csv - path = os.path.join(os.path.dirname(curpath()), 'data', 'iris.csv') + base = os.path.join(os.path.dirname(curpath()), os.pardir) + path = os.path.join(base, 'tests', 'data', 'iris.csv') self.iris = read_csv(path) n = 100 @@ -353,7 +355,7 @@ def _check_axes_shape(self, axes, axes_num=None, layout=None, self.assertTrue(len(ax.get_children()) > 0) if layout is not None: - result = self._get_axes_layout(plotting._flatten(axes)) + result = self._get_axes_layout(_flatten(axes)) self.assertEqual(result, layout) self.assert_numpy_array_equal( @@ -379,7 +381,7 @@ def _flatten_visible(self, axes): axes : matplotlib Axes object, or its list-like """ - axes = plotting._flatten(axes) + axes = _flatten(axes) axes = [ax for ax in axes if ax.get_visible()] return axes diff --git a/pandas/tests/plotting/test_boxplot_method.py b/pandas/tests/plotting/test_boxplot_method.py index 31c150bc1e64f..5b9c13bd26708 100644 --- a/pandas/tests/plotting/test_boxplot_method.py +++ b/pandas/tests/plotting/test_boxplot_method.py @@ -14,7 +14,7 @@ from numpy import random from numpy.random import randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) @@ -54,7 +54,8 @@ def test_boxplot_legacy(self): _check_plot_works(df.boxplot, by='indic') with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by=['indic', 'indic2']) - _check_plot_works(plotting.boxplot, data=df['one'], return_type='dict') + _check_plot_works(plotting._core.boxplot, data=df['one'], + return_type='dict') _check_plot_works(df.boxplot, notch=1, return_type='dict') with tm.assert_produces_warning(UserWarning): _check_plot_works(df.boxplot, by='indic', notch=1) diff --git a/pandas/tests/tseries/test_converter.py b/pandas/tests/plotting/test_converter.py similarity index 99% rename from pandas/tests/tseries/test_converter.py rename to pandas/tests/plotting/test_converter.py index 5351e26f0e62b..4629103d033f5 100644 --- a/pandas/tests/tseries/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -8,7 +8,7 @@ from pandas.tseries.offsets import Second, Milli, Micro, Day from pandas.compat.numpy import np_datetime64_compat -converter = pytest.importorskip('pandas.tseries.converter') +converter = pytest.importorskip('pandas.plotting._converter') def test_timtetonum_accepts_unicode(): diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 673c34903b259..b3692c5a8d2d2 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -144,7 +144,7 @@ def test_high_freq(self): _check_plot_works(ser.plot) def test_get_datevalue(self): - from pandas.tseries.converter import get_datevalue + from pandas.plotting._converter import get_datevalue self.assertIsNone(get_datevalue(None, 'D')) self.assertEqual(get_datevalue(1987, 'A'), 1987) self.assertEqual(get_datevalue(Period(1987, 'A'), 'M'), @@ -243,7 +243,7 @@ def test_plot_multiple_inferred_freq(self): @slow def test_uhf(self): - import pandas.tseries.converter as conv + import pandas.plotting._converter as conv import matplotlib.pyplot as plt fig = plt.gcf() plt.clf() @@ -387,7 +387,7 @@ def _test(ax): _test(ax) def test_get_finder(self): - import pandas.tseries.converter as conv + import pandas.plotting._converter as conv self.assertEqual(conv.get_finder('B'), conv._daily_finder) self.assertEqual(conv.get_finder('D'), conv._daily_finder) diff --git a/pandas/tests/plotting/test_deprecated.py b/pandas/tests/plotting/test_deprecated.py new file mode 100644 index 0000000000000..d7eaa69460a3a --- /dev/null +++ b/pandas/tests/plotting/test_deprecated.py @@ -0,0 +1,58 @@ +# coding: utf-8 + +import string + +import pandas as pd +import pandas.util.testing as tm +from pandas.util.testing import slow + +from numpy.random import randn + +import pandas.tools.plotting as plotting + +from pandas.tests.plotting.common import TestPlotBase + + +""" +Test cases for plot functions imported from deprecated +pandas.tools.plotting +""" + + +@tm.mplskip +class TestDeprecatedNameSpace(TestPlotBase): + + @slow + def test_scatter_plot_legacy(self): + tm._skip_if_no_scipy() + + df = pd.DataFrame(randn(100, 2)) + + with tm.assert_produces_warning(FutureWarning): + plotting.scatter_matrix(df) + + with tm.assert_produces_warning(FutureWarning): + pd.scatter_matrix(df) + + @slow + def test_boxplot_deprecated(self): + df = pd.DataFrame(randn(6, 4), + index=list(string.ascii_letters[:6]), + columns=['one', 'two', 'three', 'four']) + df['indic'] = ['foo', 'bar'] * 3 + + with tm.assert_produces_warning(FutureWarning): + plotting.boxplot(df, column=['one', 'two'], + by='indic') + + @slow + def test_radviz_deprecated(self): + df = self.iris + with tm.assert_produces_warning(FutureWarning): + plotting.radviz(frame=df, class_column='Name') + + @slow + def test_plot_params(self): + + with tm.assert_produces_warning(FutureWarning): + pd.plot_params['xaxis.compat'] = True diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 8090b9cc44ca3..404752b567f63 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -22,7 +22,7 @@ import numpy as np from numpy.random import rand, randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) @@ -240,13 +240,13 @@ def test_xcompat(self): self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) tm.close() - pd.plot_params['xaxis.compat'] = True + pd.plotting.plot_params['xaxis.compat'] = True ax = df.plot() lines = ax.get_lines() self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) tm.close() - pd.plot_params['x_compat'] = False + pd.plotting.plot_params['x_compat'] = False ax = df.plot() lines = ax.get_lines() self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) @@ -254,7 +254,7 @@ def test_xcompat(self): tm.close() # useful if you're plotting a bunch together - with pd.plot_params.use('x_compat', True): + with pd.plotting.plot_params.use('x_compat', True): ax = df.plot() lines = ax.get_lines() self.assertNotIsInstance(lines[0].get_xdata(), PeriodIndex) @@ -1979,7 +1979,7 @@ def test_unordered_ts(self): def test_kind_both_ways(self): df = DataFrame({'x': [1, 2, 3]}) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue df.plot(kind=kind) @@ -1990,7 +1990,7 @@ def test_kind_both_ways(self): def test_all_invalid_plot_data(self): df = DataFrame(list('abcd')) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -2001,7 +2001,7 @@ def test_partially_invalid_plot_data(self): with tm.RNGContext(42): df = DataFrame(randn(10, 2), dtype=object) df[np.random.rand(df.shape[0]) > 0.5] = 'a' - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -2454,7 +2454,7 @@ def test_memory_leak(self): import gc results = {} - for kind in plotting._plot_klass.keys(): + for kind in plotting._core._plot_klass.keys(): if not _ok_for_gaussian_kde(kind): continue args = {} @@ -2653,7 +2653,7 @@ def test_df_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings( DataFrame({'a': [1, 2, 3], 'b': [2, 3, 4]}), - plotting._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) + plotting._core._dataframe_kinds, kws={'x': 'a', 'y': 'b'}) def test_option_mpl_style(self): with tm.assert_produces_warning(FutureWarning, diff --git a/pandas/tests/plotting/test_hist_method.py b/pandas/tests/plotting/test_hist_method.py index 380bdc12abce4..0a13a6e9893a8 100644 --- a/pandas/tests/plotting/test_hist_method.py +++ b/pandas/tests/plotting/test_hist_method.py @@ -9,7 +9,7 @@ import numpy as np from numpy.random import randn -import pandas.tools.plotting as plotting +from pandas.plotting._core import grouped_hist from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works) @@ -260,7 +260,7 @@ def test_grouped_hist_legacy(self): df['C'] = np.random.randint(0, 4, 500) df['D'] = ['X'] * 500 - axes = plotting.grouped_hist(df.A, by=df.C) + axes = grouped_hist(df.A, by=df.C) self._check_axes_shape(axes, axes_num=4, layout=(2, 2)) tm.close() @@ -277,10 +277,9 @@ def test_grouped_hist_legacy(self): # make sure kwargs to hist are handled xf, yf = 20, 18 xrot, yrot = 30, 40 - axes = plotting.grouped_hist(df.A, by=df.C, normed=True, - cumulative=True, bins=4, - xlabelsize=xf, xrot=xrot, - ylabelsize=yf, yrot=yrot) + axes = grouped_hist(df.A, by=df.C, normed=True, cumulative=True, + bins=4, xlabelsize=xf, xrot=xrot, + ylabelsize=yf, yrot=yrot) # height of last bin (index 5) must be 1.0 for ax in axes.ravel(): rects = [x for x in ax.get_children() if isinstance(x, Rectangle)] @@ -290,14 +289,14 @@ def test_grouped_hist_legacy(self): ylabelsize=yf, yrot=yrot) tm.close() - axes = plotting.grouped_hist(df.A, by=df.C, log=True) + axes = grouped_hist(df.A, by=df.C, log=True) # scale of y must be 'log' self._check_ax_scales(axes, yaxis='log') tm.close() # propagate attr exception from matplotlib.Axes.hist with tm.assertRaises(AttributeError): - plotting.grouped_hist(df.A, by=df.C, foo='bar') + grouped_hist(df.A, by=df.C, foo='bar') with tm.assert_produces_warning(FutureWarning): df.hist(by='C', figsize='default') diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py index 504c55bcfcfd0..fe0b6c103a0e1 100644 --- a/pandas/tests/plotting/test_misc.py +++ b/pandas/tests/plotting/test_misc.py @@ -11,7 +11,7 @@ from numpy import random from numpy.random import randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _ok_for_gaussian_kde) @@ -29,7 +29,7 @@ def setUp(self): @slow def test_autocorrelation_plot(self): - from pandas.tools.plotting import autocorrelation_plot + from pandas.plotting import autocorrelation_plot _check_plot_works(autocorrelation_plot, series=self.ts) _check_plot_works(autocorrelation_plot, series=self.ts.values) @@ -38,13 +38,13 @@ def test_autocorrelation_plot(self): @slow def test_lag_plot(self): - from pandas.tools.plotting import lag_plot + from pandas.plotting import lag_plot _check_plot_works(lag_plot, series=self.ts) _check_plot_works(lag_plot, series=self.ts, lag=5) @slow def test_bootstrap_plot(self): - from pandas.tools.plotting import bootstrap_plot + from pandas.plotting import bootstrap_plot _check_plot_works(bootstrap_plot, series=self.ts, size=10) @@ -84,7 +84,7 @@ def scat(**kwds): _check_plot_works(scat, facecolor='rgb') def scat2(x, y, by=None, ax=None, figsize=None): - return plotting.scatter_plot(df, x, y, by, ax, figsize=None) + return plotting._core.scatter_plot(df, x, y, by, ax, figsize=None) _check_plot_works(scat2, x=0, y=1) grouper = Series(np.repeat([1, 2, 3, 4, 5], 20), df.index) @@ -130,7 +130,7 @@ def test_scatter_matrix_axis(self): @slow def test_andrews_curves(self): - from pandas.tools.plotting import andrews_curves + from pandas.plotting import andrews_curves from matplotlib import cm df = self.iris @@ -195,7 +195,7 @@ def test_andrews_curves(self): @slow def test_parallel_coordinates(self): - from pandas.tools.plotting import parallel_coordinates + from pandas.plotting import parallel_coordinates from matplotlib import cm df = self.iris @@ -263,7 +263,7 @@ def test_parallel_coordinates_with_sorted_labels(self): @slow def test_radviz(self): - from pandas.tools.plotting import radviz + from pandas.plotting import radviz from matplotlib import cm df = self.iris diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 8c00d606059a4..c3bc3ca6bf414 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -16,7 +16,7 @@ import numpy as np from numpy.random import randn -import pandas.tools.plotting as plotting +import pandas.plotting as plotting from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works, _skip_if_no_scipy_gaussian_kde, _ok_for_gaussian_kde) @@ -622,7 +622,9 @@ def test_boxplot_series(self): @slow def test_kind_both_ways(self): s = Series(range(3)) - for kind in plotting._common_kinds + plotting._series_kinds: + kinds = (plotting._core._common_kinds + + plotting._core._series_kinds) + for kind in kinds: if not _ok_for_gaussian_kde(kind): continue s.plot(kind=kind) @@ -631,7 +633,7 @@ def test_kind_both_ways(self): @slow def test_invalid_plot_data(self): s = Series(list('abcd')) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -640,14 +642,14 @@ def test_invalid_plot_data(self): @slow def test_valid_object_plot(self): s = Series(lrange(10), dtype=object) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue _check_plot_works(s.plot, kind=kind) def test_partially_invalid_plot_data(self): s = Series(['a', 'b', 1.0, 2]) - for kind in plotting._common_kinds: + for kind in plotting._core._common_kinds: if not _ok_for_gaussian_kde(kind): continue with tm.assertRaises(TypeError): @@ -718,54 +720,57 @@ def test_table(self): def test_series_grid_settings(self): # Make sure plot defaults to rcParams['axes.grid'] setting, GH 9792 self._check_grid_settings(Series([1, 2, 3]), - plotting._series_kinds + - plotting._common_kinds) + plotting._core._series_kinds + + plotting._core._common_kinds) @slow def test_standard_colors(self): + from pandas.plotting._style import _get_standard_colors + for c in ['r', 'red', 'green', '#FF0000']: - result = plotting._get_standard_colors(1, color=c) + result = _get_standard_colors(1, color=c) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(1, color=[c]) + result = _get_standard_colors(1, color=[c]) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(3, color=c) + result = _get_standard_colors(3, color=c) self.assertEqual(result, [c] * 3) - result = plotting._get_standard_colors(3, color=[c]) + result = _get_standard_colors(3, color=[c]) self.assertEqual(result, [c] * 3) @slow def test_standard_colors_all(self): import matplotlib.colors as colors + from pandas.plotting._style import _get_standard_colors # multiple colors like mediumaquamarine for c in colors.cnames: - result = plotting._get_standard_colors(num_colors=1, color=c) + result = _get_standard_colors(num_colors=1, color=c) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(num_colors=1, color=[c]) + result = _get_standard_colors(num_colors=1, color=[c]) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(num_colors=3, color=c) + result = _get_standard_colors(num_colors=3, color=c) self.assertEqual(result, [c] * 3) - result = plotting._get_standard_colors(num_colors=3, color=[c]) + result = _get_standard_colors(num_colors=3, color=[c]) self.assertEqual(result, [c] * 3) # single letter colors like k for c in colors.ColorConverter.colors: - result = plotting._get_standard_colors(num_colors=1, color=c) + result = _get_standard_colors(num_colors=1, color=c) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(num_colors=1, color=[c]) + result = _get_standard_colors(num_colors=1, color=[c]) self.assertEqual(result, [c]) - result = plotting._get_standard_colors(num_colors=3, color=c) + result = _get_standard_colors(num_colors=3, color=c) self.assertEqual(result, [c] * 3) - result = plotting._get_standard_colors(num_colors=3, color=[c]) + result = _get_standard_colors(num_colors=3, color=[c]) self.assertEqual(result, [c] * 3) def test_series_plot_color_kwargs(self): diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 141e3c74b91c4..a68da67a219e2 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1,4032 +1,20 @@ -# being a bit too dynamic -# pylint: disable=E1101 -from __future__ import division - +import sys import warnings -import re -from math import ceil -from collections import namedtuple -from contextlib import contextmanager -from distutils.version import LooseVersion - -import numpy as np - -from pandas.types.common import (is_list_like, - is_integer, - is_number, - is_hashable, - is_iterator) -from pandas.types.missing import isnull, notnull - -from pandas.util.decorators import cache_readonly, deprecate_kwarg -from pandas.core.base import PandasObject - -from pandas.core.common import AbstractMethodError, _try_sort -from pandas.core.generic import _shared_docs, _shared_doc_kwargs -from pandas.core.index import Index, MultiIndex -from pandas.core.series import Series, remove_na -from pandas.tseries.period import PeriodIndex -from pandas.compat import range, lrange, lmap, map, zip, string_types -import pandas.compat as compat -from pandas.formats.printing import pprint_thing -from pandas.util.decorators import Appender -try: # mpl optional - import pandas.tseries.converter as conv - conv.register() # needs to override so set_xlim works with str/number -except ImportError: - pass - - -# Extracted from https://gist.github.com/huyng/816622 -# this is the rcParams set when setting display.with_mpl_style -# to True. -mpl_stylesheet = { - 'axes.axisbelow': True, - 'axes.color_cycle': ['#348ABD', - '#7A68A6', - '#A60628', - '#467821', - '#CF4457', - '#188487', - '#E24A33'], - 'axes.edgecolor': '#bcbcbc', - 'axes.facecolor': '#eeeeee', - 'axes.grid': True, - 'axes.labelcolor': '#555555', - 'axes.labelsize': 'large', - 'axes.linewidth': 1.0, - 'axes.titlesize': 'x-large', - 'figure.edgecolor': 'white', - 'figure.facecolor': 'white', - 'figure.figsize': (6.0, 4.0), - 'figure.subplot.hspace': 0.5, - 'font.family': 'monospace', - 'font.monospace': ['Andale Mono', - 'Nimbus Mono L', - 'Courier New', - 'Courier', - 'Fixed', - 'Terminal', - 'monospace'], - 'font.size': 10, - 'interactive': True, - 'keymap.all_axes': ['a'], - 'keymap.back': ['left', 'c', 'backspace'], - 'keymap.forward': ['right', 'v'], - 'keymap.fullscreen': ['f'], - 'keymap.grid': ['g'], - 'keymap.home': ['h', 'r', 'home'], - 'keymap.pan': ['p'], - 'keymap.save': ['s'], - 'keymap.xscale': ['L', 'k'], - 'keymap.yscale': ['l'], - 'keymap.zoom': ['o'], - 'legend.fancybox': True, - 'lines.antialiased': True, - 'lines.linewidth': 1.0, - 'patch.antialiased': True, - 'patch.edgecolor': '#EEEEEE', - 'patch.facecolor': '#348ABD', - 'patch.linewidth': 0.5, - 'toolbar': 'toolbar2', - 'xtick.color': '#555555', - 'xtick.direction': 'in', - 'xtick.major.pad': 6.0, - 'xtick.major.size': 0.0, - 'xtick.minor.pad': 6.0, - 'xtick.minor.size': 0.0, - 'ytick.color': '#555555', - 'ytick.direction': 'in', - 'ytick.major.pad': 6.0, - 'ytick.major.size': 0.0, - 'ytick.minor.pad': 6.0, - 'ytick.minor.size': 0.0 -} - - -def _mpl_le_1_2_1(): - try: - import matplotlib as mpl - return (str(mpl.__version__) <= LooseVersion('1.2.1') and - str(mpl.__version__)[0] != '0') - except ImportError: - return False - - -def _mpl_ge_1_3_1(): - try: - import matplotlib - # The or v[0] == '0' is because their versioneer is - # messed up on dev - return (matplotlib.__version__ >= LooseVersion('1.3.1') or - matplotlib.__version__[0] == '0') - except ImportError: - return False - - -def _mpl_ge_1_4_0(): - try: - import matplotlib - return (matplotlib.__version__ >= LooseVersion('1.4') or - matplotlib.__version__[0] == '0') - except ImportError: - return False - - -def _mpl_ge_1_5_0(): - try: - import matplotlib - return (matplotlib.__version__ >= LooseVersion('1.5') or - matplotlib.__version__[0] == '0') - except ImportError: - return False - - -def _mpl_ge_2_0_0(): - try: - import matplotlib - return matplotlib.__version__ >= LooseVersion('2.0') - except ImportError: - return False - - -def _mpl_ge_2_0_1(): - try: - import matplotlib - return matplotlib.__version__ >= LooseVersion('2.0.1') - except ImportError: - return False - - -if _mpl_ge_1_5_0(): - # Compat with mp 1.5, which uses cycler. - import cycler - colors = mpl_stylesheet.pop('axes.color_cycle') - mpl_stylesheet['axes.prop_cycle'] = cycler.cycler('color', colors) - - -def _get_standard_kind(kind): - return {'density': 'kde'}.get(kind, kind) - - -def _get_standard_colors(num_colors=None, colormap=None, color_type='default', - color=None): - import matplotlib.pyplot as plt - - if color is None and colormap is not None: - if isinstance(colormap, compat.string_types): - import matplotlib.cm as cm - cmap = colormap - colormap = cm.get_cmap(colormap) - if colormap is None: - raise ValueError("Colormap {0} is not recognized".format(cmap)) - colors = lmap(colormap, np.linspace(0, 1, num=num_colors)) - elif color is not None: - if colormap is not None: - warnings.warn("'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'") - colors = list(color) if is_list_like(color) else color - else: - if color_type == 'default': - # need to call list() on the result to copy so we don't - # modify the global rcParams below - try: - colors = [c['color'] - for c in list(plt.rcParams['axes.prop_cycle'])] - except KeyError: - colors = list(plt.rcParams.get('axes.color_cycle', - list('bgrcmyk'))) - if isinstance(colors, compat.string_types): - colors = list(colors) - elif color_type == 'random': - import random - - def random_color(column): - random.seed(column) - return [random.random() for _ in range(3)] - - colors = lmap(random_color, lrange(num_colors)) - else: - raise ValueError("color_type must be either 'default' or 'random'") - - if isinstance(colors, compat.string_types): - import matplotlib.colors - conv = matplotlib.colors.ColorConverter() - - def _maybe_valid_colors(colors): - try: - [conv.to_rgba(c) for c in colors] - return True - except ValueError: - return False - - # check whether the string can be convertable to single color - maybe_single_color = _maybe_valid_colors([colors]) - # check whether each character can be convertable to colors - maybe_color_cycle = _maybe_valid_colors(list(colors)) - if maybe_single_color and maybe_color_cycle and len(colors) > 1: - # Special case for single str 'CN' match and convert to hex - # for supporting matplotlib < 2.0.0 - if re.match(r'\AC[0-9]\Z', colors) and _mpl_ge_2_0_0(): - hex_color = [c['color'] - for c in list(plt.rcParams['axes.prop_cycle'])] - colors = [hex_color[int(colors[1])]] - else: - # this may no longer be required - msg = ("'{0}' can be parsed as both single color and " - "color cycle. Specify each color using a list " - "like ['{0}'] or {1}") - raise ValueError(msg.format(colors, list(colors))) - elif maybe_single_color: - colors = [colors] - else: - # ``colors`` is regarded as color cycle. - # mpl will raise error any of them is invalid - pass - - if len(colors) != num_colors: - try: - multiple = num_colors // len(colors) - 1 - except ZeroDivisionError: - raise ValueError("Invalid color argument: ''") - mod = num_colors % len(colors) - - colors += multiple * colors - colors += colors[:mod] - - return colors - - -class _Options(dict): - """ - Stores pandas plotting options. - Allows for parameter aliasing so you can just use parameter names that are - the same as the plot function parameters, but is stored in a canonical - format that makes it easy to breakdown into groups later - """ - - # alias so the names are same as plotting method parameter names - _ALIASES = {'x_compat': 'xaxis.compat'} - _DEFAULT_KEYS = ['xaxis.compat'] - - def __init__(self): - self['xaxis.compat'] = False - - def __getitem__(self, key): - key = self._get_canonical_key(key) - if key not in self: - raise ValueError('%s is not a valid pandas plotting option' % key) - return super(_Options, self).__getitem__(key) - - def __setitem__(self, key, value): - key = self._get_canonical_key(key) - return super(_Options, self).__setitem__(key, value) - - def __delitem__(self, key): - key = self._get_canonical_key(key) - if key in self._DEFAULT_KEYS: - raise ValueError('Cannot remove default parameter %s' % key) - return super(_Options, self).__delitem__(key) - - def __contains__(self, key): - key = self._get_canonical_key(key) - return super(_Options, self).__contains__(key) - - def reset(self): - """ - Reset the option store to its initial state - - Returns - ------- - None - """ - self.__init__() - - def _get_canonical_key(self, key): - return self._ALIASES.get(key, key) - - @contextmanager - def use(self, key, value): - """ - Temporarily set a parameter value using the with statement. - Aliasing allowed. - """ - old_value = self[key] - try: - self[key] = value - yield self - finally: - self[key] = old_value - - -plot_params = _Options() - - -def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, - diagonal='hist', marker='.', density_kwds=None, - hist_kwds=None, range_padding=0.05, **kwds): - """ - Draw a matrix of scatter plots. - - Parameters - ---------- - frame : DataFrame - alpha : float, optional - amount of transparency applied - figsize : (float,float), optional - a tuple (width, height) in inches - ax : Matplotlib axis object, optional - grid : bool, optional - setting this to True will show the grid - diagonal : {'hist', 'kde'} - pick between 'kde' and 'hist' for - either Kernel Density Estimation or Histogram - plot in the diagonal - marker : str, optional - Matplotlib marker type, default '.' - hist_kwds : other plotting keyword arguments - To be passed to hist function - density_kwds : other plotting keyword arguments - To be passed to kernel density estimate plot - range_padding : float, optional - relative extension of axis range in x and y - with respect to (x_max - x_min) or (y_max - y_min), - default 0.05 - kwds : other plotting keyword arguments - To be passed to scatter function - - Examples - -------- - >>> df = DataFrame(np.random.randn(1000, 4), columns=['A','B','C','D']) - >>> scatter_matrix(df, alpha=0.2) - """ - - df = frame._get_numeric_data() - n = df.columns.size - naxes = n * n - fig, axes = _subplots(naxes=naxes, figsize=figsize, ax=ax, - squeeze=False) - - # no gaps between subplots - fig.subplots_adjust(wspace=0, hspace=0) - - mask = notnull(df) - - marker = _get_marker_compat(marker) - - hist_kwds = hist_kwds or {} - density_kwds = density_kwds or {} - - # GH 14855 - kwds.setdefault('edgecolors', 'none') - - boundaries_list = [] - for a in df.columns: - values = df[a].values[mask[a].values] - rmin_, rmax_ = np.min(values), np.max(values) - rdelta_ext = (rmax_ - rmin_) * range_padding / 2. - boundaries_list.append((rmin_ - rdelta_ext, rmax_ + rdelta_ext)) - - for i, a in zip(lrange(n), df.columns): - for j, b in zip(lrange(n), df.columns): - ax = axes[i, j] - - if i == j: - values = df[a].values[mask[a].values] - - # Deal with the diagonal by drawing a histogram there. - if diagonal == 'hist': - ax.hist(values, **hist_kwds) - - elif diagonal in ('kde', 'density'): - from scipy.stats import gaussian_kde - y = values - gkde = gaussian_kde(y) - ind = np.linspace(y.min(), y.max(), 1000) - ax.plot(ind, gkde.evaluate(ind), **density_kwds) - - ax.set_xlim(boundaries_list[i]) - - else: - common = (mask[a] & mask[b]).values - - ax.scatter(df[b][common], df[a][common], - marker=marker, alpha=alpha, **kwds) - - ax.set_xlim(boundaries_list[j]) - ax.set_ylim(boundaries_list[i]) - - ax.set_xlabel(b) - ax.set_ylabel(a) - - if j != 0: - ax.yaxis.set_visible(False) - if i != n - 1: - ax.xaxis.set_visible(False) - - if len(df.columns) > 1: - lim1 = boundaries_list[0] - locs = axes[0][1].yaxis.get_majorticklocs() - locs = locs[(lim1[0] <= locs) & (locs <= lim1[1])] - adj = (locs - lim1[0]) / (lim1[1] - lim1[0]) - - lim0 = axes[0][0].get_ylim() - adj = adj * (lim0[1] - lim0[0]) + lim0[0] - axes[0][0].yaxis.set_ticks(adj) - - if np.all(locs == locs.astype(int)): - # if all ticks are int - locs = locs.astype(int) - axes[0][0].yaxis.set_ticklabels(locs) - - _set_ticks_props(axes, xlabelsize=8, xrot=90, ylabelsize=8, yrot=0) - - return axes - - -def _gca(): - import matplotlib.pyplot as plt - return plt.gca() - - -def _gcf(): - import matplotlib.pyplot as plt - return plt.gcf() - - -def _get_marker_compat(marker): - import matplotlib.lines as mlines - import matplotlib as mpl - if mpl.__version__ < '1.1.0' and marker == '.': - return 'o' - if marker not in mlines.lineMarkers: - return 'o' - return marker - - -def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): - """RadViz - a multivariate data visualization algorithm - - Parameters: - ----------- - frame: DataFrame - class_column: str - Column name containing class names - ax: Matplotlib axis object, optional - color: list or tuple, optional - Colors to use for the different classes - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - kwds: keywords - Options to pass to matplotlib scatter plotting method - - Returns: - -------- - ax: Matplotlib axis object - """ - import matplotlib.pyplot as plt - import matplotlib.patches as patches - - def normalize(series): - a = min(series) - b = max(series) - return (series - a) / (b - a) - - n = len(frame) - classes = frame[class_column].drop_duplicates() - class_col = frame[class_column] - df = frame.drop(class_column, axis=1).apply(normalize) - - if ax is None: - ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1]) - - to_plot = {} - colors = _get_standard_colors(num_colors=len(classes), colormap=colormap, - color_type='random', color=color) - - for kls in classes: - to_plot[kls] = [[], []] - - m = len(frame.columns) - 1 - s = np.array([(np.cos(t), np.sin(t)) - for t in [2.0 * np.pi * (i / float(m)) - for i in range(m)]]) - - for i in range(n): - row = df.iloc[i].values - row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1) - y = (s * row_).sum(axis=0) / row.sum() - kls = class_col.iat[i] - to_plot[kls][0].append(y[0]) - to_plot[kls][1].append(y[1]) - - for i, kls in enumerate(classes): - ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i], - label=pprint_thing(kls), **kwds) - ax.legend() - - ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none')) - - for xy, name in zip(s, df.columns): - - ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray')) - - if xy[0] < 0.0 and xy[1] < 0.0: - ax.text(xy[0] - 0.025, xy[1] - 0.025, name, - ha='right', va='top', size='small') - elif xy[0] < 0.0 and xy[1] >= 0.0: - ax.text(xy[0] - 0.025, xy[1] + 0.025, name, - ha='right', va='bottom', size='small') - elif xy[0] >= 0.0 and xy[1] < 0.0: - ax.text(xy[0] + 0.025, xy[1] - 0.025, name, - ha='left', va='top', size='small') - elif xy[0] >= 0.0 and xy[1] >= 0.0: - ax.text(xy[0] + 0.025, xy[1] + 0.025, name, - ha='left', va='bottom', size='small') - - ax.axis('equal') - return ax - - -@deprecate_kwarg(old_arg_name='data', new_arg_name='frame') -def andrews_curves(frame, class_column, ax=None, samples=200, color=None, - colormap=None, **kwds): - """ - Generates a matplotlib plot of Andrews curves, for visualising clusters of - multivariate data. - - Andrews curves have the functional form: - - f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) + - x_4 sin(2t) + x_5 cos(2t) + ... - - Where x coefficients correspond to the values of each dimension and t is - linearly spaced between -pi and +pi. Each row of frame then corresponds to - a single curve. - - Parameters: - ----------- - frame : DataFrame - Data to be plotted, preferably normalized to (0.0, 1.0) - class_column : Name of the column containing class names - ax : matplotlib axes object, default None - samples : Number of points to plot in each curve - color: list or tuple, optional - Colors to use for the different classes - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - kwds: keywords - Options to pass to matplotlib plotting method - - Returns: - -------- - ax: Matplotlib axis object - - """ - from math import sqrt, pi - import matplotlib.pyplot as plt - - def function(amplitudes): - def f(t): - x1 = amplitudes[0] - result = x1 / sqrt(2.0) - - # Take the rest of the coefficients and resize them - # appropriately. Take a copy of amplitudes as otherwise numpy - # deletes the element from amplitudes itself. - coeffs = np.delete(np.copy(amplitudes), 0) - coeffs.resize(int((coeffs.size + 1) / 2), 2) - - # Generate the harmonics and arguments for the sin and cos - # functions. - harmonics = np.arange(0, coeffs.shape[0]) + 1 - trig_args = np.outer(harmonics, t) - - result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) + - coeffs[:, 1, np.newaxis] * np.cos(trig_args), - axis=0) - return result - return f - - n = len(frame) - class_col = frame[class_column] - classes = frame[class_column].drop_duplicates() - df = frame.drop(class_column, axis=1) - t = np.linspace(-pi, pi, samples) - used_legends = set([]) - - color_values = _get_standard_colors(num_colors=len(classes), - colormap=colormap, color_type='random', - color=color) - colors = dict(zip(classes, color_values)) - if ax is None: - ax = plt.gca(xlim=(-pi, pi)) - for i in range(n): - row = df.iloc[i].values - f = function(row) - y = f(t) - kls = class_col.iat[i] - label = pprint_thing(kls) - if label not in used_legends: - used_legends.add(label) - ax.plot(t, y, color=colors[kls], label=label, **kwds) - else: - ax.plot(t, y, color=colors[kls], **kwds) - - ax.legend(loc='upper right') - ax.grid() - return ax - - -def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): - """Bootstrap plot. - - Parameters: - ----------- - series: Time series - fig: matplotlib figure object, optional - size: number of data points to consider during each sampling - samples: number of times the bootstrap procedure is performed - kwds: optional keyword arguments for plotting commands, must be accepted - by both hist and plot - - Returns: - -------- - fig: matplotlib figure - """ - import random - import matplotlib.pyplot as plt - - # random.sample(ndarray, int) fails on python 3.3, sigh - data = list(series.values) - samplings = [random.sample(data, size) for _ in range(samples)] - - means = np.array([np.mean(sampling) for sampling in samplings]) - medians = np.array([np.median(sampling) for sampling in samplings]) - midranges = np.array([(min(sampling) + max(sampling)) * 0.5 - for sampling in samplings]) - if fig is None: - fig = plt.figure() - x = lrange(samples) - axes = [] - ax1 = fig.add_subplot(2, 3, 1) - ax1.set_xlabel("Sample") - axes.append(ax1) - ax1.plot(x, means, **kwds) - ax2 = fig.add_subplot(2, 3, 2) - ax2.set_xlabel("Sample") - axes.append(ax2) - ax2.plot(x, medians, **kwds) - ax3 = fig.add_subplot(2, 3, 3) - ax3.set_xlabel("Sample") - axes.append(ax3) - ax3.plot(x, midranges, **kwds) - ax4 = fig.add_subplot(2, 3, 4) - ax4.set_xlabel("Mean") - axes.append(ax4) - ax4.hist(means, **kwds) - ax5 = fig.add_subplot(2, 3, 5) - ax5.set_xlabel("Median") - axes.append(ax5) - ax5.hist(medians, **kwds) - ax6 = fig.add_subplot(2, 3, 6) - ax6.set_xlabel("Midrange") - axes.append(ax6) - ax6.hist(midranges, **kwds) - for axis in axes: - plt.setp(axis.get_xticklabels(), fontsize=8) - plt.setp(axis.get_yticklabels(), fontsize=8) - return fig - - -@deprecate_kwarg(old_arg_name='colors', new_arg_name='color') -@deprecate_kwarg(old_arg_name='data', new_arg_name='frame', stacklevel=3) -def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None, - use_columns=False, xticks=None, colormap=None, - axvlines=True, axvlines_kwds=None, sort_labels=False, - **kwds): - """Parallel coordinates plotting. - - Parameters - ---------- - frame: DataFrame - class_column: str - Column name containing class names - cols: list, optional - A list of column names to use - ax: matplotlib.axis, optional - matplotlib axis object - color: list or tuple, optional - Colors to use for the different classes - use_columns: bool, optional - If true, columns will be used as xticks - xticks: list or tuple, optional - A list of values to use for xticks - colormap: str or matplotlib colormap, default None - Colormap to use for line colors. - axvlines: bool, optional - If true, vertical lines will be added at each xtick - axvlines_kwds: keywords, optional - Options to be passed to axvline method for vertical lines - sort_labels: bool, False - Sort class_column labels, useful when assigning colours - - .. versionadded:: 0.20.0 - - kwds: keywords - Options to pass to matplotlib plotting method - - Returns - ------- - ax: matplotlib axis object - - Examples - -------- - >>> from pandas import read_csv - >>> from pandas.tools.plotting import parallel_coordinates - >>> from matplotlib import pyplot as plt - >>> df = read_csv('https://raw.github.com/pandas-dev/pandas/master' - '/pandas/tests/data/iris.csv') - >>> parallel_coordinates(df, 'Name', color=('#556270', - '#4ECDC4', '#C7F464')) - >>> plt.show() - """ - if axvlines_kwds is None: - axvlines_kwds = {'linewidth': 1, 'color': 'black'} - import matplotlib.pyplot as plt - - n = len(frame) - classes = frame[class_column].drop_duplicates() - class_col = frame[class_column] - - if cols is None: - df = frame.drop(class_column, axis=1) - else: - df = frame[cols] - - used_legends = set([]) - - ncols = len(df.columns) - - # determine values to use for xticks - if use_columns is True: - if not np.all(np.isreal(list(df.columns))): - raise ValueError('Columns must be numeric to be used as xticks') - x = df.columns - elif xticks is not None: - if not np.all(np.isreal(xticks)): - raise ValueError('xticks specified must be numeric') - elif len(xticks) != ncols: - raise ValueError('Length of xticks must match number of columns') - x = xticks - else: - x = lrange(ncols) - - if ax is None: - ax = plt.gca() - - color_values = _get_standard_colors(num_colors=len(classes), - colormap=colormap, color_type='random', - color=color) - - if sort_labels: - classes = sorted(classes) - color_values = sorted(color_values) - colors = dict(zip(classes, color_values)) - - for i in range(n): - y = df.iloc[i].values - kls = class_col.iat[i] - label = pprint_thing(kls) - if label not in used_legends: - used_legends.add(label) - ax.plot(x, y, color=colors[kls], label=label, **kwds) - else: - ax.plot(x, y, color=colors[kls], **kwds) - - if axvlines: - for i in x: - ax.axvline(i, **axvlines_kwds) - - ax.set_xticks(x) - ax.set_xticklabels(df.columns) - ax.set_xlim(x[0], x[-1]) - ax.legend(loc='upper right') - ax.grid() - return ax - - -def lag_plot(series, lag=1, ax=None, **kwds): - """Lag plot for time series. - - Parameters: - ----------- - series: Time series - lag: lag of the scatter plot, default 1 - ax: Matplotlib axis object, optional - kwds: Matplotlib scatter method keyword arguments, optional - - Returns: - -------- - ax: Matplotlib axis object - """ - import matplotlib.pyplot as plt - - # workaround because `c='b'` is hardcoded in matplotlibs scatter method - kwds.setdefault('c', plt.rcParams['patch.facecolor']) - - data = series.values - y1 = data[:-lag] - y2 = data[lag:] - if ax is None: - ax = plt.gca() - ax.set_xlabel("y(t)") - ax.set_ylabel("y(t + %s)" % lag) - ax.scatter(y1, y2, **kwds) - return ax - - -def autocorrelation_plot(series, ax=None, **kwds): - """Autocorrelation plot for time series. - - Parameters: - ----------- - series: Time series - ax: Matplotlib axis object, optional - kwds : keywords - Options to pass to matplotlib plotting method - - Returns: - ----------- - ax: Matplotlib axis object - """ - import matplotlib.pyplot as plt - n = len(series) - data = np.asarray(series) - if ax is None: - ax = plt.gca(xlim=(1, n), ylim=(-1.0, 1.0)) - mean = np.mean(data) - c0 = np.sum((data - mean) ** 2) / float(n) - - def r(h): - return ((data[:n - h] - mean) * - (data[h:] - mean)).sum() / float(n) / c0 - x = np.arange(n) + 1 - y = lmap(r, x) - z95 = 1.959963984540054 - z99 = 2.5758293035489004 - ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') - ax.axhline(y=z95 / np.sqrt(n), color='grey') - ax.axhline(y=0.0, color='black') - ax.axhline(y=-z95 / np.sqrt(n), color='grey') - ax.axhline(y=-z99 / np.sqrt(n), linestyle='--', color='grey') - ax.set_xlabel("Lag") - ax.set_ylabel("Autocorrelation") - ax.plot(x, y, **kwds) - if 'label' in kwds: - ax.legend() - ax.grid() - return ax - - -class MPLPlot(object): - """ - Base class for assembling a pandas plot using matplotlib - - Parameters - ---------- - data : - - """ - - @property - def _kind(self): - """Specify kind str. Must be overridden in child class""" - raise NotImplementedError - - _layout_type = 'vertical' - _default_rot = 0 - orientation = None - _pop_attributes = ['label', 'style', 'logy', 'logx', 'loglog', - 'mark_right', 'stacked'] - _attr_defaults = {'logy': False, 'logx': False, 'loglog': False, - 'mark_right': True, 'stacked': False} - - def __init__(self, data, kind=None, by=None, subplots=False, sharex=None, - sharey=False, use_index=True, - figsize=None, grid=None, legend=True, rot=None, - ax=None, fig=None, title=None, xlim=None, ylim=None, - xticks=None, yticks=None, - sort_columns=False, fontsize=None, - secondary_y=False, colormap=None, - table=False, layout=None, **kwds): - - self.data = data - self.by = by - - self.kind = kind - - self.sort_columns = sort_columns - - self.subplots = subplots - - if sharex is None: - if ax is None: - self.sharex = True - else: - # if we get an axis, the users should do the visibility - # setting... - self.sharex = False - else: - self.sharex = sharex - - self.sharey = sharey - self.figsize = figsize - self.layout = layout - - self.xticks = xticks - self.yticks = yticks - self.xlim = xlim - self.ylim = ylim - self.title = title - self.use_index = use_index - - self.fontsize = fontsize - - if rot is not None: - self.rot = rot - # need to know for format_date_labels since it's rotated to 30 by - # default - self._rot_set = True - else: - self._rot_set = False - self.rot = self._default_rot - - if grid is None: - grid = False if secondary_y else self.plt.rcParams['axes.grid'] - - self.grid = grid - self.legend = legend - self.legend_handles = [] - self.legend_labels = [] - - for attr in self._pop_attributes: - value = kwds.pop(attr, self._attr_defaults.get(attr, None)) - setattr(self, attr, value) - - self.ax = ax - self.fig = fig - self.axes = None - - # parse errorbar input if given - xerr = kwds.pop('xerr', None) - yerr = kwds.pop('yerr', None) - self.errors = {} - for kw, err in zip(['xerr', 'yerr'], [xerr, yerr]): - self.errors[kw] = self._parse_errorbars(kw, err) - - if not isinstance(secondary_y, (bool, tuple, list, np.ndarray, Index)): - secondary_y = [secondary_y] - self.secondary_y = secondary_y - - # ugly TypeError if user passes matplotlib's `cmap` name. - # Probably better to accept either. - if 'cmap' in kwds and colormap: - raise TypeError("Only specify one of `cmap` and `colormap`.") - elif 'cmap' in kwds: - self.colormap = kwds.pop('cmap') - else: - self.colormap = colormap - - self.table = table - - self.kwds = kwds - - self._validate_color_args() - - def _validate_color_args(self): - if 'color' not in self.kwds and 'colors' in self.kwds: - warnings.warn(("'colors' is being deprecated. Please use 'color'" - "instead of 'colors'")) - colors = self.kwds.pop('colors') - self.kwds['color'] = colors - - if ('color' in self.kwds and self.nseries == 1): - # support series.plot(color='green') - self.kwds['color'] = [self.kwds['color']] - - if ('color' in self.kwds or 'colors' in self.kwds) and \ - self.colormap is not None: - warnings.warn("'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'") - - if 'color' in self.kwds and self.style is not None: - if is_list_like(self.style): - styles = self.style - else: - styles = [self.style] - # need only a single match - for s in styles: - if re.match('^[a-z]+?', s) is not None: - raise ValueError( - "Cannot pass 'style' string with a color " - "symbol and 'color' keyword argument. Please" - " use one or the other or pass 'style' " - "without a color symbol") - - def _iter_data(self, data=None, keep_index=False, fillna=None): - if data is None: - data = self.data - if fillna is not None: - data = data.fillna(fillna) - - # TODO: unused? - # if self.sort_columns: - # columns = _try_sort(data.columns) - # else: - # columns = data.columns - - for col, values in data.iteritems(): - if keep_index is True: - yield col, values - else: - yield col, values.values - - @property - def nseries(self): - if self.data.ndim == 1: - return 1 - else: - return self.data.shape[1] - - def draw(self): - self.plt.draw_if_interactive() - - def generate(self): - self._args_adjust() - self._compute_plot_data() - self._setup_subplots() - self._make_plot() - self._add_table() - self._make_legend() - self._adorn_subplots() - - for ax in self.axes: - self._post_plot_logic_common(ax, self.data) - self._post_plot_logic(ax, self.data) - - def _args_adjust(self): - pass - - def _has_plotted_object(self, ax): - """check whether ax has data""" - return (len(ax.lines) != 0 or - len(ax.artists) != 0 or - len(ax.containers) != 0) - - def _maybe_right_yaxis(self, ax, axes_num): - if not self.on_right(axes_num): - # secondary axes may be passed via ax kw - return self._get_ax_layer(ax) - - if hasattr(ax, 'right_ax'): - # if it has right_ax proparty, ``ax`` must be left axes - return ax.right_ax - elif hasattr(ax, 'left_ax'): - # if it has left_ax proparty, ``ax`` must be right axes - return ax - else: - # otherwise, create twin axes - orig_ax, new_ax = ax, ax.twinx() - # TODO: use Matplotlib public API when available - new_ax._get_lines = orig_ax._get_lines - new_ax._get_patches_for_fill = orig_ax._get_patches_for_fill - orig_ax.right_ax, new_ax.left_ax = new_ax, orig_ax - - if not self._has_plotted_object(orig_ax): # no data on left y - orig_ax.get_yaxis().set_visible(False) - return new_ax - - def _setup_subplots(self): - if self.subplots: - fig, axes = _subplots(naxes=self.nseries, - sharex=self.sharex, sharey=self.sharey, - figsize=self.figsize, ax=self.ax, - layout=self.layout, - layout_type=self._layout_type) - else: - if self.ax is None: - fig = self.plt.figure(figsize=self.figsize) - axes = fig.add_subplot(111) - else: - fig = self.ax.get_figure() - if self.figsize is not None: - fig.set_size_inches(self.figsize) - axes = self.ax - - axes = _flatten(axes) - - if self.logx or self.loglog: - [a.set_xscale('log') for a in axes] - if self.logy or self.loglog: - [a.set_yscale('log') for a in axes] - - self.fig = fig - self.axes = axes - - @property - def result(self): - """ - Return result axes - """ - if self.subplots: - if self.layout is not None and not is_list_like(self.ax): - return self.axes.reshape(*self.layout) - else: - return self.axes - else: - sec_true = isinstance(self.secondary_y, bool) and self.secondary_y - all_sec = (is_list_like(self.secondary_y) and - len(self.secondary_y) == self.nseries) - if (sec_true or all_sec): - # if all data is plotted on secondary, return right axes - return self._get_ax_layer(self.axes[0], primary=False) - else: - return self.axes[0] - - def _compute_plot_data(self): - data = self.data - - if isinstance(data, Series): - label = self.label - if label is None and data.name is None: - label = 'None' - data = data.to_frame(name=label) - - numeric_data = data._convert(datetime=True)._get_numeric_data() - - try: - is_empty = numeric_data.empty - except AttributeError: - is_empty = not len(numeric_data) - - # no empty frames or series allowed - if is_empty: - raise TypeError('Empty {0!r}: no numeric data to ' - 'plot'.format(numeric_data.__class__.__name__)) - - self.data = numeric_data - - def _make_plot(self): - raise AbstractMethodError(self) - - def _add_table(self): - if self.table is False: - return - elif self.table is True: - data = self.data.transpose() - else: - data = self.table - ax = self._get_ax(0) - table(ax, data) - - def _post_plot_logic_common(self, ax, data): - """Common post process for each axes""" - labels = [pprint_thing(key) for key in data.index] - labels = dict(zip(range(len(data.index)), labels)) - - if self.orientation == 'vertical' or self.orientation is None: - if self._need_to_set_index: - xticklabels = [labels.get(x, '') for x in ax.get_xticks()] - ax.set_xticklabels(xticklabels) - self._apply_axis_properties(ax.xaxis, rot=self.rot, - fontsize=self.fontsize) - self._apply_axis_properties(ax.yaxis, fontsize=self.fontsize) - elif self.orientation == 'horizontal': - if self._need_to_set_index: - yticklabels = [labels.get(y, '') for y in ax.get_yticks()] - ax.set_yticklabels(yticklabels) - self._apply_axis_properties(ax.yaxis, rot=self.rot, - fontsize=self.fontsize) - self._apply_axis_properties(ax.xaxis, fontsize=self.fontsize) - else: # pragma no cover - raise ValueError - - def _post_plot_logic(self, ax, data): - """Post process for each axes. Overridden in child classes""" - pass - - def _adorn_subplots(self): - """Common post process unrelated to data""" - if len(self.axes) > 0: - all_axes = self._get_subplots() - nrows, ncols = self._get_axes_layout() - _handle_shared_axes(axarr=all_axes, nplots=len(all_axes), - naxes=nrows * ncols, nrows=nrows, - ncols=ncols, sharex=self.sharex, - sharey=self.sharey) - - for ax in self.axes: - if self.yticks is not None: - ax.set_yticks(self.yticks) - - if self.xticks is not None: - ax.set_xticks(self.xticks) - - if self.ylim is not None: - ax.set_ylim(self.ylim) - - if self.xlim is not None: - ax.set_xlim(self.xlim) - - ax.grid(self.grid) - - if self.title: - if self.subplots: - if is_list_like(self.title): - if len(self.title) != self.nseries: - msg = ('The length of `title` must equal the number ' - 'of columns if using `title` of type `list` ' - 'and `subplots=True`.\n' - 'length of title = {}\n' - 'number of columns = {}').format( - len(self.title), self.nseries) - raise ValueError(msg) - - for (ax, title) in zip(self.axes, self.title): - ax.set_title(title) - else: - self.fig.suptitle(self.title) - else: - if is_list_like(self.title): - msg = ('Using `title` of type `list` is not supported ' - 'unless `subplots=True` is passed') - raise ValueError(msg) - self.axes[0].set_title(self.title) - - def _apply_axis_properties(self, axis, rot=None, fontsize=None): - labels = axis.get_majorticklabels() + axis.get_minorticklabels() - for label in labels: - if rot is not None: - label.set_rotation(rot) - if fontsize is not None: - label.set_fontsize(fontsize) - - @property - def legend_title(self): - if not isinstance(self.data.columns, MultiIndex): - name = self.data.columns.name - if name is not None: - name = pprint_thing(name) - return name - else: - stringified = map(pprint_thing, - self.data.columns.names) - return ','.join(stringified) - - def _add_legend_handle(self, handle, label, index=None): - if label is not None: - if self.mark_right and index is not None: - if self.on_right(index): - label = label + ' (right)' - self.legend_handles.append(handle) - self.legend_labels.append(label) - - def _make_legend(self): - ax, leg = self._get_ax_legend(self.axes[0]) - - handles = [] - labels = [] - title = '' - - if not self.subplots: - if leg is not None: - title = leg.get_title().get_text() - handles = leg.legendHandles - labels = [x.get_text() for x in leg.get_texts()] - - if self.legend: - if self.legend == 'reverse': - self.legend_handles = reversed(self.legend_handles) - self.legend_labels = reversed(self.legend_labels) - - handles += self.legend_handles - labels += self.legend_labels - if self.legend_title is not None: - title = self.legend_title - - if len(handles) > 0: - ax.legend(handles, labels, loc='best', title=title) - - elif self.subplots and self.legend: - for ax in self.axes: - if ax.get_visible(): - ax.legend(loc='best') - - def _get_ax_legend(self, ax): - leg = ax.get_legend() - other_ax = (getattr(ax, 'left_ax', None) or - getattr(ax, 'right_ax', None)) - other_leg = None - if other_ax is not None: - other_leg = other_ax.get_legend() - if leg is None and other_leg is not None: - leg = other_leg - ax = other_ax - return ax, leg - - @cache_readonly - def plt(self): - import matplotlib.pyplot as plt - return plt - - @staticmethod - def mpl_ge_1_3_1(): - return _mpl_ge_1_3_1() - - @staticmethod - def mpl_ge_1_5_0(): - return _mpl_ge_1_5_0() - - _need_to_set_index = False - - def _get_xticks(self, convert_period=False): - index = self.data.index - is_datetype = index.inferred_type in ('datetime', 'date', - 'datetime64', 'time') - - if self.use_index: - if convert_period and isinstance(index, PeriodIndex): - self.data = self.data.reindex(index=index.sort_values()) - x = self.data.index.to_timestamp()._mpl_repr() - elif index.is_numeric(): - """ - Matplotlib supports numeric values or datetime objects as - xaxis values. Taking LBYL approach here, by the time - matplotlib raises exception when using non numeric/datetime - values for xaxis, several actions are already taken by plt. - """ - x = index._mpl_repr() - elif is_datetype: - self.data = self.data.sort_index() - x = self.data.index._mpl_repr() - else: - self._need_to_set_index = True - x = lrange(len(index)) - else: - x = lrange(len(index)) - - return x - - @classmethod - def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): - mask = isnull(y) - if mask.any(): - y = np.ma.array(y) - y = np.ma.masked_where(mask, y) - - if isinstance(x, Index): - x = x._mpl_repr() - - if is_errorbar: - if 'xerr' in kwds: - kwds['xerr'] = np.array(kwds.get('xerr')) - if 'yerr' in kwds: - kwds['yerr'] = np.array(kwds.get('yerr')) - return ax.errorbar(x, y, **kwds) - else: - # prevent style kwarg from going to errorbar, where it is - # unsupported - if style is not None: - args = (x, y, style) - else: - args = (x, y) - return ax.plot(*args, **kwds) - - def _get_index_name(self): - if isinstance(self.data.index, MultiIndex): - name = self.data.index.names - if any(x is not None for x in name): - name = ','.join([pprint_thing(x) for x in name]) - else: - name = None - else: - name = self.data.index.name - if name is not None: - name = pprint_thing(name) - - return name - - @classmethod - def _get_ax_layer(cls, ax, primary=True): - """get left (primary) or right (secondary) axes""" - if primary: - return getattr(ax, 'left_ax', ax) - else: - return getattr(ax, 'right_ax', ax) - - def _get_ax(self, i): - # get the twinx ax if appropriate - if self.subplots: - ax = self.axes[i] - ax = self._maybe_right_yaxis(ax, i) - self.axes[i] = ax - else: - ax = self.axes[0] - ax = self._maybe_right_yaxis(ax, i) - - ax.get_yaxis().set_visible(True) - return ax - - def on_right(self, i): - if isinstance(self.secondary_y, bool): - return self.secondary_y - - if isinstance(self.secondary_y, (tuple, list, np.ndarray, Index)): - return self.data.columns[i] in self.secondary_y - - def _apply_style_colors(self, colors, kwds, col_num, label): - """ - Manage style and color based on column number and its label. - Returns tuple of appropriate style and kwds which "color" may be added. - """ - style = None - if self.style is not None: - if isinstance(self.style, list): - try: - style = self.style[col_num] - except IndexError: - pass - elif isinstance(self.style, dict): - style = self.style.get(label, style) - else: - style = self.style - - has_color = 'color' in kwds or self.colormap is not None - nocolor_style = style is None or re.match('[a-z]+', style) is None - if (has_color or self.subplots) and nocolor_style: - kwds['color'] = colors[col_num % len(colors)] - return style, kwds - - def _get_colors(self, num_colors=None, color_kwds='color'): - if num_colors is None: - num_colors = self.nseries - - return _get_standard_colors(num_colors=num_colors, - colormap=self.colormap, - color=self.kwds.get(color_kwds)) - - def _parse_errorbars(self, label, err): - """ - Look for error keyword arguments and return the actual errorbar data - or return the error DataFrame/dict - - Error bars can be specified in several ways: - Series: the user provides a pandas.Series object of the same - length as the data - ndarray: provides a np.ndarray of the same length as the data - DataFrame/dict: error values are paired with keys matching the - key in the plotted DataFrame - str: the name of the column within the plotted DataFrame - """ - - if err is None: - return None - - from pandas import DataFrame, Series - - def match_labels(data, e): - e = e.reindex_axis(data.index) - return e - - # key-matched DataFrame - if isinstance(err, DataFrame): - - err = match_labels(self.data, err) - # key-matched dict - elif isinstance(err, dict): - pass - - # Series of error values - elif isinstance(err, Series): - # broadcast error series across data - err = match_labels(self.data, err) - err = np.atleast_2d(err) - err = np.tile(err, (self.nseries, 1)) - - # errors are a column in the dataframe - elif isinstance(err, string_types): - evalues = self.data[err].values - self.data = self.data[self.data.columns.drop(err)] - err = np.atleast_2d(evalues) - err = np.tile(err, (self.nseries, 1)) - - elif is_list_like(err): - if is_iterator(err): - err = np.atleast_2d(list(err)) - else: - # raw error values - err = np.atleast_2d(err) - - err_shape = err.shape - - # asymmetrical error bars - if err.ndim == 3: - if (err_shape[0] != self.nseries) or \ - (err_shape[1] != 2) or \ - (err_shape[2] != len(self.data)): - msg = "Asymmetrical error bars should be provided " + \ - "with the shape (%u, 2, %u)" % \ - (self.nseries, len(self.data)) - raise ValueError(msg) - - # broadcast errors to each data series - if len(err) == 1: - err = np.tile(err, (self.nseries, 1)) - - elif is_number(err): - err = np.tile([err], (self.nseries, len(self.data))) - - else: - msg = "No valid %s detected" % label - raise ValueError(msg) - - return err - - def _get_errorbars(self, label=None, index=None, xerr=True, yerr=True): - from pandas import DataFrame - errors = {} - - for kw, flag in zip(['xerr', 'yerr'], [xerr, yerr]): - if flag: - err = self.errors[kw] - # user provided label-matched dataframe of errors - if isinstance(err, (DataFrame, dict)): - if label is not None and label in err.keys(): - err = err[label] - else: - err = None - elif index is not None and err is not None: - err = err[index] - - if err is not None: - errors[kw] = err - return errors - - def _get_subplots(self): - from matplotlib.axes import Subplot - return [ax for ax in self.axes[0].get_figure().get_axes() - if isinstance(ax, Subplot)] - - def _get_axes_layout(self): - axes = self._get_subplots() - x_set = set() - y_set = set() - for ax in axes: - # check axes coordinates to estimate layout - points = ax.get_position().get_points() - x_set.add(points[0][0]) - y_set.add(points[0][1]) - return (len(y_set), len(x_set)) - - -class PlanePlot(MPLPlot): - """ - Abstract class for plotting on plane, currently scatter and hexbin. - """ - - _layout_type = 'single' - - def __init__(self, data, x, y, **kwargs): - MPLPlot.__init__(self, data, **kwargs) - if x is None or y is None: - raise ValueError(self._kind + ' requires and x and y column') - if is_integer(x) and not self.data.columns.holds_integer(): - x = self.data.columns[x] - if is_integer(y) and not self.data.columns.holds_integer(): - y = self.data.columns[y] - self.x = x - self.y = y - - @property - def nseries(self): - return 1 - - def _post_plot_logic(self, ax, data): - x, y = self.x, self.y - ax.set_ylabel(pprint_thing(y)) - ax.set_xlabel(pprint_thing(x)) - - -class ScatterPlot(PlanePlot): - _kind = 'scatter' - - def __init__(self, data, x, y, s=None, c=None, **kwargs): - if s is None: - # hide the matplotlib default for size, in case we want to change - # the handling of this argument later - s = 20 - super(ScatterPlot, self).__init__(data, x, y, s=s, **kwargs) - if is_integer(c) and not self.data.columns.holds_integer(): - c = self.data.columns[c] - self.c = c - - def _make_plot(self): - x, y, c, data = self.x, self.y, self.c, self.data - ax = self.axes[0] - - c_is_column = is_hashable(c) and c in self.data.columns - - # plot a colorbar only if a colormap is provided or necessary - cb = self.kwds.pop('colorbar', self.colormap or c_is_column) - - # pandas uses colormap, matplotlib uses cmap. - cmap = self.colormap or 'Greys' - cmap = self.plt.cm.get_cmap(cmap) - color = self.kwds.pop("color", None) - if c is not None and color is not None: - raise TypeError('Specify exactly one of `c` and `color`') - elif c is None and color is None: - c_values = self.plt.rcParams['patch.facecolor'] - elif color is not None: - c_values = color - elif c_is_column: - c_values = self.data[c].values - else: - c_values = c - - if self.legend and hasattr(self, 'label'): - label = self.label - else: - label = None - scatter = ax.scatter(data[x].values, data[y].values, c=c_values, - label=label, cmap=cmap, **self.kwds) - if cb: - img = ax.collections[0] - kws = dict(ax=ax) - if self.mpl_ge_1_3_1(): - kws['label'] = c if c_is_column else '' - self.fig.colorbar(img, **kws) - - if label is not None: - self._add_legend_handle(scatter, label) - else: - self.legend = False - - errors_x = self._get_errorbars(label=x, index=0, yerr=False) - errors_y = self._get_errorbars(label=y, index=0, xerr=False) - if len(errors_x) > 0 or len(errors_y) > 0: - err_kwds = dict(errors_x, **errors_y) - err_kwds['ecolor'] = scatter.get_facecolor()[0] - ax.errorbar(data[x].values, data[y].values, - linestyle='none', **err_kwds) - - -class HexBinPlot(PlanePlot): - _kind = 'hexbin' - - def __init__(self, data, x, y, C=None, **kwargs): - super(HexBinPlot, self).__init__(data, x, y, **kwargs) - if is_integer(C) and not self.data.columns.holds_integer(): - C = self.data.columns[C] - self.C = C - - def _make_plot(self): - x, y, data, C = self.x, self.y, self.data, self.C - ax = self.axes[0] - # pandas uses colormap, matplotlib uses cmap. - cmap = self.colormap or 'BuGn' - cmap = self.plt.cm.get_cmap(cmap) - cb = self.kwds.pop('colorbar', True) - - if C is None: - c_values = None - else: - c_values = data[C].values - - ax.hexbin(data[x].values, data[y].values, C=c_values, cmap=cmap, - **self.kwds) - if cb: - img = ax.collections[0] - self.fig.colorbar(img, ax=ax) - - def _make_legend(self): - pass - - -class LinePlot(MPLPlot): - _kind = 'line' - _default_rot = 0 - orientation = 'vertical' - - def __init__(self, data, **kwargs): - MPLPlot.__init__(self, data, **kwargs) - if self.stacked: - self.data = self.data.fillna(value=0) - self.x_compat = plot_params['x_compat'] - if 'x_compat' in self.kwds: - self.x_compat = bool(self.kwds.pop('x_compat')) - - def _is_ts_plot(self): - # this is slightly deceptive - return not self.x_compat and self.use_index and self._use_dynamic_x() - - def _use_dynamic_x(self): - from pandas.tseries.plotting import _use_dynamic_x - return _use_dynamic_x(self._get_ax(0), self.data) - - def _make_plot(self): - if self._is_ts_plot(): - from pandas.tseries.plotting import _maybe_convert_index - data = _maybe_convert_index(self._get_ax(0), self.data) - - x = data.index # dummy, not used - plotf = self._ts_plot - it = self._iter_data(data=data, keep_index=True) - else: - x = self._get_xticks(convert_period=True) - plotf = self._plot - it = self._iter_data() - - stacking_id = self._get_stacking_id() - is_errorbar = any(e is not None for e in self.errors.values()) - - colors = self._get_colors() - for i, (label, y) in enumerate(it): - ax = self._get_ax(i) - kwds = self.kwds.copy() - style, kwds = self._apply_style_colors(colors, kwds, i, label) - - errors = self._get_errorbars(label=label, index=i) - kwds = dict(kwds, **errors) - - label = pprint_thing(label) # .encode('utf-8') - kwds['label'] = label - - newlines = plotf(ax, x, y, style=style, column_num=i, - stacking_id=stacking_id, - is_errorbar=is_errorbar, - **kwds) - self._add_legend_handle(newlines[0], label, index=i) - - lines = _get_all_lines(ax) - left, right = _get_xlim(lines) - ax.set_xlim(left, right) - - @classmethod - def _plot(cls, ax, x, y, style=None, column_num=None, - stacking_id=None, **kwds): - # column_num is used to get the target column from protf in line and - # area plots - if column_num == 0: - cls._initialize_stacker(ax, stacking_id, len(y)) - y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) - lines = MPLPlot._plot(ax, x, y_values, style=style, **kwds) - cls._update_stacker(ax, stacking_id, y) - return lines - - @classmethod - def _ts_plot(cls, ax, x, data, style=None, **kwds): - from pandas.tseries.plotting import (_maybe_resample, - _decorate_axes, - format_dateaxis) - # accept x to be consistent with normal plot func, - # x is not passed to tsplot as it uses data.index as x coordinate - # column_num must be in kwds for stacking purpose - freq, data = _maybe_resample(data, ax, kwds) - - # Set ax with freq info - _decorate_axes(ax, freq, kwds) - # digging deeper - if hasattr(ax, 'left_ax'): - _decorate_axes(ax.left_ax, freq, kwds) - if hasattr(ax, 'right_ax'): - _decorate_axes(ax.right_ax, freq, kwds) - ax._plot_data.append((data, cls._kind, kwds)) - - lines = cls._plot(ax, data.index, data.values, style=style, **kwds) - # set date formatter, locators and rescale limits - format_dateaxis(ax, ax.freq, data.index) - return lines - - def _get_stacking_id(self): - if self.stacked: - return id(self.data) - else: - return None - - @classmethod - def _initialize_stacker(cls, ax, stacking_id, n): - if stacking_id is None: - return - if not hasattr(ax, '_stacker_pos_prior'): - ax._stacker_pos_prior = {} - if not hasattr(ax, '_stacker_neg_prior'): - ax._stacker_neg_prior = {} - ax._stacker_pos_prior[stacking_id] = np.zeros(n) - ax._stacker_neg_prior[stacking_id] = np.zeros(n) - - @classmethod - def _get_stacked_values(cls, ax, stacking_id, values, label): - if stacking_id is None: - return values - if not hasattr(ax, '_stacker_pos_prior'): - # stacker may not be initialized for subplots - cls._initialize_stacker(ax, stacking_id, len(values)) - - if (values >= 0).all(): - return ax._stacker_pos_prior[stacking_id] + values - elif (values <= 0).all(): - return ax._stacker_neg_prior[stacking_id] + values - - raise ValueError('When stacked is True, each column must be either ' - 'all positive or negative.' - '{0} contains both positive and negative values' - .format(label)) - - @classmethod - def _update_stacker(cls, ax, stacking_id, values): - if stacking_id is None: - return - if (values >= 0).all(): - ax._stacker_pos_prior[stacking_id] += values - elif (values <= 0).all(): - ax._stacker_neg_prior[stacking_id] += values - - def _post_plot_logic(self, ax, data): - condition = (not self._use_dynamic_x() and - data.index.is_all_dates and - not self.subplots or - (self.subplots and self.sharex)) - - index_name = self._get_index_name() - - if condition: - # irregular TS rotated 30 deg. by default - # probably a better place to check / set this. - if not self._rot_set: - self.rot = 30 - format_date_labels(ax, rot=self.rot) - - if index_name is not None and self.use_index: - ax.set_xlabel(index_name) - - -class AreaPlot(LinePlot): - _kind = 'area' - - def __init__(self, data, **kwargs): - kwargs.setdefault('stacked', True) - data = data.fillna(value=0) - LinePlot.__init__(self, data, **kwargs) - - if not self.stacked: - # use smaller alpha to distinguish overlap - self.kwds.setdefault('alpha', 0.5) - - if self.logy or self.loglog: - raise ValueError("Log-y scales are not supported in area plot") - - @classmethod - def _plot(cls, ax, x, y, style=None, column_num=None, - stacking_id=None, is_errorbar=False, **kwds): - - if column_num == 0: - cls._initialize_stacker(ax, stacking_id, len(y)) - y_values = cls._get_stacked_values(ax, stacking_id, y, kwds['label']) - - # need to remove label, because subplots uses mpl legend as it is - line_kwds = kwds.copy() - if cls.mpl_ge_1_5_0(): - line_kwds.pop('label') - lines = MPLPlot._plot(ax, x, y_values, style=style, **line_kwds) - - # get data from the line to get coordinates for fill_between - xdata, y_values = lines[0].get_data(orig=False) - - # unable to use ``_get_stacked_values`` here to get starting point - if stacking_id is None: - start = np.zeros(len(y)) - elif (y >= 0).all(): - start = ax._stacker_pos_prior[stacking_id] - elif (y <= 0).all(): - start = ax._stacker_neg_prior[stacking_id] - else: - start = np.zeros(len(y)) - - if 'color' not in kwds: - kwds['color'] = lines[0].get_color() - - rect = ax.fill_between(xdata, start, y_values, **kwds) - cls._update_stacker(ax, stacking_id, y) - - # LinePlot expects list of artists - res = [rect] if cls.mpl_ge_1_5_0() else lines - return res - - def _add_legend_handle(self, handle, label, index=None): - if not self.mpl_ge_1_5_0(): - from matplotlib.patches import Rectangle - # Because fill_between isn't supported in legend, - # specifically add Rectangle handle here - alpha = self.kwds.get('alpha', None) - handle = Rectangle((0, 0), 1, 1, fc=handle.get_color(), - alpha=alpha) - LinePlot._add_legend_handle(self, handle, label, index=index) - - def _post_plot_logic(self, ax, data): - LinePlot._post_plot_logic(self, ax, data) - - if self.ylim is None: - if (data >= 0).all().all(): - ax.set_ylim(0, None) - elif (data <= 0).all().all(): - ax.set_ylim(None, 0) - - -class BarPlot(MPLPlot): - _kind = 'bar' - _default_rot = 90 - orientation = 'vertical' - - def __init__(self, data, **kwargs): - self.bar_width = kwargs.pop('width', 0.5) - pos = kwargs.pop('position', 0.5) - kwargs.setdefault('align', 'center') - self.tick_pos = np.arange(len(data)) - - self.bottom = kwargs.pop('bottom', 0) - self.left = kwargs.pop('left', 0) - - self.log = kwargs.pop('log', False) - MPLPlot.__init__(self, data, **kwargs) - - if self.stacked or self.subplots: - self.tickoffset = self.bar_width * pos - if kwargs['align'] == 'edge': - self.lim_offset = self.bar_width / 2 - else: - self.lim_offset = 0 - else: - if kwargs['align'] == 'edge': - w = self.bar_width / self.nseries - self.tickoffset = self.bar_width * (pos - 0.5) + w * 0.5 - self.lim_offset = w * 0.5 - else: - self.tickoffset = self.bar_width * pos - self.lim_offset = 0 - - self.ax_pos = self.tick_pos - self.tickoffset - - def _args_adjust(self): - if is_list_like(self.bottom): - self.bottom = np.array(self.bottom) - if is_list_like(self.left): - self.left = np.array(self.left) - - @classmethod - def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): - return ax.bar(x, y, w, bottom=start, log=log, **kwds) - - @property - def _start_base(self): - return self.bottom - - def _make_plot(self): - import matplotlib as mpl - - colors = self._get_colors() - ncolors = len(colors) - - pos_prior = neg_prior = np.zeros(len(self.data)) - K = self.nseries - - for i, (label, y) in enumerate(self._iter_data(fillna=0)): - ax = self._get_ax(i) - kwds = self.kwds.copy() - kwds['color'] = colors[i % ncolors] - - errors = self._get_errorbars(label=label, index=i) - kwds = dict(kwds, **errors) - - label = pprint_thing(label) - - if (('yerr' in kwds) or ('xerr' in kwds)) \ - and (kwds.get('ecolor') is None): - kwds['ecolor'] = mpl.rcParams['xtick.color'] - - start = 0 - if self.log and (y >= 1).all(): - start = 1 - start = start + self._start_base - - if self.subplots: - w = self.bar_width / 2 - rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, - start=start, label=label, - log=self.log, **kwds) - ax.set_title(label) - elif self.stacked: - mask = y > 0 - start = np.where(mask, pos_prior, neg_prior) + self._start_base - w = self.bar_width / 2 - rect = self._plot(ax, self.ax_pos + w, y, self.bar_width, - start=start, label=label, - log=self.log, **kwds) - pos_prior = pos_prior + np.where(mask, y, 0) - neg_prior = neg_prior + np.where(mask, 0, y) - else: - w = self.bar_width / K - rect = self._plot(ax, self.ax_pos + (i + 0.5) * w, y, w, - start=start, label=label, - log=self.log, **kwds) - self._add_legend_handle(rect, label, index=i) - - def _post_plot_logic(self, ax, data): - if self.use_index: - str_index = [pprint_thing(key) for key in data.index] - else: - str_index = [pprint_thing(key) for key in range(data.shape[0])] - name = self._get_index_name() - - s_edge = self.ax_pos[0] - 0.25 + self.lim_offset - e_edge = self.ax_pos[-1] + 0.25 + self.bar_width + self.lim_offset - - self._decorate_ticks(ax, name, str_index, s_edge, e_edge) - - def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): - ax.set_xlim((start_edge, end_edge)) - ax.set_xticks(self.tick_pos) - ax.set_xticklabels(ticklabels) - if name is not None and self.use_index: - ax.set_xlabel(name) - - -class BarhPlot(BarPlot): - _kind = 'barh' - _default_rot = 0 - orientation = 'horizontal' - - @property - def _start_base(self): - return self.left - - @classmethod - def _plot(cls, ax, x, y, w, start=0, log=False, **kwds): - return ax.barh(x, y, w, left=start, log=log, **kwds) - - def _decorate_ticks(self, ax, name, ticklabels, start_edge, end_edge): - # horizontal bars - ax.set_ylim((start_edge, end_edge)) - ax.set_yticks(self.tick_pos) - ax.set_yticklabels(ticklabels) - if name is not None and self.use_index: - ax.set_ylabel(name) - - -class HistPlot(LinePlot): - _kind = 'hist' - - def __init__(self, data, bins=10, bottom=0, **kwargs): - self.bins = bins # use mpl default - self.bottom = bottom - # Do not call LinePlot.__init__ which may fill nan - MPLPlot.__init__(self, data, **kwargs) - - def _args_adjust(self): - if is_integer(self.bins): - # create common bin edge - values = (self.data._convert(datetime=True)._get_numeric_data()) - values = np.ravel(values) - values = values[~isnull(values)] - - hist, self.bins = np.histogram( - values, bins=self.bins, - range=self.kwds.get('range', None), - weights=self.kwds.get('weights', None)) - - if is_list_like(self.bottom): - self.bottom = np.array(self.bottom) - - @classmethod - def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, - stacking_id=None, **kwds): - if column_num == 0: - cls._initialize_stacker(ax, stacking_id, len(bins) - 1) - y = y[~isnull(y)] - - base = np.zeros(len(bins) - 1) - bottom = bottom + \ - cls._get_stacked_values(ax, stacking_id, base, kwds['label']) - # ignore style - n, bins, patches = ax.hist(y, bins=bins, bottom=bottom, **kwds) - cls._update_stacker(ax, stacking_id, n) - return patches - - def _make_plot(self): - colors = self._get_colors() - stacking_id = self._get_stacking_id() - - for i, (label, y) in enumerate(self._iter_data()): - ax = self._get_ax(i) - - kwds = self.kwds.copy() - - label = pprint_thing(label) - kwds['label'] = label - - style, kwds = self._apply_style_colors(colors, kwds, i, label) - if style is not None: - kwds['style'] = style - - kwds = self._make_plot_keywords(kwds, y) - artists = self._plot(ax, y, column_num=i, - stacking_id=stacking_id, **kwds) - self._add_legend_handle(artists[0], label, index=i) - - def _make_plot_keywords(self, kwds, y): - """merge BoxPlot/KdePlot properties to passed kwds""" - # y is required for KdePlot - kwds['bottom'] = self.bottom - kwds['bins'] = self.bins - return kwds - - def _post_plot_logic(self, ax, data): - if self.orientation == 'horizontal': - ax.set_xlabel('Frequency') - else: - ax.set_ylabel('Frequency') - - @property - def orientation(self): - if self.kwds.get('orientation', None) == 'horizontal': - return 'horizontal' - else: - return 'vertical' - - -class KdePlot(HistPlot): - _kind = 'kde' - orientation = 'vertical' - - def __init__(self, data, bw_method=None, ind=None, **kwargs): - MPLPlot.__init__(self, data, **kwargs) - self.bw_method = bw_method - self.ind = ind - - def _args_adjust(self): - pass - - def _get_ind(self, y): - if self.ind is None: - # np.nanmax() and np.nanmin() ignores the missing values - sample_range = np.nanmax(y) - np.nanmin(y) - ind = np.linspace(np.nanmin(y) - 0.5 * sample_range, - np.nanmax(y) + 0.5 * sample_range, 1000) - else: - ind = self.ind - return ind - - @classmethod - def _plot(cls, ax, y, style=None, bw_method=None, ind=None, - column_num=None, stacking_id=None, **kwds): - from scipy.stats import gaussian_kde - from scipy import __version__ as spv - - y = remove_na(y) - - if LooseVersion(spv) >= '0.11.0': - gkde = gaussian_kde(y, bw_method=bw_method) - else: - gkde = gaussian_kde(y) - if bw_method is not None: - msg = ('bw_method was added in Scipy 0.11.0.' + - ' Scipy version in use is %s.' % spv) - warnings.warn(msg) - - y = gkde.evaluate(ind) - lines = MPLPlot._plot(ax, ind, y, style=style, **kwds) - return lines - - def _make_plot_keywords(self, kwds, y): - kwds['bw_method'] = self.bw_method - kwds['ind'] = self._get_ind(y) - return kwds - - def _post_plot_logic(self, ax, data): - ax.set_ylabel('Density') - - -class PiePlot(MPLPlot): - _kind = 'pie' - _layout_type = 'horizontal' - - def __init__(self, data, kind=None, **kwargs): - data = data.fillna(value=0) - if (data < 0).any().any(): - raise ValueError("{0} doesn't allow negative values".format(kind)) - MPLPlot.__init__(self, data, kind=kind, **kwargs) - - def _args_adjust(self): - self.grid = False - self.logy = False - self.logx = False - self.loglog = False - - def _validate_color_args(self): - pass - - def _make_plot(self): - colors = self._get_colors( - num_colors=len(self.data), color_kwds='colors') - self.kwds.setdefault('colors', colors) - - for i, (label, y) in enumerate(self._iter_data()): - ax = self._get_ax(i) - if label is not None: - label = pprint_thing(label) - ax.set_ylabel(label) - - kwds = self.kwds.copy() - - def blank_labeler(label, value): - if value == 0: - return '' - else: - return label - - idx = [pprint_thing(v) for v in self.data.index] - labels = kwds.pop('labels', idx) - # labels is used for each wedge's labels - # Blank out labels for values of 0 so they don't overlap - # with nonzero wedges - if labels is not None: - blabels = [blank_labeler(l, value) for - l, value in zip(labels, y)] - else: - blabels = None - results = ax.pie(y, labels=blabels, **kwds) - - if kwds.get('autopct', None) is not None: - patches, texts, autotexts = results - else: - patches, texts = results - autotexts = [] - - if self.fontsize is not None: - for t in texts + autotexts: - t.set_fontsize(self.fontsize) - - # leglabels is used for legend labels - leglabels = labels if labels is not None else idx - for p, l in zip(patches, leglabels): - self._add_legend_handle(p, l) - - -class BoxPlot(LinePlot): - _kind = 'box' - _layout_type = 'horizontal' - - _valid_return_types = (None, 'axes', 'dict', 'both') - # namedtuple to hold results - BP = namedtuple("Boxplot", ['ax', 'lines']) - - def __init__(self, data, return_type='axes', **kwargs): - # Do not call LinePlot.__init__ which may fill nan - if return_type not in self._valid_return_types: - raise ValueError( - "return_type must be {None, 'axes', 'dict', 'both'}") - - self.return_type = return_type - MPLPlot.__init__(self, data, **kwargs) - - def _args_adjust(self): - if self.subplots: - # Disable label ax sharing. Otherwise, all subplots shows last - # column label - if self.orientation == 'vertical': - self.sharex = False - else: - self.sharey = False - - @classmethod - def _plot(cls, ax, y, column_num=None, return_type='axes', **kwds): - if y.ndim == 2: - y = [remove_na(v) for v in y] - # Boxplot fails with empty arrays, so need to add a NaN - # if any cols are empty - # GH 8181 - y = [v if v.size > 0 else np.array([np.nan]) for v in y] - else: - y = remove_na(y) - bp = ax.boxplot(y, **kwds) - - if return_type == 'dict': - return bp, bp - elif return_type == 'both': - return cls.BP(ax=ax, lines=bp), bp - else: - return ax, bp - - def _validate_color_args(self): - if 'color' in self.kwds: - if self.colormap is not None: - warnings.warn("'color' and 'colormap' cannot be used " - "simultaneously. Using 'color'") - self.color = self.kwds.pop('color') - - if isinstance(self.color, dict): - valid_keys = ['boxes', 'whiskers', 'medians', 'caps'] - for key, values in compat.iteritems(self.color): - if key not in valid_keys: - raise ValueError("color dict contains invalid " - "key '{0}' " - "The key must be either {1}" - .format(key, valid_keys)) - else: - self.color = None - - # get standard colors for default - colors = _get_standard_colors(num_colors=3, - colormap=self.colormap, - color=None) - # use 2 colors by default, for box/whisker and median - # flier colors isn't needed here - # because it can be specified by ``sym`` kw - self._boxes_c = colors[0] - self._whiskers_c = colors[0] - self._medians_c = colors[2] - self._caps_c = 'k' # mpl default - - def _get_colors(self, num_colors=None, color_kwds='color'): - pass - - def maybe_color_bp(self, bp): - if isinstance(self.color, dict): - boxes = self.color.get('boxes', self._boxes_c) - whiskers = self.color.get('whiskers', self._whiskers_c) - medians = self.color.get('medians', self._medians_c) - caps = self.color.get('caps', self._caps_c) - else: - # Other types are forwarded to matplotlib - # If None, use default colors - boxes = self.color or self._boxes_c - whiskers = self.color or self._whiskers_c - medians = self.color or self._medians_c - caps = self.color or self._caps_c - - from matplotlib.artist import setp - setp(bp['boxes'], color=boxes, alpha=1) - setp(bp['whiskers'], color=whiskers, alpha=1) - setp(bp['medians'], color=medians, alpha=1) - setp(bp['caps'], color=caps, alpha=1) - - def _make_plot(self): - if self.subplots: - self._return_obj = Series() - - for i, (label, y) in enumerate(self._iter_data()): - ax = self._get_ax(i) - kwds = self.kwds.copy() - - ret, bp = self._plot(ax, y, column_num=i, - return_type=self.return_type, **kwds) - self.maybe_color_bp(bp) - self._return_obj[label] = ret - - label = [pprint_thing(label)] - self._set_ticklabels(ax, label) - else: - y = self.data.values.T - ax = self._get_ax(0) - kwds = self.kwds.copy() - - ret, bp = self._plot(ax, y, column_num=0, - return_type=self.return_type, **kwds) - self.maybe_color_bp(bp) - self._return_obj = ret - - labels = [l for l, _ in self._iter_data()] - labels = [pprint_thing(l) for l in labels] - if not self.use_index: - labels = [pprint_thing(key) for key in range(len(labels))] - self._set_ticklabels(ax, labels) - - def _set_ticklabels(self, ax, labels): - if self.orientation == 'vertical': - ax.set_xticklabels(labels) - else: - ax.set_yticklabels(labels) - - def _make_legend(self): - pass - - def _post_plot_logic(self, ax, data): - pass - - @property - def orientation(self): - if self.kwds.get('vert', True): - return 'vertical' - else: - return 'horizontal' - - @property - def result(self): - if self.return_type is None: - return super(BoxPlot, self).result - else: - return self._return_obj - - -# kinds supported by both dataframe and series -_common_kinds = ['line', 'bar', 'barh', - 'kde', 'density', 'area', 'hist', 'box'] -# kinds supported by dataframe -_dataframe_kinds = ['scatter', 'hexbin'] -# kinds supported only by series or dataframe single column -_series_kinds = ['pie'] -_all_kinds = _common_kinds + _dataframe_kinds + _series_kinds - -_klasses = [LinePlot, BarPlot, BarhPlot, KdePlot, HistPlot, BoxPlot, - ScatterPlot, HexBinPlot, AreaPlot, PiePlot] - -_plot_klass = {} -for klass in _klasses: - _plot_klass[klass._kind] = klass - - -def _plot(data, x=None, y=None, subplots=False, - ax=None, kind='line', **kwds): - kind = _get_standard_kind(kind.lower().strip()) - if kind in _all_kinds: - klass = _plot_klass[kind] - else: - raise ValueError("%r is not a valid plot kind" % kind) - - from pandas import DataFrame - if kind in _dataframe_kinds: - if isinstance(data, DataFrame): - plot_obj = klass(data, x=x, y=y, subplots=subplots, ax=ax, - kind=kind, **kwds) - else: - raise ValueError("plot kind %r can only be used for data frames" - % kind) - - elif kind in _series_kinds: - if isinstance(data, DataFrame): - if y is None and subplots is False: - msg = "{0} requires either y column or 'subplots=True'" - raise ValueError(msg.format(kind)) - elif y is not None: - if is_integer(y) and not data.columns.holds_integer(): - y = data.columns[y] - # converted to series actually. copy to not modify - data = data[y].copy() - data.index.name = y - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - else: - if isinstance(data, DataFrame): - if x is not None: - if is_integer(x) and not data.columns.holds_integer(): - x = data.columns[x] - data = data.set_index(x) - - if y is not None: - if is_integer(y) and not data.columns.holds_integer(): - y = data.columns[y] - label = kwds['label'] if 'label' in kwds else y - series = data[y].copy() # Don't modify - series.name = label - - for kw in ['xerr', 'yerr']: - if (kw in kwds) and \ - (isinstance(kwds[kw], string_types) or - is_integer(kwds[kw])): - try: - kwds[kw] = data[kwds[kw]] - except (IndexError, KeyError, TypeError): - pass - data = series - plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) - - plot_obj.generate() - plot_obj.draw() - return plot_obj.result - - -df_kind = """- 'scatter' : scatter plot - - 'hexbin' : hexbin plot""" -series_kind = "" - -df_coord = """x : label or position, default None - y : label or position, default None - Allows plotting of one column versus another""" -series_coord = "" - -df_unique = """stacked : boolean, default False in line and - bar plots, and True in area plot. If True, create stacked plot. - sort_columns : boolean, default False - Sort column names to determine plot ordering - secondary_y : boolean or sequence, default False - Whether to plot on the secondary y-axis - If a list/tuple, which columns to plot on secondary y-axis""" -series_unique = """label : label argument to provide to plot - secondary_y : boolean or sequence of ints, default False - If True then y-axis will be on the right""" - -df_ax = """ax : matplotlib axes object, default None - subplots : boolean, default False - Make separate subplots for each column - sharex : boolean, default True if ax is None else False - In case subplots=True, share x axis and set some x axis labels to - invisible; defaults to True if ax is None otherwise False if an ax - is passed in; Be aware, that passing in both an ax and sharex=True - will alter all x axis labels for all axis in a figure! - sharey : boolean, default False - In case subplots=True, share y axis and set some y axis labels to - invisible - layout : tuple (optional) - (rows, columns) for the layout of subplots""" -series_ax = """ax : matplotlib axes object - If not passed, uses gca()""" - -df_note = """- If `kind` = 'scatter' and the argument `c` is the name of a dataframe - column, the values of that column are used to color each point. - - If `kind` = 'hexbin', you can control the size of the bins with the - `gridsize` argument. By default, a histogram of the counts around each - `(x, y)` point is computed. You can specify alternative aggregations - by passing values to the `C` and `reduce_C_function` arguments. - `C` specifies the value at each `(x, y)` point and `reduce_C_function` - is a function of one argument that reduces all the values in a bin to - a single number (e.g. `mean`, `max`, `sum`, `std`).""" -series_note = "" - -_shared_doc_df_kwargs = dict(klass='DataFrame', klass_obj='df', - klass_kind=df_kind, klass_coord=df_coord, - klass_ax=df_ax, klass_unique=df_unique, - klass_note=df_note) -_shared_doc_series_kwargs = dict(klass='Series', klass_obj='s', - klass_kind=series_kind, - klass_coord=series_coord, klass_ax=series_ax, - klass_unique=series_unique, - klass_note=series_note) - -_shared_docs['plot'] = """ - Make plots of %(klass)s using matplotlib / pylab. - - *New in version 0.17.0:* Each plot kind has a corresponding method on the - ``%(klass)s.plot`` accessor: - ``%(klass_obj)s.plot(kind='line')`` is equivalent to - ``%(klass_obj)s.plot.line()``. - - Parameters - ---------- - data : %(klass)s - %(klass_coord)s - kind : str - - 'line' : line plot (default) - - 'bar' : vertical bar plot - - 'barh' : horizontal bar plot - - 'hist' : histogram - - 'box' : boxplot - - 'kde' : Kernel Density Estimation plot - - 'density' : same as 'kde' - - 'area' : area plot - - 'pie' : pie plot - %(klass_kind)s - %(klass_ax)s - figsize : a tuple (width, height) in inches - use_index : boolean, default True - Use index as ticks for x axis - title : string or list - Title to use for the plot. If a string is passed, print the string at - the top of the figure. If a list is passed and `subplots` is True, - print each item in the list above the corresponding subplot. - grid : boolean, default None (matlab style default) - Axis grid lines - legend : False/True/'reverse' - Place legend on axis subplots - style : list or dict - matplotlib line style per column - logx : boolean, default False - Use log scaling on x axis - logy : boolean, default False - Use log scaling on y axis - loglog : boolean, default False - Use log scaling on both x and y axes - xticks : sequence - Values to use for the xticks - yticks : sequence - Values to use for the yticks - xlim : 2-tuple/list - ylim : 2-tuple/list - rot : int, default None - Rotation for ticks (xticks for vertical, yticks for horizontal plots) - fontsize : int, default None - Font size for xticks and yticks - colormap : str or matplotlib colormap object, default None - Colormap to select colors from. If string, load colormap with that name - from matplotlib. - colorbar : boolean, optional - If True, plot colorbar (only relevant for 'scatter' and 'hexbin' plots) - position : float - Specify relative alignments for bar plot layout. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) - layout : tuple (optional) - (rows, columns) for the layout of the plot - table : boolean, Series or DataFrame, default False - If True, draw a table using the data in the DataFrame and the data will - be transposed to meet matplotlib's default layout. - If a Series or DataFrame is passed, use passed data to draw a table. - yerr : DataFrame, Series, array-like, dict and str - See :ref:`Plotting with Error Bars ` for - detail. - xerr : same types as yerr. - %(klass_unique)s - mark_right : boolean, default True - When using a secondary_y axis, automatically mark the column - labels with "(right)" in the legend - kwds : keywords - Options to pass to matplotlib plotting method - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - - Notes - ----- - - - See matplotlib documentation online for more on this subject - - If `kind` = 'bar' or 'barh', you can specify relative alignments - for bar plot layout by `position` keyword. - From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 (center) - %(klass_note)s - - """ - - -@Appender(_shared_docs['plot'] % _shared_doc_df_kwargs) -def plot_frame(data, x=None, y=None, kind='line', ax=None, - subplots=False, sharex=None, sharey=False, layout=None, - figsize=None, use_index=True, title=None, grid=None, - legend=True, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - secondary_y=False, sort_columns=False, - **kwds): - return _plot(data, kind=kind, x=x, y=y, ax=ax, - subplots=subplots, sharex=sharex, sharey=sharey, - layout=layout, figsize=figsize, use_index=use_index, - title=title, grid=grid, legend=legend, - style=style, logx=logx, logy=logy, loglog=loglog, - xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, - rot=rot, fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, - secondary_y=secondary_y, sort_columns=sort_columns, - **kwds) - - -@Appender(_shared_docs['plot'] % _shared_doc_series_kwargs) -def plot_series(data, kind='line', ax=None, # Series unique - figsize=None, use_index=True, title=None, grid=None, - legend=False, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - label=None, secondary_y=False, # Series unique - **kwds): - - import matplotlib.pyplot as plt - """ - If no axes is specified, check whether there are existing figures - If there is no existing figures, _gca() will - create a figure with the default figsize, causing the figsize=parameter to - be ignored. - """ - if ax is None and len(plt.get_fignums()) > 0: - ax = _gca() - ax = MPLPlot._get_ax_layer(ax) - return _plot(data, kind=kind, ax=ax, - figsize=figsize, use_index=use_index, title=title, - grid=grid, legend=legend, - style=style, logx=logx, logy=logy, loglog=loglog, - xticks=xticks, yticks=yticks, xlim=xlim, ylim=ylim, - rot=rot, fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, - label=label, secondary_y=secondary_y, - **kwds) - - -_shared_docs['boxplot'] = """ - Make a box plot from DataFrame column optionally grouped by some columns or - other inputs - - Parameters - ---------- - data : the pandas object holding the data - column : column name or list of names, or vector - Can be any valid input to groupby - by : string or sequence - Column in the DataFrame to group by - ax : Matplotlib axes object, optional - fontsize : int or string - rot : label rotation angle - figsize : A tuple (width, height) in inches - grid : Setting this to True will show the grid - layout : tuple (optional) - (rows, columns) for the layout of the plot - return_type : {None, 'axes', 'dict', 'both'}, default None - The kind of object to return. The default is ``axes`` - 'axes' returns the matplotlib axes the boxplot is drawn on; - 'dict' returns a dictionary whose values are the matplotlib - Lines of the boxplot; - 'both' returns a namedtuple with the axes and dict. - - When grouping with ``by``, a Series mapping columns to ``return_type`` - is returned, unless ``return_type`` is None, in which case a NumPy - array of axes is returned with the same shape as ``layout``. - See the prose documentation for more. - - kwds : other plotting keyword arguments to be passed to matplotlib boxplot - function - - Returns - ------- - lines : dict - ax : matplotlib Axes - (ax, lines): namedtuple - - Notes - ----- - Use ``return_type='dict'`` when you want to tweak the appearance - of the lines after plotting. In this case a dict containing the Lines - making up the boxes, caps, fliers, medians, and whiskers is returned. - """ - - -@Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) -def boxplot(data, column=None, by=None, ax=None, fontsize=None, - rot=0, grid=True, figsize=None, layout=None, return_type=None, - **kwds): - - # validate return_type: - if return_type not in BoxPlot._valid_return_types: - raise ValueError("return_type must be {'axes', 'dict', 'both'}") - - from pandas import Series, DataFrame - if isinstance(data, Series): - data = DataFrame({'x': data}) - column = 'x' - - def _get_colors(): - return _get_standard_colors(color=kwds.get('color'), num_colors=1) - - def maybe_color_bp(bp): - if 'color' not in kwds: - from matplotlib.artist import setp - setp(bp['boxes'], color=colors[0], alpha=1) - setp(bp['whiskers'], color=colors[0], alpha=1) - setp(bp['medians'], color=colors[2], alpha=1) - - def plot_group(keys, values, ax): - keys = [pprint_thing(x) for x in keys] - values = [remove_na(v) for v in values] - bp = ax.boxplot(values, **kwds) - if fontsize is not None: - ax.tick_params(axis='both', labelsize=fontsize) - if kwds.get('vert', 1): - ax.set_xticklabels(keys, rotation=rot) - else: - ax.set_yticklabels(keys, rotation=rot) - maybe_color_bp(bp) - - # Return axes in multiplot case, maybe revisit later # 985 - if return_type == 'dict': - return bp - elif return_type == 'both': - return BoxPlot.BP(ax=ax, lines=bp) - else: - return ax - - colors = _get_colors() - if column is None: - columns = None - else: - if isinstance(column, (list, tuple)): - columns = column - else: - columns = [column] - - if by is not None: - # Prefer array return type for 2-D plots to match the subplot layout - # https://github.com/pandas-dev/pandas/pull/12216#issuecomment-241175580 - result = _grouped_plot_by_column(plot_group, data, columns=columns, - by=by, grid=grid, figsize=figsize, - ax=ax, layout=layout, - return_type=return_type) - else: - if return_type is None: - return_type = 'axes' - if layout is not None: - raise ValueError("The 'layout' keyword is not supported when " - "'by' is None") - - if ax is None: - ax = _gca() - data = data._get_numeric_data() - if columns is None: - columns = data.columns - else: - data = data[columns] - - result = plot_group(columns, data.values.T, ax) - ax.grid(grid) - - return result - - -def format_date_labels(ax, rot): - # mini version of autofmt_xdate - try: - for label in ax.get_xticklabels(): - label.set_ha('right') - label.set_rotation(rot) - fig = ax.get_figure() - fig.subplots_adjust(bottom=0.2) - except Exception: # pragma: no cover - pass - - -def scatter_plot(data, x, y, by=None, ax=None, figsize=None, grid=False, - **kwargs): - """ - Make a scatter plot from two DataFrame columns - - Parameters - ---------- - data : DataFrame - x : Column name for the x-axis values - y : Column name for the y-axis values - ax : Matplotlib axis object - figsize : A tuple (width, height) in inches - grid : Setting this to True will show the grid - kwargs : other plotting keyword arguments - To be passed to scatter function - - Returns - ------- - fig : matplotlib.Figure - """ - import matplotlib.pyplot as plt - - kwargs.setdefault('edgecolors', 'none') - - def plot_group(group, ax): - xvals = group[x].values - yvals = group[y].values - ax.scatter(xvals, yvals, **kwargs) - ax.grid(grid) - - if by is not None: - fig = _grouped_plot(plot_group, data, by=by, figsize=figsize, ax=ax) - else: - if ax is None: - fig = plt.figure() - ax = fig.add_subplot(111) - else: - fig = ax.get_figure() - plot_group(data, ax) - ax.set_ylabel(pprint_thing(y)) - ax.set_xlabel(pprint_thing(x)) - - ax.grid(grid) - - return fig - - -def hist_frame(data, column=None, by=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, ax=None, sharex=False, - sharey=False, figsize=None, layout=None, bins=10, **kwds): - """ - Draw histogram of the DataFrame's series using matplotlib / pylab. - - Parameters - ---------- - data : DataFrame - column : string or sequence - If passed, will be used to limit data to a subset of columns - by : object, optional - If passed, then used to form histograms for separate groups - grid : boolean, default True - Whether to show axis grid lines - xlabelsize : int, default None - If specified changes the x-axis label size - xrot : float, default None - rotation of x axis labels - ylabelsize : int, default None - If specified changes the y-axis label size - yrot : float, default None - rotation of y axis labels - ax : matplotlib axes object, default None - sharex : boolean, default True if ax is None else False - In case subplots=True, share x axis and set some x axis labels to - invisible; defaults to True if ax is None otherwise False if an ax - is passed in; Be aware, that passing in both an ax and sharex=True - will alter all x axis labels for all subplots in a figure! - sharey : boolean, default False - In case subplots=True, share y axis and set some y axis labels to - invisible - figsize : tuple - The size of the figure to create in inches by default - layout : tuple, optional - Tuple of (rows, columns) for the layout of the histograms - bins : integer, default 10 - Number of histogram bins to be used - kwds : other plotting keyword arguments - To be passed to hist function - """ - - if by is not None: - axes = grouped_hist(data, column=column, by=by, ax=ax, grid=grid, - figsize=figsize, sharex=sharex, sharey=sharey, - layout=layout, bins=bins, xlabelsize=xlabelsize, - xrot=xrot, ylabelsize=ylabelsize, - yrot=yrot, **kwds) - return axes - - if column is not None: - if not isinstance(column, (list, np.ndarray, Index)): - column = [column] - data = data[column] - data = data._get_numeric_data() - naxes = len(data.columns) - - fig, axes = _subplots(naxes=naxes, ax=ax, squeeze=False, - sharex=sharex, sharey=sharey, figsize=figsize, - layout=layout) - _axes = _flatten(axes) - - for i, col in enumerate(_try_sort(data.columns)): - ax = _axes[i] - ax.hist(data[col].dropna().values, bins=bins, **kwds) - ax.set_title(col) - ax.grid(grid) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - fig.subplots_adjust(wspace=0.3, hspace=0.3) - - return axes - - -def hist_series(self, by=None, ax=None, grid=True, xlabelsize=None, - xrot=None, ylabelsize=None, yrot=None, figsize=None, - bins=10, **kwds): - """ - Draw histogram of the input series using matplotlib - - Parameters - ---------- - by : object, optional - If passed, then used to form histograms for separate groups - ax : matplotlib axis object - If not passed, uses gca() - grid : boolean, default True - Whether to show axis grid lines - xlabelsize : int, default None - If specified changes the x-axis label size - xrot : float, default None - rotation of x axis labels - ylabelsize : int, default None - If specified changes the y-axis label size - yrot : float, default None - rotation of y axis labels - figsize : tuple, default None - figure size in inches by default - bins: integer, default 10 - Number of histogram bins to be used - kwds : keywords - To be passed to the actual plotting function - - Notes - ----- - See matplotlib documentation online for more on this - - """ - import matplotlib.pyplot as plt - - if by is None: - if kwds.get('layout', None) is not None: - raise ValueError("The 'layout' keyword is not supported when " - "'by' is None") - # hack until the plotting interface is a bit more unified - fig = kwds.pop('figure', plt.gcf() if plt.get_fignums() else - plt.figure(figsize=figsize)) - if (figsize is not None and tuple(figsize) != - tuple(fig.get_size_inches())): - fig.set_size_inches(*figsize, forward=True) - if ax is None: - ax = fig.gca() - elif ax.get_figure() != fig: - raise AssertionError('passed axis not bound to passed figure') - values = self.dropna().values - - ax.hist(values, bins=bins, **kwds) - ax.grid(grid) - axes = np.array([ax]) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - - else: - if 'figure' in kwds: - raise ValueError("Cannot pass 'figure' when using the " - "'by' argument, since a new 'Figure' instance " - "will be created") - axes = grouped_hist(self, by=by, ax=ax, grid=grid, figsize=figsize, - bins=bins, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot, **kwds) - - if hasattr(axes, 'ndim'): - if axes.ndim == 1 and len(axes) == 1: - return axes[0] - return axes - - -def grouped_hist(data, column=None, by=None, ax=None, bins=50, figsize=None, - layout=None, sharex=False, sharey=False, rot=90, grid=True, - xlabelsize=None, xrot=None, ylabelsize=None, yrot=None, - **kwargs): - """ - Grouped histogram - - Parameters - ---------- - data: Series/DataFrame - column: object, optional - by: object, optional - ax: axes, optional - bins: int, default 50 - figsize: tuple, optional - layout: optional - sharex: boolean, default False - sharey: boolean, default False - rot: int, default 90 - grid: bool, default True - kwargs: dict, keyword arguments passed to matplotlib.Axes.hist - - Returns - ------- - axes: collection of Matplotlib Axes - """ - def plot_group(group, ax): - ax.hist(group.dropna().values, bins=bins, **kwargs) - - xrot = xrot or rot - - fig, axes = _grouped_plot(plot_group, data, column=column, - by=by, sharex=sharex, sharey=sharey, ax=ax, - figsize=figsize, layout=layout, rot=rot) - - _set_ticks_props(axes, xlabelsize=xlabelsize, xrot=xrot, - ylabelsize=ylabelsize, yrot=yrot) - - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, - hspace=0.5, wspace=0.3) - return axes - - -def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, - rot=0, grid=True, ax=None, figsize=None, - layout=None, **kwds): - """ - Make box plots from DataFrameGroupBy data. - - Parameters - ---------- - grouped : Grouped DataFrame - subplots : - * ``False`` - no subplots will be used - * ``True`` - create a subplot for each group - column : column name or list of names, or vector - Can be any valid input to groupby - fontsize : int or string - rot : label rotation angle - grid : Setting this to True will show the grid - ax : Matplotlib axis object, default None - figsize : A tuple (width, height) in inches - layout : tuple (optional) - (rows, columns) for the layout of the plot - kwds : other plotting keyword arguments to be passed to matplotlib boxplot - function - - Returns - ------- - dict of key/value = group key/DataFrame.boxplot return value - or DataFrame.boxplot return value in case subplots=figures=False - - Examples - -------- - >>> import pandas - >>> import numpy as np - >>> import itertools - >>> - >>> tuples = [t for t in itertools.product(range(1000), range(4))] - >>> index = pandas.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) - >>> data = np.random.randn(len(index),4) - >>> df = pandas.DataFrame(data, columns=list('ABCD'), index=index) - >>> - >>> grouped = df.groupby(level='lvl1') - >>> boxplot_frame_groupby(grouped) - >>> - >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) - >>> boxplot_frame_groupby(grouped, subplots=False) - """ - if subplots is True: - naxes = len(grouped) - fig, axes = _subplots(naxes=naxes, squeeze=False, - ax=ax, sharex=False, sharey=True, - figsize=figsize, layout=layout) - axes = _flatten(axes) - - ret = Series() - for (key, group), ax in zip(grouped, axes): - d = group.boxplot(ax=ax, column=column, fontsize=fontsize, - rot=rot, grid=grid, **kwds) - ax.set_title(pprint_thing(key)) - ret.loc[key] = d - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, - right=0.9, wspace=0.2) - else: - from pandas.tools.concat import concat - keys, frames = zip(*grouped) - if grouped.axis == 0: - df = concat(frames, keys=keys, axis=1) - else: - if len(frames) > 1: - df = frames[0].join(frames[1::]) - else: - df = frames[0] - ret = df.boxplot(column=column, fontsize=fontsize, rot=rot, - grid=grid, ax=ax, figsize=figsize, - layout=layout, **kwds) - return ret - - -def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True, - figsize=None, sharex=True, sharey=True, layout=None, - rot=0, ax=None, **kwargs): - from pandas import DataFrame - - if figsize == 'default': - # allowed to specify mpl default with 'default' - warnings.warn("figsize='default' is deprecated. Specify figure" - "size by tuple instead", FutureWarning, stacklevel=4) - figsize = None - - grouped = data.groupby(by) - if column is not None: - grouped = grouped[column] - - naxes = len(grouped) - fig, axes = _subplots(naxes=naxes, figsize=figsize, - sharex=sharex, sharey=sharey, ax=ax, - layout=layout) - - _axes = _flatten(axes) - - for i, (key, group) in enumerate(grouped): - ax = _axes[i] - if numeric_only and isinstance(group, DataFrame): - group = group._get_numeric_data() - plotf(group, ax, **kwargs) - ax.set_title(pprint_thing(key)) - - return fig, axes - - -def _grouped_plot_by_column(plotf, data, columns=None, by=None, - numeric_only=True, grid=False, - figsize=None, ax=None, layout=None, - return_type=None, **kwargs): - grouped = data.groupby(by) - if columns is None: - if not isinstance(by, (list, tuple)): - by = [by] - columns = data._get_numeric_data().columns.difference(by) - naxes = len(columns) - fig, axes = _subplots(naxes=naxes, sharex=True, sharey=True, - figsize=figsize, ax=ax, layout=layout) - - _axes = _flatten(axes) - - result = Series() - ax_values = [] - - for i, col in enumerate(columns): - ax = _axes[i] - gp_col = grouped[col] - keys, values = zip(*gp_col) - re_plotf = plotf(keys, values, ax, **kwargs) - ax.set_title(col) - ax.set_xlabel(pprint_thing(by)) - ax_values.append(re_plotf) - ax.grid(grid) - - result = Series(ax_values, index=columns) - - # Return axes in multiplot case, maybe revisit later # 985 - if return_type is None: - result = axes - - byline = by[0] if len(by) == 1 else by - fig.suptitle('Boxplot grouped by %s' % byline) - fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) - - return result - - -def table(ax, data, rowLabels=None, colLabels=None, - **kwargs): - """ - Helper function to convert DataFrame and Series to matplotlib.table - - Parameters - ---------- - `ax`: Matplotlib axes object - `data`: DataFrame or Series - data for table contents - `kwargs`: keywords, optional - keyword arguments which passed to matplotlib.table.table. - If `rowLabels` or `colLabels` is not specified, data index or column - name will be used. - - Returns - ------- - matplotlib table object - """ - from pandas import DataFrame - if isinstance(data, Series): - data = DataFrame(data, columns=[data.name]) - elif isinstance(data, DataFrame): - pass - else: - raise ValueError('Input data must be DataFrame or Series') - - if rowLabels is None: - rowLabels = data.index - - if colLabels is None: - colLabels = data.columns - - cellText = data.values - - import matplotlib.table - table = matplotlib.table.table(ax, cellText=cellText, - rowLabels=rowLabels, - colLabels=colLabels, **kwargs) - return table - - -def _get_layout(nplots, layout=None, layout_type='box'): - if layout is not None: - if not isinstance(layout, (tuple, list)) or len(layout) != 2: - raise ValueError('Layout must be a tuple of (rows, columns)') - - nrows, ncols = layout - - # Python 2 compat - ceil_ = lambda x: int(ceil(x)) - if nrows == -1 and ncols > 0: - layout = nrows, ncols = (ceil_(float(nplots) / ncols), ncols) - elif ncols == -1 and nrows > 0: - layout = nrows, ncols = (nrows, ceil_(float(nplots) / nrows)) - elif ncols <= 0 and nrows <= 0: - msg = "At least one dimension of layout must be positive" - raise ValueError(msg) - - if nrows * ncols < nplots: - raise ValueError('Layout of %sx%s must be larger than ' - 'required size %s' % (nrows, ncols, nplots)) - - return layout - - if layout_type == 'single': - return (1, 1) - elif layout_type == 'horizontal': - return (1, nplots) - elif layout_type == 'vertical': - return (nplots, 1) - - layouts = {1: (1, 1), 2: (1, 2), 3: (2, 2), 4: (2, 2)} - try: - return layouts[nplots] - except KeyError: - k = 1 - while k ** 2 < nplots: - k += 1 - - if (k - 1) * k >= nplots: - return k, (k - 1) - else: - return k, k - -# copied from matplotlib/pyplot.py and modified for pandas.plotting - - -def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True, - subplot_kw=None, ax=None, layout=None, layout_type='box', - **fig_kw): - """Create a figure with a set of subplots already made. - - This utility wrapper makes it convenient to create common layouts of - subplots, including the enclosing figure object, in a single call. - - Keyword arguments: - - naxes : int - Number of required axes. Exceeded axes are set invisible. Default is - nrows * ncols. - - sharex : bool - If True, the X axis will be shared amongst all subplots. - - sharey : bool - If True, the Y axis will be shared amongst all subplots. - - squeeze : bool - - If True, extra dimensions are squeezed out from the returned axis object: - - if only one subplot is constructed (nrows=ncols=1), the resulting - single Axis object is returned as a scalar. - - for Nx1 or 1xN subplots, the returned object is a 1-d numpy object - array of Axis objects are returned as numpy 1-d arrays. - - for NxM subplots with N>1 and M>1 are returned as a 2d array. - - If False, no squeezing at all is done: the returned axis object is always - a 2-d array containing Axis instances, even if it ends up being 1x1. - - subplot_kw : dict - Dict with keywords passed to the add_subplot() call used to create each - subplots. - - ax : Matplotlib axis object, optional - - layout : tuple - Number of rows and columns of the subplot grid. - If not specified, calculated from naxes and layout_type - - layout_type : {'box', 'horziontal', 'vertical'}, default 'box' - Specify how to layout the subplot grid. - - fig_kw : Other keyword arguments to be passed to the figure() call. - Note that all keywords not recognized above will be - automatically included here. - - Returns: - - fig, ax : tuple - - fig is the Matplotlib Figure object - - ax can be either a single axis object or an array of axis objects if - more than one subplot was created. The dimensions of the resulting array - can be controlled with the squeeze keyword, see above. - - **Examples:** - - x = np.linspace(0, 2*np.pi, 400) - y = np.sin(x**2) - - # Just a figure and one subplot - f, ax = plt.subplots() - ax.plot(x, y) - ax.set_title('Simple plot') - - # Two subplots, unpack the output array immediately - f, (ax1, ax2) = plt.subplots(1, 2, sharey=True) - ax1.plot(x, y) - ax1.set_title('Sharing Y axis') - ax2.scatter(x, y) - - # Four polar axes - plt.subplots(2, 2, subplot_kw=dict(polar=True)) - """ - import matplotlib.pyplot as plt - - if subplot_kw is None: - subplot_kw = {} - - if ax is None: - fig = plt.figure(**fig_kw) - else: - if is_list_like(ax): - ax = _flatten(ax) - if layout is not None: - warnings.warn("When passing multiple axes, layout keyword is " - "ignored", UserWarning) - if sharex or sharey: - warnings.warn("When passing multiple axes, sharex and sharey " - "are ignored. These settings must be specified " - "when creating axes", UserWarning, - stacklevel=4) - if len(ax) == naxes: - fig = ax[0].get_figure() - return fig, ax - else: - raise ValueError("The number of passed axes must be {0}, the " - "same as the output plot".format(naxes)) - - fig = ax.get_figure() - # if ax is passed and a number of subplots is 1, return ax as it is - if naxes == 1: - if squeeze: - return fig, ax - else: - return fig, _flatten(ax) - else: - warnings.warn("To output multiple subplots, the figure containing " - "the passed axes is being cleared", UserWarning, - stacklevel=4) - fig.clear() - - nrows, ncols = _get_layout(naxes, layout=layout, layout_type=layout_type) - nplots = nrows * ncols - - # Create empty object array to hold all axes. It's easiest to make it 1-d - # so we can just append subplots upon creation, and then - axarr = np.empty(nplots, dtype=object) - - # Create first subplot separately, so we can share it if requested - ax0 = fig.add_subplot(nrows, ncols, 1, **subplot_kw) - - if sharex: - subplot_kw['sharex'] = ax0 - if sharey: - subplot_kw['sharey'] = ax0 - axarr[0] = ax0 - - # Note off-by-one counting because add_subplot uses the MATLAB 1-based - # convention. - for i in range(1, nplots): - kwds = subplot_kw.copy() - # Set sharex and sharey to None for blank/dummy axes, these can - # interfere with proper axis limits on the visible axes if - # they share axes e.g. issue #7528 - if i >= naxes: - kwds['sharex'] = None - kwds['sharey'] = None - ax = fig.add_subplot(nrows, ncols, i + 1, **kwds) - axarr[i] = ax - - if naxes != nplots: - for ax in axarr[naxes:]: - ax.set_visible(False) - - _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey) - - if squeeze: - # Reshape the array to have the final desired dimension (nrow,ncol), - # though discarding unneeded dimensions that equal 1. If we only have - # one subplot, just return it instead of a 1-element array. - if nplots == 1: - axes = axarr[0] - else: - axes = axarr.reshape(nrows, ncols).squeeze() - else: - # returned axis array will be always 2-d, even if nrows=ncols=1 - axes = axarr.reshape(nrows, ncols) - - return fig, axes - - -def _remove_labels_from_axis(axis): - for t in axis.get_majorticklabels(): - t.set_visible(False) - - try: - # set_visible will not be effective if - # minor axis has NullLocator and NullFormattor (default) - import matplotlib.ticker as ticker - if isinstance(axis.get_minor_locator(), ticker.NullLocator): - axis.set_minor_locator(ticker.AutoLocator()) - if isinstance(axis.get_minor_formatter(), ticker.NullFormatter): - axis.set_minor_formatter(ticker.FormatStrFormatter('')) - for t in axis.get_minorticklabels(): - t.set_visible(False) - except Exception: # pragma no cover - raise - axis.get_label().set_visible(False) - - -def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey): - if nplots > 1: - - if nrows > 1: - try: - # first find out the ax layout, - # so that we can correctly handle 'gaps" - layout = np.zeros((nrows + 1, ncols + 1), dtype=np.bool) - for ax in axarr: - layout[ax.rowNum, ax.colNum] = ax.get_visible() - - for ax in axarr: - # only the last row of subplots should get x labels -> all - # other off layout handles the case that the subplot is - # the last in the column, because below is no subplot/gap. - if not layout[ax.rowNum + 1, ax.colNum]: - continue - if sharex or len(ax.get_shared_x_axes() - .get_siblings(ax)) > 1: - _remove_labels_from_axis(ax.xaxis) - - except IndexError: - # if gridspec is used, ax.rowNum and ax.colNum may different - # from layout shape. in this case, use last_row logic - for ax in axarr: - if ax.is_last_row(): - continue - if sharex or len(ax.get_shared_x_axes() - .get_siblings(ax)) > 1: - _remove_labels_from_axis(ax.xaxis) - - if ncols > 1: - for ax in axarr: - # only the first column should get y labels -> set all other to - # off as we only have labels in teh first column and we always - # have a subplot there, we can skip the layout test - if ax.is_first_col(): - continue - if sharey or len(ax.get_shared_y_axes().get_siblings(ax)) > 1: - _remove_labels_from_axis(ax.yaxis) - - -def _flatten(axes): - if not is_list_like(axes): - return np.array([axes]) - elif isinstance(axes, (np.ndarray, Index)): - return axes.ravel() - return np.array(axes) - - -def _get_all_lines(ax): - lines = ax.get_lines() - - if hasattr(ax, 'right_ax'): - lines += ax.right_ax.get_lines() - - if hasattr(ax, 'left_ax'): - lines += ax.left_ax.get_lines() - - return lines - - -def _get_xlim(lines): - left, right = np.inf, -np.inf - for l in lines: - x = l.get_xdata(orig=False) - left = min(x[0], left) - right = max(x[-1], right) - return left, right - - -def _set_ticks_props(axes, xlabelsize=None, xrot=None, - ylabelsize=None, yrot=None): - import matplotlib.pyplot as plt - - for ax in _flatten(axes): - if xlabelsize is not None: - plt.setp(ax.get_xticklabels(), fontsize=xlabelsize) - if xrot is not None: - plt.setp(ax.get_xticklabels(), rotation=xrot) - if ylabelsize is not None: - plt.setp(ax.get_yticklabels(), fontsize=ylabelsize) - if yrot is not None: - plt.setp(ax.get_yticklabels(), rotation=yrot) - return axes - - -class BasePlotMethods(PandasObject): - - def __init__(self, data): - self._data = data - - def __call__(self, *args, **kwargs): - raise NotImplementedError - - -class SeriesPlotMethods(BasePlotMethods): - """Series plotting accessor and method - - Examples - -------- - >>> s.plot.line() - >>> s.plot.bar() - >>> s.plot.hist() - - Plotting methods can also be accessed by calling the accessor as a method - with the ``kind`` argument: - ``s.plot(kind='line')`` is equivalent to ``s.plot.line()`` - """ - - def __call__(self, kind='line', ax=None, - figsize=None, use_index=True, title=None, grid=None, - legend=False, style=None, logx=False, logy=False, - loglog=False, xticks=None, yticks=None, - xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - label=None, secondary_y=False, **kwds): - return plot_series(self._data, kind=kind, ax=ax, figsize=figsize, - use_index=use_index, title=title, grid=grid, - legend=legend, style=style, logx=logx, logy=logy, - loglog=loglog, xticks=xticks, yticks=yticks, - xlim=xlim, ylim=ylim, rot=rot, fontsize=fontsize, - colormap=colormap, table=table, yerr=yerr, - xerr=xerr, label=label, secondary_y=secondary_y, - **kwds) - __call__.__doc__ = plot_series.__doc__ - - def line(self, **kwds): - """ - Line plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='line', **kwds) - - def bar(self, **kwds): - """ - Vertical bar plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='bar', **kwds) - - def barh(self, **kwds): - """ - Horizontal bar plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='barh', **kwds) - - def box(self, **kwds): - """ - Boxplot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='box', **kwds) - - def hist(self, bins=10, **kwds): - """ - Histogram - - .. versionadded:: 0.17.0 - - Parameters - ---------- - bins: integer, default 10 - Number of histogram bins to be used - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='hist', bins=bins, **kwds) - - def kde(self, **kwds): - """ - Kernel Density Estimate plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='kde', **kwds) - - density = kde - - def area(self, **kwds): - """ - Area plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='area', **kwds) - - def pie(self, **kwds): - """ - Pie chart - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.Series.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='pie', **kwds) - - -class FramePlotMethods(BasePlotMethods): - """DataFrame plotting accessor and method - - Examples - -------- - >>> df.plot.line() - >>> df.plot.scatter('x', 'y') - >>> df.plot.hexbin() - - These plotting methods can also be accessed by calling the accessor as a - method with the ``kind`` argument: - ``df.plot(kind='line')`` is equivalent to ``df.plot.line()`` - """ - - def __call__(self, x=None, y=None, kind='line', ax=None, - subplots=False, sharex=None, sharey=False, layout=None, - figsize=None, use_index=True, title=None, grid=None, - legend=True, style=None, logx=False, logy=False, loglog=False, - xticks=None, yticks=None, xlim=None, ylim=None, - rot=None, fontsize=None, colormap=None, table=False, - yerr=None, xerr=None, - secondary_y=False, sort_columns=False, **kwds): - return plot_frame(self._data, kind=kind, x=x, y=y, ax=ax, - subplots=subplots, sharex=sharex, sharey=sharey, - layout=layout, figsize=figsize, use_index=use_index, - title=title, grid=grid, legend=legend, style=style, - logx=logx, logy=logy, loglog=loglog, xticks=xticks, - yticks=yticks, xlim=xlim, ylim=ylim, rot=rot, - fontsize=fontsize, colormap=colormap, table=table, - yerr=yerr, xerr=xerr, secondary_y=secondary_y, - sort_columns=sort_columns, **kwds) - __call__.__doc__ = plot_frame.__doc__ - - def line(self, x=None, y=None, **kwds): - """ - Line plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='line', x=x, y=y, **kwds) - - def bar(self, x=None, y=None, **kwds): - """ - Vertical bar plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='bar', x=x, y=y, **kwds) - - def barh(self, x=None, y=None, **kwds): - """ - Horizontal bar plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='barh', x=x, y=y, **kwds) - - def box(self, by=None, **kwds): - """ - Boxplot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - by : string or sequence - Column in the DataFrame to group by. - \*\*kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='box', by=by, **kwds) - - def hist(self, by=None, bins=10, **kwds): - """ - Histogram - - .. versionadded:: 0.17.0 - - Parameters - ---------- - by : string or sequence - Column in the DataFrame to group by. - bins: integer, default 10 - Number of histogram bins to be used - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='hist', by=by, bins=bins, **kwds) - - def kde(self, **kwds): - """ - Kernel Density Estimate plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='kde', **kwds) - - density = kde - - def area(self, x=None, y=None, **kwds): - """ - Area plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='area', x=x, y=y, **kwds) - - def pie(self, y=None, **kwds): - """ - Pie chart - - .. versionadded:: 0.17.0 - - Parameters - ---------- - y : label or position, optional - Column to plot. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='pie', y=y, **kwds) - - def scatter(self, x, y, s=None, c=None, **kwds): - """ - Scatter plot - - .. versionadded:: 0.17.0 - - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - s : scalar or array_like, optional - Size of each point. - c : label or position, optional - Color of each point. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - return self(kind='scatter', x=x, y=y, c=c, s=s, **kwds) +import pandas.plotting as _plotting - def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, - **kwds): - """ - Hexbin plot +# back-compat of public API +# deprecate these functions +m = sys.modules['pandas.tools.plotting'] +for t in [t for t in dir(_plotting) if not t.startswith('_')]: - .. versionadded:: 0.17.0 + def outer(t=t): - Parameters - ---------- - x, y : label or position, optional - Coordinates for each point. - C : label or position, optional - The value at each `(x, y)` point. - reduce_C_function : callable, optional - Function of one argument that reduces all the values in a bin to - a single number (e.g. `mean`, `max`, `sum`, `std`). - gridsize : int, optional - Number of bins. - **kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + def wrapper(*args, **kwargs): + warnings.warn("'pandas.tools.plotting.{t}' is deprecated, " + "import 'pandas.plotting.{t}' instead.".format(t=t), + FutureWarning, stacklevel=2) + return getattr(_plotting, t)(*args, **kwargs) + return wrapper - Returns - ------- - axes : matplotlib.AxesSubplot or np.array of them - """ - if reduce_C_function is not None: - kwds['reduce_C_function'] = reduce_C_function - if gridsize is not None: - kwds['gridsize'] = gridsize - return self(kind='hexbin', x=x, y=y, C=C, **kwds) + setattr(m, t, outer(t)) diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index bc768a8bc5b58..df603c4d880d8 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -1,1032 +1,11 @@ -from datetime import datetime, timedelta -import datetime as pydt -import numpy as np - -from dateutil.relativedelta import relativedelta - -import matplotlib.units as units -import matplotlib.dates as dates - -from matplotlib.ticker import Formatter, AutoLocator, Locator -from matplotlib.transforms import nonsingular - - -from pandas.types.common import (is_float, is_integer, - is_integer_dtype, - is_float_dtype, - is_datetime64_ns_dtype, - is_period_arraylike, - ) - -from pandas.compat import lrange -import pandas.compat as compat -import pandas._libs.lib as lib -import pandas.core.common as com -from pandas.core.index import Index - -from pandas.core.series import Series -from pandas.tseries.index import date_range -import pandas.tseries.tools as tools -import pandas.tseries.frequencies as frequencies -from pandas.tseries.frequencies import FreqGroup -from pandas.tseries.period import Period, PeriodIndex - -# constants -HOURS_PER_DAY = 24. -MIN_PER_HOUR = 60. -SEC_PER_MIN = 60. - -SEC_PER_HOUR = SEC_PER_MIN * MIN_PER_HOUR -SEC_PER_DAY = SEC_PER_HOUR * HOURS_PER_DAY - -MUSEC_PER_DAY = 1e6 * SEC_PER_DAY - - -def _mpl_le_2_0_0(): - try: - import matplotlib - return matplotlib.compare_versions('2.0.0', matplotlib.__version__) - except ImportError: - return False - - -def register(): - units.registry[lib.Timestamp] = DatetimeConverter() - units.registry[Period] = PeriodConverter() - units.registry[pydt.datetime] = DatetimeConverter() - units.registry[pydt.date] = DatetimeConverter() - units.registry[pydt.time] = TimeConverter() - units.registry[np.datetime64] = DatetimeConverter() - - -def _to_ordinalf(tm): - tot_sec = (tm.hour * 3600 + tm.minute * 60 + tm.second + - float(tm.microsecond / 1e6)) - return tot_sec - - -def time2num(d): - if isinstance(d, compat.string_types): - parsed = tools.to_datetime(d) - if not isinstance(parsed, datetime): - raise ValueError('Could not parse time %s' % d) - return _to_ordinalf(parsed.time()) - if isinstance(d, pydt.time): - return _to_ordinalf(d) - return d - - -class TimeConverter(units.ConversionInterface): - - @staticmethod - def convert(value, unit, axis): - valid_types = (str, pydt.time) - if (isinstance(value, valid_types) or is_integer(value) or - is_float(value)): - return time2num(value) - if isinstance(value, Index): - return value.map(time2num) - if isinstance(value, (list, tuple, np.ndarray, Index)): - return [time2num(x) for x in value] - return value - - @staticmethod - def axisinfo(unit, axis): - if unit != 'time': - return None - - majloc = AutoLocator() - majfmt = TimeFormatter(majloc) - return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='time') - - @staticmethod - def default_units(x, axis): - return 'time' - - -# time formatter -class TimeFormatter(Formatter): - - def __init__(self, locs): - self.locs = locs - - def __call__(self, x, pos=0): - fmt = '%H:%M:%S' - s = int(x) - ms = int((x - s) * 1e3) - us = int((x - s) * 1e6 - ms) - m, s = divmod(s, 60) - h, m = divmod(m, 60) - _, h = divmod(h, 24) - if us != 0: - fmt += '.%6f' - elif ms != 0: - fmt += '.%3f' - - return pydt.time(h, m, s, us).strftime(fmt) - - -# Period Conversion - - -class PeriodConverter(dates.DateConverter): - - @staticmethod - def convert(values, units, axis): - if not hasattr(axis, 'freq'): - raise TypeError('Axis must have `freq` set to convert to Periods') - valid_types = (compat.string_types, datetime, - Period, pydt.date, pydt.time) - if (isinstance(values, valid_types) or is_integer(values) or - is_float(values)): - return get_datevalue(values, axis.freq) - if isinstance(values, PeriodIndex): - return values.asfreq(axis.freq)._values - if isinstance(values, Index): - return values.map(lambda x: get_datevalue(x, axis.freq)) - if is_period_arraylike(values): - return PeriodIndex(values, freq=axis.freq)._values - if isinstance(values, (list, tuple, np.ndarray, Index)): - return [get_datevalue(x, axis.freq) for x in values] - return values - - -def get_datevalue(date, freq): - if isinstance(date, Period): - return date.asfreq(freq).ordinal - elif isinstance(date, (compat.string_types, datetime, - pydt.date, pydt.time)): - return Period(date, freq).ordinal - elif (is_integer(date) or is_float(date) or - (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): - return date - elif date is None: - return None - raise ValueError("Unrecognizable date '%s'" % date) - - -def _dt_to_float_ordinal(dt): - """ - Convert :mod:`datetime` to the Gregorian date as UTC float days, - preserving hours, minutes, seconds and microseconds. Return value - is a :func:`float`. - """ - if (isinstance(dt, (np.ndarray, Index, Series) - ) and is_datetime64_ns_dtype(dt)): - base = dates.epoch2num(dt.asi8 / 1.0E9) - else: - base = dates.date2num(dt) - return base - - -# Datetime Conversion -class DatetimeConverter(dates.DateConverter): - - @staticmethod - def convert(values, unit, axis): - def try_parse(values): - try: - return _dt_to_float_ordinal(tools.to_datetime(values)) - except Exception: - return values - - if isinstance(values, (datetime, pydt.date)): - return _dt_to_float_ordinal(values) - elif isinstance(values, np.datetime64): - return _dt_to_float_ordinal(lib.Timestamp(values)) - elif isinstance(values, pydt.time): - return dates.date2num(values) - elif (is_integer(values) or is_float(values)): - return values - elif isinstance(values, compat.string_types): - return try_parse(values) - elif isinstance(values, (list, tuple, np.ndarray, Index)): - if isinstance(values, Index): - values = values.values - if not isinstance(values, np.ndarray): - values = com._asarray_tuplesafe(values) - - if is_integer_dtype(values) or is_float_dtype(values): - return values - - try: - values = tools.to_datetime(values) - if isinstance(values, Index): - values = _dt_to_float_ordinal(values) - else: - values = [_dt_to_float_ordinal(x) for x in values] - except Exception: - values = _dt_to_float_ordinal(values) - - return values - - @staticmethod - def axisinfo(unit, axis): - """ - Return the :class:`~matplotlib.units.AxisInfo` for *unit*. - - *unit* is a tzinfo instance or None. - The *axis* argument is required but not used. - """ - tz = unit - - majloc = PandasAutoDateLocator(tz=tz) - majfmt = PandasAutoDateFormatter(majloc, tz=tz) - datemin = pydt.date(2000, 1, 1) - datemax = pydt.date(2010, 1, 1) - - return units.AxisInfo(majloc=majloc, majfmt=majfmt, label='', - default_limits=(datemin, datemax)) - - -class PandasAutoDateFormatter(dates.AutoDateFormatter): - - def __init__(self, locator, tz=None, defaultfmt='%Y-%m-%d'): - dates.AutoDateFormatter.__init__(self, locator, tz, defaultfmt) - # matplotlib.dates._UTC has no _utcoffset called by pandas - if self._tz is dates.UTC: - self._tz._utcoffset = self._tz.utcoffset(None) - - # For mpl > 2.0 the format strings are controlled via rcparams - # so do not mess with them. For mpl < 2.0 change the second - # break point and add a musec break point - if _mpl_le_2_0_0(): - self.scaled[1. / SEC_PER_DAY] = '%H:%M:%S' - self.scaled[1. / MUSEC_PER_DAY] = '%H:%M:%S.%f' - - -class PandasAutoDateLocator(dates.AutoDateLocator): - - def get_locator(self, dmin, dmax): - 'Pick the best locator based on a distance.' - delta = relativedelta(dmax, dmin) - - num_days = (delta.years * 12.0 + delta.months) * 31.0 + delta.days - num_sec = (delta.hours * 60.0 + delta.minutes) * 60.0 + delta.seconds - tot_sec = num_days * 86400. + num_sec - - if abs(tot_sec) < self.minticks: - self._freq = -1 - locator = MilliSecondLocator(self.tz) - locator.set_axis(self.axis) - - locator.set_view_interval(*self.axis.get_view_interval()) - locator.set_data_interval(*self.axis.get_data_interval()) - return locator - - return dates.AutoDateLocator.get_locator(self, dmin, dmax) - - def _get_unit(self): - return MilliSecondLocator.get_unit_generic(self._freq) - - -class MilliSecondLocator(dates.DateLocator): - - UNIT = 1. / (24 * 3600 * 1000) - - def __init__(self, tz): - dates.DateLocator.__init__(self, tz) - self._interval = 1. - - def _get_unit(self): - return self.get_unit_generic(-1) - - @staticmethod - def get_unit_generic(freq): - unit = dates.RRuleLocator.get_unit_generic(freq) - if unit < 0: - return MilliSecondLocator.UNIT - return unit - - def __call__(self): - # if no data have been set, this will tank with a ValueError - try: - dmin, dmax = self.viewlim_to_dt() - except ValueError: - return [] - - if dmin > dmax: - dmax, dmin = dmin, dmax - # We need to cap at the endpoints of valid datetime - - # TODO(wesm) unused? - # delta = relativedelta(dmax, dmin) - # try: - # start = dmin - delta - # except ValueError: - # start = _from_ordinal(1.0) - - # try: - # stop = dmax + delta - # except ValueError: - # # The magic number! - # stop = _from_ordinal(3652059.9999999) - - nmax, nmin = dates.date2num((dmax, dmin)) - - num = (nmax - nmin) * 86400 * 1000 - max_millis_ticks = 6 - for interval in [1, 10, 50, 100, 200, 500]: - if num <= interval * (max_millis_ticks - 1): - self._interval = interval - break - else: - # We went through the whole loop without breaking, default to 1 - self._interval = 1000. - - estimate = (nmax - nmin) / (self._get_unit() * self._get_interval()) - - if estimate > self.MAXTICKS * 2: - raise RuntimeError(('MillisecondLocator estimated to generate %d ' - 'ticks from %s to %s: exceeds Locator.MAXTICKS' - '* 2 (%d) ') % - (estimate, dmin, dmax, self.MAXTICKS * 2)) - - freq = '%dL' % self._get_interval() - tz = self.tz.tzname(None) - st = _from_ordinal(dates.date2num(dmin)) # strip tz - ed = _from_ordinal(dates.date2num(dmax)) - all_dates = date_range(start=st, end=ed, freq=freq, tz=tz).asobject - - try: - if len(all_dates) > 0: - locs = self.raise_if_exceeds(dates.date2num(all_dates)) - return locs - except Exception: # pragma: no cover - pass - - lims = dates.date2num([dmin, dmax]) - return lims - - def _get_interval(self): - return self._interval - - def autoscale(self): - """ - Set the view limits to include the data range. - """ - dmin, dmax = self.datalim_to_dt() - if dmin > dmax: - dmax, dmin = dmin, dmax - - # We need to cap at the endpoints of valid datetime - - # TODO(wesm): unused? - - # delta = relativedelta(dmax, dmin) - # try: - # start = dmin - delta - # except ValueError: - # start = _from_ordinal(1.0) - - # try: - # stop = dmax + delta - # except ValueError: - # # The magic number! - # stop = _from_ordinal(3652059.9999999) - - dmin, dmax = self.datalim_to_dt() - - vmin = dates.date2num(dmin) - vmax = dates.date2num(dmax) - - return self.nonsingular(vmin, vmax) - - -def _from_ordinal(x, tz=None): - ix = int(x) - dt = datetime.fromordinal(ix) - remainder = float(x) - ix - hour, remainder = divmod(24 * remainder, 1) - minute, remainder = divmod(60 * remainder, 1) - second, remainder = divmod(60 * remainder, 1) - microsecond = int(1e6 * remainder) - if microsecond < 10: - microsecond = 0 # compensate for rounding errors - dt = datetime(dt.year, dt.month, dt.day, int(hour), int(minute), - int(second), microsecond) - if tz is not None: - dt = dt.astimezone(tz) - - if microsecond > 999990: # compensate for rounding errors - dt += timedelta(microseconds=1e6 - microsecond) - - return dt - -# Fixed frequency dynamic tick locators and formatters - -# ------------------------------------------------------------------------- -# --- Locators --- -# ------------------------------------------------------------------------- - - -def _get_default_annual_spacing(nyears): - """ - Returns a default spacing between consecutive ticks for annual data. - """ - if nyears < 11: - (min_spacing, maj_spacing) = (1, 1) - elif nyears < 20: - (min_spacing, maj_spacing) = (1, 2) - elif nyears < 50: - (min_spacing, maj_spacing) = (1, 5) - elif nyears < 100: - (min_spacing, maj_spacing) = (5, 10) - elif nyears < 200: - (min_spacing, maj_spacing) = (5, 25) - elif nyears < 600: - (min_spacing, maj_spacing) = (10, 50) - else: - factor = nyears // 1000 + 1 - (min_spacing, maj_spacing) = (factor * 20, factor * 100) - return (min_spacing, maj_spacing) - - -def period_break(dates, period): - """ - Returns the indices where the given period changes. - - Parameters - ---------- - dates : PeriodIndex - Array of intervals to monitor. - period : string - Name of the period to monitor. - """ - current = getattr(dates, period) - previous = getattr(dates - 1, period) - return np.nonzero(current - previous)[0] - - -def has_level_label(label_flags, vmin): - """ - Returns true if the ``label_flags`` indicate there is at least one label - for this level. - - if the minimum view limit is not an exact integer, then the first tick - label won't be shown, so we must adjust for that. - """ - if label_flags.size == 0 or (label_flags.size == 1 and - label_flags[0] == 0 and - vmin % 1 > 0.0): - return False - else: - return True - - -def _daily_finder(vmin, vmax, freq): - periodsperday = -1 - - if freq >= FreqGroup.FR_HR: - if freq == FreqGroup.FR_NS: - periodsperday = 24 * 60 * 60 * 1000000000 - elif freq == FreqGroup.FR_US: - periodsperday = 24 * 60 * 60 * 1000000 - elif freq == FreqGroup.FR_MS: - periodsperday = 24 * 60 * 60 * 1000 - elif freq == FreqGroup.FR_SEC: - periodsperday = 24 * 60 * 60 - elif freq == FreqGroup.FR_MIN: - periodsperday = 24 * 60 - elif freq == FreqGroup.FR_HR: - periodsperday = 24 - else: # pragma: no cover - raise ValueError("unexpected frequency: %s" % freq) - periodsperyear = 365 * periodsperday - periodspermonth = 28 * periodsperday - - elif freq == FreqGroup.FR_BUS: - periodsperyear = 261 - periodspermonth = 19 - elif freq == FreqGroup.FR_DAY: - periodsperyear = 365 - periodspermonth = 28 - elif frequencies.get_freq_group(freq) == FreqGroup.FR_WK: - periodsperyear = 52 - periodspermonth = 3 - else: # pragma: no cover - raise ValueError("unexpected frequency") - - # save this for later usage - vmin_orig = vmin - - (vmin, vmax) = (Period(ordinal=int(vmin), freq=freq), - Period(ordinal=int(vmax), freq=freq)) - span = vmax.ordinal - vmin.ordinal + 1 - dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq) - # Initialize the output - info = np.zeros(span, - dtype=[('val', np.int64), ('maj', bool), - ('min', bool), ('fmt', '|S20')]) - info['val'][:] = dates_._values - info['fmt'][:] = '' - info['maj'][[0, -1]] = True - # .. and set some shortcuts - info_maj = info['maj'] - info_min = info['min'] - info_fmt = info['fmt'] - - def first_label(label_flags): - if (label_flags[0] == 0) and (label_flags.size > 1) and \ - ((vmin_orig % 1) > 0.0): - return label_flags[1] - else: - return label_flags[0] - - # Case 1. Less than a month - if span <= periodspermonth: - day_start = period_break(dates_, 'day') - month_start = period_break(dates_, 'month') - - def _hour_finder(label_interval, force_year_start): - _hour = dates_.hour - _prev_hour = (dates_ - 1).hour - hour_start = (_hour - _prev_hour) != 0 - info_maj[day_start] = True - info_min[hour_start & (_hour % label_interval == 0)] = True - year_start = period_break(dates_, 'year') - info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M' - info_fmt[day_start] = '%H:%M\n%d-%b' - info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' - if force_year_start and not has_level_label(year_start, vmin_orig): - info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y' - - def _minute_finder(label_interval): - hour_start = period_break(dates_, 'hour') - _minute = dates_.minute - _prev_minute = (dates_ - 1).minute - minute_start = (_minute - _prev_minute) != 0 - info_maj[hour_start] = True - info_min[minute_start & (_minute % label_interval == 0)] = True - year_start = period_break(dates_, 'year') - info_fmt = info['fmt'] - info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M' - info_fmt[day_start] = '%H:%M\n%d-%b' - info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' - - def _second_finder(label_interval): - minute_start = period_break(dates_, 'minute') - _second = dates_.second - _prev_second = (dates_ - 1).second - second_start = (_second - _prev_second) != 0 - info['maj'][minute_start] = True - info['min'][second_start & (_second % label_interval == 0)] = True - year_start = period_break(dates_, 'year') - info_fmt = info['fmt'] - info_fmt[second_start & (_second % - label_interval == 0)] = '%H:%M:%S' - info_fmt[day_start] = '%H:%M:%S\n%d-%b' - info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y' - - if span < periodsperday / 12000.0: - _second_finder(1) - elif span < periodsperday / 6000.0: - _second_finder(2) - elif span < periodsperday / 2400.0: - _second_finder(5) - elif span < periodsperday / 1200.0: - _second_finder(10) - elif span < periodsperday / 800.0: - _second_finder(15) - elif span < periodsperday / 400.0: - _second_finder(30) - elif span < periodsperday / 150.0: - _minute_finder(1) - elif span < periodsperday / 70.0: - _minute_finder(2) - elif span < periodsperday / 24.0: - _minute_finder(5) - elif span < periodsperday / 12.0: - _minute_finder(15) - elif span < periodsperday / 6.0: - _minute_finder(30) - elif span < periodsperday / 2.5: - _hour_finder(1, False) - elif span < periodsperday / 1.5: - _hour_finder(2, False) - elif span < periodsperday * 1.25: - _hour_finder(3, False) - elif span < periodsperday * 2.5: - _hour_finder(6, True) - elif span < periodsperday * 4: - _hour_finder(12, True) - else: - info_maj[month_start] = True - info_min[day_start] = True - year_start = period_break(dates_, 'year') - info_fmt = info['fmt'] - info_fmt[day_start] = '%d' - info_fmt[month_start] = '%d\n%b' - info_fmt[year_start] = '%d\n%b\n%Y' - if not has_level_label(year_start, vmin_orig): - if not has_level_label(month_start, vmin_orig): - info_fmt[first_label(day_start)] = '%d\n%b\n%Y' - else: - info_fmt[first_label(month_start)] = '%d\n%b\n%Y' - - # Case 2. Less than three months - elif span <= periodsperyear // 4: - month_start = period_break(dates_, 'month') - info_maj[month_start] = True - if freq < FreqGroup.FR_HR: - info['min'] = True - else: - day_start = period_break(dates_, 'day') - info['min'][day_start] = True - week_start = period_break(dates_, 'week') - year_start = period_break(dates_, 'year') - info_fmt[week_start] = '%d' - info_fmt[month_start] = '\n\n%b' - info_fmt[year_start] = '\n\n%b\n%Y' - if not has_level_label(year_start, vmin_orig): - if not has_level_label(month_start, vmin_orig): - info_fmt[first_label(week_start)] = '\n\n%b\n%Y' - else: - info_fmt[first_label(month_start)] = '\n\n%b\n%Y' - # Case 3. Less than 14 months ............... - elif span <= 1.15 * periodsperyear: - year_start = period_break(dates_, 'year') - month_start = period_break(dates_, 'month') - week_start = period_break(dates_, 'week') - info_maj[month_start] = True - info_min[week_start] = True - info_min[year_start] = False - info_min[month_start] = False - info_fmt[month_start] = '%b' - info_fmt[year_start] = '%b\n%Y' - if not has_level_label(year_start, vmin_orig): - info_fmt[first_label(month_start)] = '%b\n%Y' - # Case 4. Less than 2.5 years ............... - elif span <= 2.5 * periodsperyear: - year_start = period_break(dates_, 'year') - quarter_start = period_break(dates_, 'quarter') - month_start = period_break(dates_, 'month') - info_maj[quarter_start] = True - info_min[month_start] = True - info_fmt[quarter_start] = '%b' - info_fmt[year_start] = '%b\n%Y' - # Case 4. Less than 4 years ................. - elif span <= 4 * periodsperyear: - year_start = period_break(dates_, 'year') - month_start = period_break(dates_, 'month') - info_maj[year_start] = True - info_min[month_start] = True - info_min[year_start] = False - - month_break = dates_[month_start].month - jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] - info_fmt[jan_or_jul] = '%b' - info_fmt[year_start] = '%b\n%Y' - # Case 5. Less than 11 years ................ - elif span <= 11 * periodsperyear: - year_start = period_break(dates_, 'year') - quarter_start = period_break(dates_, 'quarter') - info_maj[year_start] = True - info_min[quarter_start] = True - info_min[year_start] = False - info_fmt[year_start] = '%Y' - # Case 6. More than 12 years ................ - else: - year_start = period_break(dates_, 'year') - year_break = dates_[year_start].year - nyears = span / periodsperyear - (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) - major_idx = year_start[(year_break % maj_anndef == 0)] - info_maj[major_idx] = True - minor_idx = year_start[(year_break % min_anndef == 0)] - info_min[minor_idx] = True - info_fmt[major_idx] = '%Y' - - return info - - -def _monthly_finder(vmin, vmax, freq): - periodsperyear = 12 - - vmin_orig = vmin - (vmin, vmax) = (int(vmin), int(vmax)) - span = vmax - vmin + 1 - - # Initialize the output - info = np.zeros(span, - dtype=[('val', int), ('maj', bool), ('min', bool), - ('fmt', '|S8')]) - info['val'] = np.arange(vmin, vmax + 1) - dates_ = info['val'] - info['fmt'] = '' - year_start = (dates_ % 12 == 0).nonzero()[0] - info_maj = info['maj'] - info_fmt = info['fmt'] - - if span <= 1.15 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - - info_fmt[:] = '%b' - info_fmt[year_start] = '%b\n%Y' - - if not has_level_label(year_start, vmin_orig): - if dates_.size > 1: - idx = 1 - else: - idx = 0 - info_fmt[idx] = '%b\n%Y' - - elif span <= 2.5 * periodsperyear: - quarter_start = (dates_ % 3 == 0).nonzero() - info_maj[year_start] = True - # TODO: Check the following : is it really info['fmt'] ? - info['fmt'][quarter_start] = True - info['min'] = True - - info_fmt[quarter_start] = '%b' - info_fmt[year_start] = '%b\n%Y' - - elif span <= 4 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - - jan_or_jul = (dates_ % 12 == 0) | (dates_ % 12 == 6) - info_fmt[jan_or_jul] = '%b' - info_fmt[year_start] = '%b\n%Y' - - elif span <= 11 * periodsperyear: - quarter_start = (dates_ % 3 == 0).nonzero() - info_maj[year_start] = True - info['min'][quarter_start] = True - - info_fmt[year_start] = '%Y' - - else: - nyears = span / periodsperyear - (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) - years = dates_[year_start] // 12 + 1 - major_idx = year_start[(years % maj_anndef == 0)] - info_maj[major_idx] = True - info['min'][year_start[(years % min_anndef == 0)]] = True - - info_fmt[major_idx] = '%Y' - - return info - - -def _quarterly_finder(vmin, vmax, freq): - periodsperyear = 4 - vmin_orig = vmin - (vmin, vmax) = (int(vmin), int(vmax)) - span = vmax - vmin + 1 - - info = np.zeros(span, - dtype=[('val', int), ('maj', bool), ('min', bool), - ('fmt', '|S8')]) - info['val'] = np.arange(vmin, vmax + 1) - info['fmt'] = '' - dates_ = info['val'] - info_maj = info['maj'] - info_fmt = info['fmt'] - year_start = (dates_ % 4 == 0).nonzero()[0] - - if span <= 3.5 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - - info_fmt[:] = 'Q%q' - info_fmt[year_start] = 'Q%q\n%F' - if not has_level_label(year_start, vmin_orig): - if dates_.size > 1: - idx = 1 - else: - idx = 0 - info_fmt[idx] = 'Q%q\n%F' - - elif span <= 11 * periodsperyear: - info_maj[year_start] = True - info['min'] = True - info_fmt[year_start] = '%F' - - else: - years = dates_[year_start] // 4 + 1 - nyears = span / periodsperyear - (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) - major_idx = year_start[(years % maj_anndef == 0)] - info_maj[major_idx] = True - info['min'][year_start[(years % min_anndef == 0)]] = True - info_fmt[major_idx] = '%F' - - return info - - -def _annual_finder(vmin, vmax, freq): - (vmin, vmax) = (int(vmin), int(vmax + 1)) - span = vmax - vmin + 1 - - info = np.zeros(span, - dtype=[('val', int), ('maj', bool), ('min', bool), - ('fmt', '|S8')]) - info['val'] = np.arange(vmin, vmax + 1) - info['fmt'] = '' - dates_ = info['val'] - - (min_anndef, maj_anndef) = _get_default_annual_spacing(span) - major_idx = dates_ % maj_anndef == 0 - info['maj'][major_idx] = True - info['min'][(dates_ % min_anndef == 0)] = True - info['fmt'][major_idx] = '%Y' - - return info - - -def get_finder(freq): - if isinstance(freq, compat.string_types): - freq = frequencies.get_freq(freq) - fgroup = frequencies.get_freq_group(freq) - - if fgroup == FreqGroup.FR_ANN: - return _annual_finder - elif fgroup == FreqGroup.FR_QTR: - return _quarterly_finder - elif freq == FreqGroup.FR_MTH: - return _monthly_finder - elif ((freq >= FreqGroup.FR_BUS) or fgroup == FreqGroup.FR_WK): - return _daily_finder - else: # pragma: no cover - errmsg = "Unsupported frequency: %s" % (freq) - raise NotImplementedError(errmsg) - - -class TimeSeries_DateLocator(Locator): - """ - Locates the ticks along an axis controlled by a :class:`Series`. - - Parameters - ---------- - freq : {var} - Valid frequency specifier. - minor_locator : {False, True}, optional - Whether the locator is for minor ticks (True) or not. - dynamic_mode : {True, False}, optional - Whether the locator should work in dynamic mode. - base : {int}, optional - quarter : {int}, optional - month : {int}, optional - day : {int}, optional - """ - - def __init__(self, freq, minor_locator=False, dynamic_mode=True, - base=1, quarter=1, month=1, day=1, plot_obj=None): - if isinstance(freq, compat.string_types): - freq = frequencies.get_freq(freq) - self.freq = freq - self.base = base - (self.quarter, self.month, self.day) = (quarter, month, day) - self.isminor = minor_locator - self.isdynamic = dynamic_mode - self.offset = 0 - self.plot_obj = plot_obj - self.finder = get_finder(freq) - - def _get_default_locs(self, vmin, vmax): - "Returns the default locations of ticks." - - if self.plot_obj.date_axis_info is None: - self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) - - locator = self.plot_obj.date_axis_info - - if self.isminor: - return np.compress(locator['min'], locator['val']) - return np.compress(locator['maj'], locator['val']) - - def __call__(self): - 'Return the locations of the ticks.' - # axis calls Locator.set_axis inside set_m_formatter - vi = tuple(self.axis.get_view_interval()) - if vi != self.plot_obj.view_interval: - self.plot_obj.date_axis_info = None - self.plot_obj.view_interval = vi - vmin, vmax = vi - if vmax < vmin: - vmin, vmax = vmax, vmin - if self.isdynamic: - locs = self._get_default_locs(vmin, vmax) - else: # pragma: no cover - base = self.base - (d, m) = divmod(vmin, base) - vmin = (d + 1) * base - locs = lrange(vmin, vmax + 1, base) - return locs - - def autoscale(self): - """ - Sets the view limits to the nearest multiples of base that contain the - data. - """ - # requires matplotlib >= 0.98.0 - (vmin, vmax) = self.axis.get_data_interval() - - locs = self._get_default_locs(vmin, vmax) - (vmin, vmax) = locs[[0, -1]] - if vmin == vmax: - vmin -= 1 - vmax += 1 - return nonsingular(vmin, vmax) - -# ------------------------------------------------------------------------- -# --- Formatter --- -# ------------------------------------------------------------------------- - - -class TimeSeries_DateFormatter(Formatter): - """ - Formats the ticks along an axis controlled by a :class:`PeriodIndex`. - - Parameters - ---------- - freq : {int, string} - Valid frequency specifier. - minor_locator : {False, True} - Whether the current formatter should apply to minor ticks (True) or - major ticks (False). - dynamic_mode : {True, False} - Whether the formatter works in dynamic mode or not. - """ - - def __init__(self, freq, minor_locator=False, dynamic_mode=True, - plot_obj=None): - if isinstance(freq, compat.string_types): - freq = frequencies.get_freq(freq) - self.format = None - self.freq = freq - self.locs = [] - self.formatdict = None - self.isminor = minor_locator - self.isdynamic = dynamic_mode - self.offset = 0 - self.plot_obj = plot_obj - self.finder = get_finder(freq) - - def _set_default_format(self, vmin, vmax): - "Returns the default ticks spacing." - - if self.plot_obj.date_axis_info is None: - self.plot_obj.date_axis_info = self.finder(vmin, vmax, self.freq) - info = self.plot_obj.date_axis_info - - if self.isminor: - format = np.compress(info['min'] & np.logical_not(info['maj']), - info) - else: - format = np.compress(info['maj'], info) - self.formatdict = dict([(x, f) for (x, _, _, f) in format]) - return self.formatdict - - def set_locs(self, locs): - 'Sets the locations of the ticks' - # don't actually use the locs. This is just needed to work with - # matplotlib. Force to use vmin, vmax - self.locs = locs - - (vmin, vmax) = vi = tuple(self.axis.get_view_interval()) - if vi != self.plot_obj.view_interval: - self.plot_obj.date_axis_info = None - self.plot_obj.view_interval = vi - if vmax < vmin: - (vmin, vmax) = (vmax, vmin) - self._set_default_format(vmin, vmax) - - def __call__(self, x, pos=0): - if self.formatdict is None: - return '' - else: - fmt = self.formatdict.pop(x, '') - return Period(ordinal=int(x), freq=self.freq).strftime(fmt) - - -class TimeSeries_TimedeltaFormatter(Formatter): - """ - Formats the ticks along an axis controlled by a :class:`TimedeltaIndex`. - """ - - @staticmethod - def format_timedelta_ticks(x, pos, n_decimals): - """ - Convert seconds to 'D days HH:MM:SS.F' - """ - s, ns = divmod(x, 1e9) - m, s = divmod(s, 60) - h, m = divmod(m, 60) - d, h = divmod(h, 24) - decimals = int(ns * 10**(n_decimals - 9)) - s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) - if n_decimals > 0: - s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) - if d != 0: - s = '{:d} days '.format(int(d)) + s - return s - - def __call__(self, x, pos=0): - (vmin, vmax) = tuple(self.axis.get_view_interval()) - n_decimals = int(np.ceil(np.log10(100 * 1e9 / (vmax - vmin)))) - if n_decimals > 9: - n_decimals = 9 - return self.format_timedelta_ticks(x, pos, n_decimals) +# flake8: noqa + +from pandas.plotting._converter import (register, time2num, + TimeConverter, TimeFormatter, + PeriodConverter, get_datevalue, + DatetimeConverter, + PandasAutoDateFormatter, + PandasAutoDateLocator, + MilliSecondLocator, get_finder, + TimeSeries_DateLocator, + TimeSeries_DateFormatter) diff --git a/pandas/tseries/plotting.py b/pandas/tseries/plotting.py index 4eddf54701889..302016907635d 100644 --- a/pandas/tseries/plotting.py +++ b/pandas/tseries/plotting.py @@ -1,344 +1,3 @@ -""" -Period formatters and locators adapted from scikits.timeseries by -Pierre GF Gerard-Marchant & Matt Knox -""" +# flake8: noqa -# TODO: Use the fact that axis can have units to simplify the process - -import numpy as np - -from matplotlib import pylab -from pandas.tseries.period import Period -from pandas.tseries.offsets import DateOffset -import pandas.tseries.frequencies as frequencies -from pandas.tseries.index import DatetimeIndex -from pandas.tseries.period import PeriodIndex -from pandas.tseries.tdi import TimedeltaIndex -from pandas.formats.printing import pprint_thing -import pandas.compat as compat - -from pandas.tseries.converter import (TimeSeries_DateLocator, - TimeSeries_DateFormatter, - TimeSeries_TimedeltaFormatter) - -# --------------------------------------------------------------------- -# Plotting functions and monkey patches - - -def tsplot(series, plotf, ax=None, **kwargs): - """ - Plots a Series on the given Matplotlib axes or the current axes - - Parameters - ---------- - axes : Axes - series : Series - - Notes - _____ - Supports same kwargs as Axes.plot - - """ - # Used inferred freq is possible, need a test case for inferred - if ax is None: - import matplotlib.pyplot as plt - ax = plt.gca() - - freq, series = _maybe_resample(series, ax, kwargs) - - # Set ax with freq info - _decorate_axes(ax, freq, kwargs) - ax._plot_data.append((series, plotf, kwargs)) - lines = plotf(ax, series.index._mpl_repr(), series.values, **kwargs) - - # set date formatter, locators and rescale limits - format_dateaxis(ax, ax.freq, series.index) - return lines - - -def _maybe_resample(series, ax, kwargs): - # resample against axes freq if necessary - freq, ax_freq = _get_freq(ax, series) - - if freq is None: # pragma: no cover - raise ValueError('Cannot use dynamic axis without frequency info') - - # Convert DatetimeIndex to PeriodIndex - if isinstance(series.index, DatetimeIndex): - series = series.to_period(freq=freq) - - if ax_freq is not None and freq != ax_freq: - if frequencies.is_superperiod(freq, ax_freq): # upsample input - series = series.copy() - series.index = series.index.asfreq(ax_freq, how='s') - freq = ax_freq - elif _is_sup(freq, ax_freq): # one is weekly - how = kwargs.pop('how', 'last') - series = getattr(series.resample('D'), how)().dropna() - series = getattr(series.resample(ax_freq), how)().dropna() - freq = ax_freq - elif frequencies.is_subperiod(freq, ax_freq) or _is_sub(freq, ax_freq): - _upsample_others(ax, freq, kwargs) - ax_freq = freq - else: # pragma: no cover - raise ValueError('Incompatible frequency conversion') - return freq, series - - -def _is_sub(f1, f2): - return ((f1.startswith('W') and frequencies.is_subperiod('D', f2)) or - (f2.startswith('W') and frequencies.is_subperiod(f1, 'D'))) - - -def _is_sup(f1, f2): - return ((f1.startswith('W') and frequencies.is_superperiod('D', f2)) or - (f2.startswith('W') and frequencies.is_superperiod(f1, 'D'))) - - -def _upsample_others(ax, freq, kwargs): - legend = ax.get_legend() - lines, labels = _replot_ax(ax, freq, kwargs) - _replot_ax(ax, freq, kwargs) - - other_ax = None - if hasattr(ax, 'left_ax'): - other_ax = ax.left_ax - if hasattr(ax, 'right_ax'): - other_ax = ax.right_ax - - if other_ax is not None: - rlines, rlabels = _replot_ax(other_ax, freq, kwargs) - lines.extend(rlines) - labels.extend(rlabels) - - if (legend is not None and kwargs.get('legend', True) and - len(lines) > 0): - title = legend.get_title().get_text() - if title == 'None': - title = None - ax.legend(lines, labels, loc='best', title=title) - - -def _replot_ax(ax, freq, kwargs): - data = getattr(ax, '_plot_data', None) - - # clear current axes and data - ax._plot_data = [] - ax.clear() - - _decorate_axes(ax, freq, kwargs) - - lines = [] - labels = [] - if data is not None: - for series, plotf, kwds in data: - series = series.copy() - idx = series.index.asfreq(freq, how='S') - series.index = idx - ax._plot_data.append((series, plotf, kwds)) - - # for tsplot - if isinstance(plotf, compat.string_types): - from pandas.tools.plotting import _plot_klass - plotf = _plot_klass[plotf]._plot - - lines.append(plotf(ax, series.index._mpl_repr(), - series.values, **kwds)[0]) - labels.append(pprint_thing(series.name)) - - return lines, labels - - -def _decorate_axes(ax, freq, kwargs): - """Initialize axes for time-series plotting""" - if not hasattr(ax, '_plot_data'): - ax._plot_data = [] - - ax.freq = freq - xaxis = ax.get_xaxis() - xaxis.freq = freq - if not hasattr(ax, 'legendlabels'): - ax.legendlabels = [kwargs.get('label', None)] - else: - ax.legendlabels.append(kwargs.get('label', None)) - ax.view_interval = None - ax.date_axis_info = None - - -def _get_ax_freq(ax): - """ - Get the freq attribute of the ax object if set. - Also checks shared axes (eg when using secondary yaxis, sharex=True - or twinx) - """ - ax_freq = getattr(ax, 'freq', None) - if ax_freq is None: - # check for left/right ax in case of secondary yaxis - if hasattr(ax, 'left_ax'): - ax_freq = getattr(ax.left_ax, 'freq', None) - elif hasattr(ax, 'right_ax'): - ax_freq = getattr(ax.right_ax, 'freq', None) - if ax_freq is None: - # check if a shared ax (sharex/twinx) has already freq set - shared_axes = ax.get_shared_x_axes().get_siblings(ax) - if len(shared_axes) > 1: - for shared_ax in shared_axes: - ax_freq = getattr(shared_ax, 'freq', None) - if ax_freq is not None: - break - return ax_freq - - -def _get_freq(ax, series): - # get frequency from data - freq = getattr(series.index, 'freq', None) - if freq is None: - freq = getattr(series.index, 'inferred_freq', None) - - ax_freq = _get_ax_freq(ax) - - # use axes freq if no data freq - if freq is None: - freq = ax_freq - - # get the period frequency - if isinstance(freq, DateOffset): - freq = freq.rule_code - else: - freq = frequencies.get_base_alias(freq) - - freq = frequencies.get_period_alias(freq) - return freq, ax_freq - - -def _use_dynamic_x(ax, data): - freq = _get_index_freq(data) - ax_freq = _get_ax_freq(ax) - - if freq is None: # convert irregular if axes has freq info - freq = ax_freq - else: # do not use tsplot if irregular was plotted first - if (ax_freq is None) and (len(ax.get_lines()) > 0): - return False - - if freq is None: - return False - - if isinstance(freq, DateOffset): - freq = freq.rule_code - else: - freq = frequencies.get_base_alias(freq) - freq = frequencies.get_period_alias(freq) - - if freq is None: - return False - - # hack this for 0.10.1, creating more technical debt...sigh - if isinstance(data.index, DatetimeIndex): - base = frequencies.get_freq(freq) - x = data.index - if (base <= frequencies.FreqGroup.FR_DAY): - return x[:1].is_normalized - return Period(x[0], freq).to_timestamp(tz=x.tz) == x[0] - return True - - -def _get_index_freq(data): - freq = getattr(data.index, 'freq', None) - if freq is None: - freq = getattr(data.index, 'inferred_freq', None) - if freq == 'B': - weekdays = np.unique(data.index.dayofweek) - if (5 in weekdays) or (6 in weekdays): - freq = None - return freq - - -def _maybe_convert_index(ax, data): - # tsplot converts automatically, but don't want to convert index - # over and over for DataFrames - if isinstance(data.index, DatetimeIndex): - freq = getattr(data.index, 'freq', None) - - if freq is None: - freq = getattr(data.index, 'inferred_freq', None) - if isinstance(freq, DateOffset): - freq = freq.rule_code - - if freq is None: - freq = _get_ax_freq(ax) - - if freq is None: - raise ValueError('Could not get frequency alias for plotting') - - freq = frequencies.get_base_alias(freq) - freq = frequencies.get_period_alias(freq) - - data = data.to_period(freq=freq) - return data - - -# Patch methods for subplot. Only format_dateaxis is currently used. -# Do we need the rest for convenience? - -def format_timedelta_ticks(x, pos, n_decimals): - """ - Convert seconds to 'D days HH:MM:SS.F' - """ - s, ns = divmod(x, 1e9) - m, s = divmod(s, 60) - h, m = divmod(m, 60) - d, h = divmod(h, 24) - decimals = int(ns * 10**(n_decimals - 9)) - s = r'{:02d}:{:02d}:{:02d}'.format(int(h), int(m), int(s)) - if n_decimals > 0: - s += '.{{:0{:0d}d}}'.format(n_decimals).format(decimals) - if d != 0: - s = '{:d} days '.format(int(d)) + s - return s - - -def format_dateaxis(subplot, freq, index): - """ - Pretty-formats the date axis (x-axis). - - Major and minor ticks are automatically set for the frequency of the - current underlying series. As the dynamic mode is activated by - default, changing the limits of the x axis will intelligently change - the positions of the ticks. - """ - - # handle index specific formatting - # Note: DatetimeIndex does not use this - # interface. DatetimeIndex uses matplotlib.date directly - if isinstance(index, PeriodIndex): - - majlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, - minor_locator=False, - plot_obj=subplot) - minlocator = TimeSeries_DateLocator(freq, dynamic_mode=True, - minor_locator=True, - plot_obj=subplot) - subplot.xaxis.set_major_locator(majlocator) - subplot.xaxis.set_minor_locator(minlocator) - - majformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, - minor_locator=False, - plot_obj=subplot) - minformatter = TimeSeries_DateFormatter(freq, dynamic_mode=True, - minor_locator=True, - plot_obj=subplot) - subplot.xaxis.set_major_formatter(majformatter) - subplot.xaxis.set_minor_formatter(minformatter) - - # x and y coord info - subplot.format_coord = lambda t, y: ( - "t = {0} y = {1:8f}".format(Period(ordinal=int(t), freq=freq), y)) - - elif isinstance(index, TimedeltaIndex): - subplot.xaxis.set_major_formatter( - TimeSeries_TimedeltaFormatter()) - else: - raise TypeError('index type not supported') - - pylab.draw_if_interactive() +from pandas.plotting._timeseries import tsplot diff --git a/pandas/util/doctools.py b/pandas/util/doctools.py index 6df6444aeafab..cbc9518b96416 100644 --- a/pandas/util/doctools.py +++ b/pandas/util/doctools.py @@ -131,7 +131,7 @@ def _make_table(self, ax, df, title, height=None): ax.set_visible(False) return - import pandas.tools.plotting as plotting + import pandas.plotting as plotting idx_nlevels = df.index.nlevels col_nlevels = df.columns.nlevels diff --git a/setup.py b/setup.py index 6707af7eb0908..f8882e748927b 100755 --- a/setup.py +++ b/setup.py @@ -649,6 +649,7 @@ def pxd(name): 'pandas.io.msgpack', 'pandas._libs', 'pandas.formats', + 'pandas.plotting', 'pandas.sparse', 'pandas.stats', 'pandas.util',