From a3c38fe82ea5a26d6beb98b91cf6473b9896b7d2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Sat, 5 Dec 2015 11:41:17 -0600 Subject: [PATCH] ENH: display_format for style Closes https://github.com/pydata/pandas/issues/11692 Closes https://github.com/pydata/pandas/issues/12134 Closes https://github.com/pydata/pandas/issues/12125 This adds a `.format` method to Styler for formatting the display value (the actual text) of each scalar value. In the processes of cleaning up the template, I close #12134 (spurious 0) and #12125 (KeyError from using iloc improperly) cherry pick test from #12126 only allow str formatting for now fix tests for new spec formatter callable update notebook --- doc/source/api.rst | 1 + doc/source/whatsnew/v0.18.0.txt | 1 + pandas/core/style.py | 148 ++++++++++++++++++++++------ pandas/tests/test_style.py | 165 +++++++++++++++++++++++--------- 4 files changed, 240 insertions(+), 75 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index c572aa9ae2e03..59f0f0a82a892 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1820,6 +1820,7 @@ Style Application Styler.apply Styler.applymap + Styler.format Styler.set_precision Styler.set_table_styles Styler.set_caption diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index c420b34db7ac8..35b1ee54ff683 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -392,6 +392,7 @@ Other enhancements values it contains (:issue:`11597`) - ``Series`` gained an ``is_unique`` attribute (:issue:`11946`) - ``DataFrame.quantile`` and ``Series.quantile`` now accept ``interpolation`` keyword (:issue:`10174`). +- Added ``DataFrame.style.format`` for more flexible formatting of cell values (:issue:`11692`) - ``DataFrame.select_dtypes`` now allows the ``np.float16`` typecode (:issue:`11990`) - ``pivot_table()`` now accepts most iterables for the ``values`` parameter (:issue:`12017`) - Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`). For further details see :ref:`here ` diff --git a/pandas/core/style.py b/pandas/core/style.py index a5a42c2bb47a7..15fcec118e7d4 100644 --- a/pandas/core/style.py +++ b/pandas/core/style.py @@ -3,10 +3,11 @@ DataFrames and Series. """ from functools import partial +from itertools import product from contextlib import contextmanager from uuid import uuid1 import copy -from collections import defaultdict +from collections import defaultdict, MutableMapping try: from jinja2 import Template @@ -18,7 +19,8 @@ import numpy as np import pandas as pd -from pandas.compat import lzip +from pandas.compat import lzip, range +import pandas.core.common as com from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice try: import matplotlib.pyplot as plt @@ -117,11 +119,7 @@ class Styler(object): {% for c in r %} <{{c.type}} id="T_{{uuid}}{{c.id}}" class="{{c.class}}"> - {% if c.value is number %} - {{c.value|round(precision)}} - {% else %} - {{c.value}} - {% endif %} + {{ c.display_value }} {% endfor %} {% endfor %} @@ -152,6 +150,15 @@ def __init__(self, data, precision=None, table_styles=None, uuid=None, precision = pd.options.display.precision self.precision = precision self.table_attributes = table_attributes + # display_funcs maps (row, col) -> formatting function + + def default_display_func(x): + if com.is_float(x): + return '{:>.{precision}g}'.format(x, precision=self.precision) + else: + return x + + self._display_funcs = defaultdict(lambda: default_display_func) def _repr_html_(self): """Hooks into Jupyter notebook rich display system.""" @@ -199,10 +206,12 @@ def _translate(self): "class": " ".join([BLANK_CLASS])}] * n_rlvls for c in range(len(clabels[0])): cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c] - cs.extend( - cell_context.get("col_headings", {}).get(r, {}).get(c, [])) + cs.extend(cell_context.get( + "col_headings", {}).get(r, {}).get(c, [])) + value = clabels[r][c] row_es.append({"type": "th", - "value": clabels[r][c], + "value": value, + "display_value": value, "class": " ".join(cs)}) head.append(row_es) @@ -231,15 +240,22 @@ def _translate(self): cell_context.get("row_headings", {}).get(r, {}).get(c, [])) row_es = [{"type": "th", "value": rlabels[r][c], - "class": " ".join(cs)} for c in range(len(rlabels[r]))] + "class": " ".join(cs), + "display_value": rlabels[r][c]} + for c in range(len(rlabels[r]))] for c, col in enumerate(self.data.columns): cs = [DATA_CLASS, "row%s" % r, "col%s" % c] cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) - row_es.append({"type": "td", - "value": self.data.iloc[r][c], - "class": " ".join(cs), - "id": "_".join(cs[1:])}) + formatter = self._display_funcs[(r, c)] + value = self.data.iloc[r, c] + row_es.append({ + "type": "td", + "value": value, + "class": " ".join(cs), + "id": "_".join(cs[1:]), + "display_value": formatter(value) + }) props = [] for x in ctx[r, c]: # have to handle empty styles like [''] @@ -255,6 +271,71 @@ def _translate(self): precision=precision, table_styles=table_styles, caption=caption, table_attributes=self.table_attributes) + def format(self, formatter, subset=None): + """ + Format the text display value of cells. + + .. versionadded:: 0.18.0 + + Parameters + ---------- + formatter: str, callable, or dict + subset: IndexSlice + A argument to DataFrame.loc that restricts which elements + ``formatter`` is applied to. + + Returns + ------- + self : Styler + + Notes + ----- + + ``formatter`` is either an ``a`` or a dict ``{column name: a}`` where + ``a`` is one of + + - str: this will be wrapped in: ``a.format(x)`` + - callable: called with the value of an individual cell + + The default display value for numeric values is the "general" (``g``) + format with ``pd.options.display.precision`` precision. + + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b']) + >>> df.style.format("{:.2%}") + >>> df['c'] = ['a', 'b', 'c', 'd'] + >>> df.style.format({'C': str.upper}) + """ + if subset is None: + row_locs = range(len(self.data)) + col_locs = range(len(self.data.columns)) + else: + subset = _non_reducing_slice(subset) + if len(subset) == 1: + subset = subset, self.data.columns + + sub_df = self.data.loc[subset] + row_locs = self.data.index.get_indexer_for(sub_df.index) + col_locs = self.data.columns.get_indexer_for(sub_df.columns) + + if isinstance(formatter, MutableMapping): + for col, col_formatter in formatter.items(): + # formatter must be callable, so '{}' are converted to lambdas + col_formatter = _maybe_wrap_formatter(col_formatter) + col_num = self.data.columns.get_indexer_for([col])[0] + + for row_num in row_locs: + self._display_funcs[(row_num, col_num)] = col_formatter + else: + # single scalar to format all cells with + locs = product(*(row_locs, col_locs)) + for i, j in locs: + formatter = _maybe_wrap_formatter(formatter) + self._display_funcs[(i, j)] = formatter + return self + def render(self): """ Render the built up styles to HTML @@ -376,7 +457,7 @@ def apply(self, func, axis=0, subset=None, **kwargs): Returns ------- - self + self : Styler Notes ----- @@ -415,7 +496,7 @@ def applymap(self, func, subset=None, **kwargs): Returns ------- - self + self : Styler """ self._todo.append((lambda instance: getattr(instance, '_applymap'), @@ -434,7 +515,7 @@ def set_precision(self, precision): Returns ------- - self + self : Styler """ self.precision = precision return self @@ -453,7 +534,7 @@ def set_table_attributes(self, attributes): Returns ------- - self + self : Styler """ self.table_attributes = attributes return self @@ -489,7 +570,7 @@ def use(self, styles): Returns ------- - self + self : Styler See Also -------- @@ -510,7 +591,7 @@ def set_uuid(self, uuid): Returns ------- - self + self : Styler """ self.uuid = uuid return self @@ -527,7 +608,7 @@ def set_caption(self, caption): Returns ------- - self + self : Styler """ self.caption = caption return self @@ -550,7 +631,7 @@ def set_table_styles(self, table_styles): Returns ------- - self + self : Styler Examples -------- @@ -583,7 +664,7 @@ def highlight_null(self, null_color='red'): Returns ------- - self + self : Styler """ self.applymap(self._highlight_null, null_color=null_color) return self @@ -610,7 +691,7 @@ def background_gradient(self, cmap='PuBu', low=0, high=0, axis=0, Returns ------- - self + self : Styler Notes ----- @@ -695,7 +776,7 @@ def bar(self, subset=None, axis=0, color='#d65f5f', width=100): Returns ------- - self + self : Styler """ subset = _maybe_numeric_slice(self.data, subset) subset = _non_reducing_slice(subset) @@ -720,7 +801,7 @@ def highlight_max(self, subset=None, color='yellow', axis=0): Returns ------- - self + self : Styler """ return self._highlight_handler(subset=subset, color=color, axis=axis, max_=True) @@ -742,7 +823,7 @@ def highlight_min(self, subset=None, color='yellow', axis=0): Returns ------- - self + self : Styler """ return self._highlight_handler(subset=subset, color=color, axis=axis, max_=False) @@ -771,3 +852,14 @@ def _highlight_extrema(data, color='yellow', max_=True): extrema = data == data.min().min() return pd.DataFrame(np.where(extrema, attr, ''), index=data.index, columns=data.columns) + + +def _maybe_wrap_formatter(formatter): + if com.is_string_like(formatter): + return lambda x: formatter.format(x) + elif callable(formatter): + return formatter + else: + msg = "Expected a template string or callable, got {} instead".format( + formatter) + raise TypeError(msg) diff --git a/pandas/tests/test_style.py b/pandas/tests/test_style.py index 9a427cb26520c..ef5a966d65545 100644 --- a/pandas/tests/test_style.py +++ b/pandas/tests/test_style.py @@ -136,9 +136,9 @@ def test_index_name(self): expected = [[{'class': 'blank', 'type': 'th', 'value': ''}, {'class': 'col_heading level0 col0', 'type': 'th', - 'value': 'B'}, + 'value': 'B', 'display_value': 'B'}, {'class': 'col_heading level0 col1', 'type': 'th', - 'value': 'C'}], + 'value': 'C', 'display_value': 'C'}], [{'class': 'col_heading level2 col0', 'type': 'th', 'value': 'A'}, {'class': 'blank', 'type': 'th', 'value': ''}, @@ -154,7 +154,7 @@ def test_multiindex_name(self): expected = [[{'class': 'blank', 'type': 'th', 'value': ''}, {'class': 'blank', 'type': 'th', 'value': ''}, {'class': 'col_heading level0 col0', 'type': 'th', - 'value': 'C'}], + 'value': 'C', 'display_value': 'C'}], [{'class': 'col_heading level2 col0', 'type': 'th', 'value': 'A'}, {'class': 'col_heading level2 col1', 'type': 'th', @@ -163,6 +163,12 @@ def test_multiindex_name(self): self.assertEqual(result['head'], expected) + def test_numeric_columns(self): + # https://github.com/pydata/pandas/issues/12125 + # smoke test for _translate + df = pd.DataFrame({0: [1, 2, 3]}) + df.style._translate() + def test_apply_axis(self): df = pd.DataFrame({'A': [0, 0], 'B': [1, 1]}) f = lambda x: ['val: %s' % x.max() for v in x] @@ -263,53 +269,51 @@ def test_bar(self): def test_bar_0points(self): df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) result = df.style.bar()._compute().ctx - expected = { - (0, 0): ['width: 10em', ' height: 80%'], - (0, 1): ['width: 10em', ' height: 80%'], - (0, 2): ['width: 10em', ' height: 80%'], - (1, 0): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 50.0%, ' - 'transparent 0%)'], - (1, 1): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 50.0%, ' - 'transparent 0%)'], - (1, 2): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 50.0%, ' - 'transparent 0%)'], - (2, 0): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 100.0%, ' - 'transparent 0%)'], - (2, 1): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 100.0%, ' - 'transparent 0%)'], - (2, 2): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 100.0%, ' - 'transparent 0%)']} + expected = {(0, 0): ['width: 10em', ' height: 80%'], + (0, 1): ['width: 10em', ' height: 80%'], + (0, 2): ['width: 10em', ' height: 80%'], + (1, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 50.0%,' + ' transparent 0%)'], + (1, 1): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 50.0%,' + ' transparent 0%)'], + (1, 2): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 50.0%,' + ' transparent 0%)'], + (2, 0): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 100.0%' + ', transparent 0%)'], + (2, 1): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 100.0%' + ', transparent 0%)'], + (2, 2): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 100.0%' + ', transparent 0%)']} self.assertEqual(result, expected) result = df.style.bar(axis=1)._compute().ctx - expected = { - (0, 0): ['width: 10em', ' height: 80%'], - (0, 1): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 50.0%, ' - 'transparent 0%)'], - (0, 2): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 100.0%, ' - 'transparent 0%)'], - (1, 0): ['width: 10em', ' height: 80%'], - (1, 1): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 50.0%, ' - 'transparent 0%)'], - (1, 2): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 100.0%, ' - 'transparent 0%)'], - (2, 0): ['width: 10em', ' height: 80%'], - (2, 1): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 50.0%, ' - 'transparent 0%)'], - (2, 2): ['width: 10em', ' height: 80%', - 'background: linear-gradient(90deg,#d65f5f 100.0%, ' - 'transparent 0%)']} + expected = {(0, 0): ['width: 10em', ' height: 80%'], + (0, 1): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 50.0%,' + ' transparent 0%)'], + (0, 2): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 100.0%' + ', transparent 0%)'], + (1, 0): ['width: 10em', ' height: 80%'], + (1, 1): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 50.0%' + ', transparent 0%)'], + (1, 2): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 100.0%' + ', transparent 0%)'], + (2, 0): ['width: 10em', ' height: 80%'], + (2, 1): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 50.0%' + ', transparent 0%)'], + (2, 2): ['width: 10em', ' height: 80%', + 'background: linear-gradient(90deg,#d65f5f 100.0%' + ', transparent 0%)']} self.assertEqual(result, expected) def test_highlight_null(self, null_color='red'): @@ -444,6 +448,73 @@ def test_export(self): self.assertEqual(style1._todo, style2._todo) style2.render() + def test_display_format(self): + df = pd.DataFrame(np.random.random(size=(2, 2))) + ctx = df.style.format("{:0.1f}")._translate() + + self.assertTrue(all(['display_value' in c for c in row] + for row in ctx['body'])) + self.assertTrue(all([len(c['display_value']) <= 3 for c in row[1:]] + for row in ctx['body'])) + self.assertTrue( + len(ctx['body'][0][1]['display_value'].lstrip('-')) <= 3) + + def test_display_format_raises(self): + df = pd.DataFrame(np.random.randn(2, 2)) + with tm.assertRaises(TypeError): + df.style.format(5) + with tm.assertRaises(TypeError): + df.style.format(True) + + def test_display_subset(self): + df = pd.DataFrame([[.1234, .1234], [1.1234, 1.1234]], + columns=['a', 'b']) + ctx = df.style.format({"a": "{:0.1f}", "b": "{0:.2%}"}, + subset=pd.IndexSlice[0, :])._translate() + expected = '0.1' + self.assertEqual(ctx['body'][0][1]['display_value'], expected) + self.assertEqual(ctx['body'][1][1]['display_value'], '1.1234') + self.assertEqual(ctx['body'][0][2]['display_value'], '12.34%') + + raw_11 = '1.1234' + ctx = df.style.format("{:0.1f}", + subset=pd.IndexSlice[0, :])._translate() + self.assertEqual(ctx['body'][0][1]['display_value'], expected) + self.assertEqual(ctx['body'][1][1]['display_value'], raw_11) + + ctx = df.style.format("{:0.1f}", + subset=pd.IndexSlice[0, :])._translate() + self.assertEqual(ctx['body'][0][1]['display_value'], expected) + self.assertEqual(ctx['body'][1][1]['display_value'], raw_11) + + ctx = df.style.format("{:0.1f}", + subset=pd.IndexSlice['a'])._translate() + self.assertEqual(ctx['body'][0][1]['display_value'], expected) + self.assertEqual(ctx['body'][0][2]['display_value'], '0.1234') + + ctx = df.style.format("{:0.1f}", + subset=pd.IndexSlice[0, 'a'])._translate() + self.assertEqual(ctx['body'][0][1]['display_value'], expected) + self.assertEqual(ctx['body'][1][1]['display_value'], raw_11) + + ctx = df.style.format("{:0.1f}", + subset=pd.IndexSlice[[0, 1], ['a']])._translate() + self.assertEqual(ctx['body'][0][1]['display_value'], expected) + self.assertEqual(ctx['body'][1][1]['display_value'], '1.1') + self.assertEqual(ctx['body'][0][2]['display_value'], '0.1234') + self.assertEqual(ctx['body'][1][2]['display_value'], '1.1234') + + def test_display_dict(self): + df = pd.DataFrame([[.1234, .1234], [1.1234, 1.1234]], + columns=['a', 'b']) + ctx = df.style.format({"a": "{:0.1f}", "b": "{0:.2%}"})._translate() + self.assertEqual(ctx['body'][0][1]['display_value'], '0.1') + self.assertEqual(ctx['body'][0][2]['display_value'], '12.34%') + df['c'] = ['aaa', 'bbb'] + ctx = df.style.format({"a": "{:0.1f}", "c": str.upper})._translate() + self.assertEqual(ctx['body'][0][1]['display_value'], '0.1') + self.assertEqual(ctx['body'][0][3]['display_value'], 'AAA') + @tm.mplskip class TestStylerMatplotlibDep(TestCase):