From a3c38fe82ea5a26d6beb98b91cf6473b9896b7d2 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 5 Dec 2015 11:41:17 -0600
Subject: [PATCH] ENH: display_format for style

Closes https://github.com/pydata/pandas/issues/11692
Closes https://github.com/pydata/pandas/issues/12134
Closes https://github.com/pydata/pandas/issues/12125

This adds a `.format` method to Styler for formatting the display value
(the actual text) of each scalar value.

In the processes of cleaning up the template, I close #12134 (spurious 0)
and #12125 (KeyError from using iloc improperly)

cherry pick test from #12126

only allow str formatting for now

fix tests for new spec

formatter callable

update notebook
---
 doc/source/api.rst              |   1 +
 doc/source/whatsnew/v0.18.0.txt |   1 +
 pandas/core/style.py            | 148 ++++++++++++++++++++++------
 pandas/tests/test_style.py      | 165 +++++++++++++++++++++++---------
 4 files changed, 240 insertions(+), 75 deletions(-)
diff --git a/doc/source/api.rst b/doc/source/api.rst
index c572aa9ae2e03..59f0f0a82a892 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1820,6 +1820,7 @@ Style Application
 
    Styler.apply
    Styler.applymap
+   Styler.format
    Styler.set_precision
    Styler.set_table_styles
    Styler.set_caption
diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt
index c420b34db7ac8..35b1ee54ff683 100644
--- a/doc/source/whatsnew/v0.18.0.txt
+++ b/doc/source/whatsnew/v0.18.0.txt
@@ -392,6 +392,7 @@ Other enhancements
   values it contains (:issue:`11597`)
 - ``Series`` gained an ``is_unique`` attribute (:issue:`11946`)
 - ``DataFrame.quantile`` and ``Series.quantile`` now accept ``interpolation`` keyword (:issue:`10174`).
+- Added ``DataFrame.style.format`` for more flexible formatting of cell values (:issue:`11692`)
 - ``DataFrame.select_dtypes`` now allows the ``np.float16`` typecode (:issue:`11990`)
 - ``pivot_table()`` now accepts most iterables for the ``values`` parameter (:issue:`12017`)
 - Added Google ``BigQuery`` service account authentication support, which enables authentication on remote servers. (:issue:`11881`). For further details see :ref:`here <io.bigquery_authentication>`
diff --git a/pandas/core/style.py b/pandas/core/style.py
index a5a42c2bb47a7..15fcec118e7d4 100644
--- a/pandas/core/style.py
+++ b/pandas/core/style.py
@@ -3,10 +3,11 @@
 DataFrames and Series.
 """
 from functools import partial
+from itertools import product
 from contextlib import contextmanager
 from uuid import uuid1
 import copy
-from collections import defaultdict
+from collections import defaultdict, MutableMapping
 
 try:
     from jinja2 import Template
@@ -18,7 +19,8 @@
 
 import numpy as np
 import pandas as pd
-from pandas.compat import lzip
+from pandas.compat import lzip, range
+import pandas.core.common as com
 from pandas.core.indexing import _maybe_numeric_slice, _non_reducing_slice
 try:
     import matplotlib.pyplot as plt
@@ -117,11 +119,7 @@ class Styler(object):
             <tr>
                 {% for c in r %}
                 <{{c.type}} id="T_{{uuid}}{{c.id}}" class="{{c.class}}">
-                    {% if c.value is number %}
-                        {{c.value|round(precision)}}
-                    {% else %}
-                        {{c.value}}
-                    {% endif %}
+                    {{ c.display_value }}
                 {% endfor %}
             </tr>
             {% endfor %}
@@ -152,6 +150,15 @@ def __init__(self, data, precision=None, table_styles=None, uuid=None,
             precision = pd.options.display.precision
         self.precision = precision
         self.table_attributes = table_attributes
+        # display_funcs maps (row, col) -> formatting function
+
+        def default_display_func(x):
+            if com.is_float(x):
+                return '{:>.{precision}g}'.format(x, precision=self.precision)
+            else:
+                return x
+
+        self._display_funcs = defaultdict(lambda: default_display_func)
 
     def _repr_html_(self):
         """Hooks into Jupyter notebook rich display system."""
@@ -199,10 +206,12 @@ def _translate(self):
                        "class": " ".join([BLANK_CLASS])}] * n_rlvls
             for c in range(len(clabels[0])):
                 cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c]
-                cs.extend(
-                    cell_context.get("col_headings", {}).get(r, {}).get(c, []))
+                cs.extend(cell_context.get(
+                    "col_headings", {}).get(r, {}).get(c, []))
+                value = clabels[r][c]
                 row_es.append({"type": "th",
-                               "value": clabels[r][c],
+                               "value": value,
+                               "display_value": value,
                                "class": " ".join(cs)})
             head.append(row_es)
 
@@ -231,15 +240,22 @@ def _translate(self):
                 cell_context.get("row_headings", {}).get(r, {}).get(c, []))
             row_es = [{"type": "th",
                        "value": rlabels[r][c],
-                       "class": " ".join(cs)} for c in range(len(rlabels[r]))]
+                       "class": " ".join(cs),
+                       "display_value": rlabels[r][c]}
+                      for c in range(len(rlabels[r]))]
 
             for c, col in enumerate(self.data.columns):
                 cs = [DATA_CLASS, "row%s" % r, "col%s" % c]
                 cs.extend(cell_context.get("data", {}).get(r, {}).get(c, []))
-                row_es.append({"type": "td",
-                               "value": self.data.iloc[r][c],
-                               "class": " ".join(cs),
-                               "id": "_".join(cs[1:])})
+                formatter = self._display_funcs[(r, c)]
+                value = self.data.iloc[r, c]
+                row_es.append({
+                    "type": "td",
+                    "value": value,
+                    "class": " ".join(cs),
+                    "id": "_".join(cs[1:]),
+                    "display_value": formatter(value)
+                })
                 props = []
                 for x in ctx[r, c]:
                     # have to handle empty styles like ['']
@@ -255,6 +271,71 @@ def _translate(self):
                     precision=precision, table_styles=table_styles,
                     caption=caption, table_attributes=self.table_attributes)
 
+    def format(self, formatter, subset=None):
+        """
+        Format the text display value of cells.
+
+        .. versionadded:: 0.18.0
+
+        Parameters
+        ----------
+        formatter: str, callable, or dict
+        subset: IndexSlice
+            A argument to DataFrame.loc that restricts which elements
+            ``formatter`` is applied to.
+
+        Returns
+        -------
+        self : Styler
+
+        Notes
+        -----
+
+        ``formatter`` is either an ``a`` or a dict ``{column name: a}`` where
+        ``a`` is one of
+
+        - str: this will be wrapped in: ``a.format(x)``
+        - callable: called with the value of an individual cell
+
+        The default display value for numeric values is the "general" (``g``)
+        format with ``pd.options.display.precision`` precision.
+
+        Examples
+        --------
+
+        >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b'])
+        >>> df.style.format("{:.2%}")
+        >>> df['c'] = ['a', 'b', 'c', 'd']
+        >>> df.style.format({'C': str.upper})
+        """
+        if subset is None:
+            row_locs = range(len(self.data))
+            col_locs = range(len(self.data.columns))
+        else:
+            subset = _non_reducing_slice(subset)
+            if len(subset) == 1:
+                subset = subset, self.data.columns
+
+            sub_df = self.data.loc[subset]
+            row_locs = self.data.index.get_indexer_for(sub_df.index)
+            col_locs = self.data.columns.get_indexer_for(sub_df.columns)
+
+        if isinstance(formatter, MutableMapping):
+            for col, col_formatter in formatter.items():
+                # formatter must be callable, so '{}' are converted to lambdas
+                col_formatter = _maybe_wrap_formatter(col_formatter)
+                col_num = self.data.columns.get_indexer_for([col])[0]
+
+                for row_num in row_locs:
+                    self._display_funcs[(row_num, col_num)] = col_formatter
+        else:
+            # single scalar to format all cells with
+            locs = product(*(row_locs, col_locs))
+            for i, j in locs:
+                formatter = _maybe_wrap_formatter(formatter)
+                self._display_funcs[(i, j)] = formatter
+        return self
+
     def render(self):
         """
         Render the built up styles to HTML
@@ -376,7 +457,7 @@ def apply(self, func, axis=0, subset=None, **kwargs):
 
         Returns
         -------
-        self
+        self : Styler
 
         Notes
         -----
@@ -415,7 +496,7 @@ def applymap(self, func, subset=None, **kwargs):
 
         Returns
         -------
-        self
+        self : Styler
 
         """
         self._todo.append((lambda instance: getattr(instance, '_applymap'),
@@ -434,7 +515,7 @@ def set_precision(self, precision):
 
         Returns
         -------
-        self
+        self : Styler
         """
         self.precision = precision
         return self
@@ -453,7 +534,7 @@ def set_table_attributes(self, attributes):
 
         Returns
         -------
-        self
+        self : Styler
         """
         self.table_attributes = attributes
         return self
@@ -489,7 +570,7 @@ def use(self, styles):
 
         Returns
         -------
-        self
+        self : Styler
 
         See Also
         --------
@@ -510,7 +591,7 @@ def set_uuid(self, uuid):
 
         Returns
         -------
-        self
+        self : Styler
         """
         self.uuid = uuid
         return self
@@ -527,7 +608,7 @@ def set_caption(self, caption):
 
         Returns
         -------
-        self
+        self : Styler
         """
         self.caption = caption
         return self
@@ -550,7 +631,7 @@ def set_table_styles(self, table_styles):
 
         Returns
         -------
-        self
+        self : Styler
 
         Examples
         --------
@@ -583,7 +664,7 @@ def highlight_null(self, null_color='red'):
 
         Returns
         -------
-        self
+        self : Styler
         """
         self.applymap(self._highlight_null, null_color=null_color)
         return self
@@ -610,7 +691,7 @@ def background_gradient(self, cmap='PuBu', low=0, high=0, axis=0,
 
         Returns
         -------
-        self
+        self : Styler
 
         Notes
         -----
@@ -695,7 +776,7 @@ def bar(self, subset=None, axis=0, color='#d65f5f', width=100):
 
         Returns
         -------
-        self
+        self : Styler
         """
         subset = _maybe_numeric_slice(self.data, subset)
         subset = _non_reducing_slice(subset)
@@ -720,7 +801,7 @@ def highlight_max(self, subset=None, color='yellow', axis=0):
 
         Returns
         -------
-        self
+        self : Styler
         """
         return self._highlight_handler(subset=subset, color=color, axis=axis,
                                        max_=True)
@@ -742,7 +823,7 @@ def highlight_min(self, subset=None, color='yellow', axis=0):
 
         Returns
         -------
-        self
+        self : Styler
         """
         return self._highlight_handler(subset=subset, color=color, axis=axis,
                                        max_=False)
@@ -771,3 +852,14 @@ def _highlight_extrema(data, color='yellow', max_=True):
                 extrema = data == data.min().min()
             return pd.DataFrame(np.where(extrema, attr, ''),
                                 index=data.index, columns=data.columns)
+
+
+def _maybe_wrap_formatter(formatter):
+    if com.is_string_like(formatter):
+        return lambda x: formatter.format(x)
+    elif callable(formatter):
+        return formatter
+    else:
+        msg = "Expected a template string or callable, got {} instead".format(
+            formatter)
+        raise TypeError(msg)
diff --git a/pandas/tests/test_style.py b/pandas/tests/test_style.py
index 9a427cb26520c..ef5a966d65545 100644
--- a/pandas/tests/test_style.py
+++ b/pandas/tests/test_style.py
@@ -136,9 +136,9 @@ def test_index_name(self):
 
         expected = [[{'class': 'blank', 'type': 'th', 'value': ''},
                      {'class': 'col_heading level0 col0', 'type': 'th',
-                      'value': 'B'},
+                      'value': 'B', 'display_value': 'B'},
                      {'class': 'col_heading level0 col1', 'type': 'th',
-                      'value': 'C'}],
+                      'value': 'C', 'display_value': 'C'}],
                     [{'class': 'col_heading level2 col0', 'type': 'th',
                       'value': 'A'},
                      {'class': 'blank', 'type': 'th', 'value': ''},
@@ -154,7 +154,7 @@ def test_multiindex_name(self):
         expected = [[{'class': 'blank', 'type': 'th', 'value': ''},
                      {'class': 'blank', 'type': 'th', 'value': ''},
                      {'class': 'col_heading level0 col0', 'type': 'th',
-                      'value': 'C'}],
+                      'value': 'C', 'display_value': 'C'}],
                     [{'class': 'col_heading level2 col0', 'type': 'th',
                       'value': 'A'},
                      {'class': 'col_heading level2 col1', 'type': 'th',
@@ -163,6 +163,12 @@ def test_multiindex_name(self):
 
         self.assertEqual(result['head'], expected)
 
+    def test_numeric_columns(self):
+        # https://github.com/pydata/pandas/issues/12125
+        # smoke test for _translate
+        df = pd.DataFrame({0: [1, 2, 3]})
+        df.style._translate()
+
     def test_apply_axis(self):
         df = pd.DataFrame({'A': [0, 0], 'B': [1, 1]})
         f = lambda x: ['val: %s' % x.max() for v in x]
@@ -263,53 +269,51 @@ def test_bar(self):
     def test_bar_0points(self):
         df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
         result = df.style.bar()._compute().ctx
-        expected = {
-            (0, 0): ['width: 10em', ' height: 80%'],
-            (0, 1): ['width: 10em', ' height: 80%'],
-            (0, 2): ['width: 10em', ' height: 80%'],
-            (1, 0): ['width: 10em', ' height: 80%',
-                     'background: linear-gradient(90deg,#d65f5f 50.0%, '
-                     'transparent 0%)'],
-            (1, 1): ['width: 10em', ' height: 80%',
-                     'background: linear-gradient(90deg,#d65f5f 50.0%, '
-                     'transparent 0%)'],
-            (1, 2): ['width: 10em', ' height: 80%',
-                     'background: linear-gradient(90deg,#d65f5f 50.0%, '
-                     'transparent 0%)'],
-            (2, 0): ['width: 10em', ' height: 80%',
-                     'background: linear-gradient(90deg,#d65f5f 100.0%, '
-                     'transparent 0%)'],
-            (2, 1): ['width: 10em', ' height: 80%',
-                     'background: linear-gradient(90deg,#d65f5f 100.0%, '
-                     'transparent 0%)'],
-            (2, 2): ['width: 10em', ' height: 80%',
-                     'background: linear-gradient(90deg,#d65f5f 100.0%, '
-                     'transparent 0%)']}
+        expected = {(0, 0): ['width: 10em', ' height: 80%'],
+                    (0, 1): ['width: 10em', ' height: 80%'],
+                    (0, 2): ['width: 10em', ' height: 80%'],
+                    (1, 0): ['width: 10em', ' height: 80%',
+                             'background: linear-gradient(90deg,#d65f5f 50.0%,'
+                             ' transparent 0%)'],
+                    (1, 1): ['width: 10em', ' height: 80%',
+                             'background: linear-gradient(90deg,#d65f5f 50.0%,'
+                             ' transparent 0%)'],
+                    (1, 2): ['width: 10em', ' height: 80%',
+                             'background: linear-gradient(90deg,#d65f5f 50.0%,'
+                             ' transparent 0%)'],
+                    (2, 0): ['width: 10em', ' height: 80%',
+                             'background: linear-gradient(90deg,#d65f5f 100.0%'
+                             ', transparent 0%)'],
+                    (2, 1): ['width: 10em', ' height: 80%',
+                             'background: linear-gradient(90deg,#d65f5f 100.0%'
+                             ', transparent 0%)'],
+                    (2, 2): ['width: 10em', ' height: 80%',
+                             'background: linear-gradient(90deg,#d65f5f 100.0%'
+                             ', transparent 0%)']}
         self.assertEqual(result, expected)
 
         result = df.style.bar(axis=1)._compute().ctx
-        expected = {
-            (0, 0): ['width: 10em', ' height: 80%'],
-            (0, 1): ['width: 10em', ' height: 80%',
-                     'background: linear-gradient(90deg,#d65f5f 50.0%, '
-                     'transparent 0%)'],
-            (0, 2): ['width: 10em', ' height: 80%',
-                     'background: linear-gradient(90deg,#d65f5f 100.0%, '
-                     'transparent 0%)'],
-            (1, 0): ['width: 10em', ' height: 80%'],
-            (1, 1): ['width: 10em', ' height: 80%',
-                     'background: linear-gradient(90deg,#d65f5f 50.0%, '
-                     'transparent 0%)'],
-            (1, 2): ['width: 10em', ' height: 80%',
-                     'background: linear-gradient(90deg,#d65f5f 100.0%, '
-                     'transparent 0%)'],
-            (2, 0): ['width: 10em', ' height: 80%'],
-            (2, 1): ['width: 10em', ' height: 80%',
-                     'background: linear-gradient(90deg,#d65f5f 50.0%, '
-                     'transparent 0%)'],
-            (2, 2): ['width: 10em', ' height: 80%',
-                     'background: linear-gradient(90deg,#d65f5f 100.0%, '
-                     'transparent 0%)']}
+        expected = {(0, 0): ['width: 10em', ' height: 80%'],
+                    (0, 1): ['width: 10em', ' height: 80%',
+                             'background: linear-gradient(90deg,#d65f5f 50.0%,'
+                             ' transparent 0%)'],
+                    (0, 2): ['width: 10em', ' height: 80%',
+                             'background: linear-gradient(90deg,#d65f5f 100.0%'
+                             ', transparent 0%)'],
+                    (1, 0): ['width: 10em', ' height: 80%'],
+                    (1, 1): ['width: 10em', ' height: 80%',
+                             'background: linear-gradient(90deg,#d65f5f 50.0%'
+                             ', transparent 0%)'],
+                    (1, 2): ['width: 10em', ' height: 80%',
+                             'background: linear-gradient(90deg,#d65f5f 100.0%'
+                             ', transparent 0%)'],
+                    (2, 0): ['width: 10em', ' height: 80%'],
+                    (2, 1): ['width: 10em', ' height: 80%',
+                             'background: linear-gradient(90deg,#d65f5f 50.0%'
+                             ', transparent 0%)'],
+                    (2, 2): ['width: 10em', ' height: 80%',
+                             'background: linear-gradient(90deg,#d65f5f 100.0%'
+                             ', transparent 0%)']}
         self.assertEqual(result, expected)
 
     def test_highlight_null(self, null_color='red'):
@@ -444,6 +448,73 @@ def test_export(self):
         self.assertEqual(style1._todo, style2._todo)
         style2.render()
 
+    def test_display_format(self):
+        df = pd.DataFrame(np.random.random(size=(2, 2)))
+        ctx = df.style.format("{:0.1f}")._translate()
+
+        self.assertTrue(all(['display_value' in c for c in row]
+                            for row in ctx['body']))
+        self.assertTrue(all([len(c['display_value']) <= 3 for c in row[1:]]
+                            for row in ctx['body']))
+        self.assertTrue(
+            len(ctx['body'][0][1]['display_value'].lstrip('-')) <= 3)
+
+    def test_display_format_raises(self):
+        df = pd.DataFrame(np.random.randn(2, 2))
+        with tm.assertRaises(TypeError):
+            df.style.format(5)
+        with tm.assertRaises(TypeError):
+            df.style.format(True)
+
+    def test_display_subset(self):
+        df = pd.DataFrame([[.1234, .1234], [1.1234, 1.1234]],
+                          columns=['a', 'b'])
+        ctx = df.style.format({"a": "{:0.1f}", "b": "{0:.2%}"},
+                              subset=pd.IndexSlice[0, :])._translate()
+        expected = '0.1'
+        self.assertEqual(ctx['body'][0][1]['display_value'], expected)
+        self.assertEqual(ctx['body'][1][1]['display_value'], '1.1234')
+        self.assertEqual(ctx['body'][0][2]['display_value'], '12.34%')
+
+        raw_11 = '1.1234'
+        ctx = df.style.format("{:0.1f}",
+                              subset=pd.IndexSlice[0, :])._translate()
+        self.assertEqual(ctx['body'][0][1]['display_value'], expected)
+        self.assertEqual(ctx['body'][1][1]['display_value'], raw_11)
+
+        ctx = df.style.format("{:0.1f}",
+                              subset=pd.IndexSlice[0, :])._translate()
+        self.assertEqual(ctx['body'][0][1]['display_value'], expected)
+        self.assertEqual(ctx['body'][1][1]['display_value'], raw_11)
+
+        ctx = df.style.format("{:0.1f}",
+                              subset=pd.IndexSlice['a'])._translate()
+        self.assertEqual(ctx['body'][0][1]['display_value'], expected)
+        self.assertEqual(ctx['body'][0][2]['display_value'], '0.1234')
+
+        ctx = df.style.format("{:0.1f}",
+                              subset=pd.IndexSlice[0, 'a'])._translate()
+        self.assertEqual(ctx['body'][0][1]['display_value'], expected)
+        self.assertEqual(ctx['body'][1][1]['display_value'], raw_11)
+
+        ctx = df.style.format("{:0.1f}",
+                              subset=pd.IndexSlice[[0, 1], ['a']])._translate()
+        self.assertEqual(ctx['body'][0][1]['display_value'], expected)
+        self.assertEqual(ctx['body'][1][1]['display_value'], '1.1')
+        self.assertEqual(ctx['body'][0][2]['display_value'], '0.1234')
+        self.assertEqual(ctx['body'][1][2]['display_value'], '1.1234')
+
+    def test_display_dict(self):
+        df = pd.DataFrame([[.1234, .1234], [1.1234, 1.1234]],
+                          columns=['a', 'b'])
+        ctx = df.style.format({"a": "{:0.1f}", "b": "{0:.2%}"})._translate()
+        self.assertEqual(ctx['body'][0][1]['display_value'], '0.1')
+        self.assertEqual(ctx['body'][0][2]['display_value'], '12.34%')
+        df['c'] = ['aaa', 'bbb']
+        ctx = df.style.format({"a": "{:0.1f}", "c": str.upper})._translate()
+        self.assertEqual(ctx['body'][0][1]['display_value'], '0.1')
+        self.assertEqual(ctx['body'][0][3]['display_value'], 'AAA')
+
 
 @tm.mplskip
 class TestStylerMatplotlibDep(TestCase):