pandas-dev · TomAugspurger · Nov 25, 2019 · Oct 20, 2019 · Oct 21, 2019 · Oct 22, 2019
diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst
@@ -41,6 +41,7 @@ Style application
    Styler.set_caption
    Styler.set_properties
    Styler.set_uuid
+   Styler.set_na_rep
    Styler.clear
    Styler.pipe
 

diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb
@@ -67,6 +67,7 @@
     "df = pd.DataFrame({'A': np.linspace(1, 10, 10)})\n",
     "df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],\n",
     "               axis=1)\n",
+    "df.iloc[3, 3] = np.nan\n",
     "df.iloc[0, 2] = np.nan"
    ]
   },
@@ -402,6 +403,38 @@
     "df.style.format({\"B\": lambda x: \"±{:.2f}\".format(abs(x))})"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can format the text displayed for missing values by `na_rep`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.style.format(\"{:.2%}\", na_rep=\"-\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "These formatting techniques can be used in combination with styling."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.style.highlight_max().format(None, na_rep=\"-\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -659,6 +692,7 @@
     "- precision\n",
     "- captions\n",
     "- table-wide styles\n",
+    "- missing values representation\n",
     "- hiding the index or columns\n",
     "\n",
     "Each of these can be specified in two ways:\n",
@@ -800,6 +834,32 @@
     "We hope to collect some useful ones either in pandas, or preferable in a new package that [builds on top](#Extensibility) the tools here."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Missing values"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can control the default missing values representation for the entire table through `set_na_rep` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(df.style\n",
+    "   .set_na_rep(\"FAIL\")\n",
+    "   .format(None, na_rep=\"PASS\", subset=[\"D\"])\n",
+    "   .highlight_null(\"yellow\"))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -122,6 +122,7 @@ Other enhancements
 - Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`)
 - Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`)
 - :meth:`Styler.background_gradient` now accepts ``vmin`` and ``vmax`` arguments (:issue:`12145`)
+- :meth:`Styler.format` added the ``na_rep`` parameter to help format the missing values (:issue:`21527`, :issue:`28358`)
 - Roundtripping DataFrames with nullable integer or string data types to parquet
   (:meth:`~DataFrame.to_parquet` / :func:`read_parquet`) using the `'pyarrow'` engine
   now preserve those data types with pyarrow >= 1.0.0 (:issue:`20612`).

diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -8,7 +8,7 @@
 import copy
 from functools import partial
 from itertools import product
-from typing import Optional
+from typing import Any, Callable, DefaultDict, Dict, List, Optional, Sequence, Tuple
 from uuid import uuid1
 
 import numpy as np
@@ -71,6 +71,11 @@ class Styler:
         The ``id`` takes the form ``T_<uuid>_row<num_row>_col<num_col>``
         where ``<uuid>`` is the unique identifier, ``<num_row>`` is the row
         number and ``<num_col>`` is the column number.
+    na_rep : str, optional
+        Representation for missing values.
+        If ``na_rep`` is None, no special formatting is applied
+
+        .. versionadded:: 1.0.0
 
     Attributes
     ----------
@@ -126,9 +131,10 @@ def __init__(
         caption=None,
         table_attributes=None,
         cell_ids=True,
+        na_rep: Optional[str] = None,
     ):
-        self.ctx = defaultdict(list)
-        self._todo = []
+        self.ctx: DefaultDict[Tuple[int, int], List[str]] = defaultdict(list)
+        self._todo: List[Tuple[Callable, Tuple, Dict]] = []
 
         if not isinstance(data, (pd.Series, pd.DataFrame)):
             raise TypeError("``data`` must be a Series or DataFrame")
@@ -149,19 +155,24 @@ def __init__(
         self.precision = precision
         self.table_attributes = table_attributes
         self.hidden_index = False
-        self.hidden_columns = []
+        self.hidden_columns: Sequence[int] = []
         self.cell_ids = cell_ids
+        self.na_rep = na_rep
 
         # display_funcs maps (row, col) -> formatting function
 
         def default_display_func(x):
-            if is_float(x):
+            if self.na_rep is not None and pd.isna(x):
+                return self.na_rep
+            elif is_float(x):
                 display_format = "{0:.{precision}f}".format(x, precision=self.precision)
                 return display_format
             else:
                 return x
 
-        self._display_funcs = defaultdict(lambda: default_display_func)
+        self._display_funcs: DefaultDict[
+            Tuple[int, int], Callable[[Any], str]
+        ] = defaultdict(lambda: default_display_func)
 
     def _repr_html_(self):
         """
@@ -416,16 +427,22 @@ def format_attr(pair):
             table_attributes=table_attr,
         )
 
-    def format(self, formatter, subset=None):
+    def format(self, formatter, subset=None, na_rep: Optional[str] = None):
         """
         Format the text display value of cells.
 
         Parameters
         ----------
-        formatter : str, callable, or dict
+        formatter : str, callable, dict or None
+            If ``formatter`` is None, the default formatter is used
         subset : IndexSlice
             An argument to ``DataFrame.loc`` that restricts which elements
             ``formatter`` is applied to.
+        na_rep : str, optional
+            Representation for missing values.
+            If ``na_rep`` is None, no special formatting is applied
+
+            .. versionadded:: 1.0.0
 
         Returns
         -------
@@ -451,6 +468,10 @@ def format(self, formatter, subset=None):
         >>> df['c'] = ['a', 'b', 'c', 'd']
         >>> df.style.format({'c': str.upper})
         """
+        if formatter is None:
+            assert self._display_funcs.default_factory is not None
+            formatter = self._display_funcs.default_factory()
+
         if subset is None:
             row_locs = range(len(self.data))
             col_locs = range(len(self.data.columns))
@@ -466,16 +487,16 @@ def format(self, formatter, subset=None):
         if is_dict_like(formatter):
             for col, col_formatter in formatter.items():
                 # formatter must be callable, so '{}' are converted to lambdas
-                col_formatter = _maybe_wrap_formatter(col_formatter)
+                col_formatter = _maybe_wrap_formatter(col_formatter, na_rep)
                 col_num = self.data.columns.get_indexer_for([col])[0]
 
                 for row_num in row_locs:
                     self._display_funcs[(row_num, col_num)] = col_formatter
         else:
             # single scalar to format all cells with
+            formatter = _maybe_wrap_formatter(formatter, na_rep)
             locs = product(*(row_locs, col_locs))
             for i, j in locs:
-                formatter = _maybe_wrap_formatter(formatter)
                 self._display_funcs[(i, j)] = formatter
         return self
 
@@ -553,6 +574,7 @@ def _copy(self, deepcopy=False):
             caption=self.caption,
             uuid=self.uuid,
             table_styles=self.table_styles,
+            na_rep=self.na_rep,
         )
         if deepcopy:
             styler.ctx = copy.deepcopy(self.ctx)
@@ -896,6 +918,23 @@ def set_table_styles(self, table_styles):
         self.table_styles = table_styles
         return self
 
+    def set_na_rep(self, na_rep: str) -> "Styler":
+        """
+        Set the missing data representation on a Styler.
+
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        na_rep : str
+
+        Returns
+        -------
+        self : Styler
+        """
+        self.na_rep = na_rep
+        return self
+
     def hide_index(self):
         """
         Hide any indices from rendering.
@@ -1487,14 +1526,22 @@ def _get_level_lengths(index, hidden_elements=None):
     return non_zero_lengths
 
 
-def _maybe_wrap_formatter(formatter):
+def _maybe_wrap_formatter(formatter, na_rep: Optional[str]):
     if isinstance(formatter, str):
-        return lambda x: formatter.format(x)
+        formatter_func = lambda x: formatter.format(x)
     elif callable(formatter):
-        return formatter
+        formatter_func = formatter
     else:
         msg = (
             "Expected a template string or callable, got {formatter} "
             "instead".format(formatter=formatter)
         )
         raise TypeError(msg)
+
+    if na_rep is None:
+        return formatter_func
+    elif isinstance(na_rep, str):
+        return lambda x: na_rep if pd.isna(x) else formatter_func(x)
+    else:
+        msg = "Expected a string, got {na_rep} instead".format(na_rep=na_rep)
+        raise TypeError(msg)
diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py
@@ -1009,6 +1009,75 @@ def test_bar_bad_align_raises(self):
         with pytest.raises(ValueError):
             df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"])
 
+    def test_format_with_na_rep(self):
+        # GH 21527 28358
+        df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
+
+        ctx = df.style.format(None, na_rep="-")._translate()
+        assert ctx["body"][0][1]["display_value"] == "-"
+        assert ctx["body"][0][2]["display_value"] == "-"
+
+        ctx = df.style.format("{:.2%}", na_rep="-")._translate()
+        assert ctx["body"][0][1]["display_value"] == "-"
+        assert ctx["body"][0][2]["display_value"] == "-"
+        assert ctx["body"][1][1]["display_value"] == "110.00%"
+        assert ctx["body"][1][2]["display_value"] == "120.00%"
+
+        ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate()
+        assert ctx["body"][0][2]["display_value"] == "-"
+        assert ctx["body"][1][2]["display_value"] == "120.00%"
+
+    def test_init_with_na_rep(self):
+        # GH 21527 28358
+        df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
+
+        ctx = Styler(df, na_rep="NA")._translate()
+        assert ctx["body"][0][1]["display_value"] == "NA"
+        assert ctx["body"][0][2]["display_value"] == "NA"
+
+    def test_set_na_rep(self):
+        # GH 21527 28358
+        df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
+
+        ctx = df.style.set_na_rep("NA")._translate()
+        assert ctx["body"][0][1]["display_value"] == "NA"
+        assert ctx["body"][0][2]["display_value"] == "NA"
+
+        ctx = (
+            df.style.set_na_rep("NA")
+            .format(None, na_rep="-", subset=["B"])
+            ._translate()
+        )
+        assert ctx["body"][0][1]["display_value"] == "NA"
+        assert ctx["body"][0][2]["display_value"] == "-"
+
+    def test_format_non_numeric_na(self):
+        # GH 21527 28358
+        df = pd.DataFrame(
+            {
+                "object": [None, np.nan, "foo"],
+                "datetime": [None, pd.NaT, pd.Timestamp("20120101")],
+            }
+        )
+
+        ctx = df.style.set_na_rep("NA")._translate()
+        assert ctx["body"][0][1]["display_value"] == "NA"
+        assert ctx["body"][0][2]["display_value"] == "NA"
+        assert ctx["body"][1][1]["display_value"] == "NA"
+        assert ctx["body"][1][2]["display_value"] == "NA"
+
+        ctx = df.style.format(None, na_rep="-")._translate()
+        assert ctx["body"][0][1]["display_value"] == "-"
+        assert ctx["body"][0][2]["display_value"] == "-"
+        assert ctx["body"][1][1]["display_value"] == "-"
+        assert ctx["body"][1][2]["display_value"] == "-"
+
+    def test_format_with_bad_na_rep(self):
+        # GH 21527 28358
+        df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
+        with pytest.raises(TypeError):
+            df.style.format(None, na_rep=-1)
+
     def test_highlight_null(self, null_color="red"):
         df = pd.DataFrame({"A": [0, np.nan]})
         result = df.style.highlight_null()._compute().ctx