From c42de40c23c58acfb80e8dc840a4448c59d8d2f5 Mon Sep 17 00:00:00 2001 From: Max Chen Date: Mon, 21 Oct 2019 02:02:12 +0800 Subject: [PATCH 1/9] Add built-in funcion for Styler to format the text displayed for missing values As described in GH #28358, user who wants to control how NA values are printed while applying styles to the output will have to implement their own formatter. (so that the underlying data will not change and can be used for styling) Since the behavior is common in styling (for reports etc.), suggest to add this shortcut function to enable users format their NA values as something like '--' or 'Not Available' easily. example usage: `df.style.highlight_max().format_null('--')` --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/io/formats/style.py | 19 +++++++++++++++++++ pandas/tests/io/formats/test_style.py | 8 ++++++++ 3 files changed, 28 insertions(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 48c1173a372a7..96b4ce59f958e 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -110,6 +110,7 @@ Other enhancements - :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) - :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`) - Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) +- :meth:`Styler.format_null` is now added into the built-in functions to help formatting missing values (:issue:`28358`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 6b98eaca9dacc..f93923f91ba8d 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -930,6 +930,25 @@ def hide_columns(self, subset): # A collection of "builtin" styles # ----------------------------------------------------------------------- + def format_null(self, na_rep="-"): + """ + Format the text displayed for missing values. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + na_rep : str + + Returns + ------- + self : Styler + """ + self.format( + lambda x: na_rep if pd.isna(x) else self._display_funcs.default_factory()(x) + ) + return self + @staticmethod def _highlight_null(v, null_color): return ( diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 61c163d2cdaac..5f96372e153c9 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -990,6 +990,14 @@ def test_bar_bad_align_raises(self): with pytest.raises(ValueError): df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]) + def test_format_null(self, na_rep="-"): + # GH 28358 + df = pd.DataFrame({"A": [0, np.nan]}) + ctx = df.style.format_null()._translate() + result = ctx["body"][1][1]["display_value"] + expected = "-" + assert result == expected + def test_highlight_null(self, null_color="red"): df = pd.DataFrame({"A": [0, np.nan]}) result = df.style.highlight_null()._compute().ctx From 01632ce591405112ad11a34ffea0742786a0bd74 Mon Sep 17 00:00:00 2001 From: Max Chen Date: Tue, 22 Oct 2019 00:05:12 +0800 Subject: [PATCH 2/9] Add Styler.format_null into user_guide and reference Doc --- doc/source/reference/style.rst | 1 + doc/source/user_guide/style.ipynb | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst index 3d155535e2585..038e237d73079 100644 --- a/doc/source/reference/style.rst +++ b/doc/source/reference/style.rst @@ -52,6 +52,7 @@ Builtin styles Styler.highlight_max Styler.highlight_min Styler.highlight_null + Styler.format_null Styler.background_gradient Styler.bar diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index 006f928c037bd..c1e1a77afb225 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -492,6 +492,22 @@ "df.style.highlight_max(axis=0)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can format the text displayed for missing values by `.format_null`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.style.highlight_max(axis=0).format_null(na_rep='-')" + ] + }, { "cell_type": "markdown", "metadata": {}, From 53b0843f8a8efdcc0eb2a1f686611112f77ba275 Mon Sep 17 00:00:00 2001 From: Max Chen Date: Wed, 23 Oct 2019 07:36:54 +0800 Subject: [PATCH 3/9] Revised to change implementation to integrate with the original Styler.format method Change the implementation to integrate with the original `.format()` method by `na_rep` parameter Add a new table-wise default `na_rep` setting, which can be set through the new `.set_na_rep()` method Also enhanced the `.highlight_null()` method to be able to use `subset` parameter Add a few user guide examples and test cases --- doc/source/reference/style.rst | 2 +- doc/source/user_guide/style.ipynb | 79 +++++++++++++++++++++------ doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/io/formats/style.py | 77 +++++++++++++++++--------- pandas/tests/io/formats/test_style.py | 12 +++- 5 files changed, 126 insertions(+), 46 deletions(-) diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst index 038e237d73079..24a47336b0522 100644 --- a/doc/source/reference/style.rst +++ b/doc/source/reference/style.rst @@ -41,6 +41,7 @@ Style application Styler.set_caption Styler.set_properties Styler.set_uuid + Styler.set_na_rep Styler.clear Styler.pipe @@ -52,7 +53,6 @@ Builtin styles Styler.highlight_max Styler.highlight_min Styler.highlight_null - Styler.format_null Styler.background_gradient Styler.bar diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index c1e1a77afb225..1edd7ba2f3f2d 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -67,7 +67,8 @@ "df = pd.DataFrame({'A': np.linspace(1, 10, 10)})\n", "df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],\n", " axis=1)\n", - "df.iloc[0, 2] = np.nan" + "df.iloc[0, 2] = np.nan\n", + "df.iloc[3, 3] = np.nan" ] }, { @@ -402,6 +403,38 @@ "df.style.format({\"B\": lambda x: \"±{:.2f}\".format(abs(x))})" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can format the text displayed for missing values by `na_rep`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.style.format(\"{:.2%}\", na_rep='-')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These formatting techniques can be used in combination with styling." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.style.highlight_max(axis=0).format(na_rep='-')" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -492,22 +525,6 @@ "df.style.highlight_max(axis=0)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can format the text displayed for missing values by `.format_null`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "df.style.highlight_max(axis=0).format_null(na_rep='-')" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -675,6 +692,7 @@ "- precision\n", "- captions\n", "- table-wide styles\n", + "- missing values representation\n", "- hiding the index or columns\n", "\n", "Each of these can be specified in two ways:\n", @@ -816,6 +834,33 @@ "We hope to collect some useful ones either in pandas, or preferable in a new package that [builds on top](#Extensibility) the tools here." ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Missing values" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can control the default missing values representation for this table through the `set_na_rep` method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(df.style\n", + " .set_na_rep('BAD')\n", + " .highlight_null('red')\n", + " .format(na_rep='GOOD', subset=['D'])\n", + " .highlight_null('green', subset=['D']))" + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 96b4ce59f958e..310763672b0d5 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -110,7 +110,7 @@ Other enhancements - :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) - :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`) - Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) -- :meth:`Styler.format_null` is now added into the built-in functions to help formatting missing values (:issue:`28358`) +- Added ``na_rep`` parameters to :meth:`DataFrame.style` and :meth:`Styler.format` to help formatting missing values (:issue:`28358`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index f93923f91ba8d..062a48e1ae263 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -71,6 +71,9 @@ class Styler: The ``id`` takes the form ``T__row_col`` where ```` is the unique identifier, ```` is the row number and ```` is the column number. + na_rep : str or None, default None + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied Attributes ---------- @@ -126,6 +129,7 @@ def __init__( caption=None, table_attributes=None, cell_ids=True, + na_rep=None, ): self.ctx = defaultdict(list) self._todo = [] @@ -151,11 +155,14 @@ def __init__( self.hidden_index = False self.hidden_columns = [] self.cell_ids = cell_ids + self.na_rep = na_rep # display_funcs maps (row, col) -> formatting function def default_display_func(x): - if is_float(x): + if self.na_rep is not None and pd.isna(x): + return self.na_rep + elif is_float(x): return "{:>.{precision}g}".format(x, precision=self.precision) else: return x @@ -415,16 +422,20 @@ def format_attr(pair): table_attributes=table_attr, ) - def format(self, formatter, subset=None): + def format(self, formatter=None, subset=None, na_rep=None): """ Format the text display value of cells. Parameters ---------- - formatter : str, callable, or dict + formatter : str, callable, dict or None + If ``formatter`` is None, the default formatter is used subset : IndexSlice An argument to ``DataFrame.loc`` that restricts which elements ``formatter`` is applied to. + na_rep : str or None, default None + Representation for missing values. + If ``na_rep`` is None, no special formatting is applied Returns ------- @@ -450,6 +461,9 @@ def format(self, formatter, subset=None): >>> df['c'] = ['a', 'b', 'c', 'd'] >>> df.style.format({'c': str.upper}) """ + if formatter is None: + formatter = self._display_funcs.default_factory() + if subset is None: row_locs = range(len(self.data)) col_locs = range(len(self.data.columns)) @@ -466,15 +480,17 @@ def format(self, formatter, subset=None): for col, col_formatter in formatter.items(): # formatter must be callable, so '{}' are converted to lambdas col_formatter = _maybe_wrap_formatter(col_formatter) + col_formatter = _maybe_wrap_na_formatter(col_formatter, na_rep) col_num = self.data.columns.get_indexer_for([col])[0] for row_num in row_locs: self._display_funcs[(row_num, col_num)] = col_formatter else: # single scalar to format all cells with + formatter = _maybe_wrap_formatter(formatter) + formatter = _maybe_wrap_na_formatter(formatter, na_rep) locs = product(*(row_locs, col_locs)) for i, j in locs: - formatter = _maybe_wrap_formatter(formatter) self._display_funcs[(i, j)] = formatter return self @@ -554,6 +570,7 @@ def _copy(self, deepcopy=False): caption=self.caption, uuid=self.uuid, table_styles=self.table_styles, + na_rep=self.na_rep, ) if deepcopy: styler.ctx = copy.deepcopy(self.ctx) @@ -892,6 +909,23 @@ def set_table_styles(self, table_styles): self.table_styles = table_styles return self + def set_na_rep(self, na_rep): + """ + Set the missing data representation on a Styler. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + na_rep : str + + Returns + ------- + self : Styler + """ + self.na_rep = na_rep + return self + def hide_index(self): """ Hide any indices from rendering. @@ -930,44 +964,27 @@ def hide_columns(self, subset): # A collection of "builtin" styles # ----------------------------------------------------------------------- - def format_null(self, na_rep="-"): - """ - Format the text displayed for missing values. - - .. versionadded:: 1.0.0 - - Parameters - ---------- - na_rep : str - - Returns - ------- - self : Styler - """ - self.format( - lambda x: na_rep if pd.isna(x) else self._display_funcs.default_factory()(x) - ) - return self - @staticmethod def _highlight_null(v, null_color): return ( "background-color: {color}".format(color=null_color) if pd.isna(v) else "" ) - def highlight_null(self, null_color="red"): + def highlight_null(self, null_color="red", subset=None): """ Shade the background ``null_color`` for missing values. Parameters ---------- null_color : str + subset : IndexSlice, default None + A valid slice for ``data`` to limit the style application to. Returns ------- self : Styler """ - self.applymap(self._highlight_null, null_color=null_color) + self.applymap(self._highlight_null, null_color=null_color, subset=subset) return self def background_gradient( @@ -1498,3 +1515,13 @@ def _maybe_wrap_formatter(formatter): "instead".format(formatter=formatter) ) raise TypeError(msg) + + +def _maybe_wrap_na_formatter(formatter, na_rep): + if na_rep is None: + return formatter + elif is_string_like(na_rep): + return lambda x: na_rep if pd.isna(x) else formatter(x) + else: + msg = "Expected a string, got {na_rep} instead".format(na_rep=na_rep) + raise TypeError(msg) diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 5f96372e153c9..85e813ef5fd71 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -990,10 +990,18 @@ def test_bar_bad_align_raises(self): with pytest.raises(ValueError): df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]) - def test_format_null(self, na_rep="-"): + def test_set_na_rep(self): # GH 28358 df = pd.DataFrame({"A": [0, np.nan]}) - ctx = df.style.format_null()._translate() + ctx = df.style.set_na_rep("-")._translate() + result = ctx["body"][1][1]["display_value"] + expected = "-" + assert result == expected + + def test_format_with_na_rep(self): + # GH 28358 + df = pd.DataFrame({"A": [0, np.nan]}) + ctx = df.style.format(na_rep="-")._translate() result = ctx["body"][1][1]["display_value"] expected = "-" assert result == expected From da3cb43c1cc1d0e0541ba52249425fdb6aebae0d Mon Sep 17 00:00:00 2001 From: Max Chen Date: Wed, 23 Oct 2019 20:29:20 +0800 Subject: [PATCH 4/9] add more tests for styling with NA values --- doc/source/user_guide/style.ipynb | 2 +- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/tests/io/formats/test_style.py | 38 ++++++++++++++++++--------- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index 1edd7ba2f3f2d..00acdb02aa352 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -845,7 +845,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "You can control the default missing values representation for this table through the `set_na_rep` method." + "You can control the default missing values representation for the entire table through `set_na_rep` method." ] }, { diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 1cd0599fe6f5e..621c10f2f3edd 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -110,7 +110,7 @@ Other enhancements - :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`) - :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`) - Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) -- Added ``na_rep`` parameters to :meth:`DataFrame.style` and :meth:`Styler.format` to help formatting missing values (:issue:`28358`) +- Added ``na_rep`` argument to :meth:`DataFrame.style` and :meth:`Styler.format` to help formatting missing values (:issue:`28358`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 38591101b2101..6fb8d052d6d7b 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -990,21 +990,35 @@ def test_bar_bad_align_raises(self): with pytest.raises(ValueError): df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]) - def test_set_na_rep(self): - # GH 28358 - df = pd.DataFrame({"A": [0, np.nan]}) - ctx = df.style.set_na_rep("-")._translate() - result = ctx["body"][1][1]["display_value"] - expected = "-" - assert result == expected - def test_format_with_na_rep(self): # GH 28358 - df = pd.DataFrame({"A": [0, np.nan]}) + df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + ctx = df.style.format(na_rep="-")._translate() - result = ctx["body"][1][1]["display_value"] - expected = "-" - assert result == expected + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + + ctx = df.style.format("{:.2%}", na_rep="-")._translate() + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][1]["display_value"] == "110.00%" + assert ctx["body"][1][2]["display_value"] == "120.00%" + + ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate() + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][2]["display_value"] == "120.00%" + + def test_set_na_rep(self): + # GH 28358 + df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + ctx = df.style.set_na_rep("NA")._translate() + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + + ctx = df.style.set_na_rep("NA").format(na_rep="-", subset=["B"])._translate() + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "-" def test_highlight_null(self, null_color="red"): df = pd.DataFrame({"A": [0, np.nan]}) From b86bdc61fb73de7b5dc700c851585f849b1ceae0 Mon Sep 17 00:00:00 2001 From: Max Chen Date: Sun, 3 Nov 2019 00:19:40 +0800 Subject: [PATCH 5/9] revision based on the requested changes 1. keep formatter as mandatory in `.format` method 2. annotate the new method `.set_na_rep` 3. remove changes in `.highlight_null` to another PR 4. minor refinement to the whats new and user guide --- doc/source/user_guide/style.ipynb | 15 +++++++-------- doc/source/whatsnew/v1.0.0.rst | 3 ++- pandas/io/formats/style.py | 14 ++++++++------ pandas/tests/io/formats/test_style.py | 12 ++++++++---- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb index 00acdb02aa352..ebd469f53d2ce 100644 --- a/doc/source/user_guide/style.ipynb +++ b/doc/source/user_guide/style.ipynb @@ -67,8 +67,8 @@ "df = pd.DataFrame({'A': np.linspace(1, 10, 10)})\n", "df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],\n", " axis=1)\n", - "df.iloc[0, 2] = np.nan\n", - "df.iloc[3, 3] = np.nan" + "df.iloc[3, 3] = np.nan\n", + "df.iloc[0, 2] = np.nan" ] }, { @@ -416,7 +416,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.style.format(\"{:.2%}\", na_rep='-')" + "df.style.format(\"{:.2%}\", na_rep=\"-\")" ] }, { @@ -432,7 +432,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.style.highlight_max(axis=0).format(na_rep='-')" + "df.style.highlight_max().format(None, na_rep=\"-\")" ] }, { @@ -855,10 +855,9 @@ "outputs": [], "source": [ "(df.style\n", - " .set_na_rep('BAD')\n", - " .highlight_null('red')\n", - " .format(na_rep='GOOD', subset=['D'])\n", - " .highlight_null('green', subset=['D']))" + " .set_na_rep(\"FAIL\")\n", + " .format(None, na_rep=\"PASS\", subset=[\"D\"])\n", + " .highlight_null(\"yellow\"))" ] }, { diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 856baab79b5b0..a1bc26df18f65 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -112,7 +112,8 @@ Other enhancements - :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`) - Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`) - Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) -- Added ``na_rep`` argument to :meth:`DataFrame.style` and :meth:`Styler.format` to help formatting missing values (:issue:`28358`) +- :class:`Styler` added :meth:`Styler.set_na_rep` method to set default missing values representation for the entire table. + :meth:`Styler.format` added the ``na_rep`` parameter to help format the missing values (:issue:`21527`, :issue:`28358`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 065a8261e7f2e..96e7ed62412f6 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -75,6 +75,8 @@ class Styler: Representation for missing values. If ``na_rep`` is None, no special formatting is applied + .. versionadded:: 1.0.0 + Attributes ---------- env : Jinja2 jinja2.Environment @@ -423,7 +425,7 @@ def format_attr(pair): table_attributes=table_attr, ) - def format(self, formatter=None, subset=None, na_rep=None): + def format(self, formatter, subset=None, na_rep=None): """ Format the text display value of cells. @@ -438,6 +440,8 @@ def format(self, formatter=None, subset=None, na_rep=None): Representation for missing values. If ``na_rep`` is None, no special formatting is applied + .. versionadded:: 1.0.0 + Returns ------- self : Styler @@ -913,7 +917,7 @@ def set_table_styles(self, table_styles): self.table_styles = table_styles return self - def set_na_rep(self, na_rep): + def set_na_rep(self, na_rep: str) -> "Styler": """ Set the missing data representation on a Styler. @@ -974,21 +978,19 @@ def _highlight_null(v, null_color): "background-color: {color}".format(color=null_color) if pd.isna(v) else "" ) - def highlight_null(self, null_color="red", subset=None): + def highlight_null(self, null_color="red"): """ Shade the background ``null_color`` for missing values. Parameters ---------- null_color : str - subset : IndexSlice, default None - A valid slice for ``data`` to limit the style application to. Returns ------- self : Styler """ - self.applymap(self._highlight_null, null_color=null_color, subset=subset) + self.applymap(self._highlight_null, null_color=null_color) return self def background_gradient( diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 6fb8d052d6d7b..4e63d6d972210 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -991,10 +991,10 @@ def test_bar_bad_align_raises(self): df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"]) def test_format_with_na_rep(self): - # GH 28358 + # GH 21527 28358 df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) - ctx = df.style.format(na_rep="-")._translate() + ctx = df.style.format(None, na_rep="-")._translate() assert ctx["body"][0][1]["display_value"] == "-" assert ctx["body"][0][2]["display_value"] == "-" @@ -1009,14 +1009,18 @@ def test_format_with_na_rep(self): assert ctx["body"][1][2]["display_value"] == "120.00%" def test_set_na_rep(self): - # GH 28358 + # GH 21527 28358 df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) ctx = df.style.set_na_rep("NA")._translate() assert ctx["body"][0][1]["display_value"] == "NA" assert ctx["body"][0][2]["display_value"] == "NA" - ctx = df.style.set_na_rep("NA").format(na_rep="-", subset=["B"])._translate() + ctx = ( + df.style.set_na_rep("NA") + .format(None, na_rep="-", subset=["B"]) + ._translate() + ) assert ctx["body"][0][1]["display_value"] == "NA" assert ctx["body"][0][2]["display_value"] == "-" From a1e9a9ef20b052a7278ce8a327bcea02d77442ac Mon Sep 17 00:00:00 2001 From: Max Chen Date: Sun, 3 Nov 2019 23:16:17 +0800 Subject: [PATCH 6/9] add tests, enhance doc string and formatter wrapping --- pandas/io/formats/style.py | 22 +++++++++----------- pandas/tests/io/formats/test_style.py | 29 +++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 96e7ed62412f6..6e17d5858985d 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -71,7 +71,7 @@ class Styler: The ``id`` takes the form ``T__row_col`` where ```` is the unique identifier, ```` is the row number and ```` is the column number. - na_rep : str or None, default None + na_rep : str, optional Representation for missing values. If ``na_rep`` is None, no special formatting is applied @@ -436,7 +436,7 @@ def format(self, formatter, subset=None, na_rep=None): subset : IndexSlice An argument to ``DataFrame.loc`` that restricts which elements ``formatter`` is applied to. - na_rep : str or None, default None + na_rep : str, optional Representation for missing values. If ``na_rep`` is None, no special formatting is applied @@ -484,16 +484,14 @@ def format(self, formatter, subset=None, na_rep=None): if is_dict_like(formatter): for col, col_formatter in formatter.items(): # formatter must be callable, so '{}' are converted to lambdas - col_formatter = _maybe_wrap_formatter(col_formatter) - col_formatter = _maybe_wrap_na_formatter(col_formatter, na_rep) + col_formatter = _maybe_wrap_formatter(col_formatter, na_rep) col_num = self.data.columns.get_indexer_for([col])[0] for row_num in row_locs: self._display_funcs[(row_num, col_num)] = col_formatter else: # single scalar to format all cells with - formatter = _maybe_wrap_formatter(formatter) - formatter = _maybe_wrap_na_formatter(formatter, na_rep) + formatter = _maybe_wrap_formatter(formatter, na_rep) locs = product(*(row_locs, col_locs)) for i, j in locs: self._display_funcs[(i, j)] = formatter @@ -1507,11 +1505,11 @@ def _get_level_lengths(index, hidden_elements=None): return non_zero_lengths -def _maybe_wrap_formatter(formatter): +def _maybe_wrap_formatter(formatter, na_rep): if is_string_like(formatter): - return lambda x: formatter.format(x) + formatter_func = lambda x: formatter.format(x) elif callable(formatter): - return formatter + formatter_func = formatter else: msg = ( "Expected a template string or callable, got {formatter} " @@ -1519,12 +1517,10 @@ def _maybe_wrap_formatter(formatter): ) raise TypeError(msg) - -def _maybe_wrap_na_formatter(formatter, na_rep): if na_rep is None: - return formatter + return formatter_func elif is_string_like(na_rep): - return lambda x: na_rep if pd.isna(x) else formatter(x) + return lambda x: na_rep if pd.isna(x) else formatter_func(x) else: msg = "Expected a string, got {na_rep} instead".format(na_rep=na_rep) raise TypeError(msg) diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 4e63d6d972210..9e8e3162efb3e 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1008,6 +1008,14 @@ def test_format_with_na_rep(self): assert ctx["body"][0][2]["display_value"] == "-" assert ctx["body"][1][2]["display_value"] == "120.00%" + def test_init_with_na_rep(self): + # GH 21527 28358 + df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + + ctx = Styler(df, na_rep="NA")._translate() + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + def test_set_na_rep(self): # GH 21527 28358 df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) @@ -1024,6 +1032,27 @@ def test_set_na_rep(self): assert ctx["body"][0][1]["display_value"] == "NA" assert ctx["body"][0][2]["display_value"] == "-" + def test_format_non_numeric_na(self): + # GH 21527 28358 + df = pd.DataFrame( + { + "object": [None, np.nan, "foo"], + "datetime": [None, pd.NaT, pd.Timestamp("20120101")], + } + ) + + ctx = df.style.set_na_rep("NA")._translate() + assert ctx["body"][0][1]["display_value"] == "NA" + assert ctx["body"][0][2]["display_value"] == "NA" + assert ctx["body"][1][1]["display_value"] == "NA" + assert ctx["body"][1][2]["display_value"] == "NA" + + ctx = df.style.format(None, na_rep="-")._translate() + assert ctx["body"][0][1]["display_value"] == "-" + assert ctx["body"][0][2]["display_value"] == "-" + assert ctx["body"][1][1]["display_value"] == "-" + assert ctx["body"][1][2]["display_value"] == "-" + def test_highlight_null(self, null_color="red"): df = pd.DataFrame({"A": [0, np.nan]}) result = df.style.highlight_null()._compute().ctx From 3d4cfd033e2e65c9972735b3b386231b183ef808 Mon Sep 17 00:00:00 2001 From: Max Chen Date: Tue, 12 Nov 2019 22:28:51 +0800 Subject: [PATCH 7/9] add type-hint and xfail test --- doc/source/whatsnew/v1.0.0.rst | 3 +-- pandas/io/formats/style.py | 19 +++++++++++-------- pandas/tests/io/formats/test_style.py | 6 ++++++ 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 7029a41b49595..83283b5e228b2 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -114,8 +114,7 @@ Other enhancements - Added ``encoding`` argument to :meth:`DataFrame.to_string` for non-ascii text (:issue:`28766`) - Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`) - :meth:`Styler.background_gradient` now accepts ``vmin`` and ``vmax`` arguments (:issue:`12145`) -- :class:`Styler` added :meth:`Styler.set_na_rep` method to set default missing values representation for the entire table. - :meth:`Styler.format` added the ``na_rep`` parameter to help format the missing values (:issue:`21527`, :issue:`28358`) +- :meth:`Styler.format` added the ``na_rep`` parameter to help format the missing values (:issue:`21527`, :issue:`28358`) Build Changes ^^^^^^^^^^^^^ diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index d59e500cddb34..95367d0f3a365 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -8,7 +8,7 @@ import copy from functools import partial from itertools import product -from typing import Optional +from typing import Any, Callable, DefaultDict, Dict, List, Optional, Sequence, Tuple from uuid import uuid1 import numpy as np @@ -131,10 +131,10 @@ def __init__( caption=None, table_attributes=None, cell_ids=True, - na_rep=None, + na_rep: Optional[str] = None, ): - self.ctx = defaultdict(list) - self._todo = [] + self.ctx = defaultdict(list) # type: DefaultDict[Tuple[int, int], List[str]] + self._todo = [] # type: List[Tuple[Callable, Tuple, Dict]] if not isinstance(data, (pd.Series, pd.DataFrame)): raise TypeError("``data`` must be a Series or DataFrame") @@ -155,7 +155,7 @@ def __init__( self.precision = precision self.table_attributes = table_attributes self.hidden_index = False - self.hidden_columns = [] + self.hidden_columns = [] # type: Sequence[int] self.cell_ids = cell_ids self.na_rep = na_rep @@ -170,7 +170,9 @@ def default_display_func(x): else: return x - self._display_funcs = defaultdict(lambda: default_display_func) + self._display_funcs = defaultdict( + lambda: default_display_func + ) # type: DefaultDict[Tuple[int, int], Callable[[Any], str]] def _repr_html_(self): """ @@ -425,7 +427,7 @@ def format_attr(pair): table_attributes=table_attr, ) - def format(self, formatter, subset=None, na_rep=None): + def format(self, formatter, subset=None, na_rep: Optional[str] = None): """ Format the text display value of cells. @@ -467,6 +469,7 @@ def format(self, formatter, subset=None, na_rep=None): >>> df.style.format({'c': str.upper}) """ if formatter is None: + assert self._display_funcs.default_factory is not None formatter = self._display_funcs.default_factory() if subset is None: @@ -1523,7 +1526,7 @@ def _get_level_lengths(index, hidden_elements=None): return non_zero_lengths -def _maybe_wrap_formatter(formatter, na_rep): +def _maybe_wrap_formatter(formatter, na_rep: Optional[str]): if isinstance(formatter, str): formatter_func = lambda x: formatter.format(x) elif callable(formatter): diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index e237d76ad076e..05a8c332779c1 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1072,6 +1072,12 @@ def test_format_non_numeric_na(self): assert ctx["body"][1][1]["display_value"] == "-" assert ctx["body"][1][2]["display_value"] == "-" + @pytest.mark.xfail + def test_format_with_bad_na_rep(self): + # GH 21527 28358 + df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) + df.style.format(None, na_rep=-1) + def test_highlight_null(self, null_color="red"): df = pd.DataFrame({"A": [0, np.nan]}) result = df.style.highlight_null()._compute().ctx From bd99db9b633a7cf32bd23501ee37a6ab27b64fea Mon Sep 17 00:00:00 2001 From: Max Chen Date: Tue, 19 Nov 2019 22:25:24 +0800 Subject: [PATCH 8/9] revise test using pytest.raises --- pandas/tests/io/formats/test_style.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index 05a8c332779c1..adb16414d4c7c 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1072,11 +1072,11 @@ def test_format_non_numeric_na(self): assert ctx["body"][1][1]["display_value"] == "-" assert ctx["body"][1][2]["display_value"] == "-" - @pytest.mark.xfail def test_format_with_bad_na_rep(self): # GH 21527 28358 df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) - df.style.format(None, na_rep=-1) + with pytest.raises(TypeError): + df.style.format(None, na_rep=-1) def test_highlight_null(self, null_color="red"): df = pd.DataFrame({"A": [0, np.nan]}) From 79353593021ccca7a450926ef9c62dd1475d0bfe Mon Sep 17 00:00:00 2001 From: Max Chen Date: Sun, 24 Nov 2019 22:36:52 +0800 Subject: [PATCH 9/9] using py36 syntax for annotations --- pandas/io/formats/style.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 95367d0f3a365..a3467f75e86c6 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -133,8 +133,8 @@ def __init__( cell_ids=True, na_rep: Optional[str] = None, ): - self.ctx = defaultdict(list) # type: DefaultDict[Tuple[int, int], List[str]] - self._todo = [] # type: List[Tuple[Callable, Tuple, Dict]] + self.ctx: DefaultDict[Tuple[int, int], List[str]] = defaultdict(list) + self._todo: List[Tuple[Callable, Tuple, Dict]] = [] if not isinstance(data, (pd.Series, pd.DataFrame)): raise TypeError("``data`` must be a Series or DataFrame") @@ -155,7 +155,7 @@ def __init__( self.precision = precision self.table_attributes = table_attributes self.hidden_index = False - self.hidden_columns = [] # type: Sequence[int] + self.hidden_columns: Sequence[int] = [] self.cell_ids = cell_ids self.na_rep = na_rep @@ -170,9 +170,9 @@ def default_display_func(x): else: return x - self._display_funcs = defaultdict( - lambda: default_display_func - ) # type: DefaultDict[Tuple[int, int], Callable[[Any], str]] + self._display_funcs: DefaultDict[ + Tuple[int, int], Callable[[Any], str] + ] = defaultdict(lambda: default_display_func) def _repr_html_(self): """