pandas-dev · TomAugspurger · Nov 25, 2019 · Oct 20, 2019 · Oct 21, 2019 · Oct 22, 2019
diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst
@@ -41,6 +41,7 @@ Style application
    Styler.set_caption
    Styler.set_properties
    Styler.set_uuid
+   Styler.set_na_rep
    Styler.clear
    Styler.pipe
 

diff --git a/doc/source/user_guide/style.ipynb b/doc/source/user_guide/style.ipynb
@@ -67,7 +67,8 @@
     "df = pd.DataFrame({'A': np.linspace(1, 10, 10)})\n",
     "df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],\n",
     "               axis=1)\n",
-    "df.iloc[0, 2] = np.nan"
+    "df.iloc[0, 2] = np.nan\n",
+    "df.iloc[3, 3] = np.nan"
    ]
   },
   {
@@ -402,6 +403,38 @@
     "df.style.format({\"B\": lambda x: \"±{:.2f}\".format(abs(x))})"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can format the text displayed for missing values by `na_rep`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.style.format(\"{:.2%}\", na_rep='-')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "These formatting techniques can be used in combination with styling."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.style.highlight_max(axis=0).format(na_rep='-')"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -659,6 +692,7 @@
     "- precision\n",
     "- captions\n",
     "- table-wide styles\n",
+    "- missing values representation\n",
     "- hiding the index or columns\n",
     "\n",
     "Each of these can be specified in two ways:\n",
@@ -800,6 +834,33 @@
     "We hope to collect some useful ones either in pandas, or preferable in a new package that [builds on top](#Extensibility) the tools here."
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Missing values"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can control the default missing values representation for the entire table through `set_na_rep` method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(df.style\n",
+    "   .set_na_rep('BAD')\n",
+    "   .highlight_null('red')\n",
+    "   .format(na_rep='GOOD', subset=['D'])\n",
+    "   .highlight_null('green', subset=['D']))"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -110,6 +110,7 @@ Other enhancements
 - :meth:`DataFrame.to_json` now accepts an ``indent`` integer argument to enable pretty printing of JSON output (:issue:`12004`)
 - :meth:`read_stata` can read Stata 119 dta files. (:issue:`28250`)
 - Added ``encoding`` argument to :func:`DataFrame.to_html` for non-ascii text (:issue:`28663`)
+- Added ``na_rep`` argument to :meth:`DataFrame.style` and :meth:`Styler.format` to help formatting missing values (:issue:`28358`)
 
 Build Changes
 ^^^^^^^^^^^^^

diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -71,6 +71,9 @@ class Styler:
         The ``id`` takes the form ``T_<uuid>_row<num_row>_col<num_col>``
         where ``<uuid>`` is the unique identifier, ``<num_row>`` is the row
         number and ``<num_col>`` is the column number.
+    na_rep : str or None, default None
+        Representation for missing values.
+        If ``na_rep`` is None, no special formatting is applied
 
     Attributes
     ----------
@@ -126,6 +129,7 @@ def __init__(
         caption=None,
         table_attributes=None,
         cell_ids=True,
+        na_rep=None,
     ):
         self.ctx = defaultdict(list)
         self._todo = []
@@ -151,11 +155,14 @@ def __init__(
         self.hidden_index = False
         self.hidden_columns = []
         self.cell_ids = cell_ids
+        self.na_rep = na_rep
 
         # display_funcs maps (row, col) -> formatting function
 
         def default_display_func(x):
-            if is_float(x):
+            if self.na_rep is not None and pd.isna(x):
+                return self.na_rep
+            elif is_float(x):
                 display_format = "{0:.{precision}f}".format(x, precision=self.precision)
                 return display_format
             else:
@@ -416,16 +423,20 @@ def format_attr(pair):
             table_attributes=table_attr,
         )
 
-    def format(self, formatter, subset=None):
+    def format(self, formatter=None, subset=None, na_rep=None):
         """
         Format the text display value of cells.
 
         Parameters
         ----------
-        formatter : str, callable, or dict
+        formatter : str, callable, dict or None
+            If ``formatter`` is None, the default formatter is used
         subset : IndexSlice
             An argument to ``DataFrame.loc`` that restricts which elements
             ``formatter`` is applied to.
+        na_rep : str or None, default None
+            Representation for missing values.
+            If ``na_rep`` is None, no special formatting is applied
 
         Returns
         -------
@@ -451,6 +462,9 @@ def format(self, formatter, subset=None):
         >>> df['c'] = ['a', 'b', 'c', 'd']
         >>> df.style.format({'c': str.upper})
         """
+        if formatter is None:
+            formatter = self._display_funcs.default_factory()
+
         if subset is None:
             row_locs = range(len(self.data))
             col_locs = range(len(self.data.columns))
@@ -467,15 +481,17 @@ def format(self, formatter, subset=None):
             for col, col_formatter in formatter.items():
                 # formatter must be callable, so '{}' are converted to lambdas
                 col_formatter = _maybe_wrap_formatter(col_formatter)
+                col_formatter = _maybe_wrap_na_formatter(col_formatter, na_rep)
                 col_num = self.data.columns.get_indexer_for([col])[0]
 
                 for row_num in row_locs:
                     self._display_funcs[(row_num, col_num)] = col_formatter
         else:
             # single scalar to format all cells with
+            formatter = _maybe_wrap_formatter(formatter)
+            formatter = _maybe_wrap_na_formatter(formatter, na_rep)
             locs = product(*(row_locs, col_locs))
             for i, j in locs:
-                formatter = _maybe_wrap_formatter(formatter)
                 self._display_funcs[(i, j)] = formatter
         return self
 
@@ -553,6 +569,7 @@ def _copy(self, deepcopy=False):
             caption=self.caption,
             uuid=self.uuid,
             table_styles=self.table_styles,
+            na_rep=self.na_rep,
         )
         if deepcopy:
             styler.ctx = copy.deepcopy(self.ctx)
@@ -891,6 +908,23 @@ def set_table_styles(self, table_styles):
         self.table_styles = table_styles
         return self
 
+    def set_na_rep(self, na_rep):
-    def set_na_rep(self, na_rep):
+    def set_na_rep(self, na_rep: str) -> "Styler":
-    def set_na_rep(self, na_rep):
+    def set_na_rep(self, na_rep: str) -> "Styler":
+        """
+        Set the missing data representation on a Styler.
+
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        na_rep : str
+
+        Returns
+        -------
+        self : Styler
+        """
+        self.na_rep = na_rep
+        return self
+
     def hide_index(self):
         """
         Hide any indices from rendering.
@@ -935,19 +969,21 @@ def _highlight_null(v, null_color):
             "background-color: {color}".format(color=null_color) if pd.isna(v) else ""
         )
 
-    def highlight_null(self, null_color="red"):
+    def highlight_null(self, null_color="red", subset=None):
         """
         Shade the background ``null_color`` for missing values.
 
         Parameters
         ----------
         null_color : str
+        subset : IndexSlice, default None
+            A valid slice for ``data`` to limit the style application to.
 
         Returns
         -------
         self : Styler
         """
-        self.applymap(self._highlight_null, null_color=null_color)
+        self.applymap(self._highlight_null, null_color=null_color, subset=subset)
         return self
 
     def background_gradient(
@@ -1475,3 +1511,13 @@ def _maybe_wrap_formatter(formatter):
             "instead".format(formatter=formatter)
         )
         raise TypeError(msg)
+
+
+def _maybe_wrap_na_formatter(formatter, na_rep):
+    if na_rep is None:
+        return formatter
+    elif is_string_like(na_rep):
+        return lambda x: na_rep if pd.isna(x) else formatter(x)
+    else:
+        msg = "Expected a string, got {na_rep} instead".format(na_rep=na_rep)
+        raise TypeError(msg)
diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py
@@ -990,6 +990,36 @@ def test_bar_bad_align_raises(self):
         with pytest.raises(ValueError):
             df.style.bar(align="poorly", color=["#d65f5f", "#5fba7d"])
 
+    def test_format_with_na_rep(self):
+        # GH 28358
+        df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
+
+        ctx = df.style.format(na_rep="-")._translate()
+        assert ctx["body"][0][1]["display_value"] == "-"
+        assert ctx["body"][0][2]["display_value"] == "-"
+
+        ctx = df.style.format("{:.2%}", na_rep="-")._translate()
+        assert ctx["body"][0][1]["display_value"] == "-"
+        assert ctx["body"][0][2]["display_value"] == "-"
+        assert ctx["body"][1][1]["display_value"] == "110.00%"
+        assert ctx["body"][1][2]["display_value"] == "120.00%"
+
+        ctx = df.style.format("{:.2%}", na_rep="-", subset=["B"])._translate()
+        assert ctx["body"][0][2]["display_value"] == "-"
+        assert ctx["body"][1][2]["display_value"] == "120.00%"
+
+    def test_set_na_rep(self):
+        # GH 28358
+        df = pd.DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"])
+
+        ctx = df.style.set_na_rep("NA")._translate()
+        assert ctx["body"][0][1]["display_value"] == "NA"
+        assert ctx["body"][0][2]["display_value"] == "NA"
+
+        ctx = df.style.set_na_rep("NA").format(na_rep="-", subset=["B"])._translate()
+        assert ctx["body"][0][1]["display_value"] == "NA"
+        assert ctx["body"][0][2]["display_value"] == "-"
+
     def test_highlight_null(self, null_color="red"):
         df = pd.DataFrame({"A": [0, np.nan]})
         result = df.style.highlight_null()._compute().ctx