DOC: Improve replace docstring (pandas-dev#18100)

jreback · Feb 4, 2018 · bc1d027 · bc1d027
1 parent 56dbaae
commit bc1d027
Show file tree

Hide file tree

Showing 3 changed files with 187 additions and 41 deletions.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3080,6 +3080,14 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
                                   inplace=inplace, limit=limit,
                                   downcast=downcast, **kwargs)
 
+    @Appender(_shared_docs['replace'] % _shared_doc_kwargs)
+    def replace(self, to_replace=None, value=None, inplace=False, limit=None,
+                regex=False, method='pad', axis=None):
+        return super(DataFrame, self).replace(to_replace=to_replace,
+                                              value=value, inplace=inplace,
+                                              limit=limit, regex=regex,
+                                              method=method, axis=axis)
+
     @Appender(_shared_docs['shift'] % _shared_doc_kwargs)
     def shift(self, periods=1, freq=None, axis=0):
         return super(DataFrame, self).shift(periods=periods, freq=freq,

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -69,6 +69,10 @@
 
 
 def _single_replace(self, to_replace, method, inplace, limit):
+    """
+    Replaces values in a Series using the fill method specified when no
+    replacement value is given in the replace method
+    """
     if self.ndim != 1:
         raise TypeError('cannot replace {0} with method {1} on a {2}'
                         .format(to_replace, method, type(self).__name__))
@@ -4787,94 +4791,111 @@ def bfill(self, axis=None, inplace=False, limit=None, downcast=None):
         return self.fillna(method='bfill', axis=axis, inplace=inplace,
                            limit=limit, downcast=downcast)
 
-    def replace(self, to_replace=None, value=None, inplace=False, limit=None,
-                regex=False, method='pad', axis=None):
-        """
+    _shared_docs['replace'] = ("""
         Replace values given in 'to_replace' with 'value'.
 
         Parameters
         ----------
         to_replace : str, regex, list, dict, Series, numeric, or None
 
-            * str or regex:
+            * numeric, str or regex:
 
-                - str: string exactly matching `to_replace` will be replaced
-                  with `value`
-                - regex: regexs matching `to_replace` will be replaced with
-                  `value`
+                - numeric: numeric values equal to ``to_replace`` will be
+                  replaced with ``value``
+                - str: string exactly matching ``to_replace`` will be replaced
+                  with ``value``
+                - regex: regexs matching ``to_replace`` will be replaced with
+                  ``value``
 
             * list of str, regex, or numeric:
 
-                - First, if `to_replace` and `value` are both lists, they
+                - First, if ``to_replace`` and ``value`` are both lists, they
                   **must** be the same length.
                 - Second, if ``regex=True`` then all of the strings in **both**
                   lists will be interpreted as regexs otherwise they will match
-                  directly. This doesn't matter much for `value` since there
+                  directly. This doesn't matter much for ``value`` since there
                   are only a few possible substitution regexes you can use.
-                - str and regex rules apply as above.
+                - str, regex and numeric rules apply as above.
 
             * dict:
 
-                - Nested dictionaries, e.g., {'a': {'b': nan}}, are read as
-                  follows: look in column 'a' for the value 'b' and replace it
-                  with nan. You can nest regular expressions as well. Note that
+                - Dicts can be used to specify different replacement values
+                  for different existing values. For example,
+                  {'a': 'b', 'y': 'z'} replaces the value 'a' with 'b' and
+                  'y' with 'z'. To use a dict in this way the ``value``
+                  parameter should be ``None``.
+                - For a DataFrame a dict can specify that different values
+                  should be replaced in different columns. For example,
+                  {'a': 1, 'b': 'z'} looks for the value 1 in column 'a' and
+                  the value 'z' in column 'b' and replaces these values with
+                  whatever is specified in ``value``. The ``value`` parameter
+                  should not be ``None`` in this case. You can treat this as a
+                  special case of passing two lists except that you are
+                  specifying the column to search in.
+                - For a DataFrame nested dictionaries, e.g.,
+                  {'a': {'b': np.nan}}, are read as follows: look in column 'a'
+                  for the value 'b' and replace it with NaN. The ``value``
+                  parameter should be ``None`` to use a nested dict in this
+                  way. You can nest regular expressions as well. Note that
                   column names (the top-level dictionary keys in a nested
                   dictionary) **cannot** be regular expressions.
-                - Keys map to column names and values map to substitution
-                  values. You can treat this as a special case of passing two
-                  lists except that you are specifying the column to search in.
 
             * None:
 
                 - This means that the ``regex`` argument must be a string,
                   compiled regular expression, or list, dict, ndarray or Series
-                  of such elements. If `value` is also ``None`` then this
+                  of such elements. If ``value`` is also ``None`` then this
                   **must** be a nested dictionary or ``Series``.
 
             See the examples section for examples of each of these.
         value : scalar, dict, list, str, regex, default None
-            Value to use to fill holes (e.g. 0), alternately a dict of values
-            specifying which value to use for each column (columns not in the
-            dict will not be filled). Regular expressions, strings and lists or
-            dicts of such objects are also allowed.
+            Value to replace any values matching ``to_replace`` with.
+            For a DataFrame a dict of values can be used to specify which
+            value to use for each column (columns not in the dict will not be
+            filled). Regular expressions, strings and lists or dicts of such
+            objects are also allowed.
         inplace : boolean, default False
             If True, in place. Note: this will modify any
             other views on this object (e.g. a column from a DataFrame).
             Returns the caller if this is True.
         limit : int, default None
             Maximum size gap to forward or backward fill
-        regex : bool or same types as `to_replace`, default False
-            Whether to interpret `to_replace` and/or `value` as regular
-            expressions. If this is ``True`` then `to_replace` *must* be a
-            string. Otherwise, `to_replace` must be ``None`` because this
-            parameter will be interpreted as a regular expression or a list,
-            dict, or array of regular expressions.
+        regex : bool or same types as ``to_replace``, default False
+            Whether to interpret ``to_replace`` and/or ``value`` as regular
+            expressions. If this is ``True`` then ``to_replace`` *must* be a
+            string. Alternatively, this could be a regular expression or a
+            list, dict, or array of regular expressions in which case
+            ``to_replace`` must be ``None``.
         method : string, optional, {'pad', 'ffill', 'bfill'}
             The method to use when for replacement, when ``to_replace`` is a
             ``list``.
 
         See Also
         --------
-        NDFrame.reindex
-        NDFrame.asfreq
-        NDFrame.fillna
+        %(klass)s.fillna : Fill NA/NaN values
+        %(klass)s.where : Replace values based on boolean condition
 
         Returns
         -------
-        filled : NDFrame
+        filled : %(klass)s
 
         Raises
         ------
         AssertionError
-            * If `regex` is not a ``bool`` and `to_replace` is not ``None``.
+            * If ``regex`` is not a ``bool`` and ``to_replace`` is not
+              ``None``.
         TypeError
-            * If `to_replace` is a ``dict`` and `value` is not a ``list``,
+            * If ``to_replace`` is a ``dict`` and ``value`` is not a ``list``,
               ``dict``, ``ndarray``, or ``Series``
-            * If `to_replace` is ``None`` and `regex` is not compilable into a
-              regular expression or is a list, dict, ndarray, or Series.
+            * If ``to_replace`` is ``None`` and ``regex`` is not compilable
+              into a regular expression or is a list, dict, ndarray, or
+              Series.
+            * When replacing multiple ``bool`` or ``datetime64`` objects and
+              the arguments to ``to_replace`` does not match the type of the
+              value being replaced
         ValueError
-            * If `to_replace` and `value` are ``list`` s or ``ndarray`` s, but
-              they are not the same length.
+            * If a ``list`` or an ``ndarray`` is passed to ``to_replace`` and
+              `value` but they are not the same length.
 
         Notes
         -----
@@ -4883,12 +4904,121 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
         * Regular expressions will only substitute on strings, meaning you
           cannot provide, for example, a regular expression matching floating
           point numbers and expect the columns in your frame that have a
-          numeric dtype to be matched. However, if those floating point numbers
-          *are* strings, then you can do this.
+          numeric dtype to be matched. However, if those floating point
+          numbers *are* strings, then you can do this.
         * This method has *a lot* of options. You are encouraged to experiment
           and play with this method to gain intuition about how it works.
 
-        """
+        Examples
+        --------
+
+        >>> s = pd.Series([0, 1, 2, 3, 4])
+        >>> s.replace(0, 5)
+        0    5
+        1    1
+        2    2
+        3    3
+        4    4
+        dtype: int64
+        >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
+        ...                    'B': [5, 6, 7, 8, 9],
+        ...                    'C': ['a', 'b', 'c', 'd', 'e']})
+        >>> df.replace(0, 5)
+           A  B  C
+        0  5  5  a
+        1  1  6  b
+        2  2  7  c
+        3  3  8  d
+        4  4  9  e
+
+        >>> df.replace([0, 1, 2, 3], 4)
+           A  B  C
+        0  4  5  a
+        1  4  6  b
+        2  4  7  c
+        3  4  8  d
+        4  4  9  e
+        >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
+           A  B  C
+        0  4  5  a
+        1  3  6  b
+        2  2  7  c
+        3  1  8  d
+        4  4  9  e
+        >>> s.replace([1, 2], method='bfill')
+        0    0
+        1    3
+        2    3
+        3    3
+        4    4
+        dtype: int64
+
+        >>> df.replace({0: 10, 1: 100})
+             A  B  C
+        0   10  5  a
+        1  100  6  b
+        2    2  7  c
+        3    3  8  d
+        4    4  9  e
+        >>> df.replace({'A': 0, 'B': 5}, 100)
+             A    B  C
+        0  100  100  a
+        1    1    6  b
+        2    2    7  c
+        3    3    8  d
+        4    4    9  e
+        >>> df.replace({'A': {0: 100, 4: 400}})
+             A  B  C
+        0  100  5  a
+        1    1  6  b
+        2    2  7  c
+        3    3  8  d
+        4  400  9  e
+
+        >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
+        ...                    'B': ['abc', 'bar', 'xyz']})
+        >>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
+              A    B
+        0   new  abc
+        1   foo  new
+        2  bait  xyz
+        >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
+              A    B
+        0   new  abc
+        1   foo  bar
+        2  bait  xyz
+        >>> df.replace(regex=r'^ba.$', value='new')
+              A    B
+        0   new  abc
+        1   foo  new
+        2  bait  xyz
+        >>> df.replace(regex={r'^ba.$':'new', 'foo':'xyz'})
+              A    B
+        0   new  abc
+        1   xyz  new
+        2  bait  xyz
+        >>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
+              A    B
+        0   new  abc
+        1   new  new
+        2  bait  xyz
+
+        Note that when replacing multiple ``bool`` or ``datetime64`` objects,
+        the data types in the ``to_replace`` parameter must match the data
+        type of the value being replaced:
+
+        >>> df = pd.DataFrame({'A': [True, False, True],
+        ...                    'B': [False, True, False]})
+        >>> df.replace({'a string': 'new value', True: False})  # raises
+        TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
+
+        This raises a ``TypeError`` because one of the ``dict`` keys is not of
+        the correct type for replacement.
+    """)
+
+    @Appender(_shared_docs['replace'] % _shared_doc_kwargs)
+    def replace(self, to_replace=None, value=None, inplace=False, limit=None,
+                regex=False, method='pad', axis=None):
         inplace = validate_bool_kwarg(inplace, 'inplace')
         if not is_bool(regex) and to_replace is not None:
             raise AssertionError("'to_replace' must be 'None' if 'regex' is "

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2671,6 +2671,14 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
                                           limit=limit, downcast=downcast,
                                           **kwargs)
 
+    @Appender(generic._shared_docs['replace'] % _shared_doc_kwargs)
+    def replace(self, to_replace=None, value=None, inplace=False, limit=None,
+                regex=False, method='pad', axis=None):
+        return super(Series, self).replace(to_replace=to_replace, value=value,
+                                           inplace=inplace, limit=limit,
+                                           regex=regex, method=method,
+                                           axis=axis)
+
     @Appender(generic._shared_docs['shift'] % _shared_doc_kwargs)
     def shift(self, periods=1, freq=None, axis=0):
         return super(Series, self).shift(periods=periods, freq=freq, axis=axis)