From bfb868b36892ceda6bfee6f0d99b7d957672e365 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 13 Feb 2018 13:58:10 -0600 Subject: [PATCH 1/7] API: Validate keyword arguments to fillna Closes https://github.com/pandas-dev/pandas/issues/19682 --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/arrays/categorical.py | 5 +++- pandas/core/generic.py | 12 +++------ pandas/tests/categorical/test_missing.py | 20 ++++++++++++++ pandas/util/_validators.py | 34 ++++++++++++++++++++++++ 5 files changed, 62 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 72f63a4da0f4d..d665bf089af47 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -570,6 +570,7 @@ Other API Changes - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) - :class:`DateOffset` objects render more simply, e.g. "" instead of "" (:issue:`19403`) - :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) +- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) .. _whatsnew_0230.deprecations: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index bcf9cb7646704..9856c85dd17f8 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -40,7 +40,7 @@ Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.io.formats.terminal import get_terminal_size -from pandas.util._validators import validate_bool_kwarg +from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs from pandas.core.config import get_option from .base import ExtensionArray @@ -1607,6 +1607,9 @@ def fillna(self, value=None, method=None, limit=None): ------- filled : Categorical with NA/NaN filled """ + value, method = validate_fillna_kwargs( + value, method, validate_scalar_dict_value=False + ) if value is None: value = np.nan diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 35f866c9e7d58..d0e4194004f33 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -54,7 +54,7 @@ import pandas.core.nanops as nanops from pandas.util._decorators import (Appender, Substitution, deprecate_kwarg) -from pandas.util._validators import validate_bool_kwarg +from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs from pandas.core import config # goal is to be able to define the docs close to function, while still being @@ -4694,10 +4694,8 @@ def infer_objects(self): def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None, downcast=None): inplace = validate_bool_kwarg(inplace, 'inplace') + value, method = validate_fillna_kwargs(value, method) - if isinstance(value, (list, tuple)): - raise TypeError('"value" parameter must be a scalar or dict, but ' - 'you passed a "{0}"'.format(type(value).__name__)) self._consolidate_inplace() # set the default here, so functions examining the signaure @@ -4708,8 +4706,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, method = missing.clean_fill_method(method) from pandas import DataFrame if value is None: - if method is None: - raise ValueError('must specify a fill method or value') + if self._is_mixed_type and axis == 1: if inplace: raise NotImplementedError() @@ -4743,9 +4740,6 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, coerce=True, downcast=downcast) else: - if method is not None: - raise ValueError('cannot specify both a fill method and value') - if len(self._get_axis(axis)) == 0: return self diff --git a/pandas/tests/categorical/test_missing.py b/pandas/tests/categorical/test_missing.py index 79758dee5cfda..c3851fc9319fc 100644 --- a/pandas/tests/categorical/test_missing.py +++ b/pandas/tests/categorical/test_missing.py @@ -53,3 +53,23 @@ def test_set_item_nan(self): exp = Categorical([1, np.nan, 3], categories=[1, 2, 3]) tm.assert_categorical_equal(cat, exp) + + def test_fillna_raises(self): + # https://github.com/pandas-dev/pandas/issues/19682 + + cat = Categorical([1, 2, 3]) + + xpr = "Cannot specify both 'value' and 'method'." + + with tm.assert_raises_regex(ValueError, xpr): + cat.fillna(value=1, method='ffill') + + xpr = "Must specify a fill 'value' or 'method'." + + with tm.assert_raises_regex(ValueError, xpr): + cat.fillna() + + xpr = "Invalid fill method. Expecting .* bad" + + with tm.assert_raises_regex(ValueError, xpr): + cat.fillna(method='bad') diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index b30ffc7416f92..e91e169d0e9dc 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -320,3 +320,37 @@ def validate_axis_style_args(data, args, kwargs, arg_name, method_name): msg = "Cannot specify all of '{}', 'index', 'columns'." raise TypeError(msg.format(arg_name)) return out + + +def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): + """Validate the keyword arguments to 'fillna'. + + This checks that exactly one of 'value' and 'method' is specified. + If 'method' is specified, this validates that it's a valid method. + + Parameters + ---------- + value, method : object + The 'value' and 'method' keyword arguments for 'fillna'. + + Returns + ------- + value, method : object + These aren't modified in any way. Just validated. + """ + from pandas.core.missing import clean_fill_method + + if value is None and method is None: + raise ValueError("Must specify a fill 'value' or 'method'.") + elif value is None and method is not None: + clean_fill_method(method) + + elif value is not None and method is None: + if validate_scalar_dict_value and isinstance(value, (list, tuple)): + raise TypeError('"value" parameter must be a scalar or dict, but ' + 'you passed a "{0}"'.format(type(value).__name__)) + + elif value is not None and method is not None: + raise ValueError("Cannot specify both 'value' and 'method'.") + + return value, method From dddc7cfe15a5fe4a08e7ec26b4227d8654a224c2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 13 Feb 2018 14:56:41 -0600 Subject: [PATCH 2/7] Return cleaned value --- pandas/util/_validators.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index e91e169d0e9dc..0d88a0942ff7c 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -336,14 +336,13 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): Returns ------- value, method : object - These aren't modified in any way. Just validated. """ from pandas.core.missing import clean_fill_method if value is None and method is None: raise ValueError("Must specify a fill 'value' or 'method'.") elif value is None and method is not None: - clean_fill_method(method) + method = clean_fill_method(method) elif value is not None and method is None: if validate_scalar_dict_value and isinstance(value, (list, tuple)): From f810f67163b661bb534c7646b2df9369cf8eba40 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 14 Feb 2018 05:28:26 -0600 Subject: [PATCH 3/7] Docstring --- pandas/util/_validators.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 0d88a0942ff7c..2ea83b7b85284 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -332,6 +332,9 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): ---------- value, method : object The 'value' and 'method' keyword arguments for 'fillna'. + validate_scalar_dict_value : bool, default True + Whether to validate that 'value' is a scalar or dict; specifically + that it is not a list or tuple. Returns ------- From c280eaa7fcbfb806d7651370c0e985282fadd4d6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 15 Feb 2018 08:36:49 -0600 Subject: [PATCH 4/7] Parametrized --- pandas/tests/categorical/test_missing.py | 28 ++++++++++-------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/pandas/tests/categorical/test_missing.py b/pandas/tests/categorical/test_missing.py index c3851fc9319fc..fca5573547071 100644 --- a/pandas/tests/categorical/test_missing.py +++ b/pandas/tests/categorical/test_missing.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import numpy as np +import pytest import pandas.util.testing as tm from pandas import (Categorical, Index, isna) @@ -54,22 +55,17 @@ def test_set_item_nan(self): exp = Categorical([1, np.nan, 3], categories=[1, 2, 3]) tm.assert_categorical_equal(cat, exp) - def test_fillna_raises(self): + @pytest.mark.parametrize('fillna_kwargs, msg', [ + (dict(value=1, method='ffill'), + "Cannot specify both 'value' and 'method'."), + (dict(), + "Must specify a fill 'value' or 'method'."), + (dict(method='bad'), + "Invalid fill method. Expecting .* bad"), + ]) + def test_fillna_raises(self, fillna_kwargs, msg): # https://github.com/pandas-dev/pandas/issues/19682 - cat = Categorical([1, 2, 3]) - xpr = "Cannot specify both 'value' and 'method'." - - with tm.assert_raises_regex(ValueError, xpr): - cat.fillna(value=1, method='ffill') - - xpr = "Must specify a fill 'value' or 'method'." - - with tm.assert_raises_regex(ValueError, xpr): - cat.fillna() - - xpr = "Invalid fill method. Expecting .* bad" - - with tm.assert_raises_regex(ValueError, xpr): - cat.fillna(method='bad') + with tm.assert_raises_regex(ValueError, msg): + cat.fillna(**fillna_kwargs) From 6c09f0832c423b118d81a2908fd4d8828f3806f0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 20 Feb 2018 08:02:23 -0600 Subject: [PATCH 5/7] Moved tz merge whatsnew --- doc/source/whatsnew/v0.23.0.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 9ff1d44fe9667..8ec88dfd12711 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -555,6 +555,7 @@ Datetimelike API Changes - Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'``(:issue:`18808`) - Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`) - Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (issue:`18817`) +- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) .. _whatsnew_0230.api.other: @@ -575,7 +576,6 @@ Other API Changes - :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) - :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) - :func:`pandas.merge` now raises a ``ValueError`` when trying to merge on incompatible data types (:issue:`9780`) -- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) - The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) - Refactored ``setup.py`` to use ``find_packages`` instead of explicitly listing out all subpackages (:issue:`18535`) - Rearranged the order of keyword arguments in :func:`read_excel()` to align with :func:`read_csv()` (:issue:`16672`) @@ -590,7 +590,6 @@ Other API Changes object frequency is ``None`` (:issue:`19147`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) - :class:`DateOffset` objects render more simply, e.g. "" instead of "" (:issue:`19403`) -- :func:`pandas.merge` provides a more informative error message when trying to merge on timezone-aware and timezone-naive columns (:issue:`15800`) - ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) .. _whatsnew_0230.deprecations: From 90f8fdef2990a25a8773bfc855770e4bbcfbfe38 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 20 Feb 2018 08:02:58 -0600 Subject: [PATCH 6/7] Docstring grammar --- pandas/util/_validators.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 2ea83b7b85284..a96563051e7de 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -333,8 +333,8 @@ def validate_fillna_kwargs(value, method, validate_scalar_dict_value=True): value, method : object The 'value' and 'method' keyword arguments for 'fillna'. validate_scalar_dict_value : bool, default True - Whether to validate that 'value' is a scalar or dict; specifically - that it is not a list or tuple. + Whether to validate that 'value' is a scalar or dict. Specifically, + validate that it is not a list or tuple. Returns ------- From dc1f9606204e126723867024fbf35f953dcf31c7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 21 Feb 2018 06:51:59 -0600 Subject: [PATCH 7/7] fixup! Merge remote-tracking branch 'upstream/master' into validate-fillna --- doc/source/whatsnew/v0.23.0.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 4a10746a90533..54fbbb12c2917 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -588,12 +588,8 @@ Other API Changes - ``KeyError`` now raises instead of ``ValueError`` in :meth:`~DataFrame.drop`, :meth:`~Panel.drop`, :meth:`~Series.drop`, :meth:`~Index.drop` when dropping a non-existent element in an axis with duplicates (:issue:`19186`) - :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) -<<<<<<< HEAD -- :class:`DateOffset` objects render more simply, e.g. "" instead of "" (:issue:`19403`) -- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) -======= - :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) ->>>>>>> upstream/master +- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`) .. _whatsnew_0230.deprecations: