diff --git a/doc/source/10min.rst b/doc/source/10min.rst index 8482eef552c17..def49a641a0ff 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -373,7 +373,7 @@ To get the boolean mask where values are ``nan`` .. ipython:: python - pd.isnull(df1) + pd.isna(df1) Operations diff --git a/doc/source/api.rst b/doc/source/api.rst index d6053791d6f4b..01f7768c136c7 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -187,8 +187,8 @@ Top-level missing data .. autosummary:: :toctree: generated/ - isnull - notnull + isna + notna Top-level conversions ~~~~~~~~~~~~~~~~~~~~~ @@ -271,8 +271,8 @@ Conversion Series.astype Series.copy - Series.isnull - Series.notnull + Series.isna + Series.notna Indexing, iteration ~~~~~~~~~~~~~~~~~~~ @@ -778,8 +778,8 @@ Conversion DataFrame.astype DataFrame.convert_objects DataFrame.copy - DataFrame.isnull - DataFrame.notnull + DataFrame.isna + DataFrame.notna Indexing, iteration ~~~~~~~~~~~~~~~~~~~ @@ -1096,8 +1096,8 @@ Conversion Panel.astype Panel.copy - Panel.isnull - Panel.notnull + Panel.isna + Panel.notna Getting and setting ~~~~~~~~~~~~~~~~~~~ @@ -1340,8 +1340,8 @@ Missing Values Index.fillna Index.dropna - Index.isnull - Index.notnull + Index.isna + Index.notna Conversion ~~~~~~~~~~ diff --git a/doc/source/basics.rst b/doc/source/basics.rst index d8b1602fb104d..6249c1b258d5d 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -444,7 +444,7 @@ So, for instance, to reproduce :meth:`~DataFrame.combine_first` as above: .. ipython:: python - combiner = lambda x, y: np.where(pd.isnull(x), y, x) + combiner = lambda x, y: np.where(pd.isna(x), y, x) df1.combine(df2, combiner) .. _basics.stats: @@ -511,7 +511,7 @@ optional ``level`` parameter which applies only if the object has a :header: "Function", "Description" :widths: 20, 80 - ``count``, Number of non-null observations + ``count``, Number of non-na observations ``sum``, Sum of values ``mean``, Mean of values ``mad``, Mean absolute deviation @@ -541,7 +541,7 @@ will exclude NAs on Series input by default: np.mean(df['one'].values) ``Series`` also has a method :meth:`~Series.nunique` which will return the -number of unique non-null values: +number of unique non-na values: .. ipython:: python diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index ef558381c5e6f..02d7920bc4a84 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -863,14 +863,14 @@ a code of ``-1``. s.cat.codes -Methods for working with missing data, e.g. :meth:`~Series.isnull`, :meth:`~Series.fillna`, +Methods for working with missing data, e.g. :meth:`~Series.isna`, :meth:`~Series.fillna`, :meth:`~Series.dropna`, all work normally: .. ipython:: python s = pd.Series(["a", "b", np.nan], dtype="category") s - pd.isnull(s) + pd.isna(s) s.fillna("a") Differences to R's `factor` diff --git a/doc/source/comparison_with_sas.rst b/doc/source/comparison_with_sas.rst index 875358521173a..33a347de0bf5b 100644 --- a/doc/source/comparison_with_sas.rst +++ b/doc/source/comparison_with_sas.rst @@ -444,13 +444,13 @@ For example, in SAS you could do this to filter missing values. if value_x ^= .; run; -Which doesn't work in in pandas. Instead, the ``pd.isnull`` or ``pd.notnull`` functions +Which doesn't work in in pandas. Instead, the ``pd.isna`` or ``pd.notna`` functions should be used for comparisons. .. ipython:: python - outer_join[pd.isnull(outer_join['value_x'])] - outer_join[pd.notnull(outer_join['value_x'])] + outer_join[pd.isna(outer_join['value_x'])] + outer_join[pd.notna(outer_join['value_x'])] pandas also provides a variety of methods to work with missing data - some of which would be challenging to express in SAS. For example, there are methods to @@ -570,7 +570,7 @@ machine's memory, but also that the operations on that data may be faster. If out of core processing is needed, one possibility is the `dask.dataframe `_ -library (currently in development) which +library (currently in development) which provides a subset of pandas functionality for an on-disk ``DataFrame`` Data Interop @@ -578,7 +578,7 @@ Data Interop pandas provides a :func:`read_sas` method that can read SAS data saved in the XPORT or SAS7BDAT binary format. - + .. code-block:: none libname xportout xport 'transport-file.xpt'; @@ -613,4 +613,3 @@ to interop data between SAS and pandas is to serialize to csv. In [9]: %time df = pd.read_csv('big.csv') Wall time: 4.86 s - diff --git a/doc/source/comparison_with_sql.rst b/doc/source/comparison_with_sql.rst index 7962e0e69faa1..2112c7de8c897 100644 --- a/doc/source/comparison_with_sql.rst +++ b/doc/source/comparison_with_sql.rst @@ -101,7 +101,7 @@ Just like SQL's OR and AND, multiple conditions can be passed to a DataFrame usi # tips by parties of at least 5 diners OR bill total was more than $45 tips[(tips['size'] >= 5) | (tips['total_bill'] > 45)] -NULL checking is done using the :meth:`~pandas.Series.notnull` and :meth:`~pandas.Series.isnull` +NULL checking is done using the :meth:`~pandas.Series.notna` and :meth:`~pandas.Series.isna` methods. .. ipython:: python @@ -121,9 +121,9 @@ where ``col2`` IS NULL with the following query: .. ipython:: python - frame[frame['col2'].isnull()] + frame[frame['col2'].isna()] -Getting items where ``col1`` IS NOT NULL can be done with :meth:`~pandas.Series.notnull`. +Getting items where ``col1`` IS NOT NULL can be done with :meth:`~pandas.Series.notna`. .. code-block:: sql @@ -133,7 +133,7 @@ Getting items where ``col1`` IS NOT NULL can be done with :meth:`~pandas.Series. .. ipython:: python - frame[frame['col1'].notnull()] + frame[frame['col1'].notna()] GROUP BY diff --git a/doc/source/conf.py b/doc/source/conf.py index 394fa44c30573..1d1cab7dd19a4 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -233,8 +233,8 @@ # https://github.com/pandas-dev/pandas/issues/16186 moved_api_pages = [ - ('pandas.core.common.isnull', 'pandas.isnull'), - ('pandas.core.common.notnull', 'pandas.notnull'), + ('pandas.core.common.isnull', 'pandas.isna'), + ('pandas.core.common.notnull', 'pandas.notna'), ('pandas.core.reshape.get_dummies', 'pandas.get_dummies'), ('pandas.tools.merge.concat', 'pandas.concat'), ('pandas.tools.merge.merge', 'pandas.merge'), diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index 11827fe2776cf..a3a90f514f142 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -202,7 +202,7 @@ For many reasons we chose the latter. After years of production use it has proven, at least in my opinion, to be the best decision given the state of affairs in NumPy and Python in general. The special value ``NaN`` (Not-A-Number) is used everywhere as the ``NA`` value, and there are API -functions ``isnull`` and ``notnull`` which can be used across the dtypes to +functions ``isna`` and ``notna`` which can be used across the dtypes to detect NA values. However, it comes with it a couple of trade-offs which I most certainly have diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 37930775885e3..e40b7d460fef8 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -36,7 +36,7 @@ When / why does data become missing? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Some might quibble over our usage of *missing*. By "missing" we simply mean -**null** or "not present for whatever reason". Many data sets simply arrive with +**na** or "not present for whatever reason". Many data sets simply arrive with missing data, either because it exists and was not collected or it never existed. For example, in a collection of financial time series, some of the time series might start on different dates. Thus, values prior to the start date @@ -63,27 +63,27 @@ to handling missing data. While ``NaN`` is the default missing value marker for reasons of computational speed and convenience, we need to be able to easily detect this value with data of different types: floating point, integer, boolean, and general object. In many cases, however, the Python ``None`` will -arise and we wish to also consider that "missing" or "null". +arise and we wish to also consider that "missing" or "na". .. note:: Prior to version v0.10.0 ``inf`` and ``-inf`` were also - considered to be "null" in computations. This is no longer the case by - default; use the ``mode.use_inf_as_null`` option to recover it. + considered to be "na" in computations. This is no longer the case by + default; use the ``mode.use_inf_as_na`` option to recover it. -.. _missing.isnull: +.. _missing.isna: To make detecting missing values easier (and across different array dtypes), -pandas provides the :func:`~pandas.core.common.isnull` and -:func:`~pandas.core.common.notnull` functions, which are also methods on +pandas provides the :func:`isna` and +:func:`notna` functions, which are also methods on ``Series`` and ``DataFrame`` objects: .. ipython:: python df2['one'] - pd.isnull(df2['one']) - df2['four'].notnull() - df2.isnull() + pd.isna(df2['one']) + df2['four'].notna() + df2.isna() .. warning:: @@ -206,7 +206,7 @@ with missing data. Filling missing values: fillna ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The **fillna** function can "fill in" NA values with non-null data in a couple +The **fillna** function can "fill in" NA values with non-na data in a couple of ways, which we illustrate: **Replace NA with a scalar value** @@ -220,7 +220,7 @@ of ways, which we illustrate: **Fill gaps forward or backward** Using the same filling arguments as :ref:`reindexing `, we -can propagate non-null values forward or backward: +can propagate non-na values forward or backward: .. ipython:: python @@ -288,7 +288,7 @@ a Series in this case. .. ipython:: python - dff.where(pd.notnull(dff), dff.mean(), axis='columns') + dff.where(pd.notna(dff), dff.mean(), axis='columns') .. _missing_data.dropna: diff --git a/doc/source/options.rst b/doc/source/options.rst index f373705a96f48..f8ef22c4c1d1e 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -421,10 +421,10 @@ mode.chained_assignment warn Raise an exception, warn, or no assignment, The default is warn mode.sim_interactive False Whether to simulate interactive mode for purposes of testing. -mode.use_inf_as_null False True means treat None, NaN, -INF, - INF as null (old way), False means +mode.use_inf_as_na False True means treat None, NaN, -INF, + INF as NA (old way), False means None and NaN are null, but INF, -INF - are not null (new way). + are not NA (new way). compute.use_bottleneck True Use the bottleneck library to accelerate computation if it is installed. compute.use_numexpr True Use the numexpr library to accelerate diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 7c52cf6f450b2..13a5ad4109012 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -92,6 +92,22 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in ... ValueError: Cannot operate inplace if there is no assignment +.. _whatsnew_0210.api.na_changes: + +NA naming Changes +^^^^^^^^^^^^^^^^^ + +In orde to promote more consistency among the pandas API, we have added additional top-level +functions :func:`isna` and :func:`notna` that are the same as :func:`isnull` and :func:`notnull`. +The naming scheme is now more consistent with methods ``.dropna()`` and ``.fillna()``. Furthermore +in all cases where ``.isnull()`` and ``.notnull()`` methods are defined, these have additional methods +named ``.isna()`` and ``.notna()``, these include for classes `Categorical`, +`Index`, `Series`, and `DataFrame`. (:issue:`15001`). + +The configuration option ``mode.use_inf_as_null`` has been renamed to ``mode.use_inf_as_na`` as well. + +We *may* deprecate the ``.isnull()`` and ``.isna()`` schemes at some point in the future. + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index aafffbf60f638..0945aec638b1d 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -83,7 +83,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ascending=True, nan_value = {{neg_nan_value}} {{if dtype == 'object'}} - mask = lib.isnullobj(values) + mask = lib.isnaobj(values) {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} @@ -259,7 +259,7 @@ def rank_2d_{{dtype}}(object in_arr, axis=0, ties_method='average', nan_value = {{neg_nan_value}} {{if dtype == 'object'}} - mask = lib.isnullobj2d(values) + mask = lib.isnaobj2d(values) {{elif dtype == 'float64'}} mask = np.isnan(values) {{elif dtype == 'int64'}} diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index f6e574b66a828..0458d4ae9f3de 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -286,7 +286,7 @@ def item_from_zerodim(object val): @cython.wraparound(False) @cython.boundscheck(False) -def isnullobj(ndarray arr): +def isnaobj(ndarray arr): cdef Py_ssize_t i, n cdef object val cdef ndarray[uint8_t] result @@ -303,7 +303,7 @@ def isnullobj(ndarray arr): @cython.wraparound(False) @cython.boundscheck(False) -def isnullobj_old(ndarray arr): +def isnaobj_old(ndarray arr): cdef Py_ssize_t i, n cdef object val cdef ndarray[uint8_t] result @@ -320,7 +320,7 @@ def isnullobj_old(ndarray arr): @cython.wraparound(False) @cython.boundscheck(False) -def isnullobj2d(ndarray arr): +def isnaobj2d(ndarray arr): cdef Py_ssize_t i, j, n, m cdef object val cdef ndarray[uint8_t, ndim=2] result @@ -339,7 +339,7 @@ def isnullobj2d(ndarray arr): @cython.wraparound(False) @cython.boundscheck(False) -def isnullobj2d_old(ndarray arr): +def isnaobj2d_old(ndarray arr): cdef Py_ssize_t i, j, n, m cdef object val cdef ndarray[uint8_t, ndim=2] result diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 9495af87f5c31..ab7f3c3de2131 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -1,7 +1,7 @@ import numpy as np from pandas import compat -from pandas.core.dtypes.missing import isnull, array_equivalent +from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.common import is_dtype_equal cdef NUMERIC_TYPES = ( @@ -182,7 +182,7 @@ cpdef assert_almost_equal(a, b, if a == b: # object comparison return True - if isnull(a) and isnull(b): + if isna(a) and isna(b): # nan / None comparison return True if is_comparable_as_number(a) and is_comparable_as_number(b): diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index b490bf787a037..9ebb41c367a90 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -27,7 +27,7 @@ _ensure_float64, _ensure_uint64, _ensure_int64) from pandas.compat.numpy import _np_version_under1p10 -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core import common as com from pandas.compat import string_types @@ -417,7 +417,7 @@ def isin(comps, values): try: values = values.astype('float64', copy=False) comps = comps.astype('float64', copy=False) - checknull = isnull(values).any() + checknull = isna(values).any() f = lambda x, y: htable.ismember_float64(x, y, checknull) except (TypeError, ValueError): values = values.astype(object) @@ -616,7 +616,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False, # count, remove nulls (from the index), and but the bins result = ii.value_counts(dropna=dropna) - result = result[result.index.notnull()] + result = result[result.index.notna()] result.index = result.index.astype('interval') result = result.sort_index() @@ -684,9 +684,9 @@ def _value_counts_arraylike(values, dropna): f = getattr(htable, "value_count_{dtype}".format(dtype=ndtype)) keys, counts = f(values, dropna) - mask = isnull(values) + mask = isna(values) if not dropna and mask.any(): - if not isnull(keys).any(): + if not isna(keys).any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) @@ -947,7 +947,7 @@ def quantile(x, q, interpolation_method='fraction'): """ x = np.asarray(x) - mask = isnull(x) + mask = isna(x) x = x[~mask] diff --git a/pandas/core/api.py b/pandas/core/api.py index 265fb4004d997..086fedd7d7cf8 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -5,7 +5,7 @@ import numpy as np from pandas.core.algorithms import factorize, unique, value_counts -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, isnull, notna, notnull from pandas.core.categorical import Categorical from pandas.core.groupby import Grouper from pandas.io.formats.format import set_eng_float_format diff --git a/pandas/core/base.py b/pandas/core/base.py index 97c4c8626dcbb..eb785b18bd02b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -6,7 +6,7 @@ from pandas.compat import builtins import numpy as np -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass from pandas.core.dtypes.common import is_object_dtype, is_list_like, is_scalar from pandas.util._validators import validate_bool_kwarg @@ -894,7 +894,7 @@ def argmin(self, axis=None): @cache_readonly def hasnans(self): """ return if I have any nans; enables various perf speedups """ - return isnull(self).any() + return isna(self).any() def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds): @@ -990,7 +990,7 @@ def nunique(self, dropna=True): """ uniqs = self.unique() n = len(uniqs) - if dropna and isnull(uniqs).any(): + if dropna and isna(uniqs).any(): n -= 1 return n diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py index afae11163b0dc..3810455521a65 100644 --- a/pandas/core/categorical.py +++ b/pandas/core/categorical.py @@ -10,7 +10,7 @@ from pandas.core.dtypes.generic import ( ABCSeries, ABCIndexClass, ABCCategoricalIndex) -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.cast import ( maybe_infer_to_datetimelike, coerce_indexer_dtype) @@ -290,7 +290,7 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False): # On list with NaNs, int values will be converted to float. Use # "object" dtype to prevent this. In the end objects will be # casted to int/... in the category assignment step. - dtype = 'object' if isnull(values).any() else None + dtype = 'object' if isna(values).any() else None values = _sanitize_array(values, None, dtype=dtype) if categories is None: @@ -561,9 +561,9 @@ def _validate_categories(cls, categories, fastpath=False): categories = _convert_to_list_like(categories) # On categories with NaNs, int values would be converted to # float. Use "object" dtype to prevent this. - if isnull(categories).any(): + if isna(categories).any(): without_na = np.array([x for x in categories - if notnull(x)]) + if notna(x)]) with_na = np.array(categories) if with_na.dtype != without_na.dtype: dtype = "object" @@ -941,9 +941,9 @@ def remove_categories(self, removals, inplace=False): new_categories = [c for c in self._categories if c not in removal_set] # GH 10156 - if any(isnull(removals)): - not_included = [x for x in not_included if notnull(x)] - new_categories = [x for x in new_categories if notnull(x)] + if any(isna(removals)): + not_included = [x for x in not_included if notna(x)] + new_categories = [x for x in new_categories if notna(x)] if len(not_included) != 0: raise ValueError("removals must all be in old categories: %s" % @@ -1153,7 +1153,7 @@ def searchsorted(self, value, side='left', sorter=None): return self.codes.searchsorted(values_as_codes, side=side, sorter=sorter) - def isnull(self): + def isna(self): """ Detect missing values @@ -1165,8 +1165,8 @@ def isnull(self): See also -------- - isnull : pandas version - Categorical.notnull : boolean inverse of Categorical.isnull + isna : pandas version + Categorical.notna : boolean inverse of Categorical.isna """ @@ -1175,14 +1175,15 @@ def isnull(self): # String/object and float categories can hold np.nan if self.categories.dtype.kind in ['S', 'O', 'f']: if np.nan in self.categories: - nan_pos = np.where(isnull(self.categories))[0] + nan_pos = np.where(isna(self.categories))[0] # we only have one NA in categories ret = np.logical_or(ret, self._codes == nan_pos) return ret + isull = isna - def notnull(self): + def notna(self): """ - Reverse of isnull + Reverse of isna Both missing values (-1 in .codes) and NA as a category are detected as null. @@ -1193,11 +1194,12 @@ def notnull(self): See also -------- - notnull : pandas version - Categorical.isnull : boolean inverse of Categorical.notnull + notna : pandas version + Categorical.isna : boolean inverse of Categorical.notna """ - return ~self.isnull() + return ~self.isna() + notnull = notna def put(self, *args, **kwargs): """ @@ -1217,8 +1219,8 @@ def dropna(self): ------- valid : Categorical """ - result = self[self.notnull()] - if isnull(result.categories).any(): + result = self[self.notna()] + if isna(result.categories).any(): result = result.remove_categories([np.nan]) return result @@ -1243,12 +1245,10 @@ def value_counts(self, dropna=True): """ from numpy import bincount - from pandas.core.dtypes.missing import isnull - from pandas.core.series import Series - from pandas.core.index import CategoricalIndex + from pandas import isna, Series, CategoricalIndex obj = (self.remove_categories([np.nan]) if dropna and - isnull(self.categories).any() else self) + isna(self.categories).any() else self) code, cat = obj._codes, obj.categories ncat, mask = len(cat), 0 <= code ix, clean = np.arange(ncat), mask.all() @@ -1520,7 +1520,7 @@ def fillna(self, value=None, method=None, limit=None): if self.categories.dtype.kind in ['S', 'O', 'f']: if np.nan in self.categories: values = values.copy() - nan_pos = np.where(isnull(self.categories))[0] + nan_pos = np.where(isna(self.categories))[0] # we only have one NA in categories values[values == nan_pos] = -1 @@ -1534,13 +1534,13 @@ def fillna(self, value=None, method=None, limit=None): else: - if not isnull(value) and value not in self.categories: + if not isna(value) and value not in self.categories: raise ValueError("fill value must be in categories") mask = values == -1 if mask.any(): values = values.copy() - if isnull(value): + if isna(value): values[mask] = -1 else: values[mask] = self.categories.get_loc(value) @@ -1556,7 +1556,7 @@ def take_nd(self, indexer, allow_fill=True, fill_value=None): # filling must always be None/nan here # but is passed thru internally - assert isnull(fill_value) + assert isna(fill_value) codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1) result = self._constructor(codes, categories=self.categories, @@ -1720,7 +1720,7 @@ def __setitem__(self, key, value): # no assignments of values not in categories, but it's always ok to set # something to np.nan - if len(to_add) and not isnull(to_add).all(): + if len(to_add) and not isna(to_add).all(): raise ValueError("Cannot setitem on a Categorical with a new " "category, set the categories first") @@ -1763,8 +1763,8 @@ def __setitem__(self, key, value): # https://github.com/pandas-dev/pandas/issues/7820 # float categories do currently return -1 for np.nan, even if np.nan is # included in the index -> "repair" this here - if isnull(rvalue).any() and isnull(self.categories).any(): - nan_pos = np.where(isnull(self.categories))[0] + if isna(rvalue).any() and isna(self.categories).any(): + nan_pos = np.where(isna(self.categories))[0] lindexer[lindexer == -1] = nan_pos lindexer = self._maybe_coerce_indexer(lindexer) diff --git a/pandas/core/common.py b/pandas/core/common.py index 3b09e68c6433a..44cb36b8a3207 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -18,7 +18,7 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.common import _NS_DTYPE from pandas.core.dtypes.inference import _iterable_not_string -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna, isnull, notnull # noqa from pandas.api import types from pandas.core.dtypes import common @@ -187,7 +187,7 @@ def is_bool_indexer(key): key = np.asarray(_values_from_object(key)) if not lib.is_bool_array(key): - if isnull(key).any(): + if isna(key).any(): raise ValueError('cannot index with vector containing ' 'NA / NaN values') return False diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index ae3001564a62f..91faeabc684aa 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -408,9 +408,14 @@ def table_schema_cb(key): cf.register_option('sim_interactive', False, tc_sim_interactive_doc) use_inf_as_null_doc = """ +use_inf_as_null had been deprecated and will be removed in a future version. +Use `use_inf_as_na` instead. +""" + +use_inf_as_na_doc = """ : boolean - True means treat None, NaN, INF, -INF as null (old way), - False means None and NaN are null, but INF, -INF are not null + True means treat None, NaN, INF, -INF as na (old way), + False means None and NaN are null, but INF, -INF are not na (new way). """ @@ -418,14 +423,17 @@ def table_schema_cb(key): # or we'll hit circular deps. -def use_inf_as_null_cb(key): - from pandas.core.dtypes.missing import _use_inf_as_null - _use_inf_as_null(key) +def use_inf_as_na_cb(key): + from pandas.core.dtypes.missing import _use_inf_as_na + _use_inf_as_na(key) -with cf.config_prefix('mode'): - cf.register_option('use_inf_as_null', False, use_inf_as_null_doc, - cb=use_inf_as_null_cb) +cf.register_option('mode.use_inf_as_na', False, use_inf_as_na_doc, + cb=use_inf_as_na_cb) + +cf.deprecate_option('mode.use_inf_as_null', msg=use_inf_as_null_doc, + rkey='mode.use_inf_as_na') + # user warnings chained_assignment = """ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6532e17695c86..2cb2c9ed84fdc 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -27,7 +27,7 @@ from .dtypes import ExtensionDtype, DatetimeTZDtype, PeriodDtype from .generic import (ABCDatetimeIndex, ABCPeriodIndex, ABCSeries) -from .missing import isnull, notnull +from .missing import isna, notna from .inference import is_list_like _int8_max = np.iinfo(np.int8).max @@ -121,7 +121,7 @@ def trans(x): # noqa arr = np.array([r[0]]) # if we have any nulls, then we are done - if (isnull(arr).any() or + if (isna(arr).any() or not np.allclose(arr, trans(arr).astype(dtype), rtol=0)): return result @@ -131,7 +131,7 @@ def trans(x): # noqa return result if (issubclass(result.dtype.type, (np.object_, np.number)) and - notnull(result).all()): + notna(result).all()): new_result = trans(result).astype(dtype) try: if np.allclose(new_result, result, rtol=0): @@ -191,7 +191,7 @@ def maybe_upcast_putmask(result, mask, other): # integer or integer array -> date-like array if is_datetimelike(result.dtype): if is_scalar(other): - if isnull(other): + if isna(other): other = result.dtype.type('nat') elif is_integer(other): other = np.array(other, dtype=result.dtype) @@ -232,13 +232,13 @@ def changeit(): # and its nan and we are changing some values if (is_scalar(other) or (isinstance(other, np.ndarray) and other.ndim < 1)): - if isnull(other): + if isna(other): return changeit() # we have an ndarray and the masking has nans in it else: - if isnull(other[mask]).any(): + if isna(other[mask]).any(): return changeit() try: @@ -268,7 +268,7 @@ def maybe_promote(dtype, fill_value=np.nan): # for now: refuse to upcast datetime64 # (this is because datetime64 will not implicitly upconvert # to object correctly as of numpy 1.6.1) - if isnull(fill_value): + if isna(fill_value): fill_value = iNaT else: if issubclass(dtype.type, np.datetime64): @@ -287,7 +287,7 @@ def maybe_promote(dtype, fill_value=np.nan): else: fill_value = iNaT elif is_datetimetz(dtype): - if isnull(fill_value): + if isna(fill_value): fill_value = iNaT elif is_float(fill_value): if issubclass(dtype.type, np.bool_): @@ -550,7 +550,7 @@ def coerce_to_dtypes(result, dtypes): def conv(r, dtype): try: - if isnull(r): + if isna(r): pass elif dtype == _NS_DTYPE: r = lib.Timestamp(r) @@ -605,7 +605,7 @@ def astype_nansafe(arr, dtype, copy=True): # allow frequency conversions if dtype.kind == 'm': - mask = isnull(arr) + mask = isna(arr) result = arr.astype(dtype).astype(np.float64) result[mask] = np.nan return result @@ -657,7 +657,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, values, 'M8[ns]', errors='coerce') # if we are all nans then leave me alone - if not isnull(new_values).all(): + if not isna(new_values).all(): values = new_values else: @@ -672,7 +672,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, new_values = to_timedelta(values, errors='coerce') # if we are all nans then leave me alone - if not isnull(new_values).all(): + if not isna(new_values).all(): values = new_values else: @@ -687,7 +687,7 @@ def maybe_convert_objects(values, convert_dates=True, convert_numeric=True, coerce_numeric=True) # if we are all nans then leave me alone - if not isnull(new_values).all(): + if not isna(new_values).all(): values = new_values except: @@ -749,7 +749,7 @@ def soft_convert_objects(values, datetime=True, numeric=True, timedelta=True, converted = lib.maybe_convert_numeric(values, set(), coerce_numeric=True) # If all NaNs, then do not-alter - values = converted if not isnull(converted).all() else values + values = converted if not isna(converted).all() else values values = values.copy() if copy else values except: pass @@ -851,7 +851,7 @@ def try_timedelta(v): elif inferred_type == 'nat': # if all NaT, return as datetime - if isnull(v).all(): + if isna(v).all(): value = try_datetime(v) else: @@ -902,7 +902,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): # our NaT doesn't support tz's # this will coerce to DatetimeIndex with # a matching dtype below - if is_scalar(value) and isnull(value): + if is_scalar(value) and isna(value): value = [value] elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE): @@ -916,7 +916,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): "dtype [%s]" % dtype) if is_scalar(value): - if value == iNaT or isnull(value): + if value == iNaT or isna(value): value = iNaT else: value = np.array(value, copy=False) diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 9913923cb7807..45b8af6277c4c 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -23,7 +23,7 @@ from .inference import is_list_like -def isnull(obj): +def isna(obj): """Detect missing values (NaN in numeric arrays, None/NaN in object arrays) Parameters @@ -33,34 +33,37 @@ def isnull(obj): Returns ------- - isnulled : array-like of bool or bool + isna : array-like of bool or bool Array or bool indicating whether an object is null or if an array is given which of the element is null. See also -------- - pandas.notnull: boolean inverse of pandas.isnull + pandas.notna: boolean inverse of pandas.isna """ - return _isnull(obj) + return _isna(obj) -def _isnull_new(obj): +isnull = isna + + +def _isna_new(obj): if is_scalar(obj): return lib.checknull(obj) # hack (for now) because MI registers as ndarray elif isinstance(obj, ABCMultiIndex): - raise NotImplementedError("isnull is not defined for MultiIndex") + raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)): - return _isnull_ndarraylike(obj) + return _isna_ndarraylike(obj) elif isinstance(obj, ABCGeneric): - return obj._constructor(obj._data.isnull(func=isnull)) + return obj._constructor(obj._data.isna(func=isna)) elif isinstance(obj, list) or hasattr(obj, '__array__'): - return _isnull_ndarraylike(np.asarray(obj)) + return _isna_ndarraylike(np.asarray(obj)) else: return obj is None -def _isnull_old(obj): +def _isna_old(obj): """Detect missing values. Treat None, NaN, INF, -INF as null. Parameters @@ -75,22 +78,22 @@ def _isnull_old(obj): return lib.checknull_old(obj) # hack (for now) because MI registers as ndarray elif isinstance(obj, ABCMultiIndex): - raise NotImplementedError("isnull is not defined for MultiIndex") + raise NotImplementedError("isna is not defined for MultiIndex") elif isinstance(obj, (ABCSeries, np.ndarray, ABCIndexClass)): - return _isnull_ndarraylike_old(obj) + return _isna_ndarraylike_old(obj) elif isinstance(obj, ABCGeneric): - return obj._constructor(obj._data.isnull(func=_isnull_old)) + return obj._constructor(obj._data.isna(func=_isna_old)) elif isinstance(obj, list) or hasattr(obj, '__array__'): - return _isnull_ndarraylike_old(np.asarray(obj)) + return _isna_ndarraylike_old(np.asarray(obj)) else: return obj is None -_isnull = _isnull_new +_isna = _isna_new -def _use_inf_as_null(key): - """Option change callback for null/inf behaviour +def _use_inf_as_na(key): + """Option change callback for na/inf behaviour Choose which replacement for numpy.isnan / -numpy.isfinite is used. Parameters @@ -111,12 +114,12 @@ def _use_inf_as_null(key): from pandas.core.config import get_option flag = get_option(key) if flag: - globals()['_isnull'] = _isnull_old + globals()['_isna'] = _isna_old else: - globals()['_isnull'] = _isnull_new + globals()['_isna'] = _isna_new -def _isnull_ndarraylike(obj): +def _isna_ndarraylike(obj): values = getattr(obj, 'values', obj) dtype = values.dtype @@ -126,10 +129,10 @@ def _isnull_ndarraylike(obj): from pandas import Categorical if not isinstance(values, Categorical): values = values.values - result = values.isnull() + result = values.isna() elif is_interval_dtype(values): from pandas import IntervalIndex - result = IntervalIndex(obj).isnull() + result = IntervalIndex(obj).isna() else: # Working around NumPy ticket 1542 @@ -139,7 +142,7 @@ def _isnull_ndarraylike(obj): result = np.zeros(values.shape, dtype=bool) else: result = np.empty(shape, dtype=bool) - vec = lib.isnullobj(values.ravel()) + vec = lib.isnaobj(values.ravel()) result[...] = vec.reshape(shape) elif needs_i8_conversion(obj): @@ -156,7 +159,7 @@ def _isnull_ndarraylike(obj): return result -def _isnull_ndarraylike_old(obj): +def _isna_ndarraylike_old(obj): values = getattr(obj, 'values', obj) dtype = values.dtype @@ -168,7 +171,7 @@ def _isnull_ndarraylike_old(obj): result = np.zeros(values.shape, dtype=bool) else: result = np.empty(shape, dtype=bool) - vec = lib.isnullobj_old(values.ravel()) + vec = lib.isnaobj_old(values.ravel()) result[:] = vec.reshape(shape) elif is_datetime64_dtype(dtype): @@ -185,7 +188,7 @@ def _isnull_ndarraylike_old(obj): return result -def notnull(obj): +def notna(obj): """Replacement for numpy.isfinite / -numpy.isnan which is suitable for use on object arrays. @@ -196,20 +199,23 @@ def notnull(obj): Returns ------- - isnulled : array-like of bool or bool + notisna : array-like of bool or bool Array or bool indicating whether an object is *not* null or if an array is given which of the element is *not* null. See also -------- - pandas.isnull : boolean inverse of pandas.notnull + pandas.isna : boolean inverse of pandas.notna """ - res = isnull(obj) + res = isna(obj) if is_scalar(res): return not res return ~res +notnull = notna + + def is_null_datelike_scalar(other): """ test whether the object is a null datelike, e.g. Nat but guard against passing a non-scalar """ @@ -222,11 +228,11 @@ def is_null_datelike_scalar(other): return other.view('i8') == iNaT elif is_integer(other) and other == iNaT: return True - return isnull(other) + return isna(other) return False -def _is_na_compat(arr, fill_value=np.nan): +def _isna_compat(arr, fill_value=np.nan): """ Parameters ---------- @@ -238,7 +244,7 @@ def _is_na_compat(arr, fill_value=np.nan): True if we can fill using this fill_value """ dtype = arr.dtype - if isnull(fill_value): + if isna(fill_value): return not (is_bool_dtype(dtype) or is_integer_dtype(dtype)) return True @@ -286,7 +292,7 @@ def array_equivalent(left, right, strict_nan=False): if is_string_dtype(left) or is_string_dtype(right): if not strict_nan: - # isnull considers NaN and None to be equivalent. + # isna considers NaN and None to be equivalent. return lib.array_equivalent_object( _ensure_object(left.ravel()), _ensure_object(right.ravel())) @@ -305,7 +311,7 @@ def array_equivalent(left, right, strict_nan=False): # NaNs can occur in float and complex arrays. if is_float_dtype(left) or is_complex_dtype(left): - return ((left == right) | (isnull(left) & isnull(right))).all() + return ((left == right) | (isna(left) & isna(right))).all() # numpy will will not allow this type of datetimelike vs integer comparison elif is_datetimelike_v_numeric(left, right): @@ -365,7 +371,7 @@ def _maybe_fill(arr, fill_value=np.nan): """ if we have a compatiable fill_value and arr dtype, then fill """ - if _is_na_compat(arr, fill_value): + if _isna_compat(arr, fill_value): arr.fill(fill_value) return arr diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9a79ca1d4eab1..bd5fe7a99fd20 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -57,7 +57,7 @@ is_iterator, is_sequence, is_named_tuple) -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.common import (_try_sort, _default_index, @@ -3619,8 +3619,8 @@ def _combine_frame(self, other, func, fill_value=None, level=None): def _arith_op(left, right): if fill_value is not None: - left_mask = isnull(left) - right_mask = isnull(right) + left_mask = isna(left) + right_mask = isna(right) left = left.copy() right = right.copy() @@ -3793,8 +3793,8 @@ def combine(self, other, func, fill_value=None, overwrite=True): this_dtype = series.dtype other_dtype = otherSeries.dtype - this_mask = isnull(series) - other_mask = isnull(otherSeries) + this_mask = isna(series) + other_mask = isna(otherSeries) # don't overwrite columns unecessarily # DO propagate if this column is not in the intersection @@ -3873,11 +3873,11 @@ def combiner(x, y, needs_i8_conversion=False): x_values = x.values if hasattr(x, 'values') else x y_values = y.values if hasattr(y, 'values') else y if needs_i8_conversion: - mask = isnull(x) + mask = isna(x) x_values = x_values.view('i8') y_values = y_values.view('i8') else: - mask = isnull(x_values) + mask = isna(x_values) return expressions.where(mask, y_values, x_values, raise_on_error=True) @@ -3917,18 +3917,18 @@ def update(self, other, join='left', overwrite=True, filter_func=None, that = other[col].values if filter_func is not None: with np.errstate(all='ignore'): - mask = ~filter_func(this) | isnull(that) + mask = ~filter_func(this) | isna(that) else: if raise_conflict: - mask_this = notnull(that) - mask_that = notnull(this) + mask_this = notna(that) + mask_that = notna(this) if any(mask_this & mask_that): raise ValueError("Data overlaps.") if overwrite: - mask = isnull(that) + mask = isna(that) else: - mask = notnull(this) + mask = notna(this) # don't overwrite columns unecessarily if mask.all(): @@ -5068,7 +5068,7 @@ def cov(self, min_periods=None): idx = cols.copy() mat = numeric_df.values - if notnull(mat).all(): + if notna(mat).all(): if min_periods is not None and min_periods > len(mat): baseCov = np.empty((mat.shape[1], mat.shape[1])) baseCov.fill(np.nan) @@ -5168,9 +5168,9 @@ def count(self, axis=0, level=None, numeric_only=False): result = Series(0, index=frame._get_agg_axis(axis)) else: if frame._is_mixed_type: - result = notnull(frame).sum(axis=axis) + result = notna(frame).sum(axis=axis) else: - counts = notnull(frame.values).sum(axis=axis) + counts = notna(frame.values).sum(axis=axis) result = Series(counts, index=frame._get_agg_axis(axis)) return result.astype('int64') @@ -5189,12 +5189,12 @@ def _count_level(self, level, axis=0, numeric_only=False): self._get_axis_name(axis)) if frame._is_mixed_type: - # Since we have mixed types, calling notnull(frame.values) might + # Since we have mixed types, calling notna(frame.values) might # upcast everything to object - mask = notnull(frame).values + mask = notna(frame).values else: # But use the speedup when we have homogeneous dtypes - mask = notnull(frame.values) + mask = notna(frame.values) if axis == 1: # We're transposing the mask rather than frame to avoid potential @@ -5287,7 +5287,7 @@ def f(x): try: if filter_type is None or filter_type == 'numeric': result = result.astype(np.float64) - elif filter_type == 'bool' and notnull(result).all(): + elif filter_type == 'bool' and notna(result).all(): result = result.astype(np.bool_) except (ValueError, TypeError): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f12592feaa4c3..36ecf82ca4b1e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -27,7 +27,7 @@ is_re_compilable, pandas_dtype) from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.generic import ABCSeries, ABCPanel from pandas.core.common import (_values_from_object, @@ -3868,7 +3868,7 @@ def fillna(self, value=None, method=None, axis=None, inplace=False, inplace=inplace, downcast=downcast) elif isinstance(value, DataFrame) and self.ndim == 2: - new_data = self.where(self.notnull(), value) + new_data = self.where(self.notna(), value) else: raise ValueError("invalid fill value with a %s" % type(value)) @@ -4266,7 +4266,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False, else: index = _maybe_transposed_self._get_axis(alt_ax) - if pd.isnull(index).any(): + if isna(index).any(): raise NotImplementedError("Interpolation with NaNs in the index " "has not been implemented. Try filling " "those NaNs before interpolating.") @@ -4371,14 +4371,14 @@ def asof(self, where, subset=None): loc -= 1 values = self._values - while loc > 0 and isnull(values[loc]): + while loc > 0 and isna(values[loc]): loc -= 1 return values[loc] if not isinstance(where, Index): where = Index(where) if is_list else Index([where]) - nulls = self.isnull() if is_series else self[subset].isnull().any(1) + nulls = self.isna() if is_series else self[subset].isna().any(1) if nulls.all(): if is_series: return self._constructor(np.nan, index=where, name=self.name) @@ -4401,38 +4401,40 @@ def asof(self, where, subset=None): # ---------------------------------------------------------------------- # Action Methods - _shared_docs['isnull'] = """ - Return a boolean same-sized object indicating if the values are null. + _shared_docs['isna'] = """ + Return a boolean same-sized object indicating if the values are na. See Also -------- - notnull : boolean inverse of isnull + notna : boolean inverse of isna """ - @Appender(_shared_docs['isnull']) - def isnull(self): - return isnull(self).__finalize__(self) + @Appender(_shared_docs['isna']) + def isna(self): + return isna(self).__finalize__(self) + isnull = isna - _shared_docs['isnotnull'] = """ + _shared_docs['notna'] = """ Return a boolean same-sized object indicating if the values are - not null. + not na. See Also -------- - isnull : boolean inverse of notnull + isna : boolean inverse of notna """ - @Appender(_shared_docs['isnotnull']) - def notnull(self): - return notnull(self).__finalize__(self) + @Appender(_shared_docs['notna']) + def notna(self): + return notna(self).__finalize__(self) + notnull = notna def _clip_with_scalar(self, lower, upper, inplace=False): - if ((lower is not None and np.any(isnull(lower))) or - (upper is not None and np.any(isnull(upper)))): + if ((lower is not None and np.any(isna(lower))) or + (upper is not None and np.any(isna(upper)))): raise ValueError("Cannot use an NA value as a clip threshold") result = self.values - mask = isnull(result) + mask = isna(result) with np.errstate(all='ignore'): if upper is not None: @@ -4456,7 +4458,7 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): if axis is not None: axis = self._get_axis_number(axis) - if np.any(isnull(threshold)): + if np.any(isna(threshold)): raise ValueError("Cannot use an NA value as a clip threshold") # method is self.le for upper bound and self.ge for lower bound @@ -4465,7 +4467,7 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): return self._clip_with_scalar(None, threshold, inplace=inplace) return self._clip_with_scalar(threshold, None, inplace=inplace) - subset = method(threshold, axis=axis) | isnull(self) + subset = method(threshold, axis=axis) | isna(self) # GH #15390 # In order for where method to work, the threshold must @@ -5340,7 +5342,7 @@ def _align_series(self, other, join='outer', axis=None, level=None, right = other.reindex(join_index, level=level) # fill - fill_na = notnull(fill_value) or (method is not None) + fill_na = notna(fill_value) or (method is not None) if fill_na: left = left.fillna(fill_value, method=method, limit=limit, axis=fill_axis) @@ -6315,7 +6317,7 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, rs = (data.div(data.shift(periods=periods, freq=freq, axis=axis, **kwargs)) - 1) if freq is None: - mask = isnull(_values_from_object(self)) + mask = isna(_values_from_object(self)) np.putmask(rs.values, mask, np.nan) return rs @@ -6677,10 +6679,10 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs): if (skipna and issubclass(y.dtype.type, (np.datetime64, np.timedelta64))): result = accum_func(y, axis) - mask = isnull(self) + mask = isna(self) np.putmask(result, mask, tslib.iNaT) elif skipna and not issubclass(y.dtype.type, (np.integer, np.bool_)): - mask = isnull(self) + mask = isna(self) np.putmask(y, mask, mask_a) result = accum_func(y, axis) np.putmask(result, mask, mask_b) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index daf3381ae4e89..1d7c9c1d48e6d 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -36,7 +36,7 @@ _ensure_categorical, _ensure_float) from pandas.core.dtypes.cast import maybe_downcast_to_dtype -from pandas.core.dtypes.missing import isnull, notnull, _maybe_fill +from pandas.core.dtypes.missing import isna, notna, _maybe_fill from pandas.core.common import (_values_from_object, AbstractMethodError, _default_index) @@ -1167,7 +1167,7 @@ def first_compat(x, axis=0): def first(x): x = np.asarray(x) - x = x[notnull(x)] + x = x[notna(x)] if len(x) == 0: return np.nan return x[0] @@ -1182,7 +1182,7 @@ def last_compat(x, axis=0): def last(x): x = np.asarray(x) - x = x[notnull(x)] + x = x[notna(x)] if len(x) == 0: return np.nan return x[-1] @@ -2356,7 +2356,7 @@ def ngroups(self): @cache_readonly def result_index(self): - if len(self.binlabels) != 0 and isnull(self.binlabels[0]): + if len(self.binlabels) != 0 and isna(self.binlabels[0]): return self.binlabels[1:] return self.binlabels @@ -3113,13 +3113,13 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa wrapper = lambda x: func(x, *args, **kwargs) # Interpret np.nan as False. - def true_and_notnull(x, *args, **kwargs): + def true_and_notna(x, *args, **kwargs): b = wrapper(x, *args, **kwargs) - return b and notnull(b) + return b and notna(b) try: indices = [self._get_index(name) for name, group in self - if true_and_notnull(group)] + if true_and_notna(group)] except ValueError: raise TypeError("the filter must return a boolean result") except TypeError: @@ -3141,9 +3141,9 @@ def nunique(self, dropna=True): 'val.dtype must be object, got %s' % val.dtype val, _ = algorithms.factorize(val, sort=False) sorter = np.lexsort((val, ids)) - _isnull = lambda a: a == -1 + _isna = lambda a: a == -1 else: - _isnull = isnull + _isna = isna ids, val = ids[sorter], val[sorter] @@ -3153,7 +3153,7 @@ def nunique(self, dropna=True): inc = np.r_[1, val[1:] != val[:-1]] # 1st item of each group is a new unique observation - mask = _isnull(val) + mask = _isna(val) if dropna: inc[idx] = 1 inc[mask] = 0 @@ -3315,7 +3315,7 @@ def count(self): ids, _, ngroups = self.grouper.group_info val = self.obj.get_values() - mask = (ids != -1) & ~isnull(val) + mask = (ids != -1) & ~isna(val) ids = _ensure_platform_int(ids) out = np.bincount(ids[mask], minlength=ngroups or None) @@ -3869,7 +3869,7 @@ def _choose_path(self, fast_path, slow_path, group): if res.shape == res_fast.shape: res_r = res.values.ravel() res_fast_r = res_fast.values.ravel() - mask = notnull(res_r) + mask = notna(res_r) if (res_r[mask] == res_fast_r[mask]).all(): path = fast_path @@ -3949,8 +3949,8 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa pass # interpret the result of the filter - if is_bool(res) or (is_scalar(res) and isnull(res)): - if res and notnull(res): + if is_bool(res) or (is_scalar(res) and isna(res)): + if res and notna(res): indices.append(self._get_index(name)) else: # non scalars aren't allowed @@ -4203,13 +4203,13 @@ def _apply_to_column_groupbys(self, func): def count(self): """ Compute count of group, excluding missing values """ from functools import partial - from pandas.core.dtypes.missing import _isnull_ndarraylike as isnull + from pandas.core.dtypes.missing import _isna_ndarraylike as isna data, _ = self._get_data_to_aggregate() ids, _, ngroups = self.grouper.group_info mask = ids != -1 - val = ((mask & ~isnull(blk.get_values())) for blk in data.blocks) + val = ((mask & ~isna(blk.get_values())) for blk in data.blocks) loc = (blk.mgr_locs for blk in data.blocks) counter = partial(count_level_2d, labels=ids, max_bin=ngroups, axis=1) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bbbc19b36964d..a2ee7b9e29a50 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -14,7 +14,7 @@ from pandas.core.dtypes.generic import ABCSeries, ABCMultiIndex, ABCPeriodIndex -from pandas.core.dtypes.missing import isnull, array_equivalent +from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.common import ( _ensure_int64, _ensure_object, @@ -214,7 +214,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, if inferred == 'integer': data = np.array(data, copy=copy, dtype=dtype) elif inferred in ['floating', 'mixed-integer-float']: - if isnull(data).any(): + if isna(data).any(): raise ValueError('cannot convert float ' 'NaN to integer') @@ -613,7 +613,7 @@ def where(self, cond, other=None): values = np.where(cond, self.values, other) dtype = self.dtype - if self._is_numeric_dtype and np.any(isnull(values)): + if self._is_numeric_dtype and np.any(isna(values)): # We can't coerce to the numeric dtype of "self" (unless # it's float) if there are NaN values in our output. dtype = None @@ -724,7 +724,7 @@ def _coerce_scalar_to_index(self, item): """ dtype = self.dtype - if self._is_numeric_dtype and isnull(item): + if self._is_numeric_dtype and isna(item): # We can't coerce to the numeric dtype of "self" (unless # it's float) if there are NaN values in our output. dtype = None @@ -1810,7 +1810,7 @@ def _assert_take_fillable(self, values, indices, allow_fill=True, def _isnan(self): """ return if each value is nan""" if self._can_hold_na: - return isnull(self) + return isna(self) else: # shouldn't reach to this condition by checking hasnans beforehand values = np.empty(len(self), dtype=np.bool_) @@ -1833,7 +1833,7 @@ def hasnans(self): else: return False - def isnull(self): + def isna(self): """ Detect missing values @@ -1841,29 +1841,31 @@ def isnull(self): Returns ------- - a boolean array of whether my values are null + a boolean array of whether my values are na See also -------- - pandas.isnull : pandas version + pandas.isna : pandas version """ return self._isnan + isnull = isna - def notnull(self): + def notna(self): """ - Reverse of isnull + Reverse of isna .. versionadded:: 0.20.0 Returns ------- - a boolean array of whether my values are not null + a boolean array of whether my values are not na See also -------- - pandas.notnull : pandas version + pandas.notna : pandas version """ - return ~self.isnull() + return ~self.isna() + notnull = notna def putmask(self, mask, value): """ @@ -1911,7 +1913,7 @@ def _format_with_header(self, header, na_rep='NaN', **kwargs): for x in values] # could have nans - mask = isnull(values) + mask = isna(values) if mask.any(): result = np.array(result) result[mask] = na_rep @@ -1949,7 +1951,7 @@ def to_native_types(self, slicer=None, **kwargs): def _format_native_types(self, na_rep='', quoting=None, **kwargs): """ actually format my specific types """ - mask = isnull(self) + mask = isna(self) if not self.is_object() and not quoting: values = np.asarray(self).astype(str) else: @@ -2400,7 +2402,7 @@ def _get_unique_index(self, dropna=False): if dropna: try: if self.hasnans: - values = values[~isnull(values)] + values = values[~isna(values)] except NotImplementedError: pass diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index cd8559bcca03c..845c71b6c41d8 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -17,7 +17,7 @@ from pandas.core.dtypes.generic import ( ABCIndex, ABCSeries, ABCPeriodIndex, ABCIndexClass) -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core import common as com, algorithms from pandas.core.algorithms import checked_add_with_arr from pandas.core.common import AbstractMethodError @@ -857,7 +857,7 @@ def _append_same_dtype(self, to_concat, name): def _ensure_datetimelike_to_i8(other): """ helper for coercing an input scalar or array to i8 """ - if lib.isscalar(other) and isnull(other): + if lib.isscalar(other) and isna(other): other = iNaT elif isinstance(other, ABCIndexClass): # convert tz if needed diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index e6bc1790f2992..5a04c550f4502 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -23,7 +23,7 @@ _ensure_int64) from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna import pandas.core.dtypes.concat as _concat from pandas.errors import PerformanceWarning @@ -109,7 +109,7 @@ def wrapper(self, other): isinstance(other, compat.string_types)): other = _to_m8(other, tz=self.tz) result = func(other) - if isnull(other): + if isna(other): result.fill(nat_result) else: if isinstance(other, list): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e6b2bc0953680..aa2ad21ae37fd 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -2,7 +2,7 @@ import numpy as np -from pandas.core.dtypes.missing import notnull, isnull +from pandas.core.dtypes.missing import notna, isna from pandas.core.dtypes.generic import ABCPeriodIndex from pandas.core.dtypes.dtypes import IntervalDtype from pandas.core.dtypes.common import ( @@ -222,8 +222,8 @@ def _validate(self): raise ValueError("invalid options for 'closed': %s" % self.closed) if len(self.left) != len(self.right): raise ValueError('left and right must have the same length') - left_mask = notnull(self.left) - right_mask = notnull(self.right) + left_mask = notna(self.left) + right_mask = notna(self.right) if not (left_mask == right_mask).all(): raise ValueError('missing values must be missing in the same ' 'location both left and right sides') @@ -240,7 +240,7 @@ def hasnans(self): def _isnan(self): """ return if each value is nan""" if self._mask is None: - self._mask = isnull(self.left) + self._mask = isna(self.left) return self._mask @cache_readonly @@ -415,7 +415,7 @@ def from_tuples(cls, data, closed='right', name=None, copy=False): right = [] for d in data: - if isnull(d): + if isna(d): left.append(np.nan) right.append(np.nan) continue diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ed7ca079a07b5..420788f9008cd 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -19,7 +19,7 @@ is_iterator, is_list_like, is_scalar) -from pandas.core.dtypes.missing import isnull, array_equivalent +from pandas.core.dtypes.missing import isna, array_equivalent from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.common import (_values_from_object, is_bool_indexer, @@ -783,8 +783,8 @@ def duplicated(self, keep='first'): @Appender(ibase._index_shared_docs['fillna']) def fillna(self, value=None, downcast=None): - # isnull is not implemented for MultiIndex - raise NotImplementedError('isnull is not defined for MultiIndex') + # isna is not implemented for MultiIndex + raise NotImplementedError('isna is not defined for MultiIndex') @Appender(_index_shared_docs['dropna']) def dropna(self, how='any'): @@ -920,7 +920,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False, else: # weird all NA case - formatted = [pprint_thing(na if isnull(x) else x, + formatted = [pprint_thing(na if isna(x) else x, escape_chars=('\t', '\r', '\n')) for x in algos.take_1d(lev._values, lab)] stringified_levels.append(formatted) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 68713743d72ed..2823951c0f348 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -13,7 +13,7 @@ is_timedelta64_dtype, is_timedelta64_ns_dtype, _ensure_int64) -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCSeries from pandas.core.common import _maybe_box, _values_from_object @@ -51,7 +51,7 @@ def wrapper(self, other): # failed to parse as timedelta raise TypeError(msg.format(type(other))) result = func(other) - if isnull(other): + if isna(other): result.fill(nat_result) else: if not is_list_like(other): @@ -331,7 +331,7 @@ def _evaluate_with_timedelta_like(self, other, op, opstr): if opstr in ['__div__', '__truediv__', '__floordiv__']: if _is_convertible_to_td(other): other = Timedelta(other) - if isnull(other): + if isna(other): raise NotImplementedError( "division by pd.NaT not implemented") @@ -430,7 +430,7 @@ def components(self): hasnans = self.hasnans if hasnans: def f(x): - if isnull(x): + if isna(x): return [np.nan] * len(columns) return x.components else: @@ -685,7 +685,7 @@ def get_loc(self, key, method=None, tolerance=None): if is_list_like(key): raise TypeError - if isnull(key): + if isna(key): key = NaT if tolerance is not None: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ae0aaf98fdf02..9f04df3322eaa 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -15,7 +15,7 @@ is_sparse, _is_unorderable_exception, _ensure_platform_int) -from pandas.core.dtypes.missing import isnull, _infer_fill_value +from pandas.core.dtypes.missing import isna, _infer_fill_value from pandas.core.index import Index, MultiIndex @@ -1426,7 +1426,7 @@ def _has_valid_type(self, key, axis): else: def error(): - if isnull(key): + if isna(key): raise TypeError("cannot use label indexing with a null " "key") raise KeyError("the label [%s] is not in the [%s]" % @@ -1938,7 +1938,7 @@ def check_bool_indexer(ax, key): result = key if isinstance(key, ABCSeries) and not key.index.equals(ax): result = result.reindex(ax) - mask = isnull(result._values) + mask = isna(result._values) if mask.any(): raise IndexingError('Unalignable boolean Series provided as ' 'indexer (index of the boolean Series and of ' diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f2a7ac76481d4..03b35271f150c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -42,8 +42,8 @@ astype_nansafe, find_common_type) from pandas.core.dtypes.missing import ( - isnull, array_equivalent, - _is_na_compat, + isna, array_equivalent, + _isna_compat, is_null_datelike_scalar) import pandas.core.dtypes.concat as _concat @@ -375,7 +375,7 @@ def fillna(self, value, limit=None, inplace=False, downcast=None, return self.copy() original_value = value - mask = isnull(self.values) + mask = isna(self.values) if limit is not None: if not is_integer(limit): raise ValueError('Limit must be an integer') @@ -569,7 +569,7 @@ def _try_cast_result(self, result, dtype=None): dtype = dtype.type if issubclass(dtype, (np.bool_, np.object_)): if issubclass(dtype, np.bool_): - if isnull(result).all(): + if isna(result).all(): return result.astype(np.bool_) else: result = result.astype(np.object_) @@ -611,7 +611,7 @@ def to_native_types(self, slicer=None, na_rep='nan', quoting=None, values = self.values if slicer is not None: values = values[:, slicer] - mask = isnull(values) + mask = isna(values) if not self.is_object and not quoting: values = values.astype(str) @@ -639,7 +639,7 @@ def replace(self, to_replace, value, inplace=False, filter=None, inplace = validate_bool_kwarg(inplace, 'inplace') original_to_replace = to_replace - mask = isnull(self.values) + mask = isna(self.values) # try to replace, if we raise an error, convert to ObjectBlock and # retry try: @@ -826,7 +826,7 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, mask = mask.values # if we are passed a scalar None, convert it here - if not is_list_like(new) and isnull(new) and not self.is_object: + if not is_list_like(new) and isna(new) and not self.is_object: new = self.fill_value if self._can_hold_element(new): @@ -1353,7 +1353,7 @@ def _nanpercentile1D(values, mask, q, **kw): def _nanpercentile(values, q, axis, **kw): - mask = isnull(self.values) + mask = isna(self.values) if not is_scalar(mask) and mask.any(): if self.ndim == 1: return _nanpercentile1D(values, mask, q, **kw) @@ -1604,7 +1604,7 @@ def to_native_types(self, slicer=None, na_rep='', float_format=None, # output (important for appropriate 'quoting' behaviour), # so do not pass it through the FloatArrayFormatter if float_format is None and decimal == '.': - mask = isnull(values) + mask = isna(values) if not quoting: values = values.astype(str) @@ -1731,7 +1731,7 @@ def _try_coerce_args(self, values, other): base-type values, values mask, base-type other, other mask """ - values_mask = isnull(values) + values_mask = isna(values) values = values.view('i8') other_mask = False @@ -1741,20 +1741,20 @@ def _try_coerce_args(self, values, other): other = tslib.iNaT other_mask = True elif isinstance(other, Timedelta): - other_mask = isnull(other) + other_mask = isna(other) other = other.value elif isinstance(other, np.timedelta64): - other_mask = isnull(other) + other_mask = isna(other) other = Timedelta(other).value elif isinstance(other, timedelta): other = Timedelta(other).value elif isinstance(other, np.ndarray): - other_mask = isnull(other) + other_mask = isna(other) other = other.astype('i8', copy=False).view('i8') else: # scalar other = Timedelta(other) - other_mask = isnull(other) + other_mask = isna(other) other = other.value return values, values_mask, other, other_mask @@ -1762,7 +1762,7 @@ def _try_coerce_args(self, values, other): def _try_coerce_result(self, result): """ reverse of try_coerce_args / try_operate """ if isinstance(result, np.ndarray): - mask = isnull(result) + mask = isna(result) if result.dtype.kind in ['i', 'f', 'O']: result = result.astype('m8[ns]') result[mask] = tslib.iNaT @@ -1780,7 +1780,7 @@ def to_native_types(self, slicer=None, na_rep=None, quoting=None, values = self.values if slicer is not None: values = values[:, slicer] - mask = isnull(values) + mask = isna(values) rvalues = np.empty(values.shape, dtype=object) if na_rep is None: @@ -2048,7 +2048,7 @@ def _replace_single(self, to_replace, value, inplace=False, filter=None, # deal with replacing values with objects (strings) that match but # whose replacement is not a string (numeric, nan, object) - if isnull(value) or not isinstance(value, compat.string_types): + if isna(value) or not isinstance(value, compat.string_types): def re_replacer(s): try: @@ -2203,7 +2203,7 @@ def to_native_types(self, slicer=None, na_rep='', quoting=None, **kwargs): if slicer is not None: # Categorical is always one dimension values = values[slicer] - mask = isnull(values) + mask = isna(values) values = np.array(values, dtype='object') values[mask] = na_rep @@ -2247,7 +2247,7 @@ def _can_hold_element(self, element): element = np.array(element) return element.dtype == _NS_DTYPE or element.dtype == np.int64 return (is_integer(element) or isinstance(element, datetime) or - isnull(element)) + isna(element)) def _try_cast(self, element): try: @@ -2272,7 +2272,7 @@ def _try_coerce_args(self, values, other): base-type values, values mask, base-type other, other mask """ - values_mask = isnull(values) + values_mask = isna(values) values = values.view('i8') other_mask = False @@ -2286,14 +2286,14 @@ def _try_coerce_args(self, values, other): if getattr(other, 'tz') is not None: raise TypeError("cannot coerce a Timestamp with a tz on a " "naive Block") - other_mask = isnull(other) + other_mask = isna(other) other = other.asm8.view('i8') elif hasattr(other, 'dtype') and is_integer_dtype(other): other = other.view('i8') else: try: other = np.asarray(other) - other_mask = isnull(other) + other_mask = isna(other) other = other.astype('i8', copy=False).view('i8') except ValueError: @@ -2437,26 +2437,26 @@ def _try_coerce_args(self, values, other): ------- base-type values, values mask, base-type other, other mask """ - values_mask = _block_shape(isnull(values), ndim=self.ndim) + values_mask = _block_shape(isna(values), ndim=self.ndim) # asi8 is a view, needs copy values = _block_shape(values.asi8, ndim=self.ndim) other_mask = False if isinstance(other, ABCSeries): other = self._holder(other) - other_mask = isnull(other) + other_mask = isna(other) if isinstance(other, bool): raise TypeError elif (is_null_datelike_scalar(other) or - (is_scalar(other) and isnull(other))): + (is_scalar(other) and isna(other))): other = tslib.iNaT other_mask = True elif isinstance(other, self._holder): if other.tz != self.values.tz: raise ValueError("incompatible or non tz-aware value") other = other.asi8 - other_mask = isnull(other) + other_mask = isna(other) elif isinstance(other, (np.datetime64, datetime, date)): other = lib.Timestamp(other) tz = getattr(other, 'tz', None) @@ -2464,7 +2464,7 @@ def _try_coerce_args(self, values, other): # test we can have an equal time zone if tz is None or str(tz) != str(self.values.tz): raise ValueError("incompatible or non tz-aware value") - other_mask = isnull(other) + other_mask = isna(other) other = other.value return values, values_mask, other, other_mask @@ -3187,7 +3187,7 @@ def reduction(self, f, axis=0, consolidate=True, transposed=False, placement=np.arange(len(values)))], axes[0]) - def isnull(self, **kwargs): + def isna(self, **kwargs): return self.apply('apply', **kwargs) def where(self, **kwargs): @@ -3242,8 +3242,8 @@ def replace_list(self, src_list, dest_list, inplace=False, regex=False, values = self.as_matrix() def comp(s): - if isnull(s): - return isnull(values) + if isna(s): + return isna(values) return _maybe_compare(values, getattr(s, 'asm8', s), operator.eq) def _cast_scalar(block, scalar): @@ -3586,10 +3586,10 @@ def get(self, item, fastpath=True): """ if self.items.is_unique: - if not isnull(item): + if not isna(item): loc = self.items.get_loc(item) else: - indexer = np.arange(len(self.items))[isnull(self.items)] + indexer = np.arange(len(self.items))[isna(self.items)] # allow a single nan location indexer if not is_scalar(indexer): @@ -3601,7 +3601,7 @@ def get(self, item, fastpath=True): return self.iget(loc, fastpath=fastpath) else: - if isnull(item): + if isna(item): raise TypeError("cannot label index with a null key") indexer = self.items.get_indexer_for([item]) @@ -4778,7 +4778,7 @@ def _putmask_smart(v, m, n): # make sure that we have a nullable type # if we have nulls - if not _is_na_compat(v, nn[0]): + if not _isna_compat(v, nn[0]): raise ValueError nn_at = nn.astype(v.dtype) @@ -4884,7 +4884,7 @@ def get_empty_dtype_and_na(join_units): # Null blocks should not influence upcast class selection, unless there # are only null blocks, when same upcasting rules must be applied to # null upcast classes. - if unit.is_null: + if unit.is_na: null_upcast_classes[upcast_cls].append(dtype) else: upcast_classes[upcast_cls].append(dtype) @@ -5154,7 +5154,7 @@ def dtype(self): self.block.fill_value)[0]) @cache_readonly - def is_null(self): + def is_na(self): if self.block is None: return True @@ -5177,7 +5177,7 @@ def is_null(self): total_len = values_flat.shape[0] chunk_len = max(total_len // 40, 1000) for i in range(0, total_len, chunk_len): - if not isnull(values_flat[i:i + chunk_len]).all(): + if not isna(values_flat[i:i + chunk_len]).all(): return False return True @@ -5190,7 +5190,7 @@ def get_reindexed_values(self, empty_dtype, upcasted_na): else: fill_value = upcasted_na - if self.is_null: + if self.is_na: if getattr(self.block, 'is_object', False): # we want to avoid filling with np.nan if we are # using None; we already know that we are all diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 5aabc9d8730dd..93281e20a2a96 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -20,7 +20,7 @@ _ensure_float64) from pandas.core.dtypes.cast import infer_dtype_from_array -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna def mask_missing(arr, values_to_mask): @@ -36,7 +36,7 @@ def mask_missing(arr, values_to_mask): except Exception: values_to_mask = np.array(values_to_mask, dtype=object) - na_mask = isnull(values_to_mask) + na_mask = isna(values_to_mask) nonna = values_to_mask[~na_mask] mask = None @@ -63,9 +63,9 @@ def mask_missing(arr, values_to_mask): if na_mask.any(): if mask is None: - mask = isnull(arr) + mask = isna(arr) else: - mask |= isnull(arr) + mask |= isna(arr) return mask @@ -122,7 +122,7 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None, """ # Treat the original, non-scipy methods first. - invalid = isnull(yvalues) + invalid = isna(yvalues) valid = ~invalid if not valid.any(): @@ -479,7 +479,7 @@ def pad_1d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name) if mask is None: - mask = isnull(values) + mask = isna(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) return values @@ -503,7 +503,7 @@ def backfill_1d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for backfill_1d [%s]' % dtype.name) if mask is None: - mask = isnull(values) + mask = isna(values) mask = mask.view(np.uint8) _method(values, mask, limit=limit) @@ -528,7 +528,7 @@ def pad_2d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for pad_2d [%s]' % dtype.name) if mask is None: - mask = isnull(values) + mask = isna(values) mask = mask.view(np.uint8) if np.all(values.shape): @@ -557,7 +557,7 @@ def backfill_2d(values, limit=None, mask=None, dtype=None): raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name) if mask is None: - mask = isnull(values) + mask = isna(values) mask = mask.view(np.uint8) if np.all(values.shape): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 1d64f87b15761..5bebb8eb65b23 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -16,7 +16,7 @@ is_datetime_or_timedelta_dtype, is_int_or_datetime_dtype, is_any_int_dtype) from pandas.core.dtypes.cast import _int64_max, maybe_upcast_putmask -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.config import get_option from pandas.core.common import _values_from_object @@ -195,7 +195,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, if isfinite: mask = _isfinite(values) else: - mask = isnull(values) + mask = isna(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) @@ -232,7 +232,7 @@ def _get_values(values, skipna, fill_value=None, fill_value_typ=None, def _isfinite(values): if is_datetime_or_timedelta_dtype(values): - return isnull(values) + return isna(values) if (is_complex_dtype(values) or is_float_dtype(values) or is_integer_dtype(values) or is_bool_dtype(values)): return ~np.isfinite(values) @@ -329,7 +329,7 @@ def nanmedian(values, axis=None, skipna=True): values, mask, dtype, dtype_max = _get_values(values, skipna) def get_median(x): - mask = notnull(x) + mask = notna(x) if not skipna and not mask.all(): return np.nan return algos.median(_values_from_object(x[mask])) @@ -395,7 +395,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1): values = _values_from_object(values) dtype = values.dtype - mask = isnull(values) + mask = isna(values) if is_any_int_dtype(values): values = values.astype('f8') values[mask] = np.nan @@ -434,7 +434,7 @@ def nanvar(values, axis=None, skipna=True, ddof=1): def nansem(values, axis=None, skipna=True, ddof=1): var = nanvar(values, axis, skipna, ddof=ddof) - mask = isnull(values) + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count, _ = _get_counts_nanvar(mask, axis, ddof, values.dtype) @@ -503,7 +503,7 @@ def nanskew(values, axis=None, skipna=True): """ values = _values_from_object(values) - mask = isnull(values) + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) @@ -558,7 +558,7 @@ def nankurt(values, axis=None, skipna=True): """ values = _values_from_object(values) - mask = isnull(values) + mask = isna(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) @@ -615,7 +615,7 @@ def nankurt(values, axis=None, skipna=True): @disallow('M8', 'm8') def nanprod(values, axis=None, skipna=True): - mask = isnull(values) + mask = isna(values) if skipna and not is_any_int_dtype(values): values = values.copy() values[mask] = 1 @@ -696,7 +696,7 @@ def nancorr(a, b, method='pearson', min_periods=None): if min_periods is None: min_periods = 1 - valid = notnull(a) & notnull(b) + valid = notna(a) & notna(b) if not valid.all(): a = a[valid] b = b[valid] @@ -740,7 +740,7 @@ def nancov(a, b, min_periods=None): if min_periods is None: min_periods = 1 - valid = notnull(a) & notnull(b) + valid = notna(a) & notna(b) if not valid.all(): a = a[valid] b = b[valid] @@ -778,8 +778,8 @@ def _ensure_numeric(x): def make_nancomp(op): def f(x, y): - xmask = isnull(x) - ymask = isnull(y) + xmask = isna(x) + ymask = isna(y) mask = xmask | ymask with np.errstate(all='ignore'): diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 55473ec8d7cad..b72ca5f43970f 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -23,7 +23,7 @@ from pandas.errors import PerformanceWarning from pandas.core.common import _values_from_object, _maybe_match_name -from pandas.core.dtypes.missing import notnull, isnull +from pandas.core.dtypes.missing import notna, isna from pandas.core.dtypes.common import ( needs_i8_conversion, is_datetimelike_v_numeric, @@ -463,7 +463,7 @@ def _convert_to_array(self, values, name=None, other=None): # we are in the wrong path if (supplied_dtype is None and other is not None and (other.dtype in ('timedelta64[ns]', 'datetime64[ns]')) and - isnull(values).all()): + isna(values).all()): values = np.empty(values.shape, dtype='timedelta64[ns]') values[:] = iNaT @@ -494,7 +494,7 @@ def _convert_to_array(self, values, name=None, other=None): raise TypeError("incompatible type for a datetime/timedelta " "operation [{0}]".format(name)) elif inferred_type == 'floating': - if (isnull(values).all() and + if (isna(values).all() and name in ('__add__', '__radd__', '__sub__', '__rsub__')): values = np.empty(values.shape, dtype=other.dtype) values[:] = iNaT @@ -510,7 +510,7 @@ def _convert_to_array(self, values, name=None, other=None): def _convert_for_datetime(self, lvalues, rvalues): from pandas.core.tools.timedeltas import to_timedelta - mask = isnull(lvalues) | isnull(rvalues) + mask = isna(lvalues) | isna(rvalues) # datetimes require views if self.is_datetime_lhs or self.is_datetime_rhs: @@ -660,11 +660,11 @@ def na_op(x, y): if isinstance(y, (np.ndarray, ABCSeries, pd.Index)): dtype = find_common_type([x.dtype, y.dtype]) result = np.empty(x.size, dtype=dtype) - mask = notnull(x) & notnull(y) + mask = notna(x) & notna(y) result[mask] = op(x[mask], _values_from_object(y[mask])) elif isinstance(x, np.ndarray): result = np.empty(len(x), dtype=x.dtype) - mask = notnull(x) + mask = notna(x) result[mask] = op(x[mask], y) else: raise TypeError("{typ} cannot perform the operation " @@ -774,7 +774,7 @@ def na_op(x, y): raise TypeError("invalid type comparison") # numpy does not like comparisons vs None - if is_scalar(y) and isnull(y): + if is_scalar(y) and isna(y): if name == '__ne__': return np.ones(len(x), dtype=bool) else: @@ -786,10 +786,10 @@ def na_op(x, y): (not is_scalar(y) and needs_i8_conversion(y))): if is_scalar(y): - mask = isnull(x) + mask = isna(x) y = libindex.convert_scalar(x, _values_from_object(y)) else: - mask = isnull(x) | isnull(y) + mask = isna(x) | isna(y) y = y.view('i8') x = x.view('i8') @@ -896,7 +896,7 @@ def na_op(x, y): try: # let null fall thru - if not isnull(y): + if not isna(y): y = bool(y) result = lib.scalar_binop(x, y, op) except: @@ -1180,7 +1180,7 @@ def na_op(x, y): dtype = np.find_common_type([x.dtype, y.dtype], []) result = np.empty(x.size, dtype=dtype) yrav = y.ravel() - mask = notnull(xrav) & notnull(yrav) + mask = notna(xrav) & notna(yrav) xrav = xrav[mask] # we may need to manually @@ -1195,7 +1195,7 @@ def na_op(x, y): result[mask] = op(xrav, yrav) elif hasattr(x, 'size'): result = np.empty(x.size, dtype=x.dtype) - mask = notnull(xrav) + mask = notna(xrav) xrav = xrav[mask] if np.prod(xrav.shape): with np.errstate(all='ignore'): @@ -1257,11 +1257,11 @@ def na_op(x, y): result = np.empty(x.size, dtype=bool) if isinstance(y, (np.ndarray, ABCSeries)): yrav = y.ravel() - mask = notnull(xrav) & notnull(yrav) + mask = notna(xrav) & notna(yrav) result[mask] = op(np.array(list(xrav[mask])), np.array(list(yrav[mask]))) else: - mask = notnull(xrav) + mask = notna(xrav) result[mask] = op(np.array(list(xrav[mask])), y) if op == operator.ne: # pragma: no cover @@ -1329,7 +1329,7 @@ def na_op(x, y): # TODO: might need to find_common_type here? result = np.empty(len(x), dtype=x.dtype) - mask = notnull(x) + mask = notna(x) result[mask] = op(x[mask], y) result, changed = maybe_upcast_putmask(result, ~mask, np.nan) @@ -1359,11 +1359,11 @@ def na_op(x, y): result = np.empty(x.size, dtype=bool) if isinstance(y, np.ndarray): yrav = y.ravel() - mask = notnull(xrav) & notnull(yrav) + mask = notna(xrav) & notna(yrav) result[mask] = op(np.array(list(xrav[mask])), np.array(list(yrav[mask]))) else: - mask = notnull(xrav) + mask = notna(xrav) result[mask] = op(np.array(list(xrav[mask])), y) if op == operator.ne: # pragma: no cover diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 69a8468552f54..8ee48563790c7 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -12,7 +12,7 @@ from pandas.core.dtypes.common import ( is_integer, is_list_like, is_string_like, is_scalar) -from pandas.core.dtypes.missing import notnull +from pandas.core.dtypes.missing import notna import pandas.core.computation.expressions as expressions import pandas.core.common as com @@ -688,7 +688,7 @@ def dropna(self, axis=0, how='any', inplace=False): axis = self._get_axis_number(axis) values = self.values - mask = notnull(values) + mask = notna(values) for ax in reversed(sorted(set(range(self._AXIS_LEN)) - set([axis]))): mask = mask.sum(ax) @@ -910,7 +910,7 @@ def to_frame(self, filter_observations=True): if filter_observations: # shaped like the return DataFrame - mask = notnull(self.values).all(axis=0) + mask = notna(self.values).all(axis=0) # size = mask.sum() selector = mask.ravel() else: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 0581ec7484c49..f3b772f524dc2 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -174,7 +174,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', if margins: if dropna: - data = data[data.notnull().all(axis=1)] + data = data[data.notna().all(axis=1)] table = _add_margins(table, data, values, rows=index, cols=columns, aggfunc=aggfunc, margins_name=margins_name) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index dcb83d225699d..b7638471f2ad0 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -12,7 +12,7 @@ is_list_like, is_bool_dtype, needs_i8_conversion) from pandas.core.dtypes.cast import maybe_promote -from pandas.core.dtypes.missing import notnull +from pandas.core.dtypes.missing import notna import pandas.core.dtypes.concat as _concat from pandas.core.series import Series @@ -547,7 +547,7 @@ def factorize(index): new_values = frame.values.ravel() if dropna: - mask = notnull(new_values) + mask = notna(new_values) new_values = new_values[mask] new_index = new_index[mask] return Series(new_values, index=new_index) @@ -835,7 +835,7 @@ def lreshape(data, groups, dropna=True, label=None): if dropna: mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool) for c in pivot_cols: - mask &= notnull(mdata[c]) + mask &= notna(mdata[c]) if not mask.all(): mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata)) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index d8398023a5083..1cb39faa2e869 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -2,7 +2,7 @@ Quantilization functions and related stuff """ -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core.dtypes.common import ( is_integer, is_scalar, @@ -241,7 +241,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, if include_lowest: ids[x == bins[0]] = 1 - na_mask = isnull(x) | (ids == len(bins)) | (ids == 0) + na_mask = isna(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() if labels is not False: diff --git a/pandas/core/series.py b/pandas/core/series.py index 4d5b718ce0ae9..29c6531827e6b 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -36,7 +36,7 @@ maybe_upcast, infer_dtype_from_scalar, maybe_convert_platform, maybe_cast_to_datetime, maybe_castable) -from pandas.core.dtypes.missing import isnull, notnull, remove_na_arraylike +from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike from pandas.core.common import (is_bool_indexer, _default_index, @@ -735,7 +735,7 @@ def setitem(key, value): pass elif is_timedelta64_dtype(self.dtype): # reassign a null value to iNaT - if isnull(value): + if isna(value): value = iNaT try: @@ -1216,7 +1216,7 @@ def count(self, level=None): from pandas.core.index import _get_na_value if level is None: - return notnull(_values_from_object(self)).sum() + return notna(_values_from_object(self)).sum() if isinstance(level, compat.string_types): level = self.index._get_level_number(level) @@ -1229,7 +1229,7 @@ def count(self, level=None): lab[mask] = cnt = len(lev) lev = lev.insert(cnt, _get_na_value(lev.dtype.type)) - obs = lab[notnull(self.values)] + obs = lab[notna(self.values)] out = np.bincount(obs, minlength=len(lev) or None) return self._constructor(out, index=lev, dtype='int64').__finalize__(self) @@ -1643,8 +1643,8 @@ def _binop(self, other, func, level=None, fill_value=None): other_vals = other.values if fill_value is not None: - this_mask = isnull(this_vals) - other_mask = isnull(other_vals) + this_mask = isna(this_vals) + other_mask = isna(other_vals) this_vals = this_vals.copy() other_vals = other_vals.copy() @@ -1713,7 +1713,7 @@ def combine_first(self, other): other = other.reindex(new_index, copy=False) # TODO: do we need name? name = _maybe_match_name(self, other) # noqa - rs_vals = com._where_compat(isnull(this), other._values, this._values) + rs_vals = com._where_compat(isna(this), other._values, this._values) return self._constructor(rs_vals, index=new_index).__finalize__(self) def update(self, other): @@ -1726,7 +1726,7 @@ def update(self, other): other : Series """ other = other.reindex_like(self) - mask = notnull(other) + mask = notna(other) self._data = self._data.putmask(mask=mask, new=other, inplace=True) self._maybe_update_cacher() @@ -1759,7 +1759,7 @@ def _try_kind_sort(arr): arr = self._values sortedIdx = np.empty(len(self), dtype=np.int32) - bad = isnull(arr) + bad = isna(arr) good = ~bad idx = _default_index(len(self)) @@ -1864,7 +1864,7 @@ def argsort(self, axis=0, kind='quicksort', order=None): numpy.ndarray.argsort """ values = self._values - mask = isnull(values) + mask = isna(values) if mask.any(): result = Series(-1, index=self.index, name=self.name, @@ -2193,7 +2193,7 @@ def map(self, arg, na_action=None): if na_action == 'ignore': def map_f(values, f): return lib.map_infer_mask(values, f, - isnull(values).view(np.uint8)) + isna(values).view(np.uint8)) else: map_f = lib.map_infer @@ -2802,7 +2802,7 @@ def first_valid_index(self): if len(self) == 0: return None - mask = isnull(self._values) + mask = isna(self._values) i = mask.argmin() if mask[i]: return None @@ -2816,7 +2816,7 @@ def last_valid_index(self): if len(self) == 0: return None - mask = isnull(self._values[::-1]) + mask = isna(self._values[::-1]) i = mask.argmin() if mask[i]: return None @@ -2988,7 +2988,7 @@ def _try_cast(arr, take_fast_path): # possibility of nan -> garbage if is_float_dtype(data.dtype) and is_integer_dtype(dtype): - if not isnull(data).any(): + if not isna(data).any(): subarr = _try_cast(data, True) elif copy: subarr = data.copy() diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 10b80cbc3483d..874da2e743fa7 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -7,7 +7,7 @@ _ensure_platform_int, _ensure_int64, is_categorical_dtype) -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna import pandas.core.algorithms as algorithms from pandas._libs import lib, algos, hashtable from pandas._libs.hashtable import unique_label_indices @@ -237,7 +237,7 @@ def nargsort(items, kind='quicksort', ascending=True, na_position='last'): items = np.asanyarray(items) idx = np.arange(len(items)) - mask = isnull(items) + mask = isna(items) non_nans = items[~mask] non_nan_idx = idx[~mask] nan_idx = np.nonzero(mask)[0] diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py index 5c1cf8c773501..7eaa21efd8695 100644 --- a/pandas/core/sparse/array.py +++ b/pandas/core/sparse/array.py @@ -27,7 +27,7 @@ from pandas.core.dtypes.cast import ( maybe_convert_platform, maybe_promote, astype_nansafe, find_common_type) -from pandas.core.dtypes.missing import isnull, notnull, na_value_for_dtype +from pandas.core.dtypes.missing import isna, notna, na_value_for_dtype import pandas._libs.sparse as splib from pandas._libs.sparse import SparseIndex, BlockIndex, IntIndex @@ -579,12 +579,12 @@ def count(self): @property def _null_fill_value(self): - return isnull(self.fill_value) + return isna(self.fill_value) @property def _valid_sp_values(self): sp_vals = self.sp_values - mask = notnull(sp_vals) + mask = notna(sp_vals) return sp_vals[mask] @Appender(_index_shared_docs['fillna'] % _sparray_doc_kwargs) @@ -600,7 +600,7 @@ def fillna(self, value, downcast=None): fill_value=value) else: new_values = self.sp_values.copy() - new_values[isnull(new_values)] = value + new_values[isna(new_values)] = value return self._simple_new(new_values, self.sp_index, fill_value=self.fill_value) @@ -690,7 +690,7 @@ def value_counts(self, dropna=True): pass else: if self._null_fill_value: - mask = pd.isnull(keys) + mask = pd.isna(keys) else: mask = keys == self.fill_value @@ -770,8 +770,8 @@ def make_sparse(arr, kind='block', fill_value=None): if fill_value is None: fill_value = na_value_for_dtype(arr.dtype) - if isnull(fill_value): - mask = notnull(arr) + if isna(fill_value): + mask = notna(arr) else: # For str arrays in NumPy 1.12.0, operator!= below isn't # element-wise but just returns False if fill_value is not str, diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py index 461dd50c5da6e..87e4e54266008 100644 --- a/pandas/core/sparse/frame.py +++ b/pandas/core/sparse/frame.py @@ -10,7 +10,7 @@ from pandas import compat import numpy as np -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.cast import maybe_upcast, find_common_type from pandas.core.dtypes.common import _ensure_platform_int, is_scipy_sparse @@ -564,7 +564,7 @@ def _combine_match_index(self, other, func, level=None, fill_value=None): new_data[col] = func(series.values, other.values) # fill_value is a function of our operator - if isnull(other.fill_value) or isnull(self.default_fill_value): + if isna(other.fill_value) or isna(self.default_fill_value): fill_value = np.nan else: fill_value = func(np.float64(self.default_fill_value), @@ -649,7 +649,7 @@ def _reindex_columns(self, columns, method, copy, level, fill_value=None, if level is not None: raise TypeError('Reindex by level not supported for sparse') - if notnull(fill_value): + if notna(fill_value): raise NotImplementedError("'fill_value' argument is not supported") if limit: @@ -783,13 +783,15 @@ def cumsum(self, axis=0, *args, **kwargs): return self.apply(lambda x: x.cumsum(), axis=axis) - @Appender(generic._shared_docs['isnull']) - def isnull(self): - return self._apply_columns(lambda x: x.isnull()) + @Appender(generic._shared_docs['isna']) + def isna(self): + return self._apply_columns(lambda x: x.isna()) + isnull = isna - @Appender(generic._shared_docs['isnotnull']) - def isnotnull(self): - return self._apply_columns(lambda x: x.isnotnull()) + @Appender(generic._shared_docs['notna']) + def notna(self): + return self._apply_columns(lambda x: x.notna()) + notnull = notna def apply(self, func, axis=0, broadcast=False, reduce=False): """ diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py index 9dd061e26ba06..2c46624c025a7 100644 --- a/pandas/core/sparse/series.py +++ b/pandas/core/sparse/series.py @@ -8,7 +8,7 @@ import numpy as np import warnings -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.common import is_scalar from pandas.core.common import _values_from_object, _maybe_match_name @@ -173,7 +173,7 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block', else: length = len(index) - if data == fill_value or (isnull(data) and isnull(fill_value)): + if data == fill_value or (isna(data) and isna(fill_value)): if kind == 'block': sparse_index = BlockIndex(length, [], []) else: @@ -642,19 +642,21 @@ def cumsum(self, axis=0, *args, **kwargs): new_array, index=self.index, sparse_index=new_array.sp_index).__finalize__(self) - @Appender(generic._shared_docs['isnull']) - def isnull(self): - arr = SparseArray(isnull(self.values.sp_values), + @Appender(generic._shared_docs['isna']) + def isna(self): + arr = SparseArray(isna(self.values.sp_values), sparse_index=self.values.sp_index, - fill_value=isnull(self.fill_value)) + fill_value=isna(self.fill_value)) return self._constructor(arr, index=self.index).__finalize__(self) + isnull = isna - @Appender(generic._shared_docs['isnotnull']) - def isnotnull(self): - arr = SparseArray(notnull(self.values.sp_values), + @Appender(generic._shared_docs['notna']) + def notna(self): + arr = SparseArray(notna(self.values.sp_values), sparse_index=self.values.sp_index, - fill_value=notnull(self.fill_value)) + fill_value=notna(self.fill_value)) return self._constructor(arr, index=self.index).__finalize__(self) + notnull = notna def dropna(self, axis=0, inplace=False, **kwargs): """ @@ -666,7 +668,7 @@ def dropna(self, axis=0, inplace=False, **kwargs): if inplace: raise NotImplementedError("Cannot perform inplace dropna" " operations on a SparseSeries") - if isnull(self.fill_value): + if isna(self.fill_value): return dense_valid else: dense_valid = dense_valid[dense_valid != self.fill_value] @@ -678,7 +680,7 @@ def shift(self, periods, freq=None, axis=0): return self.copy() # no special handling of fill values yet - if not isnull(self.fill_value): + if not isna(self.fill_value): shifted = self.to_dense().shift(periods, freq=freq, axis=axis) return shifted.to_sparse(fill_value=self.fill_value, diff --git a/pandas/core/strings.py b/pandas/core/strings.py index cd7e313b13f1e..30465561a911c 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2,7 +2,7 @@ from pandas.compat import zip from pandas.core.dtypes.generic import ABCSeries, ABCIndex -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.common import ( is_bool_dtype, is_categorical_dtype, @@ -101,7 +101,7 @@ def str_cat(arr, others=None, sep=None, na_rep=None): arrays = _get_array_list(arr, others) n = _length_check(arrays) - masks = np.array([isnull(x) for x in arrays]) + masks = np.array([isna(x) for x in arrays]) cats = None if na_rep is None: @@ -129,12 +129,12 @@ def str_cat(arr, others=None, sep=None, na_rep=None): return result else: arr = np.asarray(arr, dtype=object) - mask = isnull(arr) + mask = isna(arr) if na_rep is None and mask.any(): if sep == '': na_rep = '' else: - return sep.join(arr[notnull(arr)]) + return sep.join(arr[notna(arr)]) return sep.join(np.where(mask, na_rep, arr)) @@ -165,7 +165,7 @@ def _map(f, arr, na_mask=False, na_value=np.nan, dtype=object): if not isinstance(arr, np.ndarray): arr = np.asarray(arr, dtype=object) if na_mask: - mask = isnull(arr) + mask = isna(arr) try: convert = not all(mask) result = lib.map_infer_mask(arr, f, mask.view(np.uint8), convert) @@ -1391,7 +1391,7 @@ def __getitem__(self, key): def __iter__(self): i = 0 g = self.get(i) - while g.notnull().any(): + while g.notna().any(): yield g i += 1 g = self.get(i) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 9c02a6212c412..a1f323aff7c1a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -18,7 +18,7 @@ from pandas.core.dtypes.generic import ( ABCIndexClass, ABCSeries, ABCDataFrame) -from pandas.core.dtypes.missing import notnull +from pandas.core.dtypes.missing import notna from pandas.core import algorithms import pandas.compat as compat @@ -176,7 +176,7 @@ def _guess_datetime_format(dt_str, dayfirst=False, def _guess_datetime_format_for_array(arr, **kwargs): # Try to guess the format based on the first non-NaN element - non_nan_elements = notnull(arr).nonzero()[0] + non_nan_elements = notna(arr).nonzero()[0] if len(non_nan_elements): return _guess_datetime_format(arr[non_nan_elements[0]], **kwargs) @@ -665,7 +665,7 @@ def calc_with_mask(carg, mask): # a float with actual np.nan try: carg = arg.astype(np.float64) - return calc_with_mask(carg, notnull(carg)) + return calc_with_mask(carg, notna(carg)) except: pass @@ -744,7 +744,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): def _guess_time_format_for_array(arr): # Try to guess the format based on the first non-NaN element - non_nan_elements = notnull(arr).nonzero()[0] + non_nan_elements = notna(arr).nonzero()[0] if len(non_nan_elements): element = arr[non_nan_elements[0]] for time_format in _time_formats: diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index e41ffae9d03c2..07e993d7ef509 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -12,7 +12,7 @@ ABCDataFrame) from pandas.core.dtypes.common import ( is_categorical_dtype, is_list_like) -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core.dtypes.cast import infer_dtype_from_scalar @@ -215,7 +215,7 @@ def _hash_categorical(c, encoding, hash_key): # # TODO: GH 15362 - mask = c.isnull() + mask = c.isna() if len(hashed): result = hashed.take(c.codes) else: @@ -313,7 +313,7 @@ def _hash_scalar(val, encoding='utf8', hash_key=None): 1d uint64 numpy array of hash value, of length 1 """ - if isnull(val): + if isna(val): # this is to be consistent with the _hash_categorical implementation return np.array([np.iinfo(np.uint64).max], dtype='u8') diff --git a/pandas/core/window.py b/pandas/core/window.py index 57611794c375f..5866f1e8a76bd 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -828,7 +828,7 @@ def count(self): results = [] for b in blocks: - result = b.notnull().astype(int) + result = b.notna().astype(int) result = self._constructor(result, window=window, min_periods=0, center=self.center, closed=self.closed).sum() diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 23eb3bb05fd0a..2b322431bd301 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -10,7 +10,7 @@ from textwrap import dedent -from pandas.core.dtypes.missing import isnull, notnull +from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.common import ( is_categorical_dtype, is_float_dtype, @@ -1562,7 +1562,7 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', self.data_index = obj.index if (isinstance(self.data_index, (DatetimeIndex, PeriodIndex)) and date_format is not None): - self.data_index = Index([x.strftime(date_format) if notnull(x) else + self.data_index = Index([x.strftime(date_format) if notna(x) else '' for x in self.data_index]) self.nlevels = getattr(self.data_index, 'nlevels', 1) @@ -1816,7 +1816,7 @@ def _format(x): elif isinstance(vals, ABCSparseArray): vals = vals.values - is_float_type = lib.map_infer(vals, is_float) & notnull(vals) + is_float_type = lib.map_infer(vals, is_float) & notna(vals) leading_space = is_float_type.any() fmt_values = [] @@ -1862,10 +1862,10 @@ def _value_formatter(self, float_format=None, threshold=None): # because str(0.0) = '0.0' while '%g' % 0.0 = '0' if float_format: def base_formatter(v): - return (float_format % v) if notnull(v) else self.na_rep + return (float_format % v) if notna(v) else self.na_rep else: def base_formatter(v): - return str(v) if notnull(v) else self.na_rep + return str(v) if notna(v) else self.na_rep if self.decimal != '.': def decimal_formatter(v): @@ -1877,7 +1877,7 @@ def decimal_formatter(v): return decimal_formatter def formatter(value): - if notnull(value): + if notna(value): if abs(value) > threshold: return decimal_formatter(value) else: @@ -1907,7 +1907,7 @@ def format_values_with(float_format): # separate the wheat from the chaff values = self.values - mask = isnull(values) + mask = isna(values) if hasattr(values, 'to_dense'): # sparse numpy ndarray values = values.to_dense() values = np.array(values, dtype='object') diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index b08d3877f3b03..6db56ecb36074 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -771,7 +771,7 @@ def set_table_styles(self, table_styles): @staticmethod def _highlight_null(v, null_color): - return 'background-color: %s' % null_color if pd.isnull(v) else '' + return 'background-color: %s' % null_color if pd.isna(v) else '' def highlight_null(self, null_color='red'): """ diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 31907ad586817..a1d48719ba9c0 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -5,7 +5,7 @@ import pandas._libs.json as json from pandas._libs.tslib import iNaT from pandas.compat import StringIO, long, u -from pandas import compat, isnull +from pandas import compat, isna from pandas import Series, DataFrame, to_datetime, MultiIndex from pandas.io.common import (get_filepath_or_buffer, _get_handle, _stringify_path) @@ -535,7 +535,7 @@ def _try_convert_to_date(self, data): # ignore numbers that are out of range if issubclass(new_data.dtype.type, np.number): - in_range = (isnull(new_data.values) | (new_data > self.min_stamp) | + in_range = (isna(new_data.values) | (new_data > self.min_stamp) | (new_data.values == iNaT)) if not in_range.all(): return data, False diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 343bc7a74fde8..906b99d489dc6 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -21,7 +21,7 @@ is_float, is_dtype_equal, is_object_dtype, is_string_dtype, is_scalar, is_categorical_dtype) -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core.dtypes.cast import astype_nansafe from pandas.core.index import Index, MultiIndex, RangeIndex from pandas.core.series import Series @@ -1531,7 +1531,7 @@ def _infer_types(self, values, na_values, try_num_bool=True): if try_num_bool: try: result = lib.maybe_convert_numeric(values, na_values, False) - na_count = isnull(result).sum() + na_count = isna(result).sum() except Exception: result = values if values.dtype == np.object_: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4e343556c083b..82c80a13372d7 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -25,7 +25,7 @@ import numpy as np from pandas import (Series, DataFrame, Panel, Panel4D, Index, - MultiIndex, Int64Index, isnull, concat, + MultiIndex, Int64Index, isna, concat, SparseSeries, SparseDataFrame, PeriodIndex, DatetimeIndex, TimedeltaIndex) from pandas.core import config @@ -2136,7 +2136,7 @@ def convert(self, values, nan_rep, encoding): # if we have stored a NaN in the categories # then strip it; in theory we could have BOTH # -1s in the codes and nulls :< - mask = isnull(categories) + mask = isna(categories) if mask.any(): categories = categories[~mask] codes[codes != -1] -= mask.astype(int).cumsum().values @@ -3941,7 +3941,7 @@ def write_data(self, chunksize, dropna=False): # figure the mask: only do if we can successfully process this # column, otherwise ignore the mask - mask = isnull(a.data).all(axis=0) + mask = isna(a.data).all(axis=0) if isinstance(mask, np.ndarray): masks.append(mask.astype('u1', copy=False)) diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 0dbef66616e43..9aa47e5c69850 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -12,7 +12,7 @@ import numpy as np import pandas._libs.lib as lib -from pandas.core.dtypes.missing import isnull +from pandas.core.dtypes.missing import isna from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.common import ( is_list_like, is_dict_like, @@ -632,7 +632,7 @@ def insert_data(self): # replace NaN with None if b._can_hold_na: - mask = isnull(d) + mask = isna(d) d[mask] = None for col_loc, col in zip(b.mgr_locs, d): @@ -845,7 +845,7 @@ def _harmonize_columns(self, parse_dates=None): except KeyError: pass # this column not in results - def _get_notnull_col_dtype(self, col): + def _get_notna_col_dtype(self, col): """ Infer datatype of the Series col. In case the dtype of col is 'object' and it contains NA values, this infers the datatype of the not-NA @@ -853,9 +853,9 @@ def _get_notnull_col_dtype(self, col): """ col_for_inference = col if col.dtype == 'object': - notnulldata = col[~isnull(col)] - if len(notnulldata): - col_for_inference = notnulldata + notnadata = col[~isna(col)] + if len(notnadata): + col_for_inference = notnadata return lib.infer_dtype(col_for_inference) @@ -865,7 +865,7 @@ def _sqlalchemy_type(self, col): if col.name in dtype: return self.dtype[col.name] - col_type = self._get_notnull_col_dtype(col) + col_type = self._get_notna_col_dtype(col) from sqlalchemy.types import (BigInteger, Integer, Float, Text, Boolean, @@ -1345,7 +1345,7 @@ def _sql_type_name(self, col): if col.name in dtype: return dtype[col.name] - col_type = self._get_notnull_col_dtype(col) + col_type = self._get_notna_col_dtype(col) if col_type == 'timedelta64': warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 30991d8a24c63..253ed03c25db9 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -24,7 +24,7 @@ from pandas.core.frame import DataFrame from pandas.core.series import Series import datetime -from pandas import compat, to_timedelta, to_datetime, isnull, DatetimeIndex +from pandas import compat, to_timedelta, to_datetime, isna, DatetimeIndex from pandas.compat import lrange, lmap, lzip, text_type, string_types, range, \ zip, BytesIO from pandas.util._decorators import Appender @@ -402,7 +402,7 @@ def parse_dates_safe(dates, delta=False, year=False, days=False): return DataFrame(d, index=index) - bad_loc = isnull(dates) + bad_loc = isna(dates) index = dates.index if bad_loc.any(): dates = Series(dates) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index a623288efc1ae..65980e5f3c215 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -11,14 +11,14 @@ from pandas.util._decorators import cache_readonly from pandas.core.base import PandasObject -from pandas.core.dtypes.missing import notnull, remove_na_arraylike +from pandas.core.dtypes.missing import isna, notna, remove_na_arraylike from pandas.core.dtypes.common import ( is_list_like, is_integer, is_number, is_hashable, is_iterator) -from pandas.core.common import AbstractMethodError, isnull, _try_sort +from pandas.core.common import AbstractMethodError, _try_sort from pandas.core.generic import _shared_docs, _shared_doc_kwargs from pandas.core.index import Index, MultiIndex from pandas.core.series import Series @@ -554,7 +554,7 @@ def _get_xticks(self, convert_period=False): """ x = index._mpl_repr() elif is_datetype: - self.data = self.data[notnull(self.data.index)] + self.data = self.data[notna(self.data.index)] self.data = self.data.sort_index() x = self.data.index._mpl_repr() else: @@ -567,7 +567,7 @@ def _get_xticks(self, convert_period=False): @classmethod def _plot(cls, ax, x, y, style=None, is_errorbar=False, **kwds): - mask = isnull(y) + mask = isna(y) if mask.any(): y = np.ma.array(y) y = np.ma.masked_where(mask, y) @@ -1290,7 +1290,7 @@ def _args_adjust(self): # create common bin edge values = (self.data._convert(datetime=True)._get_numeric_data()) values = np.ravel(values) - values = values[~isnull(values)] + values = values[~isna(values)] hist, self.bins = np.histogram( values, bins=self.bins, @@ -1305,7 +1305,7 @@ def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, stacking_id=None, **kwds): if column_num == 0: cls._initialize_stacker(ax, stacking_id, len(bins) - 1) - y = y[~isnull(y)] + y = y[~isna(y)] base = np.zeros(len(bins) - 1) bottom = bottom + \ diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 20ada033c0f58..db2211fb55135 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -5,7 +5,7 @@ import numpy as np from pandas.util._decorators import deprecate_kwarg -from pandas.core.dtypes.missing import notnull +from pandas.core.dtypes.missing import notna from pandas.compat import range, lrange, lmap, zip from pandas.io.formats.printing import pprint_thing @@ -62,7 +62,7 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, # no gaps between subplots fig.subplots_adjust(wspace=0, hspace=0) - mask = notnull(df) + mask = notna(df) marker = _get_marker_compat(marker) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index b1652cf6eb6db..29ff9c3a77971 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -64,8 +64,8 @@ class TestPDApi(Base): funcs = ['bdate_range', 'concat', 'crosstab', 'cut', 'date_range', 'interval_range', 'eval', 'factorize', 'get_dummies', - 'infer_freq', 'isnull', 'lreshape', - 'melt', 'notnull', 'offsets', + 'infer_freq', 'isna', 'lreshape', + 'melt', 'notna', 'offsets', 'merge', 'merge_ordered', 'merge_asof', 'period_range', 'pivot', 'pivot_table', 'qcut', @@ -88,6 +88,9 @@ class TestPDApi(Base): funcs_to = ['to_datetime', 'to_msgpack', 'to_numeric', 'to_pickle', 'to_timedelta'] + # top-level to deprecate in the future + deprecated_funcs_in_future = ['isnull', 'notnull'] + # these are already deprecated; awaiting removal deprecated_funcs = ['ewma', 'ewmcorr', 'ewmcov', 'ewmstd', 'ewmvar', 'ewmvol', 'expanding_apply', 'expanding_corr', @@ -113,6 +116,7 @@ def test_api(self): self.deprecated_classes_in_future + self.funcs + self.funcs_option + self.funcs_read + self.funcs_to + + self.deprecated_funcs_in_future + self.deprecated_funcs, self.ignored) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index ec5fe45d7f610..d26ea047bb41f 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -18,7 +18,7 @@ from pandas._libs import tslib, lib from pandas import (Series, Index, DataFrame, Timedelta, DatetimeIndex, TimedeltaIndex, Timestamp, - Panel, Period, Categorical) + Panel, Period, Categorical, isna) from pandas.compat import u, PY2, PY3, StringIO, lrange from pandas.core.dtypes import inference from pandas.core.dtypes.common import ( @@ -36,7 +36,6 @@ is_scipy_sparse, _ensure_int32, _ensure_categorical) -from pandas.core.dtypes.missing import isnull from pandas.util import testing as tm @@ -1014,7 +1013,7 @@ def test_nan_to_nat_conversions(): s = df['B'].copy() s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) - assert (isnull(s[8])) + assert (isna(s[8])) # numpy < 1.7.0 is wrong from distutils.version import LooseVersion diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 90993890b7553..84cb7f9f7365d 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -13,161 +13,161 @@ DatetimeIndex, TimedeltaIndex, date_range) from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.missing import ( - array_equivalent, isnull, notnull, + array_equivalent, isna, notna, na_value_for_dtype) -def test_notnull(): - assert notnull(1.) - assert not notnull(None) - assert not notnull(np.NaN) +def test_notna(): + assert notna(1.) + assert not notna(None) + assert not notna(np.NaN) - with cf.option_context("mode.use_inf_as_null", False): - assert notnull(np.inf) - assert notnull(-np.inf) + with cf.option_context("mode.use_inf_as_na", False): + assert notna(np.inf) + assert notna(-np.inf) arr = np.array([1.5, np.inf, 3.5, -np.inf]) - result = notnull(arr) + result = notna(arr) assert result.all() - with cf.option_context("mode.use_inf_as_null", True): - assert not notnull(np.inf) - assert not notnull(-np.inf) + with cf.option_context("mode.use_inf_as_na", True): + assert not notna(np.inf) + assert not notna(-np.inf) arr = np.array([1.5, np.inf, 3.5, -np.inf]) - result = notnull(arr) + result = notna(arr) assert result.sum() == 2 - with cf.option_context("mode.use_inf_as_null", False): + with cf.option_context("mode.use_inf_as_na", False): for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: - assert (isinstance(isnull(s), Series)) + assert (isinstance(isna(s), Series)) -class TestIsNull(object): +class TestIsNA(object): def test_0d_array(self): - assert isnull(np.array(np.nan)) - assert not isnull(np.array(0.0)) - assert not isnull(np.array(0)) + assert isna(np.array(np.nan)) + assert not isna(np.array(0.0)) + assert not isna(np.array(0)) # test object dtype - assert isnull(np.array(np.nan, dtype=object)) - assert not isnull(np.array(0.0, dtype=object)) - assert not isnull(np.array(0, dtype=object)) + assert isna(np.array(np.nan, dtype=object)) + assert not isna(np.array(0.0, dtype=object)) + assert not isna(np.array(0, dtype=object)) def test_empty_object(self): for shape in [(4, 0), (4,)]: arr = np.empty(shape=shape, dtype=object) - result = isnull(arr) + result = isna(arr) expected = np.ones(shape=shape, dtype=bool) tm.assert_numpy_array_equal(result, expected) - def test_isnull(self): - assert not isnull(1.) - assert isnull(None) - assert isnull(np.NaN) + def test_isna(self): + assert not isna(1.) + assert isna(None) + assert isna(np.NaN) assert float('nan') - assert not isnull(np.inf) - assert not isnull(-np.inf) + assert not isna(np.inf) + assert not isna(-np.inf) # series for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries(), tm.makeTimeSeries(), tm.makePeriodSeries()]: - assert isinstance(isnull(s), Series) + assert isinstance(isna(s), Series) # frame for df in [tm.makeTimeDataFrame(), tm.makePeriodFrame(), tm.makeMixedDataFrame()]: - result = isnull(df) - expected = df.apply(isnull) + result = isna(df) + expected = df.apply(isna) tm.assert_frame_equal(result, expected) # panel with catch_warnings(record=True): for p in [tm.makePanel(), tm.makePeriodPanel(), tm.add_nans(tm.makePanel())]: - result = isnull(p) - expected = p.apply(isnull) + result = isna(p) + expected = p.apply(isna) tm.assert_panel_equal(result, expected) # panel 4d with catch_warnings(record=True): for p in [tm.makePanel4D(), tm.add_nans_panel4d(tm.makePanel4D())]: - result = isnull(p) - expected = p.apply(isnull) + result = isna(p) + expected = p.apply(isna) tm.assert_panel4d_equal(result, expected) - def test_isnull_lists(self): - result = isnull([[False]]) + def test_isna_lists(self): + result = isna([[False]]) exp = np.array([[False]]) tm.assert_numpy_array_equal(result, exp) - result = isnull([[1], [2]]) + result = isna([[1], [2]]) exp = np.array([[False], [False]]) tm.assert_numpy_array_equal(result, exp) # list of strings / unicode - result = isnull(['foo', 'bar']) + result = isna(['foo', 'bar']) exp = np.array([False, False]) tm.assert_numpy_array_equal(result, exp) - result = isnull([u('foo'), u('bar')]) + result = isna([u('foo'), u('bar')]) exp = np.array([False, False]) tm.assert_numpy_array_equal(result, exp) - def test_isnull_nat(self): - result = isnull([NaT]) + def test_isna_nat(self): + result = isna([NaT]) exp = np.array([True]) tm.assert_numpy_array_equal(result, exp) - result = isnull(np.array([NaT], dtype=object)) + result = isna(np.array([NaT], dtype=object)) exp = np.array([True]) tm.assert_numpy_array_equal(result, exp) - def test_isnull_numpy_nat(self): + def test_isna_numpy_nat(self): arr = np.array([NaT, np.datetime64('NaT'), np.timedelta64('NaT'), np.datetime64('NaT', 's')]) - result = isnull(arr) + result = isna(arr) expected = np.array([True] * 4) tm.assert_numpy_array_equal(result, expected) - def test_isnull_datetime(self): - assert not isnull(datetime.now()) - assert notnull(datetime.now()) + def test_isna_datetime(self): + assert not isna(datetime.now()) + assert notna(datetime.now()) idx = date_range('1/1/1990', periods=20) exp = np.ones(len(idx), dtype=bool) - tm.assert_numpy_array_equal(notnull(idx), exp) + tm.assert_numpy_array_equal(notna(idx), exp) idx = np.asarray(idx) idx[0] = iNaT idx = DatetimeIndex(idx) - mask = isnull(idx) + mask = isna(idx) assert mask[0] exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) tm.assert_numpy_array_equal(mask, exp) # GH 9129 pidx = idx.to_period(freq='M') - mask = isnull(pidx) + mask = isna(pidx) assert mask[0] exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) tm.assert_numpy_array_equal(mask, exp) - mask = isnull(pidx[1:]) + mask = isna(pidx[1:]) exp = np.zeros(len(mask), dtype=bool) tm.assert_numpy_array_equal(mask, exp) def test_datetime_other_units(self): idx = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-02']) exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(idx), exp) - tm.assert_numpy_array_equal(notnull(idx), ~exp) - tm.assert_numpy_array_equal(isnull(idx.values), exp) - tm.assert_numpy_array_equal(notnull(idx.values), ~exp) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx.values), exp) + tm.assert_numpy_array_equal(notna(idx.values), ~exp) for dtype in ['datetime64[D]', 'datetime64[h]', 'datetime64[m]', 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', @@ -175,24 +175,24 @@ def test_datetime_other_units(self): values = idx.values.astype(dtype) exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(values), exp) - tm.assert_numpy_array_equal(notnull(values), ~exp) + tm.assert_numpy_array_equal(isna(values), exp) + tm.assert_numpy_array_equal(notna(values), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(values) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) s = pd.Series(values, dtype=object) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) def test_timedelta_other_units(self): idx = pd.TimedeltaIndex(['1 days', 'NaT', '2 days']) exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(idx), exp) - tm.assert_numpy_array_equal(notnull(idx), ~exp) - tm.assert_numpy_array_equal(isnull(idx.values), exp) - tm.assert_numpy_array_equal(notnull(idx.values), ~exp) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx.values), exp) + tm.assert_numpy_array_equal(notna(idx.values), ~exp) for dtype in ['timedelta64[D]', 'timedelta64[h]', 'timedelta64[m]', 'timedelta64[s]', 'timedelta64[ms]', 'timedelta64[us]', @@ -200,30 +200,30 @@ def test_timedelta_other_units(self): values = idx.values.astype(dtype) exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(values), exp) - tm.assert_numpy_array_equal(notnull(values), ~exp) + tm.assert_numpy_array_equal(isna(values), exp) + tm.assert_numpy_array_equal(notna(values), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(values) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) s = pd.Series(values, dtype=object) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) def test_period(self): idx = pd.PeriodIndex(['2011-01', 'NaT', '2012-01'], freq='M') exp = np.array([False, True, False]) - tm.assert_numpy_array_equal(isnull(idx), exp) - tm.assert_numpy_array_equal(notnull(idx), ~exp) + tm.assert_numpy_array_equal(isna(idx), exp) + tm.assert_numpy_array_equal(notna(idx), ~exp) exp = pd.Series([False, True, False]) s = pd.Series(idx) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) s = pd.Series(idx, dtype=object) - tm.assert_series_equal(isnull(s), exp) - tm.assert_series_equal(notnull(s), ~exp) + tm.assert_series_equal(isna(s), exp) + tm.assert_series_equal(notna(s), ~exp) def test_array_equivalent(): diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index b09325bfa2ddc..4c07aea79277a 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -13,7 +13,7 @@ import numpy as np from pandas.compat import lrange, product -from pandas import (compat, isnull, notnull, DataFrame, Series, +from pandas import (compat, isna, notna, DataFrame, Series, MultiIndex, date_range, Timestamp) import pandas as pd import pandas.core.nanops as nanops @@ -81,11 +81,11 @@ def test_corr_nooverlap(self): 'C': [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]}) rs = df.corr(meth) - assert isnull(rs.loc['A', 'B']) - assert isnull(rs.loc['B', 'A']) + assert isna(rs.loc['A', 'B']) + assert isna(rs.loc['B', 'A']) assert rs.loc['A', 'A'] == 1 assert rs.loc['B', 'B'] == 1 - assert isnull(rs.loc['C', 'C']) + assert isna(rs.loc['C', 'C']) def test_corr_constant(self): tm._skip_if_no_scipy() @@ -96,7 +96,7 @@ def test_corr_constant(self): df = DataFrame({'A': [1, 1, 1, np.nan, np.nan, np.nan], 'B': [np.nan, np.nan, np.nan, 1, 1, 1]}) rs = df.corr(meth) - assert isnull(rs.values).all() + assert isna(rs.values).all() def test_corr_int(self): # dtypes other than float64 #1761 @@ -136,7 +136,7 @@ def test_cov(self): tm.assert_frame_equal(expected, result) result = self.frame.cov(min_periods=len(self.frame) + 1) - assert isnull(result.values).all() + assert isna(result.values).all() # with NAs frame = self.frame.copy() @@ -389,7 +389,7 @@ def test_reduce_mixed_frame(self): tm.assert_series_equal(test, df.T.sum(axis=1)) def test_count(self): - f = lambda s: notnull(s).sum() + f = lambda s: notna(s).sum() self._check_stat_op('count', f, has_skipna=False, has_numeric_only=True, @@ -477,7 +477,7 @@ def test_product(self): def test_median(self): def wrapper(x): - if isnull(x).any(): + if isna(x).any(): return np.nan return np.median(x) @@ -974,7 +974,7 @@ def test_stats_mixed_type(self): def test_median_corner(self): def wrapper(x): - if isnull(x).any(): + if isna(x).any(): return np.nan return np.median(x) @@ -998,7 +998,7 @@ def test_cumsum_corner(self): def test_sum_bools(self): df = DataFrame(index=lrange(1), columns=lrange(10)) - bools = isnull(df) + bools = isna(df) assert bools.sum(axis=1)[0] == 10 # Index of max / min diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index a6f39cabb60ed..ab2e810d77634 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -9,7 +9,7 @@ import warnings import numpy as np -from pandas import (notnull, DataFrame, Series, MultiIndex, date_range, +from pandas import (notna, DataFrame, Series, MultiIndex, date_range, Timestamp, compat) import pandas as pd from pandas.core.dtypes.dtypes import CategoricalDtype @@ -278,7 +278,7 @@ def transform(row): return row def transform2(row): - if (notnull(row['C']) and row['C'].startswith('shin') and + if (notna(row['C']) and row['C'].startswith('shin') and row['A'] == 'foo'): row['D'] = 7 return row diff --git a/pandas/tests/frame/test_asof.py b/pandas/tests/frame/test_asof.py index d4e3d541937dc..fea6a5370109e 100644 --- a/pandas/tests/frame/test_asof.py +++ b/pandas/tests/frame/test_asof.py @@ -23,13 +23,13 @@ def test_basic(self): freq='25s') result = df.asof(dates) - assert result.notnull().all(1).all() + assert result.notna().all(1).all() lb = df.index[14] ub = df.index[30] dates = list(dates) result = df.asof(dates) - assert result.notnull().all(1).all() + assert result.notna().all(1).all() mask = (result.index >= lb) & (result.index < ub) rs = result[mask] diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 87d942101f5f1..e76869bf6712b 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -11,7 +11,7 @@ from pandas.compat import lrange, lzip, u from pandas import (compat, DataFrame, Series, Index, MultiIndex, - date_range, isnull) + date_range, isna) import pandas as pd from pandas.util.testing import assert_frame_equal @@ -852,11 +852,11 @@ def test_reindex_boolean(self): reindexed = frame.reindex(np.arange(10)) assert reindexed.values.dtype == np.object_ - assert isnull(reindexed[0][1]) + assert isna(reindexed[0][1]) reindexed = frame.reindex(columns=lrange(3)) assert reindexed.values.dtype == np.object_ - assert isnull(reindexed[1]).all() + assert isna(reindexed[1]).all() def test_reindex_objects(self): reindexed = self.mixed_frame.reindex(columns=['foo', 'A', 'B']) diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index c1a5b437be5d0..54dc04bc783b0 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -507,7 +507,7 @@ def test_stale_cached_series_bug_473(self): repr(Y) result = Y.sum() # noqa exp = Y['g'].sum() # noqa - assert pd.isnull(Y['g']['c']) + assert pd.isna(Y['g']['c']) def test_get_X_columns(self): # numeric and object columns @@ -540,6 +540,6 @@ def test_strange_column_corruption_issue(self): myid = 100 - first = len(df.loc[pd.isnull(df[myid]), [myid]]) - second = len(df.loc[pd.isnull(df[myid]), [myid]]) + first = len(df.loc[pd.isna(df[myid]), [myid]]) + second = len(df.loc[pd.isna(df[myid]), [myid]]) assert first == second == 0 diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 97cf3ce8a7216..d942330ecd8a6 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -17,7 +17,7 @@ from pandas.compat import (lmap, long, zip, range, lrange, lzip, OrderedDict, is_platform_little_endian) from pandas import compat -from pandas import (DataFrame, Index, Series, isnull, +from pandas import (DataFrame, Index, Series, isna, MultiIndex, Timedelta, Timestamp, date_range) import pandas as pd @@ -224,7 +224,7 @@ def test_constructor_dict(self): assert len(frame) == len(self.ts2) assert 'col1' not in frame - assert isnull(frame['col3']).all() + assert isna(frame['col3']).all() # Corner cases assert len(DataFrame({})) == 0 @@ -279,12 +279,12 @@ def test_constructor_multi_index(self): tuples = [(2, 3), (3, 3), (3, 3)] mi = MultiIndex.from_tuples(tuples) df = DataFrame(index=mi, columns=mi) - assert pd.isnull(df).values.ravel().all() + assert pd.isna(df).values.ravel().all() tuples = [(3, 3), (2, 3), (3, 3)] mi = MultiIndex.from_tuples(tuples) df = DataFrame(index=mi, columns=mi) - assert pd.isnull(df).values.ravel().all() + assert pd.isna(df).values.ravel().all() def test_constructor_error_msgs(self): msg = "Empty data passed with indices specified." @@ -625,7 +625,7 @@ def test_constructor_maskedarray_nonfloat(self): assert len(frame.index) == 2 assert len(frame.columns) == 3 - assert isnull(frame).values.all() + assert isna(frame).values.all() # cast type frame = DataFrame(mat, columns=['A', 'B', 'C'], @@ -1496,7 +1496,7 @@ def check(df): df.iloc[:, i] # allow single nans to succeed - indexer = np.arange(len(df.columns))[isnull(df.columns)] + indexer = np.arange(len(df.columns))[isna(df.columns)] if len(indexer) == 1: tm.assert_series_equal(df.iloc[:, indexer[0]], @@ -1966,7 +1966,7 @@ def test_frame_datetime64_mixed_index_ctor_1681(self): # it works! d = DataFrame({'A': 'foo', 'B': ts}, index=dr) - assert d['B'].isnull().all() + assert d['B'].isna().all() def test_frame_timeseries_to_records(self): index = date_range('1/1/2000', periods=10) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 065580d56a683..5941b2ab7c2cb 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -382,7 +382,7 @@ def test_dtypes_gh8722(self): assert_series_equal(result, expected) # compat, GH 8722 - with option_context('use_inf_as_null', True): + with option_context('use_inf_as_na', True): df = DataFrame([[1]]) result = df.dtypes assert_series_equal(result, Series({0: np.dtype('int64')})) diff --git a/pandas/tests/frame/test_indexing.py b/pandas/tests/frame/test_indexing.py index f0503b60eeefa..0f461d4e372a1 100644 --- a/pandas/tests/frame/test_indexing.py +++ b/pandas/tests/frame/test_indexing.py @@ -15,7 +15,7 @@ import numpy as np import pandas.core.common as com -from pandas import (DataFrame, Index, Series, notnull, isnull, +from pandas import (DataFrame, Index, Series, notna, isna, MultiIndex, DatetimeIndex, Timestamp, date_range) import pandas as pd @@ -312,7 +312,7 @@ def test_getitem_boolean_casting(self): df = DataFrame(data=np.random.randn(100, 50)) df = df.where(df > 0) # create nans bools = df > 0 - mask = isnull(df) + mask = isna(df) expected = bools.astype(float).mask(mask) result = bools.mask(mask) assert_frame_equal(result, expected) @@ -395,7 +395,7 @@ def test_getitem_setitem_ix_negative_integers(self): df = DataFrame(np.random.randn(8, 4)) with catch_warnings(record=True): - assert isnull(df.ix[:, [-1]].values).all() + assert isna(df.ix[:, [-1]].values).all() # #1942 a = DataFrame(randn(20, 2), index=[chr(x + 65) for x in range(20)]) @@ -487,7 +487,7 @@ def test_setitem_always_copy(self): self.frame['E'] = s self.frame['E'][5:10] = nan - assert notnull(s[5:10]).all() + assert notna(s[5:10]).all() def test_setitem_boolean(self): df = self.frame.copy() @@ -705,7 +705,7 @@ def test_setitem_empty(self): 'c': ['111', '222', '333']}) result = df.copy() - result.loc[result.b.isnull(), 'a'] = result.a + result.loc[result.b.isna(), 'a'] = result.a assert_frame_equal(result, df) def test_setitem_empty_frame_with_boolean(self): @@ -795,7 +795,7 @@ def test_getitem_fancy_slice_integers_step(self): # this is OK result = df.iloc[:8:2] # noqa df.iloc[:8:2] = np.nan - assert isnull(df.iloc[:8:2]).values.all() + assert isna(df.iloc[:8:2]).values.all() def test_getitem_setitem_integer_slice_keyerrors(self): df = DataFrame(np.random.randn(10, 5), index=lrange(0, 20, 2)) @@ -1020,7 +1020,7 @@ def test_setitem_fancy_mixed_2d(self): assert (result.values == 5).all() self.mixed_frame.ix[5] = np.nan - assert isnull(self.mixed_frame.ix[5]).all() + assert isna(self.mixed_frame.ix[5]).all() self.mixed_frame.ix[5] = self.mixed_frame.ix[6] assert_series_equal(self.mixed_frame.ix[5], self.mixed_frame.ix[6], @@ -1492,15 +1492,15 @@ def test_setitem_single_column_mixed_datetime(self): # set an allowable datetime64 type df.loc['b', 'timestamp'] = iNaT - assert isnull(df.loc['b', 'timestamp']) + assert isna(df.loc['b', 'timestamp']) # allow this syntax df.loc['c', 'timestamp'] = nan - assert isnull(df.loc['c', 'timestamp']) + assert isna(df.loc['c', 'timestamp']) # allow this syntax df.loc['d', :] = nan - assert not isnull(df.loc['c', :]).all() + assert not isna(df.loc['c', :]).all() # as of GH 3216 this will now work! # try to set with a list like item @@ -1695,7 +1695,7 @@ def test_set_value_resize(self): res = self.frame.copy() res3 = res.set_value('foobar', 'baz', 5) assert is_float_dtype(res3['baz']) - assert isnull(res3['baz'].drop(['foobar'])).all() + assert isna(res3['baz'].drop(['foobar'])).all() pytest.raises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') def test_set_value_with_index_dtype_change(self): @@ -1935,7 +1935,7 @@ def test_reindex_frame_add_nat(self): result = df.reindex(lrange(15)) assert np.issubdtype(result['B'].dtype, np.dtype('M8[ns]')) - mask = com.isnull(result)['B'] + mask = com.isna(result)['B'] assert mask[-5:].all() assert not mask[:-5].any() @@ -2589,7 +2589,7 @@ def test_where_bug(self): # GH7506 a = DataFrame({0: [1, 2], 1: [3, 4], 2: [5, 6]}) b = DataFrame({0: [np.nan, 8], 1: [9, np.nan], 2: [np.nan, np.nan]}) - do_not_replace = b.isnull() | (a > b) + do_not_replace = b.isna() | (a > b) expected = a.copy() expected[~do_not_replace] = b @@ -2599,7 +2599,7 @@ def test_where_bug(self): a = DataFrame({0: [4, 6], 1: [1, 0]}) b = DataFrame({0: [np.nan, 3], 1: [3, np.nan]}) - do_not_replace = b.isnull() | (a > b) + do_not_replace = b.isna() | (a > b) expected = a.copy() expected[~do_not_replace] = b @@ -2632,10 +2632,10 @@ def test_where_none(self): # GH 7656 df = DataFrame([{'A': 1, 'B': np.nan, 'C': 'Test'}, { 'A': np.nan, 'B': 'Test', 'C': np.nan}]) - expected = df.where(~isnull(df), None) + expected = df.where(~isna(df), None) with tm.assert_raises_regex(TypeError, 'boolean setting ' 'on mixed-type'): - df.where(~isnull(df), None, inplace=True) + df.where(~isna(df), None, inplace=True) def test_where_align(self): @@ -2649,10 +2649,10 @@ def create(): # series df = create() expected = df.fillna(df.mean()) - result = df.where(pd.notnull(df), df.mean(), axis='columns') + result = df.where(pd.notna(df), df.mean(), axis='columns') assert_frame_equal(result, expected) - df.where(pd.notnull(df), df.mean(), inplace=True, axis='columns') + df.where(pd.notna(df), df.mean(), inplace=True, axis='columns') assert_frame_equal(df, expected) df = create().fillna(0) @@ -2665,7 +2665,7 @@ def create(): # frame df = create() expected = df.fillna(1) - result = df.where(pd.notnull(df), DataFrame( + result = df.where(pd.notna(df), DataFrame( 1, index=df.index, columns=df.columns)) assert_frame_equal(result, expected) @@ -2946,7 +2946,7 @@ def test_setitem(self): df2.iloc[1, 1] = pd.NaT df2.iloc[1, 2] = pd.NaT result = df2['B'] - assert_series_equal(notnull(result), Series( + assert_series_equal(notna(result), Series( [True, False, True], name='B')) assert_series_equal(df2.dtypes, df.dtypes) @@ -2998,7 +2998,7 @@ def test_setitem(self): df2.iloc[1, 1] = pd.NaT df2.iloc[1, 2] = pd.NaT result = df2['B'] - assert_series_equal(notnull(result), Series( + assert_series_equal(notna(result), Series( [True, False, True], name='B')) assert_series_equal(df2.dtypes, Series([np.dtype('uint64'), np.dtype('O'), np.dtype('O')], diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 8ec6c6e6263d8..1d56478061dbc 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -137,12 +137,12 @@ def test_operators_none_as_na(self): filled = df.fillna(np.nan) result = op(df, 3) expected = op(filled, 3).astype(object) - expected[com.isnull(expected)] = None + expected[com.isna(expected)] = None assert_frame_equal(result, expected) result = op(df, df) expected = op(filled, filled).astype(object) - expected[com.isnull(expected)] = None + expected[com.isna(expected)] = None assert_frame_equal(result, expected) result = op(df, df.fillna(7)) @@ -1044,8 +1044,8 @@ def test_combine_generic(self): combined = df1.combine(df2, np.add) combined2 = df2.combine(df1, np.add) - assert combined['D'].isnull().all() - assert combined2['D'].isnull().all() + assert combined['D'].isna().all() + assert combined2['D'].isna().all() chunk = combined.loc[combined.index[:-5], ['A', 'B', 'C']] chunk2 = combined2.loc[combined2.index[:-5], ['A', 'B', 'C']] diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index fdb0119d8ae60..e2f362ebdc895 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -528,7 +528,7 @@ def test_unstack_nan_index(self): # GH7466 def verify(df): mk_list = lambda a: list(a) if isinstance(a, tuple) else [a] - rows, cols = df.notnull().values.nonzero() + rows, cols = df.notna().values.nonzero() for i, j in zip(rows, cols): left = sorted(df.iloc[i, j].split('.')) right = mk_list(df.index[i]) + mk_list(df.columns[j]) @@ -547,7 +547,7 @@ def verify(df): mi = df.set_index(list(idx)) for lev in range(2): udf = mi.unstack(level=lev) - assert udf.notnull().values.sum() == len(df) + assert udf.notna().values.sum() == len(df) verify(udf['jolie']) df = DataFrame({'1st': ['d'] * 3 + [nan] * 5 + ['a'] * 2 + @@ -565,7 +565,7 @@ def verify(df): mi = df.set_index(list(idx)) for lev in range(3): udf = mi.unstack(level=lev) - assert udf.notnull().values.sum() == 2 * len(df) + assert udf.notna().values.sum() == 2 * len(df) for col in ['4th', '5th']: verify(udf[col]) @@ -670,7 +670,7 @@ def verify(df): df.loc[1, '3rd'] = df.loc[4, '3rd'] = nan left = df.set_index(['1st', '2nd', '3rd']).unstack(['2nd', '3rd']) - assert left.notnull().values.sum() == 2 * len(df) + assert left.notna().values.sum() == 2 * len(df) for col in ['jim', 'joe']: for _, r in df.iterrows(): diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py index aaca8a60fe062..19fbf854256c6 100644 --- a/pandas/tests/frame/test_timeseries.py +++ b/pandas/tests/frame/test_timeseries.py @@ -281,7 +281,7 @@ def test_shift_duplicate_columns(self): shifted.append(df) # sanity check the base case - nulls = shifted[0].isnull().sum() + nulls = shifted[0].isna().sum() assert_series_equal(nulls, Series(range(1, 6), dtype='int64')) # check all answers are the same diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py index f527c732fb76b..8b95455b53d22 100644 --- a/pandas/tests/groupby/test_bin_groupby.py +++ b/pandas/tests/groupby/test_bin_groupby.py @@ -6,7 +6,7 @@ import numpy as np from pandas.core.dtypes.common import _ensure_int64 -from pandas import Index, isnull +from pandas import Index, isna from pandas.util.testing import assert_almost_equal import pandas.util.testing as tm from pandas._libs import lib, groupby @@ -97,7 +97,7 @@ def _check(dtype): func(out, counts, obj[:, None], labels) def _ohlc(group): - if isnull(group).all(): + if isna(group).all(): return np.repeat(nan, 4) return [group[0], group.max(), group.min(), group[-1]] diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 19124a33bdbcb..0dea1e8447b2b 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2562,7 +2562,7 @@ def test_cython_grouper_series_bug_noncontig(self): inds = np.tile(lrange(10), 10) result = obj.groupby(inds).agg(Series.median) - assert result.isnull().all() + assert result.isna().all() def test_series_grouper_noncontig_index(self): index = Index(tm.rands_array(10, 100)) @@ -3540,7 +3540,7 @@ def test_max_nan_bug(self): r = gb[['File']].max() e = gb['File'].max().to_frame() tm.assert_frame_equal(r, e) - assert not r['File'].isnull().any() + assert not r['File'].isna().any() def test_nlargest(self): a = Series([1, 3, 5, 7, 2, 9, 0, 4, 6, 10]) diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py index 47e6e7839422a..28392537be3c6 100644 --- a/pandas/tests/groupby/test_nth.py +++ b/pandas/tests/groupby/test_nth.py @@ -1,6 +1,6 @@ import numpy as np import pandas as pd -from pandas import DataFrame, MultiIndex, Index, Series, isnull +from pandas import DataFrame, MultiIndex, Index, Series, isna from pandas.compat import lrange from pandas.util.testing import assert_frame_equal, assert_series_equal @@ -41,9 +41,9 @@ def test_first_last_nth(self): grouped['B'].nth(0) self.df.loc[self.df['A'] == 'foo', 'B'] = np.nan - assert isnull(grouped['B'].first()['foo']) - assert isnull(grouped['B'].last()['foo']) - assert isnull(grouped['B'].nth(0)['foo']) + assert isna(grouped['B'].first()['foo']) + assert isna(grouped['B'].last()['foo']) + assert isna(grouped['B'].nth(0)['foo']) # v0.14.0 whatsnew df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B']) diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index 70b6b1e439691..df0a93d783375 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -599,7 +599,7 @@ def test_first_last_max_min_on_time_data(self): 'td': [nan, td(days=1), td(days=2), td(days=3), nan]}) df_test.dt = pd.to_datetime(df_test.dt) df_test['group'] = 'A' - df_ref = df_test[df_test.dt.notnull()] + df_ref = df_test[df_test.dt.notna()] grouped_test = df_test.groupby('group') grouped_ref = df_ref.groupby('group') diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 1513a1c690014..1fdc08d68eb26 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -10,7 +10,7 @@ from pandas import (Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, IntervalIndex, - notnull, isnull) + notna, isna) from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.core.dtypes.common import needs_i8_conversion from pandas._libs.tslib import iNaT @@ -514,7 +514,7 @@ def test_numpy_repeat(self): def test_where(self): i = self.create_index() - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) @@ -884,7 +884,7 @@ def test_fillna(self): pass elif isinstance(index, MultiIndex): idx = index.copy() - msg = "isnull is not defined for MultiIndex" + msg = "isna is not defined for MultiIndex" with tm.assert_raises_regex(NotImplementedError, msg): idx.fillna(idx[0]) else: @@ -924,23 +924,23 @@ def test_nulls(self): for name, index in self.indices.items(): if len(index) == 0: tm.assert_numpy_array_equal( - index.isnull(), np.array([], dtype=bool)) + index.isna(), np.array([], dtype=bool)) elif isinstance(index, MultiIndex): idx = index.copy() - msg = "isnull is not defined for MultiIndex" + msg = "isna is not defined for MultiIndex" with tm.assert_raises_regex(NotImplementedError, msg): - idx.isnull() + idx.isna() else: if not index.hasnans: tm.assert_numpy_array_equal( - index.isnull(), np.zeros(len(index), dtype=bool)) + index.isna(), np.zeros(len(index), dtype=bool)) tm.assert_numpy_array_equal( - index.notnull(), np.ones(len(index), dtype=bool)) + index.notna(), np.ones(len(index), dtype=bool)) else: - result = isnull(index) - tm.assert_numpy_array_equal(index.isnull(), result) - tm.assert_numpy_array_equal(index.notnull(), ~result) + result = isna(index) + tm.assert_numpy_array_equal(index.isna(), result) + tm.assert_numpy_array_equal(index.notna(), ~result) def test_empty(self): # GH 15270 diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index 4ef5cc5499f4d..9416b08f9654a 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -5,7 +5,7 @@ import pandas as pd import pandas.util.testing as tm import pandas.compat as compat -from pandas import notnull, Index, DatetimeIndex, datetime, date_range +from pandas import notna, Index, DatetimeIndex, datetime, date_range class TestDatetimeIndex(object): @@ -16,29 +16,29 @@ def test_where_other(self): i = pd.date_range('20130101', periods=3, tz='US/Eastern') for arr in [np.nan, pd.NaT]: - result = i.where(notnull(i), other=np.nan) + result = i.where(notna(i), other=np.nan) expected = i tm.assert_index_equal(result, expected) i2 = i.copy() i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2), i2) + result = i.where(notna(i2), i2) tm.assert_index_equal(result, i2) i2 = i.copy() i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2), i2.values) + result = i.where(notna(i2), i2.values) tm.assert_index_equal(result, i2) def test_where_tz(self): i = pd.date_range('20130101', periods=3, tz='US/Eastern') - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) i2 = i.copy() i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist()) - result = i.where(notnull(i2)) + result = i.where(notna(i2)) expected = i2 tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index f33cdf8800791..86e65feec04f3 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -116,13 +116,13 @@ def test_minmax(self): for op in ['min', 'max']: # Return NaT obj = DatetimeIndex([]) - assert pd.isnull(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)()) obj = DatetimeIndex([pd.NaT]) - assert pd.isnull(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)()) obj = DatetimeIndex([pd.NaT, pd.NaT, pd.NaT]) - assert pd.isnull(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)()) def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index a47db755b44af..7ff9c2b23cbfb 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -20,7 +20,7 @@ from pandas.core.dtypes.common import is_datetime64_ns_dtype from pandas.util import testing as tm from pandas.util.testing import assert_series_equal, _skip_if_has_locale -from pandas import (isnull, to_datetime, Timestamp, Series, DataFrame, +from pandas import (isna, to_datetime, Timestamp, Series, DataFrame, Index, DatetimeIndex, NaT, date_range, bdate_range, compat) @@ -683,7 +683,7 @@ def test_to_datetime_types(self): assert result is NaT result = to_datetime(['', '']) - assert isnull(result).all() + assert isna(result).all() # ints result = Timestamp(0) @@ -751,7 +751,7 @@ def test_string_na_nat_conversion(self): expected = np.empty(4, dtype='M8[ns]') for i, val in enumerate(strings): - if isnull(val): + if isna(val): expected[i] = tslib.iNaT else: expected[i] = parse_date(val) @@ -787,7 +787,7 @@ def test_string_na_nat_conversion(self): expected = Series(np.empty(5, dtype='M8[ns]'), index=idx) for i in range(5): x = series[i] - if isnull(x): + if isna(x): expected[i] = tslib.iNaT else: expected[i] = to_datetime(x) @@ -977,13 +977,13 @@ class TestDaysInMonth(object): # tests for issue #10154 def test_day_not_in_month_coerce(self): - assert isnull(to_datetime('2015-02-29', errors='coerce')) - assert isnull(to_datetime('2015-02-29', format="%Y-%m-%d", - errors='coerce')) - assert isnull(to_datetime('2015-02-32', format="%Y-%m-%d", - errors='coerce')) - assert isnull(to_datetime('2015-04-31', format="%Y-%m-%d", - errors='coerce')) + assert isna(to_datetime('2015-02-29', errors='coerce')) + assert isna(to_datetime('2015-02-29', format="%Y-%m-%d", + errors='coerce')) + assert isna(to_datetime('2015-02-32', format="%Y-%m-%d", + errors='coerce')) + assert isna(to_datetime('2015-04-31', format="%Y-%m-%d", + errors='coerce')) def test_day_not_in_month_raise(self): pytest.raises(ValueError, to_datetime, '2015-02-29', diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 291ca317f8fae..e24e2ad936e2c 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -6,7 +6,7 @@ import pandas as pd from pandas.util import testing as tm -from pandas import (PeriodIndex, period_range, notnull, DatetimeIndex, NaT, +from pandas import (PeriodIndex, period_range, notna, DatetimeIndex, NaT, Index, Period, Int64Index, Series, DataFrame, date_range, offsets, compat) @@ -92,13 +92,13 @@ def test_get_loc(self): def test_where(self): i = self.create_index() - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq='D') - result = i.where(notnull(i2)) + result = i.where(notna(i2)) expected = i2 tm.assert_index_equal(result, expected) @@ -116,20 +116,20 @@ def test_where_other(self): i = self.create_index() for arr in [np.nan, pd.NaT]: - result = i.where(notnull(i), other=np.nan) + result = i.where(notna(i), other=np.nan) expected = i tm.assert_index_equal(result, expected) i2 = i.copy() i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq='D') - result = i.where(notnull(i2), i2) + result = i.where(notna(i2), i2) tm.assert_index_equal(result, i2) i2 = i.copy() i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(), freq='D') - result = i.where(notnull(i2), i2.values) + result = i.where(notna(i2), i2.values) tm.assert_index_equal(result, i2) def test_get_indexer(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 18dbe6624008a..c49d9ee07d7ef 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -16,7 +16,7 @@ from pandas import (period_range, date_range, Series, DataFrame, Float64Index, Int64Index, CategoricalIndex, DatetimeIndex, TimedeltaIndex, - PeriodIndex, isnull) + PeriodIndex, isna) from pandas.core.index import _get_combined_index from pandas.util.testing import assert_almost_equal from pandas.compat.numpy import np_datetime64_compat @@ -504,7 +504,7 @@ def test_is_(self): def test_asof(self): d = self.dateIndex[0] assert self.dateIndex.asof(d) == d - assert isnull(self.dateIndex.asof(d - timedelta(1))) + assert isna(self.dateIndex.asof(d - timedelta(1))) d = self.dateIndex[-1] assert self.dateIndex.asof(d + timedelta(1)) == d diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 14f344acbefb2..b2f7f9a7b80a7 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -10,7 +10,7 @@ import numpy as np -from pandas import Categorical, IntervalIndex, compat, notnull +from pandas import Categorical, IntervalIndex, compat, notna from pandas.util.testing import assert_almost_equal import pandas.core.config as cf import pandas as pd @@ -236,13 +236,13 @@ def f(x): def test_where(self): i = self.create_index() - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) i2 = pd.CategoricalIndex([np.nan, np.nan] + i[2:].tolist(), categories=i.categories) - result = i.where(notnull(i2)) + result = i.where(notna(i2)) expected = i2 tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py index 33745017fe3d6..fe86a2121761a 100644 --- a/pandas/tests/indexes/test_interval.py +++ b/pandas/tests/indexes/test_interval.py @@ -3,7 +3,7 @@ import pytest import numpy as np -from pandas import (Interval, IntervalIndex, Index, isnull, +from pandas import (Interval, IntervalIndex, Index, isna, interval_range, Timestamp, Timedelta, compat) from pandas._libs.interval import IntervalTree @@ -152,16 +152,16 @@ def test_properties(self): def test_with_nans(self): index = self.index assert not index.hasnans - tm.assert_numpy_array_equal(index.isnull(), + tm.assert_numpy_array_equal(index.isna(), np.array([False, False])) - tm.assert_numpy_array_equal(index.notnull(), + tm.assert_numpy_array_equal(index.notna(), np.array([True, True])) index = self.index_with_nan assert index.hasnans - tm.assert_numpy_array_equal(index.notnull(), + tm.assert_numpy_array_equal(index.notna(), np.array([True, False, True])) - tm.assert_numpy_array_equal(index.isnull(), + tm.assert_numpy_array_equal(index.isna(), np.array([False, True, False])) def test_copy(self): @@ -228,7 +228,7 @@ def test_astype(self): def test_where(self): expected = self.index - result = self.index.where(self.index.notnull()) + result = self.index.where(self.index.notna()) tm.assert_index_equal(result, expected) idx = IntervalIndex.from_breaks([1, 2]) @@ -311,7 +311,7 @@ def test_get_item(self): closed='right') assert i[0] == Interval(0.0, 1.0) assert i[1] == Interval(1.0, 2.0) - assert isnull(i[2]) + assert isna(i[2]) result = i[0:1] expected = IntervalIndex.from_arrays((0.,), (1.,), closed='right') @@ -620,7 +620,7 @@ def test_missing_values(self): with pytest.raises(ValueError): IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2])) - tm.assert_numpy_array_equal(isnull(idx), + tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) def test_sort_values(self): @@ -631,15 +631,15 @@ def test_sort_values(self): # nan idx = self.index_with_nan - mask = idx.isnull() + mask = idx.isna() tm.assert_numpy_array_equal(mask, np.array([False, True, False])) result = idx.sort_values() - mask = result.isnull() + mask = result.isna() tm.assert_numpy_array_equal(mask, np.array([False, False, True])) result = idx.sort_values(ascending=False) - mask = result.isnull() + mask = result.isna() tm.assert_numpy_array_equal(mask, np.array([True, False, False])) def test_datetime(self): diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 719cd2f7e01a4..da1b309f5a621 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2366,12 +2366,12 @@ def test_slice_keep_name(self): names=['x', 'y']) assert x[1:].names == x.names - def test_isnull_behavior(self): + def test_isna_behavior(self): # should not segfault GH5123 # NOTE: if MI representation changes, may make sense to allow - # isnull(MI) + # isna(MI) with pytest.raises(NotImplementedError): - pd.isnull(self.index) + pd.isna(self.index) def test_level_setting_resets_attributes(self): ind = MultiIndex.from_arrays([ @@ -2889,13 +2889,13 @@ def test_nan_stays_float(self): labels=[[0], [0]], names=[0, 1]) idxm = idx0.join(idx1, how='outer') - assert pd.isnull(idx0.get_level_values(1)).all() + assert pd.isna(idx0.get_level_values(1)).all() # the following failed in 0.14.1 - assert pd.isnull(idxm.get_level_values(1)[:-1]).all() + assert pd.isna(idxm.get_level_values(1)[:-1]).all() df0 = pd.DataFrame([[1, 2]], index=idx0) df1 = pd.DataFrame([[3, 4]], index=idx1) dfm = df0 - df1 - assert pd.isnull(df0.index.get_level_values(1)).all() + assert pd.isna(df0.index.get_level_values(1)).all() # the following failed in 0.14.1 - assert pd.isnull(dfm.index.get_level_values(1)[:-1]).all() + assert pd.isna(dfm.index.get_level_values(1)[:-1]).all() diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 62ac337d02727..1a0a38c173284 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -7,7 +7,7 @@ import numpy as np -from pandas import (date_range, notnull, Series, Index, Float64Index, +from pandas import (date_range, notna, Series, Index, Float64Index, Int64Index, UInt64Index, RangeIndex) import pandas.util.testing as tm @@ -228,11 +228,11 @@ def test_constructor(self): # nan handling result = Float64Index([np.nan, np.nan]) - assert pd.isnull(result.values).all() + assert pd.isna(result.values).all() result = Float64Index(np.array([np.nan])) - assert pd.isnull(result.values).all() + assert pd.isna(result.values).all() result = Index(np.array([np.nan])) - assert pd.isnull(result.values).all() + assert pd.isna(result.values).all() def test_constructor_invalid(self): @@ -717,7 +717,7 @@ def test_coerce_list(self): def test_where(self): i = self.create_index() - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 0d88e88030604..566354da4870d 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -10,7 +10,7 @@ import numpy as np -from pandas import (notnull, Series, Index, Float64Index, +from pandas import (notna, Series, Index, Float64Index, Int64Index, RangeIndex) import pandas.util.testing as tm @@ -929,7 +929,7 @@ def test_len_specialised(self): def test_where(self): i = self.create_index() - result = i.where(notnull(i)) + result = i.where(notna(i)) expected = i tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index 9a9912d4f0ab1..f4f669ee1d087 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -71,13 +71,13 @@ def test_minmax(self): for op in ['min', 'max']: # Return NaT obj = TimedeltaIndex([]) - assert pd.isnull(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)()) obj = TimedeltaIndex([pd.NaT]) - assert pd.isnull(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)()) obj = TimedeltaIndex([pd.NaT, pd.NaT, pd.NaT]) - assert pd.isnull(getattr(obj, op)()) + assert pd.isna(getattr(obj, op)()) def test_numpy_minmax(self): dr = pd.date_range(start='2016-01-15', end='2016-01-20') diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index a991b7bbe140a..1a4d1b1d7abaa 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -6,7 +6,7 @@ import pandas as pd import pandas.util.testing as tm from pandas.util.testing import assert_series_equal -from pandas import (Series, Timedelta, to_timedelta, isnull, +from pandas import (Series, Timedelta, to_timedelta, isna, TimedeltaIndex) from pandas._libs.tslib import iNaT @@ -31,7 +31,7 @@ def conv(v): assert result.astype('int64') == iNaT result = to_timedelta(['', '']) - assert isnull(result).all() + assert isna(result).all() # pass thru result = to_timedelta(np.array([np.timedelta64(1, 's')])) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 27a889e58e55e..25e572ee09a6b 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -321,7 +321,7 @@ def test_setting_with_copy_bug(self): df = pd.DataFrame({'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', np.nan, 'd']}) - mask = pd.isnull(df.c) + mask = pd.isna(df.c) def f(): df[['c']][mask] = df[['b']][mask] diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 769cf8ec395dd..1ba9f3101e7b6 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -7,7 +7,7 @@ import pandas as pd from pandas.compat import lrange, lmap -from pandas import Series, DataFrame, date_range, concat, isnull +from pandas import Series, DataFrame, date_range, concat, isna from pandas.util import testing as tm from pandas.tests.indexing.common import Base @@ -191,7 +191,7 @@ def test_iloc_getitem_dups(self): # cross-sectional indexing result = df.iloc[0, 0] - assert isnull(result) + assert isna(result) result = df.iloc[0, :] expected = Series([np.nan, 1, 3, 3], index=['A', 'B', 'A', 'B'], diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 9fa677eb624ae..e9c53ff3bf245 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -335,7 +335,7 @@ def test_multi_assign(self): df.iloc[1, 0] = np.nan df2 = df.copy() - mask = ~df2.FC.isnull() + mask = ~df2.FC.isna() cols = ['col1', 'col2'] dft = df2 * 2 diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 4d1f9936af983..34ed8782b346c 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -704,7 +704,7 @@ def test_missing_trailing_delimiters(self): 1,3,3, 1,4,5""" result = self.read_csv(StringIO(data)) - assert result['D'].isnull()[1:].all() + assert result['D'].isna()[1:].all() def test_skipinitialspace(self): s = ('"09-Apr-2012", "01:10:18.300", 2456026.548822908, 12849, ' @@ -718,7 +718,7 @@ def test_skipinitialspace(self): # it's 33 columns result = self.read_csv(sfile, names=lrange(33), na_values=['-9999.0'], header=None, skipinitialspace=True) - assert pd.isnull(result.iloc[0, 29]) + assert pd.isna(result.iloc[0, 29]) def test_utf16_bom_skiprows(self): # #2298 diff --git a/pandas/tests/io/parser/converters.py b/pandas/tests/io/parser/converters.py index 8fde709e39cae..1176b1e84e29b 100644 --- a/pandas/tests/io/parser/converters.py +++ b/pandas/tests/io/parser/converters.py @@ -133,7 +133,7 @@ def convert_score(x): result = self.read_csv(fh, converters={'score': convert_score, 'days': convert_days}, na_values=['', None]) - assert pd.isnull(result['days'][1]) + assert pd.isna(result['days'][1]) fh = StringIO(data) result2 = self.read_csv(fh, converters={'score': convert_score, diff --git a/pandas/tests/io/parser/na_values.py b/pandas/tests/io/parser/na_values.py index c6d1cc79b82d7..7fbf174e19eee 100644 --- a/pandas/tests/io/parser/na_values.py +++ b/pandas/tests/io/parser/na_values.py @@ -249,7 +249,7 @@ def test_na_trailing_columns(self): result = self.read_csv(StringIO(data)) assert result['Date'][1] == '2012-05-12' - assert result['UnitPrice'].isnull().all() + assert result['UnitPrice'].isna().all() def test_na_values_scalar(self): # see gh-12224 diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py index 4507db108b684..e1ae1b577ea29 100644 --- a/pandas/tests/io/parser/parse_dates.py +++ b/pandas/tests/io/parser/parse_dates.py @@ -461,7 +461,7 @@ def test_parse_dates_empty_string(self): data = "Date, test\n2012-01-01, 1\n,2" result = self.read_csv(StringIO(data), parse_dates=["Date"], na_filter=False) - assert result['Date'].isnull()[1] + assert result['Date'].isna()[1] def test_parse_dates_noconvert_thousands(self): # see gh-14066 diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py index 4ef265dcd5113..7e99bf36e0dd4 100644 --- a/pandas/tests/io/test_html.py +++ b/pandas/tests/io/test_html.py @@ -380,7 +380,7 @@ def test_thousands_macau_stats(self): attrs={'class': 'style1'}) df = dfs[all_non_nan_table_index] - assert not any(s.isnull().any() for _, s in df.iteritems()) + assert not any(s.isna().any() for _, s in df.iteritems()) @pytest.mark.slow def test_thousands_macau_index_col(self): @@ -389,7 +389,7 @@ def test_thousands_macau_index_col(self): dfs = self.read_html(macau_data, index_col=0, header=0) df = dfs[all_non_nan_table_index] - assert not any(s.isnull().any() for _, s in df.iteritems()) + assert not any(s.isna().any() for _, s in df.iteritems()) def test_empty_tables(self): """ diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index c0d200560b477..fc17b5f85b68c 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -14,7 +14,7 @@ from pandas import (Series, DataFrame, Panel, Panel4D, MultiIndex, Int64Index, RangeIndex, Categorical, bdate_range, date_range, timedelta_range, Index, DatetimeIndex, - isnull) + isna) from pandas.compat import is_platform_windows, PY3, PY35, BytesIO, text_type from pandas.io.formats.printing import pprint_thing @@ -3948,7 +3948,7 @@ def test_string_select(self): store.append('df2', df2, data_columns=['x']) result = store.select('df2', 'x!=none') - expected = df2[isnull(df2.x)] + expected = df2[isna(df2.x)] assert_frame_equal(result, expected) # int ==/!= diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index deeb8cba2b228..a7c42391effe6 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -33,7 +33,7 @@ from pandas.core.dtypes.common import ( is_object_dtype, is_datetime64_dtype, is_datetime64tz_dtype) -from pandas import DataFrame, Series, Index, MultiIndex, isnull, concat +from pandas import DataFrame, Series, Index, MultiIndex, isna, concat from pandas import date_range, to_datetime, to_timedelta, Timestamp import pandas.compat as compat from pandas.compat import range, lrange, string_types, PY36 @@ -1530,7 +1530,7 @@ def test_dtype(self): assert isinstance(sqltypea, sqlalchemy.TEXT) assert isinstance(sqltypeb, sqlalchemy.TEXT) - def test_notnull_dtype(self): + def test_notna_dtype(self): cols = {'Bool': Series([True, None]), 'Date': Series([datetime(2012, 5, 1), None]), 'Int': Series([1, None], dtype='object'), @@ -1538,7 +1538,7 @@ def test_notnull_dtype(self): } df = DataFrame(cols) - tbl = 'notnull_dtype_test' + tbl = 'notna_dtype_test' df.to_sql(tbl, self.conn) returned_df = sql.read_sql_table(tbl, self.conn) # noqa meta = sqlalchemy.schema.MetaData(bind=self.conn) @@ -2005,7 +2005,7 @@ def test_dtype(self): assert self._get_sqlite_column_type( 'single_dtype_test', 'B') == 'STRING' - def test_notnull_dtype(self): + def test_notna_dtype(self): if self.flavor == 'mysql': pytest.skip('Not applicable to MySQL legacy') @@ -2016,7 +2016,7 @@ def test_notnull_dtype(self): } df = DataFrame(cols) - tbl = 'notnull_dtype_test' + tbl = 'notna_dtype_test' df.to_sql(tbl, self.conn) assert self._get_sqlite_column_type(tbl, 'Bool') == 'INTEGER' @@ -2069,7 +2069,7 @@ def format_query(sql, *args): """ processed_args = [] for arg in args: - if isinstance(arg, float) and isnull(arg): + if isinstance(arg, float) and isna(arg): arg = None formatter = _formatters[type(arg)] diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index 7486c32f57fdb..46fea86c45925 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -8,7 +8,7 @@ from pandas.compat import StringIO, iteritems import pandas as pd from pandas import (DataFrame, concat, - read_csv, isnull, Series, date_range, + read_csv, isna, Series, date_range, Index, Panel, MultiIndex, Timestamp, DatetimeIndex) from pandas.util import testing as tm @@ -789,8 +789,8 @@ def test_append_different_columns(self): b = df[5:].loc[:, ['strings', 'ints', 'floats']] appended = a.append(b) - assert isnull(appended['strings'][0:4]).all() - assert isnull(appended['bools'][5:]).all() + assert isna(appended['strings'][0:4]).all() + assert isna(appended['bools'][5:]).all() def test_append_many(self): chunks = [self.frame[:5], self.frame[5:10], @@ -804,7 +804,7 @@ def test_append_many(self): result = chunks[0].append(chunks[1:]) tm.assert_frame_equal(result.loc[:, self.frame.columns], self.frame) assert (result['foo'][15:] == 'bar').all() - assert result['foo'][:15].isnull().all() + assert result['foo'][:15].isna().all() def test_append_preserve_index_name(self): # #980 diff --git a/pandas/tests/reshape/test_join.py b/pandas/tests/reshape/test_join.py index e4894307918c6..75c01fabea8f6 100644 --- a/pandas/tests/reshape/test_join.py +++ b/pandas/tests/reshape/test_join.py @@ -252,7 +252,7 @@ def test_join_with_len0(self): merged = self.target.join(self.source.reindex([]), on='C') for col in self.source: assert col in merged - assert merged[col].isnull().all() + assert merged[col].isna().all() merged2 = self.target.join(self.source.reindex([]), on='C', how='inner') @@ -266,7 +266,7 @@ def test_join_on_inner(self): joined = df.join(df2, on='key', how='inner') expected = df.join(df2, on='key') - expected = expected[expected['value'].notnull()] + expected = expected[expected['value'].notna()] tm.assert_series_equal(joined['key'], expected['key'], check_dtype=False) tm.assert_series_equal(joined['value'], expected['value'], @@ -734,7 +734,7 @@ def _check_join(left, right, result, join_col, how='left', # some smoke tests for c in join_col: - assert(result[c].notnull().all()) + assert(result[c].notna().all()) left_grouped = left.groupby(join_col) right_grouped = right.groupby(join_col) @@ -797,7 +797,7 @@ def _assert_all_na(join_chunk, source_columns, join_col): for c in source_columns: if c in join_col: continue - assert(join_chunk[c].isnull().all()) + assert(join_chunk[c].isna().all()) def _join_by_hand(a, b, how='left'): diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py index 919675188576e..e2b75fe52f6fa 100644 --- a/pandas/tests/reshape/test_merge.py +++ b/pandas/tests/reshape/test_merge.py @@ -229,8 +229,8 @@ def test_handle_join_key_pass_array(self): merged2 = merge(right, left, left_on=key, right_on='key', how='outer') assert_series_equal(merged['key'], merged2['key']) - assert merged['key'].notnull().all() - assert merged2['key'].notnull().all() + assert merged['key'].notna().all() + assert merged2['key'].notna().all() left = DataFrame({'value': lrange(5)}, columns=['value']) right = DataFrame({'rvalue': lrange(6)}) @@ -926,8 +926,8 @@ def run_asserts(left, right): res = left.join(right, on=icols, how='left', sort=sort) assert len(left) < len(res) + 1 - assert not res['4th'].isnull().any() - assert not res['5th'].isnull().any() + assert not res['4th'].isna().any() + assert not res['5th'].isna().any() tm.assert_series_equal( res['4th'], - res['5th'], check_names=False) diff --git a/pandas/tests/reshape/test_merge_ordered.py b/pandas/tests/reshape/test_merge_ordered.py index 9469e98f336fd..9b1806ee52c1d 100644 --- a/pandas/tests/reshape/test_merge_ordered.py +++ b/pandas/tests/reshape/test_merge_ordered.py @@ -57,7 +57,7 @@ def test_multigroup(self): assert_frame_equal(result, result2.loc[:, result.columns]) result = merge_ordered(left, self.right, on='key', left_by='group') - assert result['group'].notnull().all() + assert result['group'].notna().all() def test_merge_type(self): class NotADataFrame(DataFrame): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 9881ab72f3ef5..f0fe0166528ea 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -267,7 +267,7 @@ def test_pivot_index_with_nan(self): df.loc[1, 'b'] = df.loc[4, 'b'] = nan pv = df.pivot('a', 'b', 'c') - assert pv.notnull().values.sum() == len(df) + assert pv.notna().values.sum() == len(df) for _, row in df.iterrows(): assert pv.loc[row['a'], row['b']] == row['c'] diff --git a/pandas/tests/reshape/test_tile.py b/pandas/tests/reshape/test_tile.py index 2523f8ab9f776..91000747b41bb 100644 --- a/pandas/tests/reshape/test_tile.py +++ b/pandas/tests/reshape/test_tile.py @@ -4,7 +4,7 @@ import numpy as np from pandas.compat import zip -from pandas import (Series, Index, isnull, +from pandas import (Series, Index, isna, to_datetime, DatetimeIndex, Timestamp, Interval, IntervalIndex, Categorical, cut, qcut, date_range) @@ -140,12 +140,12 @@ def test_na_handling(self): result_arr = np.asarray(result) - ex_arr = np.where(isnull(arr), np.nan, result_arr) + ex_arr = np.where(isna(arr), np.nan, result_arr) tm.assert_almost_equal(result_arr, ex_arr) result = cut(arr, 4, labels=False) - ex_result = np.where(isnull(arr), np.nan, result) + ex_result = np.where(isna(arr), np.nan, result) tm.assert_almost_equal(result, ex_result) def test_inf_handling(self): @@ -200,7 +200,7 @@ def test_cut_out_of_bounds(self): result = cut(arr, [-1, 0, 1]) - mask = isnull(result) + mask = isna(result) ex_mask = (arr < -1) | (arr > 1) tm.assert_numpy_array_equal(mask, ex_mask) @@ -244,7 +244,7 @@ def test_qcut_nas(self): arr[:20] = np.nan result = qcut(arr, 4) - assert isnull(result[:20]).all() + assert isna(result[:20]).all() def test_qcut_index(self): result = qcut([0, 2], 2) @@ -502,9 +502,9 @@ def f(): result = cut(date_range('20130102', periods=5), bins=date_range('20130101', periods=2)) - mask = result.categories.isnull() + mask = result.categories.isna() tm.assert_numpy_array_equal(mask, np.array([False])) - mask = result.isnull() + mask = result.isna() tm.assert_numpy_array_equal( mask, np.array([False, True, True, True, True])) diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 0695fe2243947..5f247cae1099b 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -6,7 +6,7 @@ import numpy as np from pandas import (NaT, Index, Timestamp, Timedelta, Period, DatetimeIndex, PeriodIndex, - TimedeltaIndex, Series, isnull) + TimedeltaIndex, Series, isna) from pandas.util import testing as tm from pandas._libs.tslib import iNaT @@ -95,7 +95,7 @@ def test_identity(klass): result = klass('NaT') assert result is NaT - assert isnull(klass('nat')) + assert isna(klass('nat')) @pytest.mark.parametrize('klass', [Timestamp, Timedelta, Period]) @@ -108,7 +108,7 @@ def test_equality(klass): klass('NAT').value == iNaT klass(None).value == iNaT klass(np.nan).value == iNaT - assert isnull(klass('nat')) + assert isna(klass('nat')) @pytest.mark.parametrize('klass', [Timestamp, Timedelta]) diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py index ecc44204924d3..bc9a0388df9d9 100644 --- a/pandas/tests/scalar/test_timedelta.py +++ b/pandas/tests/scalar/test_timedelta.py @@ -638,8 +638,8 @@ def test_components(self): s[1] = np.nan result = s.dt.components - assert not result.iloc[0].isnull().all() - assert result.iloc[1].isnull().all() + assert not result.iloc[0].isna().all() + assert result.iloc[1].isna().all() def test_isoformat(self): td = Timedelta(days=6, minutes=50, seconds=3, diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index 749af1c56a7f0..280a5b8fcae0c 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -10,7 +10,7 @@ import numpy as np import pandas as pd -from pandas import (Series, Categorical, DataFrame, isnull, notnull, +from pandas import (Series, Categorical, DataFrame, isna, notna, bdate_range, date_range, _np_version_under1p10) from pandas.core.index import MultiIndex from pandas.core.indexes.datetimes import Timestamp @@ -130,7 +130,7 @@ def test_sum_inf(self): arr = np.random.randn(100, 100).astype('f4') arr[:, 2] = np.inf - with cf.option_context("mode.use_inf_as_null", True): + with cf.option_context("mode.use_inf_as_na", True): assert_almost_equal(s.sum(), s2.sum()) res = nanops.nansum(arr, axis=1) @@ -269,10 +269,10 @@ def test_var_std(self): # 1 - element series with ddof=1 s = self.ts.iloc[[0]] result = s.var(ddof=1) - assert isnull(result) + assert isna(result) result = s.std(ddof=1) - assert isnull(result) + assert isna(result) def test_sem(self): alt = lambda x: np.std(x, ddof=1) / np.sqrt(len(x)) @@ -286,7 +286,7 @@ def test_sem(self): # 1 - element series with ddof=1 s = self.ts.iloc[[0]] result = s.sem(ddof=1) - assert isnull(result) + assert isna(result) def test_skew(self): tm._skip_if_no_scipy() @@ -365,7 +365,7 @@ def test_argsort(self): assert s.dtype == 'datetime64[ns]' shifted = s.shift(-1) assert shifted.dtype == 'datetime64[ns]' - assert isnull(shifted[4]) + assert isna(shifted[4]) result = s.argsort() expected = Series(lrange(5), dtype='int64') @@ -524,8 +524,8 @@ def testit(): pytest.raises(TypeError, f, ds) # skipna or no - assert notnull(f(self.series)) - assert isnull(f(self.series, skipna=False)) + assert notna(f(self.series)) + assert isna(f(self.series, skipna=False)) # check the result is correct nona = self.series.dropna() @@ -743,10 +743,10 @@ def test_ops_consistency_on_empty(self): assert result == 0 result = Series(dtype=float).mean() - assert isnull(result) + assert isna(result) result = Series(dtype=float).median() - assert isnull(result) + assert isna(result) # timedelta64[ns] result = Series(dtype='m8[ns]').sum() @@ -769,11 +769,11 @@ def test_corr(self): # partial overlap tm.assert_almost_equal(self.ts[:15].corr(self.ts[5:]), 1) - assert isnull(self.ts[:15].corr(self.ts[5:], min_periods=12)) + assert isna(self.ts[:15].corr(self.ts[5:], min_periods=12)) ts1 = self.ts[:15].reindex(self.ts.index) ts2 = self.ts[5:].reindex(self.ts.index) - assert isnull(ts1.corr(ts2, min_periods=12)) + assert isna(ts1.corr(ts2, min_periods=12)) # No overlap assert np.isnan(self.ts[::2].corr(self.ts[1::2])) @@ -781,7 +781,7 @@ def test_corr(self): # all NA cp = self.ts[:10].copy() cp[:] = np.nan - assert isnull(cp.corr(cp)) + assert isna(cp.corr(cp)) A = tm.makeTimeSeries() B = tm.makeTimeSeries() @@ -838,14 +838,14 @@ def test_cov(self): # all NA cp = self.ts[:10].copy() cp[:] = np.nan - assert isnull(cp.cov(cp)) + assert isna(cp.cov(cp)) # min_periods - assert isnull(self.ts[:15].cov(self.ts[5:], min_periods=12)) + assert isna(self.ts[:15].cov(self.ts[5:], min_periods=12)) ts1 = self.ts[:15].reindex(self.ts.index) ts2 = self.ts[5:].reindex(self.ts.index) - assert isnull(ts1.cov(ts2, min_periods=12)) + assert isna(ts1.cov(ts2, min_periods=12)) def test_count(self): assert self.ts.count() == len(self.ts) @@ -995,10 +995,10 @@ def test_clip_types_and_nulls(self): thresh = s[2] l = s.clip_lower(thresh) u = s.clip_upper(thresh) - assert l[notnull(l)].min() == thresh - assert u[notnull(u)].max() == thresh - assert list(isnull(s)) == list(isnull(l)) - assert list(isnull(s)) == list(isnull(u)) + assert l[notna(l)].min() == thresh + assert u[notna(u)].max() == thresh + assert list(isna(s)) == list(isna(l)) + assert list(isna(s)) == list(isna(u)) def test_clip_against_series(self): # GH #6966 @@ -1181,14 +1181,14 @@ def test_timedelta64_analytics(self): def test_idxmin(self): # test idxmin - # _check_stat_op approach can not be used here because of isnull check. + # _check_stat_op approach can not be used here because of isna check. # add some NaNs self.series[5:15] = np.NaN # skipna or no assert self.series[self.series.idxmin()] == self.series.min() - assert isnull(self.series.idxmin(skipna=False)) + assert isna(self.series.idxmin(skipna=False)) # no NaNs nona = self.series.dropna() @@ -1198,7 +1198,7 @@ def test_idxmin(self): # all NaNs allna = self.series * nan - assert isnull(allna.idxmin()) + assert isna(allna.idxmin()) # datetime64[ns] from pandas import date_range @@ -1223,14 +1223,14 @@ def test_numpy_argmin(self): def test_idxmax(self): # test idxmax - # _check_stat_op approach can not be used here because of isnull check. + # _check_stat_op approach can not be used here because of isna check. # add some NaNs self.series[5:15] = np.NaN # skipna or no assert self.series[self.series.idxmax()] == self.series.max() - assert isnull(self.series.idxmax(skipna=False)) + assert isna(self.series.idxmax(skipna=False)) # no NaNs nona = self.series.dropna() @@ -1240,7 +1240,7 @@ def test_idxmax(self): # all NaNs allna = self.series * nan - assert isnull(allna.idxmax()) + assert isna(allna.idxmax()) from pandas import date_range s = Series(date_range('20130102', periods=6)) @@ -1286,7 +1286,7 @@ def test_ptp(self): # GH11163 s = Series([3, 5, np.nan, -3, 10]) assert s.ptp() == 13 - assert pd.isnull(s.ptp(skipna=False)) + assert pd.isna(s.ptp(skipna=False)) mi = pd.MultiIndex.from_product([['a', 'b'], [1, 2, 3]]) s = pd.Series([1, np.nan, 7, 3, 5, np.nan], index=mi) diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py index 2c5f0d7772cc2..e3be5427588b3 100644 --- a/pandas/tests/series/test_apply.py +++ b/pandas/tests/series/test_apply.py @@ -8,7 +8,7 @@ import numpy as np import pandas as pd -from pandas import (Index, Series, DataFrame, isnull) +from pandas import (Index, Series, DataFrame, isna) from pandas.compat import lrange from pandas import compat from pandas.util.testing import assert_series_equal, assert_frame_equal @@ -393,8 +393,8 @@ def test_map_int(self): merged = left.map(right) assert merged.dtype == np.float_ - assert isnull(merged['d']) - assert not isnull(merged['c']) + assert isna(merged['d']) + assert not isna(merged['c']) def test_map_type_inference(self): s = Series(lrange(3)) diff --git a/pandas/tests/series/test_asof.py b/pandas/tests/series/test_asof.py index 1f62d618b20e1..3104d85601434 100644 --- a/pandas/tests/series/test_asof.py +++ b/pandas/tests/series/test_asof.py @@ -3,8 +3,8 @@ import pytest import numpy as np -from pandas import (offsets, Series, notnull, - isnull, date_range, Timestamp) +from pandas import (offsets, Series, notna, + isna, date_range, Timestamp) import pandas.util.testing as tm @@ -23,12 +23,12 @@ def test_basic(self): dates = date_range('1/1/1990', periods=N * 3, freq='25s') result = ts.asof(dates) - assert notnull(result).all() + assert notna(result).all() lb = ts.index[14] ub = ts.index[30] result = ts.asof(list(dates)) - assert notnull(result).all() + assert notna(result).all() lb = ts.index[14] ub = ts.index[30] @@ -98,12 +98,12 @@ def test_periodindex(self): dates = date_range('1/1/1990', periods=N * 3, freq='37min') result = ts.asof(dates) - assert notnull(result).all() + assert notna(result).all() lb = ts.index[14] ub = ts.index[30] result = ts.asof(list(dates)) - assert notnull(result).all() + assert notna(result).all() lb = ts.index[14] ub = ts.index[30] @@ -130,7 +130,7 @@ def test_periodindex(self): # no as of value d = ts.index[0].to_timestamp() - offsets.BDay() - assert isnull(ts.asof(d)) + assert isna(ts.asof(d)) def test_errors(self): @@ -170,7 +170,7 @@ def test_all_nans(self): # testing scalar input date = date_range('1/1/1990', periods=N * 3, freq='25s')[0] result = Series(np.nan, index=rng).asof(date) - assert isnull(result) + assert isna(result) # test name is propagated result = Series(np.nan, index=[1, 2, 3, 4], name='test').asof([4, 5]) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index d591aa4f567a9..e3029ad46beb6 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -13,7 +13,7 @@ from pandas.core.dtypes.common import ( is_categorical_dtype, is_datetime64tz_dtype) -from pandas import (Index, Series, isnull, date_range, +from pandas import (Index, Series, isna, date_range, NaT, period_range, MultiIndex, IntervalIndex) from pandas.core.indexes.datetimes import Timestamp, DatetimeIndex @@ -349,22 +349,22 @@ def test_constructor_datetimes_with_nulls(self): def test_constructor_dtype_datetime64(self): s = Series(iNaT, dtype='M8[ns]', index=lrange(5)) - assert isnull(s).all() + assert isna(s).all() # in theory this should be all nulls, but since # we are not specifying a dtype is ambiguous s = Series(iNaT, index=lrange(5)) - assert not isnull(s).all() + assert not isna(s).all() s = Series(nan, dtype='M8[ns]', index=lrange(5)) - assert isnull(s).all() + assert isna(s).all() s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]') - assert isnull(s[1]) + assert isna(s[1]) assert s.dtype == 'M8[ns]' s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]') - assert isnull(s[1]) + assert isna(s[1]) assert s.dtype == 'M8[ns]' # GH3416 @@ -823,10 +823,10 @@ def test_NaT_scalar(self): series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') val = series[3] - assert isnull(val) + assert isna(val) series[2] = val - assert isnull(series[2]) + assert isna(series[2]) def test_NaT_cast(self): # GH10747 diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py index 6d8a54b538237..c901fe50c070a 100644 --- a/pandas/tests/series/test_indexing.py +++ b/pandas/tests/series/test_indexing.py @@ -11,7 +11,7 @@ import pandas._libs.index as _index from pandas.core.dtypes.common import is_integer, is_scalar -from pandas import (Index, Series, DataFrame, isnull, +from pandas import (Index, Series, DataFrame, isna, date_range, NaT, MultiIndex, Timestamp, DatetimeIndex, Timedelta) from pandas.core.indexing import IndexingError @@ -254,7 +254,7 @@ def test_getitem_boolean(self): def test_getitem_boolean_empty(self): s = Series([], dtype=np.int64) s.index.name = 'index_name' - s = s[s.isnull()] + s = s[s.isna()] assert s.index.name == 'index_name' assert s.dtype == np.int64 @@ -1183,11 +1183,11 @@ def f(): s = Series(range(10)).astype(float) s[8] = None result = s[8] - assert isnull(result) + assert isna(result) s = Series(range(10)).astype(float) s[s > 8] = None - result = s[isnull(s)] + result = s[isna(s)] expected = Series(np.nan, index=[9]) assert_series_equal(result, expected) @@ -1981,7 +1981,7 @@ def test_reindex_series_add_nat(self): result = series.reindex(lrange(15)) assert np.issubdtype(result.dtype, np.dtype('M8[ns]')) - mask = result.isnull() + mask = result.isna() assert mask[-5:].all() assert not mask[:-5].any() @@ -2107,7 +2107,7 @@ def test_reindex_bool_pad(self): ts = self.ts[5:] bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) filled_bool = bool_ts.reindex(self.ts.index, method='pad') - assert isnull(filled_bool[:5]).all() + assert isna(filled_bool[:5]).all() def test_reindex_like(self): other = self.ts[::2] diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 8e73c17684a16..2608e038e2049 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -11,7 +11,7 @@ import numpy as np import pandas as pd -from pandas import (Series, DataFrame, isnull, date_range, +from pandas import (Series, DataFrame, isna, date_range, MultiIndex, Index, Timestamp, NaT, IntervalIndex) from pandas.compat import range from pandas._libs.tslib import iNaT @@ -151,7 +151,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-02 10:00')]) tm.assert_series_equal(expected, result) # check s is not changed - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz)) expected = Series([Timestamp('2011-01-01 10:00'), @@ -159,14 +159,14 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00'), Timestamp('2011-01-02 10:00', tz=tz)]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna('AAA') expected = Series([Timestamp('2011-01-01 10:00'), 'AAA', Timestamp('2011-01-03 10:00'), 'AAA'], dtype=object) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00')}) @@ -175,7 +175,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00'), Timestamp('2011-01-04 10:00')]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00'), 3: pd.Timestamp('2011-01-04 10:00')}) @@ -184,14 +184,14 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00'), Timestamp('2011-01-04 10:00')]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) # DatetimeBlockTZ idx = pd.DatetimeIndex(['2011-01-01 10:00', pd.NaT, '2011-01-03 10:00', pd.NaT], tz=tz) s = pd.Series(idx) assert s.dtype == 'datetime64[ns, {0}]'.format(tz) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00')) expected = Series([Timestamp('2011-01-01 10:00', tz=tz), @@ -199,7 +199,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2011-01-02 10:00')]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz)) idx = pd.DatetimeIndex(['2011-01-01 10:00', '2011-01-02 10:00', @@ -207,7 +207,7 @@ def test_datetime64_tz_fillna(self): tz=tz) expected = Series(idx) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(pd.Timestamp('2011-01-02 10:00', tz=tz).to_pydatetime()) @@ -216,14 +216,14 @@ def test_datetime64_tz_fillna(self): tz=tz) expected = Series(idx) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna('AAA') expected = Series([Timestamp('2011-01-01 10:00', tz=tz), 'AAA', Timestamp('2011-01-03 10:00', tz=tz), 'AAA'], dtype=object) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00')}) @@ -232,7 +232,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00')]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna({1: pd.Timestamp('2011-01-02 10:00', tz=tz), 3: pd.Timestamp('2011-01-04 10:00', tz=tz)}) @@ -241,7 +241,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2011-01-04 10:00', tz=tz)]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) # filling with a naive/other zone, coerce to object result = s.fillna(Timestamp('20130101')) @@ -250,7 +250,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2013-01-01')]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) result = s.fillna(Timestamp('20130101', tz='US/Pacific')) expected = Series([Timestamp('2011-01-01 10:00', tz=tz), @@ -258,7 +258,7 @@ def test_datetime64_tz_fillna(self): Timestamp('2011-01-03 10:00', tz=tz), Timestamp('2013-01-01', tz='US/Pacific')]) tm.assert_series_equal(expected, result) - tm.assert_series_equal(pd.isnull(s), null_loc) + tm.assert_series_equal(pd.isna(s), null_loc) # with timezone # GH 15855 @@ -358,10 +358,10 @@ def test_fillna_nat(self): assert_frame_equal(filled, expected) assert_frame_equal(filled2, expected) - def test_isnull_for_inf(self): + def test_isna_for_inf(self): s = Series(['a', np.inf, np.nan, 1.0]) - with pd.option_context('mode.use_inf_as_null', True): - r = s.isnull() + with pd.option_context('mode.use_inf_as_na', True): + r = s.isna() dr = s.dropna() e = Series([False, True, True, False]) de = Series(['a', 1.0], index=[0, 3]) @@ -484,28 +484,28 @@ def test_timedelta64_nan(self): # nan ops on timedeltas td1 = td.copy() td1[0] = np.nan - assert isnull(td1[0]) + assert isna(td1[0]) assert td1[0].value == iNaT td1[0] = td[0] - assert not isnull(td1[0]) + assert not isna(td1[0]) td1[1] = iNaT - assert isnull(td1[1]) + assert isna(td1[1]) assert td1[1].value == iNaT td1[1] = td[1] - assert not isnull(td1[1]) + assert not isna(td1[1]) td1[2] = NaT - assert isnull(td1[2]) + assert isna(td1[2]) assert td1[2].value == iNaT td1[2] = td[2] - assert not isnull(td1[2]) + assert not isna(td1[2]) # boolean setting # this doesn't work, not sure numpy even supports it # result = td[(td>np.timedelta64(timedelta(days=3))) & # td