From b03f7e52e859c5d20141a47aa4d6880a321af84d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sat, 29 Jul 2017 23:58:03 +0200 Subject: [PATCH] DOC: further clean-up null/na changes (#17113) --- doc/source/basics.rst | 4 +-- doc/source/io.rst | 5 ---- doc/source/missing_data.rst | 2 +- doc/source/whatsnew/v0.10.0.txt | 46 +++++++++++++++++++++++++++------ doc/source/whatsnew/v0.4.x.txt | 2 +- pandas/core/config_init.py | 4 +-- 6 files changed, 44 insertions(+), 19 deletions(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index c8138d795b836..fe20a7eb2b786 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -511,7 +511,7 @@ optional ``level`` parameter which applies only if the object has a :header: "Function", "Description" :widths: 20, 80 - ``count``, Number of non-na observations + ``count``, Number of non-NA observations ``sum``, Sum of values ``mean``, Mean of values ``mad``, Mean absolute deviation @@ -541,7 +541,7 @@ will exclude NAs on Series input by default: np.mean(df['one'].values) ``Series`` also has a method :meth:`~Series.nunique` which will return the -number of unique non-na values: +number of unique non-NA values: .. ipython:: python diff --git a/doc/source/io.rst b/doc/source/io.rst index 149c86aead135..bf68a0cae1d27 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -137,7 +137,6 @@ usecols : array-like or callable, default ``None`` Using this parameter results in much faster parsing time and lower memory usage. as_recarray : boolean, default ``False`` - .. deprecated:: 0.18.2 Please call ``pd.read_csv(...).to_records()`` instead. @@ -193,7 +192,6 @@ skiprows : list-like or integer, default ``None`` skipfooter : int, default ``0`` Number of lines at bottom of file to skip (unsupported with engine='c'). skip_footer : int, default ``0`` - .. deprecated:: 0.19.0 Use the ``skipfooter`` parameter instead, as they are identical @@ -208,13 +206,11 @@ low_memory : boolean, default ``True`` use the ``chunksize`` or ``iterator`` parameter to return the data in chunks. (Only valid with C parser) buffer_lines : int, default None - .. deprecated:: 0.19.0 Argument removed because its value is not respected by the parser compact_ints : boolean, default False - .. deprecated:: 0.19.0 Argument moved to ``pd.to_numeric`` @@ -223,7 +219,6 @@ compact_ints : boolean, default False parser will attempt to cast it as the smallest integer ``dtype`` possible, either signed or unsigned depending on the specification from the ``use_unsigned`` parameter. use_unsigned : boolean, default False - .. deprecated:: 0.18.2 Argument moved to ``pd.to_numeric`` diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 5c10df25051a2..d54288baa389b 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -36,7 +36,7 @@ When / why does data become missing? ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Some might quibble over our usage of *missing*. By "missing" we simply mean -**NA** or "not present for whatever reason". Many data sets simply arrive with +**NA** ("not available") or "not present for whatever reason". Many data sets simply arrive with missing data, either because it exists and was not collected or it never existed. For example, in a collection of financial time series, some of the time series might start on different dates. Thus, values prior to the start date diff --git a/doc/source/whatsnew/v0.10.0.txt b/doc/source/whatsnew/v0.10.0.txt index cf5369466308c..f0db1d82252c1 100644 --- a/doc/source/whatsnew/v0.10.0.txt +++ b/doc/source/whatsnew/v0.10.0.txt @@ -128,15 +128,45 @@ labeled the aggregated group with the end of the interval: the next day). ``notnull``. That they ever were was a relic of early pandas. This behavior can be re-enabled globally by the ``mode.use_inf_as_null`` option: -.. ipython:: python +.. code-block:: ipython - s = pd.Series([1.5, np.inf, 3.4, -np.inf]) - pd.isnull(s) - s.fillna(0) - pd.set_option('use_inf_as_null', True) - pd.isnull(s) - s.fillna(0) - pd.reset_option('use_inf_as_null') + In [6]: s = pd.Series([1.5, np.inf, 3.4, -np.inf]) + + In [7]: pd.isnull(s) + Out[7]: + 0 False + 1 False + 2 False + 3 False + Length: 4, dtype: bool + + In [8]: s.fillna(0) + Out[8]: + 0 1.500000 + 1 inf + 2 3.400000 + 3 -inf + Length: 4, dtype: float64 + + In [9]: pd.set_option('use_inf_as_null', True) + + In [10]: pd.isnull(s) + Out[10]: + 0 False + 1 True + 2 False + 3 True + Length: 4, dtype: bool + + In [11]: s.fillna(0) + Out[11]: + 0 1.5 + 1 0.0 + 2 3.4 + 3 0.0 + Length: 4, dtype: float64 + + In [12]: pd.reset_option('use_inf_as_null') - Methods with the ``inplace`` option now all return ``None`` instead of the calling object. E.g. code written like ``df = df.fillna(0, inplace=True)`` diff --git a/doc/source/whatsnew/v0.4.x.txt b/doc/source/whatsnew/v0.4.x.txt index 237ea84425051..ed9352059a6dc 100644 --- a/doc/source/whatsnew/v0.4.x.txt +++ b/doc/source/whatsnew/v0.4.x.txt @@ -9,7 +9,7 @@ New Features - Added Python 3 support using 2to3 (:issue:`200`) - :ref:`Added ` ``name`` attribute to ``Series``, now prints as part of ``Series.__repr__`` -- :ref:`Added ` instance methods ``isnull`` and ``notnull`` to +- :ref:`Added ` instance methods ``isnull`` and ``notnull`` to Series (:issue:`209`, :issue:`203`) - :ref:`Added ` ``Series.align`` method for aligning two series with choice of join method (ENH56_) diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 76e30a6fb9d52..3e753aacf7c71 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -398,8 +398,8 @@ def table_schema_cb(key): use_inf_as_na_doc = """ : boolean - True means treat None, NaN, INF, -INF as na (old way), - False means None and NaN are null, but INF, -INF are not na + True means treat None, NaN, INF, -INF as NA (old way), + False means None and NaN are null, but INF, -INF are not NA (new way). """