Merge branch 'master' into doc-multiindex-get_slice_bound

pandas-dev · Dec 23, 2019 · 4cce86d · 4cce86d
2 parents de298f5 + 3577b5a
commit 4cce86d
Show file tree

Hide file tree

Showing 84 changed files with 1,060 additions and 967 deletions.
diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml
@@ -31,7 +31,8 @@ jobs:
     - bash: |
         source activate pandas-dev
         conda list
-        ci\\incremental\\build.cmd
+        python setup.py build_ext -q -i
+        python -m pip install --no-build-isolation -e .
       displayName: 'Build'
     - bash: |
         source activate pandas-dev

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -52,7 +52,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     black --version
 
     MSG='Checking black formatting' ; echo $MSG
-	black . --check
+    black . --check
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     # `setup.cfg` contains the list of error codes that are being ignored in flake8
@@ -104,7 +104,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     isort --version-number
 
     # Imports - Check formatting using isort see setup.cfg for settings
-    MSG='Check import format using isort ' ; echo $MSG
+    MSG='Check import format using isort' ; echo $MSG
     ISORT_CMD="isort --recursive --check-only pandas asv_bench"
     if [[ "$GITHUB_ACTIONS" == "true" ]]; then
         eval $ISORT_CMD | awk '{print "##[error]" $0}'; RET=$(($RET + ${PIPESTATUS[0]}))
@@ -203,6 +203,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     invgrep -R --include=*.{py,pyx} '\.__class__' pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Check for use of xrange instead of range' ; echo $MSG
+    invgrep -R --include=*.{py,pyx} 'xrange' pandas
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
     MSG='Check that no file in the repo contains trailing whitespaces' ; echo $MSG
     INVGREP_APPEND=" <- trailing whitespaces found"
     invgrep -RI --exclude=\*.{svg,c,cpp,html,js} --exclude-dir=env "\s$" *

diff --git a/ci/incremental/build.cmd b/ci/incremental/build.cmd
diff --git a/ci/run_tests.sh b/ci/run_tests.sh
@@ -38,6 +38,6 @@ sh -c "$PYTEST_CMD"
 
 if [[ "$COVERAGE" && $? == 0 && "$TRAVIS_BRANCH" == "master" ]]; then
     echo "uploading coverage"
-    echo "bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME"
-          bash <(curl -s https://codecov.io/bash) -Z -c -F $TYPE -f $COVERAGE_FNAME
+    echo "bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME"
+          bash <(curl -s https://codecov.io/bash) -Z -c -f $COVERAGE_FNAME
 fi
diff --git a/doc/source/_static/favicon.ico b/doc/source/_static/favicon.ico
diff --git a/doc/source/conf.py b/doc/source/conf.py
@@ -204,7 +204,11 @@
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-# html_theme_options = {}
+html_theme_options = {
+    "external_links": [],
+    "github_url": "https://github.com/pandas-dev/pandas",
+    "twitter_url": "https://twitter.com/pandas_dev",
+}
 
 # Add any paths that contain custom themes here, relative to this directory.
 # html_theme_path = ["themes"]
@@ -228,7 +232,7 @@
 # The name of an image file (within the static path) to use as favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-html_favicon = os.path.join(html_static_path[0], "favicon.ico")
+html_favicon = "../../web/pandas/static/img/favicon.ico"
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -4763,10 +4763,10 @@ Parquet supports partitioning of data based on the values of one or more columns
 .. ipython:: python
 
     df = pd.DataFrame({'a': [0, 0, 1, 1], 'b': [0, 1, 0, 1]})
-    df.to_parquet(fname='test', engine='pyarrow',
+    df.to_parquet(path='test', engine='pyarrow',
                   partition_cols=['a'], compression=None)
 
-The `fname` specifies the parent directory to which data will be saved.
+The `path` specifies the parent directory to which data will be saved.
 The `partition_cols` are the column names by which the dataset will be partitioned.
 Columns are partitioned in the order they are given. The partition splits are
 determined by the unique values in the partition columns.
@@ -4828,7 +4828,6 @@ See also some :ref:`cookbook examples <cookbook.sql>` for some advanced strategi
 The key functions are:
 
 .. autosummary::
-    :toctree: ../reference/api/
 
     read_sql_table
     read_sql_query

diff --git a/doc/source/user_guide/text.rst b/doc/source/user_guide/text.rst
@@ -74,6 +74,7 @@ These are places where the behavior of ``StringDtype`` objects differ from
 l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
    that return **numeric** output will always return a nullable integer dtype,
    rather than either int or float dtype, depending on the presence of NA values.
+   Methods returning **boolean** output will return a nullable boolean dtype.
 
    .. ipython:: python
 
@@ -89,7 +90,13 @@ l. For ``StringDtype``, :ref:`string accessor methods<api.series.str>`
       s.astype(object).str.count("a")
       s.astype(object).dropna().str.count("a")
 
-   When NA values are present, the output dtype is float64.
+   When NA values are present, the output dtype is float64. Similarly for
+   methods returning boolean values.
+
+   .. ipython:: python
+
+      s.str.isdigit()
+      s.str.match("a")
 
 2. Some string methods, like :meth:`Series.str.decode` are not available
    on ``StringArray`` because ``StringArray`` only holds strings, not

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -502,7 +502,8 @@ Deprecations
 - :func:`pandas.json_normalize` is now exposed in the top-level namespace.
   Usage of ``json_normalize`` as ``pandas.io.json.json_normalize`` is now deprecated and
   it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`).
--
+- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)
+
 
 .. _whatsnew_1000.prior_deprecations:
 
@@ -711,7 +712,7 @@ Datetimelike
 - Bug in :func:`pandas.to_datetime` when called with ``None`` raising ``TypeError`` instead of returning ``NaT`` (:issue:`30011`)
 - Bug in :func:`pandas.to_datetime` failing for `deques` when using ``cache=True`` (the default) (:issue:`29403`)
 - Bug in :meth:`Series.item` with ``datetime64`` or ``timedelta64`` dtype, :meth:`DatetimeIndex.item`, and :meth:`TimedeltaIndex.item` returning an integer instead of a :class:`Timestamp` or :class:`Timedelta` (:issue:`30175`)
--
+- Bug in :class:`DatetimeIndex` addition when adding a non-optimized :class:`DateOffset` incorrectly dropping timezone information (:issue:`30336`)
 
 Timedelta
 ^^^^^^^^^

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -1367,7 +1367,26 @@ def _ensure_encoded(list lst):
 # common NA values
 # no longer excluding inf representations
 # '1.#INF','-1.#INF', '1.#INF000000',
-_NA_VALUES = _ensure_encoded(list(icom._NA_VALUES))
+STR_NA_VALUES = {
+    "-1.#IND",
+    "1.#QNAN",
+    "1.#IND",
+    "-1.#QNAN",
+    "#N/A N/A",
+    "#N/A",
+    "N/A",
+    "n/a",
+    "NA",
+    "#NA",
+    "NULL",
+    "null",
+    "NaN",
+    "-NaN",
+    "nan",
+    "-nan",
+    "",
+}
+_NA_VALUES = _ensure_encoded(list(STR_NA_VALUES))
 
 
 def _maybe_upcast(arr):

diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx
@@ -159,7 +159,7 @@ cpdef assert_almost_equal(a, b,
 
             raise_assert_detail(obj, f"{obj} length are different", na, nb, r)
 
-        for i in xrange(len(a)):
+        for i in range(len(a)):
             try:
                 assert_almost_equal(a[i], b[i],
                                     check_less_precise=check_less_precise)

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -336,11 +336,22 @@ class Timestamp(_Timestamp):
         """
         return cls(datetime.combine(date, time))
 
-    def __new__(cls, object ts_input=_no_input,
-                object freq=None, tz=None, unit=None,
-                year=None, month=None, day=None,
-                hour=None, minute=None, second=None, microsecond=None,
-                nanosecond=None, tzinfo=None):
+    def __new__(
+        cls,
+        object ts_input=_no_input,
+        object freq=None,
+        tz=None,
+        unit=None,
+        year=None,
+        month=None,
+        day=None,
+        hour=None,
+        minute=None,
+        second=None,
+        microsecond=None,
+        nanosecond=None,
+        tzinfo=None
+    ):
         # The parameter list folds together legacy parameter names (the first
         # four) and positional and keyword parameter names from pydatetime.
         #
@@ -401,8 +412,8 @@ class Timestamp(_Timestamp):
             freq = None
 
         if getattr(ts_input, 'tzinfo', None) is not None and tz is not None:
-            raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with the"
-                             " tz parameter. Use tz_convert instead.")
+            raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with "
+                             "the tz parameter. Use tz_convert instead.")
 
         ts = convert_to_tsobject(ts_input, tz, unit, 0, 0, nanosecond or 0)
 

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -794,16 +794,17 @@ def _add_offset(self, offset):
                 values = self.tz_localize(None)
             else:
                 values = self
-            result = offset.apply_index(values)
-            if self.tz is not None:
-                result = result.tz_localize(self.tz)
+            result = offset.apply_index(values).tz_localize(self.tz)
 
         except NotImplementedError:
             warnings.warn(
                 "Non-vectorized DateOffset being applied to Series or DatetimeIndex",
                 PerformanceWarning,
             )
             result = self.astype("O") + offset
+            if len(self) == 0:
+                # _from_sequence won't be able to infer self.tz
+                return type(self)._from_sequence(result).tz_localize(self.tz)
 
         return type(self)._from_sequence(result, freq="infer")
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -37,7 +37,12 @@
 
 from pandas._libs import algos as libalgos, lib
 from pandas.compat.numpy import function as nv
-from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature
+from pandas.util._decorators import (
+    Appender,
+    Substitution,
+    deprecate_kwarg,
+    rewrite_axis_style_signature,
+)
 from pandas.util._validators import (
     validate_axis_style_args,
     validate_bool_kwarg,
@@ -1740,7 +1745,7 @@ def to_records(self, index=True, column_dtypes=None, index_dtypes=None):
         rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)],
                   dtype=[('I', 'S2'), ('A', '<i8'), ('B', '<f8')])
 
-        >>> index_dtypes = "<S{}".format(df.index.str.len().max())
+        >>> index_dtypes = f"<S{df.index.str.len().max()}"
         >>> df.to_records(index_dtypes=index_dtypes)
         rec.array([(b'a', 1, 0.5 ), (b'b', 2, 0.75)],
                   dtype=[('I', 'S1'), ('A', '<i8'), ('B', '<f8')])
@@ -1829,9 +1834,10 @@ def _from_arrays(cls, arrays, columns, index, dtype=None):
         mgr = arrays_to_mgr(arrays, columns, index, columns, dtype=dtype)
         return cls(mgr)
 
+    @deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
     def to_stata(
         self,
-        fname,
+        path,
         convert_dates=None,
         write_index=True,
         byteorder=None,
@@ -1849,11 +1855,16 @@ def to_stata(
 
         Parameters
         ----------
-        fname : str, buffer or path object
+        path : str, buffer or path object
             String, path object (pathlib.Path or py._path.local.LocalPath) or
             object implementing a binary write() function. If using a buffer
             then the buffer will not be automatically closed after the file
             data has been written.
+
+            .. versionchanged:: 1.0.0
+
+            Previously this was "fname"
+
         convert_dates : dict
             Dictionary mapping columns containing datetime types to stata
             internal format to use when writing the dates. Options are 'tc',
@@ -1927,7 +1938,7 @@ def to_stata(
             kwargs["convert_strl"] = convert_strl
 
         writer = statawriter(
-            fname,
+            path,
             self,
             convert_dates=convert_dates,
             byteorder=byteorder,
@@ -1939,22 +1950,24 @@ def to_stata(
         )
         writer.write_file()
 
-    def to_feather(self, fname):
+    @deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
+    def to_feather(self, path):
         """
         Write out the binary feather-format for DataFrames.
 
         Parameters
         ----------
-        fname : str
+        path : str
             String file path.
         """
         from pandas.io.feather_format import to_feather
 
-        to_feather(self, fname)
+        to_feather(self, path)
 
+    @deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
     def to_parquet(
         self,
-        fname,
+        path,
         engine="auto",
         compression="snappy",
         index=None,
@@ -1973,11 +1986,13 @@ def to_parquet(
 
         Parameters
         ----------
-        fname : str
+        path : str
             File path or Root Directory path. Will be used as Root Directory
             path while writing a partitioned dataset.
 
-            .. versionchanged:: 0.24.0
+            .. versionchanged:: 1.0.0
+
+            Previously this was "fname"
 
         engine : {'auto', 'pyarrow', 'fastparquet'}, default 'auto'
             Parquet library to use. If 'auto', then the option
@@ -2034,7 +2049,7 @@ def to_parquet(
 
         to_parquet(
             self,
-            fname,
+            path,
             engine,
             compression=compression,
             index=index,
@@ -2340,13 +2355,9 @@ def _sizeof_fmt(num, size_qualifier):
             # returns size in human readable format
             for x in ["bytes", "KB", "MB", "GB", "TB"]:
                 if num < 1024.0:
-                    return "{num:3.1f}{size_q} {x}".format(
-                        num=num, size_q=size_qualifier, x=x
-                    )
+                    return f"{num:3.1f}{size_qualifier} {x}"
                 num /= 1024.0
-            return "{num:3.1f}{size_q} {pb}".format(
-                num=num, size_q=size_qualifier, pb="PB"
-            )
+            return f"{num:3.1f}{size_qualifier} PB"
 
         if verbose:
             _verbose_repr()
@@ -2359,7 +2370,7 @@ def _sizeof_fmt(num, size_qualifier):
                 _verbose_repr()
 
         counts = self._data.get_dtype_counts()
-        dtypes = ["{k}({kk:d})".format(k=k[0], kk=k[1]) for k in sorted(counts.items())]
+        dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())]
         lines.append(f"dtypes: {', '.join(dtypes)}")
 
         if memory_usage is None: