From 888d1fae80a975147e3f99f9254bf1dbca3affd3 Mon Sep 17 00:00:00 2001
From: Eric Wong <mtrbean@users.noreply.github.com>
Date: Fri, 23 Aug 2019 01:06:03 -0700
Subject: [PATCH 01/95] DOC: update GroupBy.head()/tail() documentation
 (#27844)

---
 pandas/core/groupby/groupby.py | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 3e8d079e47326b..3eeecd9c149e1b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2370,8 +2370,9 @@ def head(self, n=5):
         """
         Return first n rows of each group.
 
-        Essentially equivalent to ``.apply(lambda x: x.head(n))``,
-        except ignores as_index flag.
+        Similar to ``.apply(lambda x: x.head(n))``, but it returns a subset of rows
+        from the original DataFrame with original index and order preserved
+        (``as_index`` flag is ignored).
 
         Returns
         -------
@@ -2382,10 +2383,6 @@ def head(self, n=5):
 
         >>> df = pd.DataFrame([[1, 2], [1, 4], [5, 6]],
         ...                   columns=['A', 'B'])
-        >>> df.groupby('A', as_index=False).head(1)
-           A  B
-        0  1  2
-        2  5  6
         >>> df.groupby('A').head(1)
            A  B
         0  1  2
@@ -2401,8 +2398,9 @@ def tail(self, n=5):
         """
         Return last n rows of each group.
 
-        Essentially equivalent to ``.apply(lambda x: x.tail(n))``,
-        except ignores as_index flag.
+        Similar to ``.apply(lambda x: x.tail(n))``, but it returns a subset of rows
+        from the original DataFrame with original index and order preserved
+        (``as_index`` flag is ignored).
 
         Returns
         -------
@@ -2417,10 +2415,6 @@ def tail(self, n=5):
            A  B
         1  a  2
         3  b  2
-        >>> df.groupby('A').head(1)
-           A  B
-        0  a  1
-        2  b  1
         """
         self._reset_group_selection()
         mask = self._cumcount_array(ascending=False) < n

From d5ba4c14c62c1a23f53773c4e3ecb3bd9a792a91 Mon Sep 17 00:00:00 2001
From: Wuraola Oyewusi <Oyewusiwuraola@gmail.com>
Date: Fri, 23 Aug 2019 10:01:28 +0100
Subject: [PATCH 02/95] DOC: Remove alias for numpy.random.randn from the docs
 (#28082)

---
 doc/source/conf.py              | 1 -
 doc/source/whatsnew/v0.10.0.rst | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/conf.py b/doc/source/conf.py
index 3ebc5d8b6333b2..a4b7d97c2cf5e2 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -315,7 +315,6 @@
    import numpy as np
    import pandas as pd
 
-   randn = np.random.randn
    np.random.seed(123456)
    np.set_printoptions(precision=4, suppress=True)
    pd.options.display.max_rows = 15
diff --git a/doc/source/whatsnew/v0.10.0.rst b/doc/source/whatsnew/v0.10.0.rst
index 59ea6b97762327..2e0442364b2f32 100644
--- a/doc/source/whatsnew/v0.10.0.rst
+++ b/doc/source/whatsnew/v0.10.0.rst
@@ -498,7 +498,7 @@ Here is a taste of what to expect.
 
 .. code-block:: ipython
 
-  In [58]: p4d = Panel4D(randn(2, 2, 5, 4),
+  In [58]: p4d = Panel4D(np.random.randn(2, 2, 5, 4),
     ....:       labels=['Label1','Label2'],
     ....:       items=['Item1', 'Item2'],
     ....:       major_axis=date_range('1/1/2000', periods=5),

From c7ceff98395b13aded759a6ac8d1fbe49fc9113c Mon Sep 17 00:00:00 2001
From: "Martina G. Vilas" <martinagonzalezvilas@gmail.com>
Date: Fri, 23 Aug 2019 11:03:00 +0200
Subject: [PATCH 03/95] DOC: Fix docstrings lack of punctuation (#28031)

---
 pandas/core/arrays/base.py        |  4 ++--
 pandas/core/arrays/datetimes.py   |  4 ++--
 pandas/core/arrays/period.py      |  2 +-
 pandas/core/indexes/datetimes.py  | 12 ++++++------
 pandas/core/indexes/multi.py      |  6 +++---
 pandas/core/indexes/timedeltas.py | 22 +++++++++++-----------
 pandas/core/indexing.py           |  2 +-
 7 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 5c121172d0e4fc..0778b6726d1041 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -514,7 +514,7 @@ def fillna(self, value=None, method=None, limit=None):
 
     def dropna(self):
         """
-        Return ExtensionArray without NA values
+        Return ExtensionArray without NA values.
 
         Returns
         -------
@@ -957,7 +957,7 @@ def _concat_same_type(
         cls, to_concat: Sequence[ABCExtensionArray]
     ) -> ABCExtensionArray:
         """
-        Concatenate multiple array
+        Concatenate multiple array.
 
         Parameters
         ----------
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 093334a815938e..70df708d36b3bf 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -1158,7 +1158,7 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise", errors=None):
     def to_pydatetime(self):
         """
         Return Datetime Array/Index as object ndarray of datetime.datetime
-        objects
+        objects.
 
         Returns
         -------
@@ -1283,7 +1283,7 @@ def to_perioddelta(self, freq):
         """
         Calculate TimedeltaArray of difference between index
         values and index converted to PeriodArray at specified
-        freq. Used for vectorized offsets
+        freq. Used for vectorized offsets.
 
         Parameters
         ----------
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index 20ce11c70c3443..f2d74794eadf53 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -426,7 +426,7 @@ def __array__(self, dtype=None):
     @property
     def is_leap_year(self):
         """
-        Logical indicating if the date belongs to a leap year
+        Logical indicating if the date belongs to a leap year.
         """
         return isleapyear_arr(np.asarray(self.year))
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 51daad3b426493..272066d476ce34 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -661,7 +661,7 @@ def _get_time_micros(self):
     def to_series(self, keep_tz=None, index=None, name=None):
         """
         Create a Series with both index and values equal to the index keys
-        useful with map for returning an indexer based on an index
+        useful with map for returning an indexer based on an index.
 
         Parameters
         ----------
@@ -687,10 +687,10 @@ def to_series(self, keep_tz=None, index=None, name=None):
                 behaviour and silence the warning.
 
         index : Index, optional
-            index of resulting Series. If None, defaults to original index
-        name : string, optional
-            name of resulting Series. If None, defaults to name of original
-            index
+            Index of resulting Series. If None, defaults to original index.
+        name : str, optional
+            Name of resulting Series. If None, defaults to name of original
+            index.
 
         Returns
         -------
@@ -735,7 +735,7 @@ def to_series(self, keep_tz=None, index=None, name=None):
 
     def snap(self, freq="S"):
         """
-        Snap time stamps to nearest occurring frequency
+        Snap time stamps to nearest occurring frequency.
 
         Returns
         -------
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index b614952ba1e043..761862b9f30e98 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1250,7 +1250,7 @@ def _set_names(self, names, level=None, validate=True):
             self.levels[l].rename(name, inplace=True)
 
     names = property(
-        fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex\n"""
+        fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex.\n"""
     )
 
     @Appender(_index_shared_docs["_get_grouper_for_level"])
@@ -1762,7 +1762,7 @@ def is_all_dates(self):
 
     def is_lexsorted(self):
         """
-        Return True if the codes are lexicographically sorted
+        Return True if the codes are lexicographically sorted.
 
         Returns
         -------
@@ -2246,7 +2246,7 @@ def swaplevel(self, i=-2, j=-1):
 
     def reorder_levels(self, order):
         """
-        Rearrange levels using input order. May not drop or duplicate levels
+        Rearrange levels using input order. May not drop or duplicate levels.
 
         Parameters
         ----------
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index d06afa3daa792f..8cf14e2ca777e4 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -68,20 +68,20 @@ class TimedeltaIndex(
 ):
     """
     Immutable ndarray of timedelta64 data, represented internally as int64, and
-    which can be boxed to timedelta objects
+    which can be boxed to timedelta objects.
 
     Parameters
     ----------
     data  : array-like (1-dimensional), optional
-        Optional timedelta-like data to construct index with
+        Optional timedelta-like data to construct index with.
     unit : unit of the arg (D,h,m,s,ms,us,ns) denote the unit, optional
-        which is an integer/float number
-    freq : string or pandas offset object, optional
+        Which is an integer/float number.
+    freq : str or pandas offset object, optional
         One of pandas date offset strings or corresponding objects. The string
         'infer' can be passed in order to set the frequency of the index as the
-        inferred frequency upon creation
+        inferred frequency upon creation.
     copy  : bool
-        Make a copy of input ndarray
+        Make a copy of input ndarray.
     start : starting value, timedelta-like, optional
         If data is None, start is used as the start point in generating regular
         timedelta data.
@@ -90,24 +90,24 @@ class TimedeltaIndex(
 
     periods  : int, optional, > 0
         Number of periods to generate, if generating index. Takes precedence
-        over end argument
+        over end argument.
 
         .. deprecated:: 0.24.0
 
     end : end time, timedelta-like, optional
         If periods is none, generated index will extend to first conforming
-        time on or just past end argument
+        time on or just past end argument.
 
         .. deprecated:: 0.24. 0
 
-    closed : string or None, default None
+    closed : str or None, default None
         Make the interval closed with respect to the given frequency to
-        the 'left', 'right', or both sides (None)
+        the 'left', 'right', or both sides (None).
 
         .. deprecated:: 0.24. 0
 
     name : object
-        Name to be stored in the index
+        Name to be stored in the index.
 
     Attributes
     ----------
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 7bb5e2fa3018d1..b8ca3419af4d7e 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -49,7 +49,7 @@ def get_indexers_list():
 # the public IndexSlicerMaker
 class _IndexSlice:
     """
-    Create an object to more easily perform multi-index slicing
+    Create an object to more easily perform multi-index slicing.
 
     See Also
     --------

From 9dc4d718e093ccbb15e024da6d3bad80f4e99ba6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Fri, 23 Aug 2019 08:36:19 -0500
Subject: [PATCH 04/95] DOC: Start 0.25.2 (#28111)

* DOC: Start 0.25.2
---
 doc/source/whatsnew/index.rst   |   1 +
 doc/source/whatsnew/v0.25.2.rst | 110 ++++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+)
 create mode 100644 doc/source/whatsnew/v0.25.2.rst

diff --git a/doc/source/whatsnew/index.rst b/doc/source/whatsnew/index.rst
index aeab2cf5809e79..fe80cc8bb959a5 100644
--- a/doc/source/whatsnew/index.rst
+++ b/doc/source/whatsnew/index.rst
@@ -24,6 +24,7 @@ Version 0.25
 .. toctree::
    :maxdepth: 2
 
+   v0.25.2
    v0.25.1
    v0.25.0
 
diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst
new file mode 100644
index 00000000000000..76473405374e84
--- /dev/null
+++ b/doc/source/whatsnew/v0.25.2.rst
@@ -0,0 +1,110 @@
+.. _whatsnew_0252:
+
+What's new in 0.25.2 (October XX, 2019)
+---------------------------------------
+
+These are the changes in pandas 0.25.2. See :ref:`release` for a full changelog
+including other versions of pandas.
+
+.. _whatsnew_0252.bug_fixes:
+
+Bug fixes
+~~~~~~~~~
+
+Categorical
+^^^^^^^^^^^
+
+-
+
+Datetimelike
+^^^^^^^^^^^^
+
+-
+-
+-
+
+Timezones
+^^^^^^^^^
+
+-
+
+Numeric
+^^^^^^^
+
+-
+-
+-
+-
+
+Conversion
+^^^^^^^^^^
+
+-
+
+Interval
+^^^^^^^^
+
+-
+
+Indexing
+^^^^^^^^
+
+-
+-
+-
+-
+
+Missing
+^^^^^^^
+
+-
+
+I/O
+^^^
+
+-
+-
+-
+
+Plotting
+^^^^^^^^
+
+-
+-
+-
+
+Groupby/resample/rolling
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+-
+-
+-
+-
+-
+
+Reshaping
+^^^^^^^^^
+
+-
+-
+-
+-
+-
+
+Sparse
+^^^^^^
+
+-
+
+Other
+^^^^^
+
+-
+-
+
+.. _whatsnew_0.252.contributors:
+
+Contributors
+~~~~~~~~~~~~
+
+.. contributors:: v0.25.1..HEAD

From 347ad8564ec7dbf679f61e88f6914ab20d7ae3da Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 23 Aug 2019 11:11:49 -0700
Subject: [PATCH 05/95] TST: fix compression tests when run without
 virtualenv/condaenv (#28051)

---
 pandas/tests/io/test_compression.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index 16ca1109f266cc..d68b6a1effaa0a 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -1,6 +1,7 @@
 import contextlib
 import os
 import subprocess
+import sys
 import textwrap
 import warnings
 
@@ -139,7 +140,7 @@ def test_with_missing_lzma():
         import pandas
         """
     )
-    subprocess.check_output(["python", "-c", code])
+    subprocess.check_output([sys.executable, "-c", code])
 
 
 def test_with_missing_lzma_runtime():
@@ -156,4 +157,4 @@ def test_with_missing_lzma_runtime():
             df.to_csv('foo.csv', compression='xz')
         """
     )
-    subprocess.check_output(["python", "-c", code])
+    subprocess.check_output([sys.executable, "-c", code])

From e2483c022d58d0871cf2d961b9636bbf7d81917c Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Fri, 23 Aug 2019 23:36:58 +0100
Subject: [PATCH 06/95] TYPING: more type hints for io.formats.printing
 (#27765)

---
 pandas/io/formats/printing.py | 40 ++++++++++++++---------------------
 1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
index 4ec9094ce4abe4..ead51693da7919 100644
--- a/pandas/io/formats/printing.py
+++ b/pandas/io/formats/printing.py
@@ -3,12 +3,14 @@
 """
 
 import sys
-from typing import Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
+from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
 
 from pandas._config import get_option
 
 from pandas.core.dtypes.inference import is_sequence
 
+EscapeChars = Union[Dict[str, str], Iterable[str]]
+
 
 def adjoin(space: int, *lists: List[str], **kwargs) -> str:
     """
@@ -148,19 +150,16 @@ def _pprint_dict(
 
 
 def pprint_thing(
-    thing,
+    thing: Any,
     _nest_lvl: int = 0,
-    escape_chars: Optional[Union[Dict[str, str], Iterable[str]]] = None,
+    escape_chars: Optional[EscapeChars] = None,
     default_escapes: bool = False,
     quote_strings: bool = False,
     max_seq_items: Optional[int] = None,
 ) -> str:
     """
     This function is the sanctioned way of converting objects
-    to a unicode representation.
-
-    properly handles nested sequences containing unicode strings
-    (unicode(object) does not)
+    to a string representation and properly handles nested sequences.
 
     Parameters
     ----------
@@ -178,21 +177,13 @@ def pprint_thing(
 
     Returns
     -------
-    result - unicode str
+    str
 
     """
 
-    def as_escaped_unicode(thing, escape_chars=escape_chars):
-        # Unicode is fine, else we try to decode using utf-8 and 'replace'
-        # if that's not it either, we have no way of knowing and the user
-        # should deal with it himself.
-
-        try:
-            result = str(thing)  # we should try this first
-        except UnicodeDecodeError:
-            # either utf-8 or we replace errors
-            result = str(thing).decode("utf-8", "replace")
-
+    def as_escaped_string(
+        thing: Any, escape_chars: Optional[EscapeChars] = escape_chars
+    ) -> str:
         translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
         if isinstance(escape_chars, dict):
             if default_escapes:
@@ -202,10 +193,11 @@ def as_escaped_unicode(thing, escape_chars=escape_chars):
             escape_chars = list(escape_chars.keys())
         else:
             escape_chars = escape_chars or tuple()
+
+        result = str(thing)
         for c in escape_chars:
             result = result.replace(c, translate[c])
-
-        return str(result)
+        return result
 
     if hasattr(thing, "__next__"):
         return str(thing)
@@ -224,11 +216,11 @@ def as_escaped_unicode(thing, escape_chars=escape_chars):
             max_seq_items=max_seq_items,
         )
     elif isinstance(thing, str) and quote_strings:
-        result = "'{thing}'".format(thing=as_escaped_unicode(thing))
+        result = "'{thing}'".format(thing=as_escaped_string(thing))
     else:
-        result = as_escaped_unicode(thing)
+        result = as_escaped_string(thing)
 
-    return str(result)  # always unicode
+    return result
 
 
 def pprint_thing_encoded(

From d75ee703efc0d201af2f05bd166b0f58ec5977b5 Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@gmail.com>
Date: Sat, 24 Aug 2019 00:38:17 +0200
Subject: [PATCH 07/95] Remove Encoding of values in char** For Labels (#27618)

---
 doc/source/whatsnew/v1.0.0.rst            |   1 +
 pandas/_libs/src/ujson/lib/ultrajson.h    |   7 -
 pandas/_libs/src/ujson/lib/ultrajsonenc.c |   6 +
 pandas/_libs/src/ujson/python/objToJSON.c | 234 +++++++++++++++-------
 pandas/tests/io/json/test_pandas.py       | 134 ++++++++-----
 5 files changed, 250 insertions(+), 132 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 4decc99087a9e4..8e25857e5ad693 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -159,6 +159,7 @@ I/O
 ^^^
 
 - :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`)
+- Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`)
 -
 
 Plotting
diff --git a/pandas/_libs/src/ujson/lib/ultrajson.h b/pandas/_libs/src/ujson/lib/ultrajson.h
index 0470fef450dde3..ee6e7081bf00e2 100644
--- a/pandas/_libs/src/ujson/lib/ultrajson.h
+++ b/pandas/_libs/src/ujson/lib/ultrajson.h
@@ -307,11 +307,4 @@ EXPORTFUNCTION JSOBJ JSON_DecodeObject(JSONObjectDecoder *dec,
                                        const char *buffer, size_t cbBuffer);
 EXPORTFUNCTION void encode(JSOBJ, JSONObjectEncoder *, const char *, size_t);
 
-#define Buffer_Reserve(__enc, __len)                                  \
-    if ((size_t)((__enc)->end - (__enc)->offset) < (size_t)(__len)) { \
-        Buffer_Realloc((__enc), (__len));                             \
-    }
-
-void Buffer_Realloc(JSONObjectEncoder *enc, size_t cbNeeded);
-
 #endif  // PANDAS__LIBS_SRC_UJSON_LIB_ULTRAJSON_H_
diff --git a/pandas/_libs/src/ujson/lib/ultrajsonenc.c b/pandas/_libs/src/ujson/lib/ultrajsonenc.c
index 2d6c823a45515e..d5b379bee585b4 100644
--- a/pandas/_libs/src/ujson/lib/ultrajsonenc.c
+++ b/pandas/_libs/src/ujson/lib/ultrajsonenc.c
@@ -714,6 +714,12 @@ int Buffer_EscapeStringValidated(JSOBJ obj, JSONObjectEncoder *enc,
     }
 }
 
+#define Buffer_Reserve(__enc, __len) \
+    if ( (size_t) ((__enc)->end - (__enc)->offset) < (size_t) (__len))  \
+    {   \
+      Buffer_Realloc((__enc), (__len));\
+    }   \
+
 #define Buffer_AppendCharUnchecked(__enc, __chr) *((__enc)->offset++) = __chr;
 
 FASTCALL_ATTR INLINE_PREFIX void FASTCALL_MSVC strreverse(char *begin,
diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c
index 926440218b5d93..de336fb3aa1dcb 100644
--- a/pandas/_libs/src/ujson/python/objToJSON.c
+++ b/pandas/_libs/src/ujson/python/objToJSON.c
@@ -48,13 +48,13 @@ Numeric decoder derived from from TCL library
 #include <../../../tslibs/src/datetime/np_datetime_strings.h>
 #include "datetime.h"
 
-#define NPY_JSON_BUFSIZE 32768
-
 static PyTypeObject *type_decimal;
 static PyTypeObject *cls_dataframe;
 static PyTypeObject *cls_series;
 static PyTypeObject *cls_index;
 static PyTypeObject *cls_nat;
+PyObject *cls_timestamp;
+PyObject *cls_timedelta;
 
 npy_int64 get_nat(void) { return NPY_MIN_INT64; }
 
@@ -166,6 +166,8 @@ void *initObjToJSON(void)
         cls_index = (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Index");
         cls_series =
             (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Series");
+        cls_timestamp = PyObject_GetAttrString(mod_pandas, "Timestamp");
+	cls_timedelta = PyObject_GetAttrString(mod_pandas, "Timedelta");
         Py_DECREF(mod_pandas);
     }
 
@@ -787,30 +789,23 @@ JSOBJ NpyArr_iterGetValue(JSOBJ obj, JSONTypeContext *tc) {
     return GET_TC(tc)->itemValue;
 }
 
-static void NpyArr_getLabel(JSOBJ obj, JSONTypeContext *tc, size_t *outLen,
-                            npy_intp idx, char **labels) {
-    JSONObjectEncoder *enc = (JSONObjectEncoder *)tc->encoder;
-    PRINTMARK();
-    *outLen = strlen(labels[idx]);
-    Buffer_Reserve(enc, *outLen);
-    memcpy(enc->offset, labels[idx], sizeof(char) * (*outLen));
-    enc->offset += *outLen;
-    *outLen = 0;
-}
-
 char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) {
     NpyArrContext *npyarr = GET_TC(tc)->npyarr;
     npy_intp idx;
     PRINTMARK();
+    char *cStr;
 
     if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
         idx = npyarr->index[npyarr->stridedim] - 1;
-        NpyArr_getLabel(obj, tc, outLen, idx, npyarr->columnLabels);
+	cStr = npyarr->columnLabels[idx];
     } else {
         idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1;
-        NpyArr_getLabel(obj, tc, outLen, idx, npyarr->rowLabels);
+	cStr = npyarr->rowLabels[idx];
     }
-    return NULL;
+
+    *outLen = strlen(cStr);
+
+    return cStr;
 }
 
 //=============================================================================
@@ -852,19 +847,22 @@ char *PdBlock_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) {
     PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
     NpyArrContext *npyarr = blkCtxt->npyCtxts[0];
     npy_intp idx;
+    char *cStr;
     PRINTMARK();
 
     if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) {
         idx = blkCtxt->colIdx - 1;
-        NpyArr_getLabel(obj, tc, outLen, idx, npyarr->columnLabels);
+	cStr = npyarr->columnLabels[idx];
     } else {
         idx = GET_TC(tc)->iterNext != PdBlock_iterNext
                   ? npyarr->index[npyarr->stridedim - npyarr->inc] - 1
                   : npyarr->index[npyarr->stridedim];
 
-        NpyArr_getLabel(obj, tc, outLen, idx, npyarr->rowLabels);
+	cStr = npyarr->rowLabels[idx];
     }
-    return NULL;
+
+    *outLen = strlen(cStr);
+    return cStr;
 }
 
 char *PdBlock_iterGetName_Transpose(JSOBJ obj, JSONTypeContext *tc,
@@ -872,16 +870,19 @@ char *PdBlock_iterGetName_Transpose(JSOBJ obj, JSONTypeContext *tc,
     PdBlockContext *blkCtxt = GET_TC(tc)->pdblock;
     NpyArrContext *npyarr = blkCtxt->npyCtxts[blkCtxt->colIdx];
     npy_intp idx;
+    char *cStr;
     PRINTMARK();
 
     if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
         idx = npyarr->index[npyarr->stridedim] - 1;
-        NpyArr_getLabel(obj, tc, outLen, idx, npyarr->columnLabels);
+	cStr = npyarr->columnLabels[idx];
     } else {
         idx = blkCtxt->colIdx;
-        NpyArr_getLabel(obj, tc, outLen, idx, npyarr->rowLabels);
+	cStr = npyarr->rowLabels[idx];
     }
-    return NULL;
+
+    *outLen = strlen(cStr);
+    return cStr;
 }
 
 int PdBlock_iterNext(JSOBJ obj, JSONTypeContext *tc) {
@@ -1578,16 +1579,30 @@ void NpyArr_freeLabels(char **labels, npy_intp len) {
     }
 }
 
-char **NpyArr_encodeLabels(PyArrayObject *labels, JSONObjectEncoder *enc,
+/*
+ * Function: NpyArr_encodeLabels
+ * -----------------------------
+ *
+ * Builds an array of "encoded" labels.
+ *
+ * labels: PyArrayObject pointer for labels to be "encoded"
+ * num : number of labels
+ *
+ * "encode" is quoted above because we aren't really doing encoding
+ * For historical reasons this function would actually encode the entire
+ * array into a separate buffer with a separate call to JSON_Encode
+ * and would leave it to complex pointer manipulation from there to
+ * unpack values as needed. To make things simpler and more idiomatic
+ * this has instead just stringified any input save for datetime values,
+ * which may need to be represented in various formats.
+ */
+char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
                            npy_intp num) {
     // NOTE this function steals a reference to labels.
-    PyObjectEncoder *pyenc = (PyObjectEncoder *)enc;
     PyObject *item = NULL;
-    npy_intp i, stride, len, need_quotes;
+    npy_intp i, stride, len;
     char **ret;
-    char *dataptr, *cLabel, *origend, *origst, *origoffset;
-    char labelBuffer[NPY_JSON_BUFSIZE];
-    PyArray_GetItemFunc *getitem;
+    char *dataptr, *cLabel;
     int type_num;
     PRINTMARK();
 
@@ -1614,68 +1629,136 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, JSONObjectEncoder *enc,
         ret[i] = NULL;
     }
 
-    origst = enc->start;
-    origend = enc->end;
-    origoffset = enc->offset;
-
     stride = PyArray_STRIDE(labels, 0);
     dataptr = PyArray_DATA(labels);
-    getitem = (PyArray_GetItemFunc *)PyArray_DESCR(labels)->f->getitem;
     type_num = PyArray_TYPE(labels);
 
     for (i = 0; i < num; i++) {
-        if (PyTypeNum_ISDATETIME(type_num) || PyTypeNum_ISNUMBER(type_num))
-        {
-            item = (PyObject *)labels;
-            pyenc->npyType = type_num;
-            pyenc->npyValue = dataptr;
-        } else {
-            item = getitem(dataptr, labels);
-            if (!item) {
-                NpyArr_freeLabels(ret, num);
-                ret = 0;
-                break;
-            }
-        }
-
-        cLabel = JSON_EncodeObject(item, enc, labelBuffer, NPY_JSON_BUFSIZE);
-
-        if (item != (PyObject *)labels) {
-            Py_DECREF(item);
-        }
-
-        if (PyErr_Occurred() || enc->errorMsg) {
+      item = PyArray_GETITEM(labels, dataptr);
+        if (!item) {
+	  NpyArr_freeLabels(ret, num);
+	  ret = 0;
+	  break;
+	}
+
+	// TODO: for any matches on type_num (date and timedeltas) should use a
+	// vectorized solution to convert to epoch or iso formats
+	if (enc->datetimeIso && (type_num == NPY_TIMEDELTA || PyDelta_Check(item))) {
+	  PyObject *td = PyObject_CallFunction(cls_timedelta, "(O)", item);
+	  if (td == NULL) {
+	    Py_DECREF(item);
+            NpyArr_freeLabels(ret, num);
+            ret = 0;
+            break;
+	  }	  
+
+	  PyObject *iso = PyObject_CallMethod(td, "isoformat", NULL);
+	  Py_DECREF(td);
+	  if (iso == NULL) {
+	    Py_DECREF(item);
+	    NpyArr_freeLabels(ret, num);
+	    ret = 0;
+	    break;
+	  }
+
+	  cLabel = (char *)PyUnicode_AsUTF8(iso);
+	  Py_DECREF(iso);
+	  len = strlen(cLabel);
+	}
+	else if (PyTypeNum_ISDATETIME(type_num) || 
+	    PyDateTime_Check(item) || PyDate_Check(item)) {
+	  PyObject *ts = PyObject_CallFunction(cls_timestamp, "(O)", item);
+	  if (ts == NULL) {
+	    Py_DECREF(item);
+            NpyArr_freeLabels(ret, num);
+            ret = 0;
+            break;
+	  }
+
+	  if (enc->datetimeIso) {
+	    PyObject *iso = PyObject_CallMethod(ts, "isoformat", NULL);
+	    Py_DECREF(ts);
+	    if (iso == NULL) {
+	      Py_DECREF(item);
+	      NpyArr_freeLabels(ret, num);
+	      ret = 0;
+	      break;
+	    }
+
+	    cLabel = (char *)PyUnicode_AsUTF8(iso);
+	    Py_DECREF(iso);
+	    len = strlen(cLabel);
+	  } else {
+	    npy_int64 value;
+	    // TODO: refactor to not duplicate what goes on in beginTypeContext
+	    if (PyObject_HasAttrString(ts, "value")) {
+	      PRINTMARK();
+	      value = get_long_attr(ts, "value");
+	    } else {
+	      PRINTMARK();
+	      value =
+                total_seconds(ts) * 1000000000LL;  // nanoseconds per second
+	    }
+	    Py_DECREF(ts);
+
+	    switch (enc->datetimeUnit) {
+            case NPY_FR_ns:
+	      break;
+            case NPY_FR_us:
+	      value /= 1000LL;
+	      break;
+            case NPY_FR_ms:
+	      value /= 1000000LL;
+	      break;
+            case NPY_FR_s:
+	      value /= 1000000000LL;
+	      break;
+	    default:
+	      Py_DECREF(item);
+	      NpyArr_freeLabels(ret, num);
+	      ret = 0;
+	      break;	      
+	    }
+
+	    char buf[21] = {0};  // 21 chars for 2**63 as string
+	    cLabel = buf;
+	    sprintf(buf, "%" NPY_INT64_FMT, value);
+	    len = strlen(cLabel);
+	  }
+	} else {  // Fallack to string representation
+	  PyObject *str = PyObject_Str(item);
+	  if (str == NULL) {
+	    Py_DECREF(item);
+	    NpyArr_freeLabels(ret, num);
+	    ret = 0;
+	    break;
+	  }
+
+	  cLabel = (char *)PyUnicode_AsUTF8(str);
+	  Py_DECREF(str);	  
+	  len = strlen(cLabel);
+	}
+
+	Py_DECREF(item);
+	// Add 1 to include NULL terminator
+	ret[i] = PyObject_Malloc(len + 1);
+	memcpy(ret[i], cLabel, len + 1);
+
+        if (PyErr_Occurred()) {
             NpyArr_freeLabels(ret, num);
             ret = 0;
             break;
         }
 
-        need_quotes = ((*cLabel) != '"');
-        len = enc->offset - cLabel + 1 + 2 * need_quotes;
-        ret[i] = PyObject_Malloc(sizeof(char) * len);
-
         if (!ret[i]) {
             PyErr_NoMemory();
             ret = 0;
             break;
         }
 
-        if (need_quotes) {
-            ret[i][0] = '"';
-            memcpy(ret[i] + 1, cLabel, sizeof(char) * (len - 4));
-            ret[i][len - 3] = '"';
-        } else {
-            memcpy(ret[i], cLabel, sizeof(char) * (len - 2));
-        }
-        ret[i][len - 2] = ':';
-        ret[i][len - 1] = '\0';
         dataptr += stride;
     }
 
-    enc->start = origst;
-    enc->end = origend;
-    enc->offset = origoffset;
-
     Py_DECREF(labels);
     return ret;
 }
@@ -1972,7 +2055,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
             }
             pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0);
             pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values,
-                                                   (JSONObjectEncoder *)enc,
+                                                   enc,
                                                    pc->columnLabelsLen);
             if (!pc->columnLabels) {
                 goto INVALID;
@@ -2075,7 +2158,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
             }
             pc->columnLabelsLen = PyObject_Size(tmpObj);
             pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values,
-                                                   (JSONObjectEncoder *)enc,
+                                                   enc,
                                                    pc->columnLabelsLen);
             Py_DECREF(tmpObj);
             if (!pc->columnLabels) {
@@ -2098,7 +2181,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
             pc->rowLabelsLen = PyObject_Size(tmpObj);
             pc->rowLabels =
                 NpyArr_encodeLabels((PyArrayObject *)values,
-                                    (JSONObjectEncoder *)enc, pc->rowLabelsLen);
+                                    enc, pc->rowLabelsLen);
             Py_DECREF(tmpObj);
             tmpObj = (enc->outputFormat == INDEX
                           ? PyObject_GetAttrString(obj, "columns")
@@ -2117,7 +2200,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
             }
             pc->columnLabelsLen = PyObject_Size(tmpObj);
             pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values,
-                                                   (JSONObjectEncoder *)enc,
+                                                   enc,
                                                    pc->columnLabelsLen);
             Py_DECREF(tmpObj);
             if (!pc->columnLabels) {
@@ -2429,7 +2512,6 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
     PRINTMARK();
     ret = JSON_EncodeObject(oinput, encoder, buffer, sizeof(buffer));
     PRINTMARK();
-
     if (PyErr_Occurred()) {
         PRINTMARK();
         return NULL;
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 9c687f036aa684..9842a706f43d78 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1012,60 +1012,70 @@ def test_convert_dates_infer(self):
             result = read_json(dumps(data))[["id", infer_word]]
             assert_frame_equal(result, expected)
 
-    def test_date_format_frame(self):
+    @pytest.mark.parametrize(
+        "date,date_unit",
+        [
+            ("20130101 20:43:42.123", None),
+            ("20130101 20:43:42", "s"),
+            ("20130101 20:43:42.123", "ms"),
+            ("20130101 20:43:42.123456", "us"),
+            ("20130101 20:43:42.123456789", "ns"),
+        ],
+    )
+    def test_date_format_frame(self, date, date_unit):
         df = self.tsframe.copy()
 
-        def test_w_date(date, date_unit=None):
-            df["date"] = Timestamp(date)
-            df.iloc[1, df.columns.get_loc("date")] = pd.NaT
-            df.iloc[5, df.columns.get_loc("date")] = pd.NaT
-            if date_unit:
-                json = df.to_json(date_format="iso", date_unit=date_unit)
-            else:
-                json = df.to_json(date_format="iso")
-            result = read_json(json)
-            expected = df.copy()
-            expected.index = expected.index.tz_localize("UTC")
-            expected["date"] = expected["date"].dt.tz_localize("UTC")
-            assert_frame_equal(result, expected)
-
-        test_w_date("20130101 20:43:42.123")
-        test_w_date("20130101 20:43:42", date_unit="s")
-        test_w_date("20130101 20:43:42.123", date_unit="ms")
-        test_w_date("20130101 20:43:42.123456", date_unit="us")
-        test_w_date("20130101 20:43:42.123456789", date_unit="ns")
+        df["date"] = Timestamp(date)
+        df.iloc[1, df.columns.get_loc("date")] = pd.NaT
+        df.iloc[5, df.columns.get_loc("date")] = pd.NaT
+        if date_unit:
+            json = df.to_json(date_format="iso", date_unit=date_unit)
+        else:
+            json = df.to_json(date_format="iso")
+        result = read_json(json)
+        expected = df.copy()
+        # expected.index = expected.index.tz_localize("UTC")
+        expected["date"] = expected["date"].dt.tz_localize("UTC")
+        assert_frame_equal(result, expected)
 
+    def test_date_format_frame_raises(self):
+        df = self.tsframe.copy()
         msg = "Invalid value 'foo' for option 'date_unit'"
         with pytest.raises(ValueError, match=msg):
             df.to_json(date_format="iso", date_unit="foo")
 
-    def test_date_format_series(self):
-        def test_w_date(date, date_unit=None):
-            ts = Series(Timestamp(date), index=self.ts.index)
-            ts.iloc[1] = pd.NaT
-            ts.iloc[5] = pd.NaT
-            if date_unit:
-                json = ts.to_json(date_format="iso", date_unit=date_unit)
-            else:
-                json = ts.to_json(date_format="iso")
-            result = read_json(json, typ="series")
-            expected = ts.copy()
-            expected.index = expected.index.tz_localize("UTC")
-            expected = expected.dt.tz_localize("UTC")
-            assert_series_equal(result, expected)
-
-        test_w_date("20130101 20:43:42.123")
-        test_w_date("20130101 20:43:42", date_unit="s")
-        test_w_date("20130101 20:43:42.123", date_unit="ms")
-        test_w_date("20130101 20:43:42.123456", date_unit="us")
-        test_w_date("20130101 20:43:42.123456789", date_unit="ns")
+    @pytest.mark.parametrize(
+        "date,date_unit",
+        [
+            ("20130101 20:43:42.123", None),
+            ("20130101 20:43:42", "s"),
+            ("20130101 20:43:42.123", "ms"),
+            ("20130101 20:43:42.123456", "us"),
+            ("20130101 20:43:42.123456789", "ns"),
+        ],
+    )
+    def test_date_format_series(self, date, date_unit):
+        ts = Series(Timestamp(date), index=self.ts.index)
+        ts.iloc[1] = pd.NaT
+        ts.iloc[5] = pd.NaT
+        if date_unit:
+            json = ts.to_json(date_format="iso", date_unit=date_unit)
+        else:
+            json = ts.to_json(date_format="iso")
+        result = read_json(json, typ="series")
+        expected = ts.copy()
+        # expected.index = expected.index.tz_localize("UTC")
+        expected = expected.dt.tz_localize("UTC")
+        assert_series_equal(result, expected)
 
+    def test_date_format_series_raises(self):
         ts = Series(Timestamp("20130101 20:43:42.123"), index=self.ts.index)
         msg = "Invalid value 'foo' for option 'date_unit'"
         with pytest.raises(ValueError, match=msg):
             ts.to_json(date_format="iso", date_unit="foo")
 
-    def test_date_unit(self):
+    @pytest.mark.parametrize("unit", ["s", "ms", "us", "ns"])
+    def test_date_unit(self, unit):
         df = self.tsframe.copy()
         df["date"] = Timestamp("20130101 20:43:42")
         dl = df.columns.get_loc("date")
@@ -1073,16 +1083,15 @@ def test_date_unit(self):
         df.iloc[2, dl] = Timestamp("21460101 20:43:42")
         df.iloc[4, dl] = pd.NaT
 
-        for unit in ("s", "ms", "us", "ns"):
-            json = df.to_json(date_format="epoch", date_unit=unit)
+        json = df.to_json(date_format="epoch", date_unit=unit)
 
-            # force date unit
-            result = read_json(json, date_unit=unit)
-            assert_frame_equal(result, df)
+        # force date unit
+        result = read_json(json, date_unit=unit)
+        assert_frame_equal(result, df)
 
-            # detect date unit
-            result = read_json(json, date_unit=None)
-            assert_frame_equal(result, df)
+        # detect date unit
+        result = read_json(json, date_unit=None)
+        assert_frame_equal(result, df)
 
     def test_weird_nested_json(self):
         # this used to core dump the parser
@@ -1611,3 +1620,30 @@ def test_read_timezone_information(self):
         )
         expected = Series([88], index=DatetimeIndex(["2019-01-01 11:00:00"], tz="UTC"))
         assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize(
+        "date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")]
+    )
+    def test_timedelta_as_label(self, date_format, key):
+        df = pd.DataFrame([[1]], columns=[pd.Timedelta("1D")])
+        expected = '{{"{key}":{{"0":1}}}}'.format(key=key)
+        result = df.to_json(date_format=date_format)
+
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "orient,expected",
+        [
+            ("index", "{\"('a', 'b')\":{\"('c', 'd')\":1}}"),
+            ("columns", "{\"('c', 'd')\":{\"('a', 'b')\":1}}"),
+            # TODO: the below have separate encoding procedures
+            # They produce JSON but not in a consistent manner
+            pytest.param("split", "", marks=pytest.mark.skip),
+            pytest.param("table", "", marks=pytest.mark.skip),
+        ],
+    )
+    def test_tuple_labels(self, orient, expected):
+        # GH 20500
+        df = pd.DataFrame([[1]], index=[("a", "b")], columns=[("c", "d")])
+        result = df.to_json(orient=orient)
+        assert result == expected

From 5c0da7dd4034427745038381e8e2b77ac8c59d08 Mon Sep 17 00:00:00 2001
From: steveayers124 <46000954+steveayers124@users.noreply.github.com>
Date: Sat, 24 Aug 2019 04:32:54 -0500
Subject: [PATCH 08/95] DOC: Fix GL01 and GL02 errors in the docstrings
 (#27988)

---
 pandas/conftest.py | 37 +++++++++++++++++++++++++------------
 pandas/io/html.py  | 24 ++++++++++++++++--------
 2 files changed, 41 insertions(+), 20 deletions(-)

diff --git a/pandas/conftest.py b/pandas/conftest.py
index 2cf7bf6a6df41c..b032e14d8f7e1d 100644
--- a/pandas/conftest.py
+++ b/pandas/conftest.py
@@ -123,18 +123,22 @@ def ip():
 
 @pytest.fixture(params=[True, False, None])
 def observed(request):
-    """ pass in the observed keyword to groupby for [True, False]
+    """
+    Pass in the observed keyword to groupby for [True, False]
     This indicates whether categoricals should return values for
     values which are not in the grouper [False / None], or only values which
     appear in the grouper [True]. [None] is supported for future compatibility
     if we decide to change the default (and would need to warn if this
-    parameter is not passed)"""
+    parameter is not passed).
+    """
     return request.param
 
 
 @pytest.fixture(params=[True, False, None])
 def ordered_fixture(request):
-    """Boolean 'ordered' parameter for Categorical."""
+    """
+    Boolean 'ordered' parameter for Categorical.
+    """
     return request.param
 
 
@@ -234,7 +238,8 @@ def cython_table_items(request):
 
 
 def _get_cython_table_params(ndframe, func_names_and_expected):
-    """combine frame, functions from SelectionMixin._cython_table
+    """
+    Combine frame, functions from SelectionMixin._cython_table
     keys and expected result.
 
     Parameters
@@ -242,7 +247,7 @@ def _get_cython_table_params(ndframe, func_names_and_expected):
     ndframe : DataFrame or Series
     func_names_and_expected : Sequence of two items
         The first item is a name of a NDFrame method ('sum', 'prod') etc.
-        The second item is the expected return value
+        The second item is the expected return value.
 
     Returns
     -------
@@ -341,7 +346,8 @@ def strict_data_files(pytestconfig):
 
 @pytest.fixture
 def datapath(strict_data_files):
-    """Get the path to a data file.
+    """
+    Get the path to a data file.
 
     Parameters
     ----------
@@ -375,7 +381,9 @@ def deco(*args):
 
 @pytest.fixture
 def iris(datapath):
-    """The iris dataset as a DataFrame."""
+    """
+    The iris dataset as a DataFrame.
+    """
     return pd.read_csv(datapath("data", "iris.csv"))
 
 
@@ -504,7 +512,8 @@ def tz_aware_fixture(request):
 
 @pytest.fixture(params=STRING_DTYPES)
 def string_dtype(request):
-    """Parametrized fixture for string dtypes.
+    """
+    Parametrized fixture for string dtypes.
 
     * str
     * 'str'
@@ -515,7 +524,8 @@ def string_dtype(request):
 
 @pytest.fixture(params=BYTES_DTYPES)
 def bytes_dtype(request):
-    """Parametrized fixture for bytes dtypes.
+    """
+    Parametrized fixture for bytes dtypes.
 
     * bytes
     * 'bytes'
@@ -525,7 +535,8 @@ def bytes_dtype(request):
 
 @pytest.fixture(params=OBJECT_DTYPES)
 def object_dtype(request):
-    """Parametrized fixture for object dtypes.
+    """
+    Parametrized fixture for object dtypes.
 
     * object
     * 'object'
@@ -535,7 +546,8 @@ def object_dtype(request):
 
 @pytest.fixture(params=DATETIME64_DTYPES)
 def datetime64_dtype(request):
-    """Parametrized fixture for datetime64 dtypes.
+    """
+    Parametrized fixture for datetime64 dtypes.
 
     * 'datetime64[ns]'
     * 'M8[ns]'
@@ -545,7 +557,8 @@ def datetime64_dtype(request):
 
 @pytest.fixture(params=TIMEDELTA64_DTYPES)
 def timedelta64_dtype(request):
-    """Parametrized fixture for timedelta64 dtypes.
+    """
+    Parametrized fixture for timedelta64 dtypes.
 
     * 'timedelta64[ns]'
     * 'm8[ns]'
diff --git a/pandas/io/html.py b/pandas/io/html.py
index 9d2647f226f009..490c574463b9bd 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -1,4 +1,5 @@
-""":mod:`pandas.io.html` is a module containing functionality for dealing with
+"""
+:mod:`pandas.io.html` is a module containing functionality for dealing with
 HTML IO.
 
 """
@@ -58,7 +59,8 @@ def _importers():
 
 
 def _remove_whitespace(s, regex=_RE_WHITESPACE):
-    """Replace extra whitespace inside of a string with a single space.
+    """
+    Replace extra whitespace inside of a string with a single space.
 
     Parameters
     ----------
@@ -77,7 +79,8 @@ def _remove_whitespace(s, regex=_RE_WHITESPACE):
 
 
 def _get_skiprows(skiprows):
-    """Get an iterator given an integer, slice or container.
+    """
+    Get an iterator given an integer, slice or container.
 
     Parameters
     ----------
@@ -107,7 +110,8 @@ def _get_skiprows(skiprows):
 
 
 def _read(obj):
-    """Try to read from a url, file or string.
+    """
+    Try to read from a url, file or string.
 
     Parameters
     ----------
@@ -136,7 +140,8 @@ def _read(obj):
 
 
 class _HtmlFrameParser:
-    """Base class for parsers that parse HTML into DataFrames.
+    """
+    Base class for parsers that parse HTML into DataFrames.
 
     Parameters
     ----------
@@ -515,7 +520,8 @@ def _handle_hidden_tables(self, tbl_list, attr_name):
 
 
 class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser):
-    """HTML to DataFrame parser that uses BeautifulSoup under the hood.
+    """
+    HTML to DataFrame parser that uses BeautifulSoup under the hood.
 
     See Also
     --------
@@ -622,7 +628,8 @@ def _build_xpath_expr(attrs):
 
 
 class _LxmlFrameParser(_HtmlFrameParser):
-    """HTML to DataFrame parser that uses lxml under the hood.
+    """
+    HTML to DataFrame parser that uses lxml under the hood.
 
     Warning
     -------
@@ -937,7 +944,8 @@ def read_html(
     keep_default_na=True,
     displayed_only=True,
 ):
-    r"""Read HTML tables into a ``list`` of ``DataFrame`` objects.
+    r"""
+    Read HTML tables into a ``list`` of ``DataFrame`` objects.
 
     Parameters
     ----------

From 518d8aea8f1a7053b541fc6491a50fca30e6fb08 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 25 Aug 2019 08:54:40 -0700
Subject: [PATCH 09/95] Change trys to checks (#28121)

---
 pandas/core/internals/blocks.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index e24e6e088b92aa..f0ee56f403325a 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -2830,9 +2830,9 @@ def _replace_single(
         regex = regex_re or to_rep_re
 
         # try to get the pattern attribute (compiled re) or it's a string
-        try:
+        if is_re(to_replace):
             pattern = to_replace.pattern
-        except AttributeError:
+        else:
             pattern = to_replace
 
         # if the pattern is not empty and to_replace is either a string or a
@@ -2853,18 +2853,18 @@ def _replace_single(
         if isna(value) or not isinstance(value, str):
 
             def re_replacer(s):
-                try:
+                if is_re(rx) and isinstance(s, str):
                     return value if rx.search(s) is not None else s
-                except TypeError:
+                else:
                     return s
 
         else:
             # value is guaranteed to be a string here, s can be either a string
             # or null if it's null it gets returned
             def re_replacer(s):
-                try:
+                if is_re(rx) and isinstance(s, str):
                     return rx.sub(value, s)
-                except TypeError:
+                else:
                     return s
 
         f = np.vectorize(re_replacer, otypes=[self.dtype])

From 2165a6a64d4064af2bf79d7e6889bda2b6adb86f Mon Sep 17 00:00:00 2001
From: Bryant Moscon <bmoscon@gmail.com>
Date: Sun, 25 Aug 2019 11:56:15 -0400
Subject: [PATCH 10/95] Remove outdated docstring that no longer applies
 (#28137)

---
 pandas/compat/pickle_compat.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py
index bca33513b00698..87240a9f986c33 100644
--- a/pandas/compat/pickle_compat.py
+++ b/pandas/compat/pickle_compat.py
@@ -196,10 +196,6 @@ def load_newobj_ex(self):
 def load(fh, encoding=None, is_verbose=False):
     """load a pickle, with a provided encoding
 
-    if compat is True:
-       fake the old class hierarchy
-       if it works, then return the new type objects
-
     Parameters
     ----------
     fh : a filelike object

From 09ab18f6dca48d4dde677ce9ed86444f8a937e32 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Sun, 25 Aug 2019 16:57:58 +0100
Subject: [PATCH 11/95] TYPING: _pytest.mark.structures.MarkDecorator ->
 Callable (#28134)

---
 pandas/util/_test_decorators.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 3de4e5d66d5774..627757aaa37412 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -25,9 +25,8 @@ def test_foo():
 """
 from distutils.version import LooseVersion
 import locale
-from typing import Optional
+from typing import Callable, Optional
 
-from _pytest.mark.structures import MarkDecorator
 import pytest
 
 from pandas.compat import is_platform_32bit, is_platform_windows
@@ -103,7 +102,7 @@ def _skip_if_no_scipy():
     )
 
 
-def skip_if_installed(package: str,) -> MarkDecorator:
+def skip_if_installed(package: str,) -> Callable:
     """
     Skip a test if a package is installed.
 
@@ -117,7 +116,7 @@ def skip_if_installed(package: str,) -> MarkDecorator:
     )
 
 
-def skip_if_no(package: str, min_version: Optional[str] = None) -> MarkDecorator:
+def skip_if_no(package: str, min_version: Optional[str] = None) -> Callable:
     """
     Generic function to help skip tests when required packages are not
     present on the testing system.

From 97f9bbf6d4b8af8691fabb7014b7e5aa006e1cf2 Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@gmail.com>
Date: Sun, 25 Aug 2019 09:04:59 -0700
Subject: [PATCH 12/95] Contributing Guide for Type Hints (#27050)

---
 doc/source/development/contributing.rst | 130 ++++++++++++++++++++++++
 1 file changed, 130 insertions(+)

diff --git a/doc/source/development/contributing.rst b/doc/source/development/contributing.rst
index b38f7767ae0733..be6555b2ab9368 100644
--- a/doc/source/development/contributing.rst
+++ b/doc/source/development/contributing.rst
@@ -699,6 +699,136 @@ You'll also need to
 
 See :ref:`contributing.warnings` for more.
 
+.. _contributing.type_hints:
+
+Type Hints
+----------
+
+*pandas* strongly encourages the use of :pep:`484` style type hints. New development should contain type hints and pull requests to annotate existing code are accepted as well!
+
+Style Guidelines
+~~~~~~~~~~~~~~~~
+
+Types imports should follow the ``from typing import ...`` convention. So rather than
+
+.. code-block:: python
+
+   import typing
+
+   primes = []  # type: typing.List[int]
+
+You should write
+
+.. code-block:: python
+
+   from typing import List, Optional, Union
+
+   primes = []  # type: List[int]
+
+``Optional`` should be used where applicable, so instead of
+
+.. code-block:: python
+
+   maybe_primes = []  # type: List[Union[int, None]]
+
+You should write
+
+.. code-block:: python
+
+   maybe_primes = []  # type: List[Optional[int]]
+
+In some cases in the code base classes may define class variables that shadow builtins. This causes an issue as described in `Mypy 1775 <https://github.com/python/mypy/issues/1775#issuecomment-310969854>`_. The defensive solution here is to create an unambiguous alias of the builtin and use that without your annotation. For example, if you come across a definition like
+
+.. code-block:: python
+
+   class SomeClass1:
+       str = None
+
+The appropriate way to annotate this would be as follows
+
+.. code-block:: python
+
+   str_type = str
+
+   class SomeClass2:
+       str = None  # type: str_type
+
+In some cases you may be tempted to use ``cast`` from the typing module when you know better than the analyzer. This occurs particularly when using custom inference functions. For example
+
+.. code-block:: python
+
+   from typing import cast
+
+   from pandas.core.dtypes.common import is_number
+
+   def cannot_infer_bad(obj: Union[str, int, float]):
+
+       if is_number(obj):
+           ...
+       else:  # Reasonably only str objects would reach this but...
+           obj = cast(str, obj)  # Mypy complains without this!
+	   return obj.upper()
+
+The limitation here is that while a human can reasonably understand that ``is_number`` would catch the ``int`` and ``float`` types mypy cannot make that same inference just yet (see `mypy #5206 <https://github.com/python/mypy/issues/5206>`_. While the above works, the use of ``cast`` is **strongly discouraged**. Where applicable a refactor of the code to appease static analysis is preferable
+
+.. code-block:: python
+
+   def cannot_infer_good(obj: Union[str, int, float]):
+
+       if isinstance(obj, str):
+           return obj.upper()
+       else:
+           ...
+
+With custom types and inference this is not always possible so exceptions are made, but every effort should be exhausted to avoid ``cast`` before going down such paths.
+
+Syntax Requirements
+~~~~~~~~~~~~~~~~~~~
+
+Because *pandas* still supports Python 3.5, :pep:`526` does not apply and variables **must** be annotated with type comments. Specifically, this is a valid annotation within pandas:
+
+.. code-block:: python
+
+   primes = []  # type: List[int]
+
+Whereas this is **NOT** allowed:
+
+.. code-block:: python
+
+   primes: List[int] = []  # not supported in Python 3.5!
+
+Note that function signatures can always be annotated per :pep:`3107`:
+
+.. code-block:: python
+
+   def sum_of_primes(primes: List[int] = []) -> int:
+       ...
+
+
+Pandas-specific Types
+~~~~~~~~~~~~~~~~~~~~~
+
+Commonly used types specific to *pandas* will appear in `pandas._typing <https://github.com/pandas-dev/pandas/blob/master/pandas/_typing.py>`_ and you should use these where applicable. This module is private for now but ultimately this should be exposed to third party libraries who want to implement type checking against pandas.
+
+For example, quite a few functions in *pandas* accept a ``dtype`` argument. This can be expressed as a string like ``"object"``, a ``numpy.dtype`` like ``np.int64`` or even a pandas ``ExtensionDtype`` like ``pd.CategoricalDtype``. Rather than burden the user with having to constantly annotate all of those options, this can simply be imported and reused from the pandas._typing module
+
+.. code-block:: python
+
+   from pandas._typing import Dtype
+
+   def as_type(dtype: Dtype) -> ...:
+       ...
+
+This module will ultimately house types for repeatedly used concepts like "path-like", "array-like", "numeric", etc... and can also hold aliases for commonly appearing parameters like `axis`. Development of this module is active so be sure to refer to the source for the most up to date list of available types.
+
+Validating Type Hints
+~~~~~~~~~~~~~~~~~~~~~
+
+*pandas* uses `mypy <http://mypy-lang.org>`_ to statically analyze the code base and type hints. After making any change you can ensure your type hints are correct by running
+
+.. code-block:: shell
+
+   mypy pandas
 
 .. _contributing.ci:
 

From 5d9fd7e3b226b68e695d87121f584202aa6d4abc Mon Sep 17 00:00:00 2001
From: John Ward <johnward.umensch@gmail.com>
Date: Sun, 25 Aug 2019 15:11:00 -0500
Subject: [PATCH 13/95] DOC: Fixes to docstrings formatting (#28096)

---
 pandas/core/generic.py    |  2 +-
 pandas/io/clipboards.py   |  9 ++--
 pandas/io/excel/_base.py  |  4 +-
 pandas/io/pytables.py     | 93 ++++++++++++++++++++++-----------------
 pandas/tseries/offsets.py | 10 +++--
 5 files changed, 68 insertions(+), 50 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index ba1c516b9b444e..90779baea32cbf 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1875,7 +1875,7 @@ def __iter__(self):
     # can we get a better explanation of this?
     def keys(self):
         """
-        Get the 'info axis' (see Indexing for more)
+        Get the 'info axis' (see Indexing for more).
 
         This is index for Series, columns for DataFrame.
 
diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py
index d38221d7842739..76c01535a26e79 100644
--- a/pandas/io/clipboards.py
+++ b/pandas/io/clipboards.py
@@ -9,8 +9,7 @@
 
 def read_clipboard(sep=r"\s+", **kwargs):  # pragma: no cover
     r"""
-    Read text from clipboard and pass to read_csv. See read_csv for the
-    full argument list
+    Read text from clipboard and pass to read_csv.
 
     Parameters
     ----------
@@ -18,9 +17,13 @@ def read_clipboard(sep=r"\s+", **kwargs):  # pragma: no cover
         A string or regex delimiter. The default of '\s+' denotes
         one or more whitespace characters.
 
+    **kwargs
+        See read_csv for the full argument list.
+
     Returns
     -------
-    parsed : DataFrame
+    DataFrame
+        A parsed DataFrame object.
     """
     encoding = kwargs.pop("encoding", "utf-8")
 
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 154656fbb250b5..997edf49d9e8fc 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -837,10 +837,10 @@ def parse(
         **kwds
     ):
         """
-        Parse specified sheet(s) into a DataFrame
+        Parse specified sheet(s) into a DataFrame.
 
         Equivalent to read_excel(ExcelFile, ...)  See the read_excel
-        docstring for more info on accepted parameters
+        docstring for more info on accepted parameters.
 
         Returns
         -------
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 6af5dd6f1bf372..576c45a2f8097e 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -431,8 +431,9 @@ def _is_metadata_of(group, parent_group):
 class HDFStore:
 
     """
-    Dict-like IO interface for storing pandas objects in PyTables
-    either Fixed or Table format.
+    Dict-like IO interface for storing pandas objects in PyTables.
+
+    Either Fixed or Table format.
 
     Parameters
     ----------
@@ -564,13 +565,12 @@ def __exit__(self, exc_type, exc_value, traceback):
 
     def keys(self):
         """
-        Return a (potentially unordered) list of the keys corresponding to the
-        objects stored in the HDFStore. These are ABSOLUTE path-names (e.g.
-        have the leading '/'
+        Return a list of keys corresponding to objects stored in HDFStore.
 
         Returns
         -------
         list
+            List of ABSOLUTE path-names (e.g. have the leading '/').
         """
         return [n._v_pathname for n in self.groups()]
 
@@ -703,7 +703,7 @@ def flush(self, fsync=False):
 
     def get(self, key):
         """
-        Retrieve pandas object stored in file
+        Retrieve pandas object stored in file.
 
         Parameters
         ----------
@@ -711,7 +711,8 @@ def get(self, key):
 
         Returns
         -------
-        obj : same type as object stored in file
+        object
+            Same type as object stored in file.
         """
         group = self.get_node(key)
         if group is None:
@@ -731,25 +732,31 @@ def select(
         **kwargs
     ):
         """
-        Retrieve pandas object stored in file, optionally based on where
-        criteria
+        Retrieve pandas object stored in file, optionally based on where criteria.
 
         Parameters
         ----------
         key : object
-        where : list of Term (or convertible) objects, optional
-        start : integer (defaults to None), row number to start selection
-        stop  : integer (defaults to None), row number to stop selection
-        columns : a list of columns that if not None, will limit the return
-            columns
-        iterator : boolean, return an iterator, default False
-        chunksize : nrows to include in iteration, return an iterator
-        auto_close : boolean, should automatically close the store when
-            finished, default is False
+                Object being retrieved from file.
+        where : list, default None
+                List of Term (or convertible) objects, optional.
+        start : int, default None
+                Row number to start selection.
+        stop : int, default None
+                Row number to stop selection.
+        columns : list, default None
+                A list of columns that if not None, will limit the return columns.
+        iterator : bool, default False
+                Returns an iterator.
+        chunksize : int, default None
+                Number or rows to include in iteration, return an iterator.
+        auto_close : bool, default False
+            Should automatically close the store when finished.
 
         Returns
         -------
-        The selected object
+        object
+            Retrieved object from file.
         """
         group = self.get_node(key)
         if group is None:
@@ -929,28 +936,30 @@ def func(_start, _stop, _where):
 
     def put(self, key, value, format=None, append=False, **kwargs):
         """
-        Store object in HDFStore
+        Store object in HDFStore.
 
         Parameters
         ----------
-        key      : object
-        value    : {Series, DataFrame}
-        format   : 'fixed(f)|table(t)', default is 'fixed'
+        key : object
+        value : {Series, DataFrame}
+        format : 'fixed(f)|table(t)', default is 'fixed'
             fixed(f) : Fixed format
-                       Fast writing/reading. Not-appendable, nor searchable
+                       Fast writing/reading. Not-appendable, nor searchable.
             table(t) : Table format
                        Write as a PyTables Table structure which may perform
                        worse but allow more flexible operations like searching
-                       / selecting subsets of the data
-        append   : boolean, default False
+                       / selecting subsets of the data.
+        append   : bool, default False
             This will force Table format, append the input data to the
             existing.
-        data_columns : list of columns to create as data columns, or True to
+        data_columns : list, default None
+            List of columns to create as data columns, or True to
             use all columns. See `here
             <http://pandas.pydata.org/pandas-docs/stable/user_guide/io.html#query-via-data-columns>`__.
-        encoding : default None, provide an encoding for strings
-        dropna   : boolean, default False, do not write an ALL nan row to
-            the store settable by the option 'io.hdf.dropna_table'
+        encoding : str, default None
+            Provide an encoding for strings.
+        dropna   : bool, default False, do not write an ALL nan row to
+            The store settable by the option 'io.hdf.dropna_table'.
         """
         if format is None:
             format = get_option("io.hdf.default_format") or "fixed"
@@ -1165,12 +1174,15 @@ def create_table_index(self, key, **kwargs):
         s.create_index(**kwargs)
 
     def groups(self):
-        """return a list of all the top-level nodes (that are not themselves a
-        pandas storage object)
+        """
+        Return a list of all the top-level nodes.
+
+        Each node returned is not a pandas storage object.
 
         Returns
         -------
         list
+            List of objects.
         """
         _tables()
         self._check_if_open()
@@ -1188,10 +1200,12 @@ def groups(self):
         ]
 
     def walk(self, where="/"):
-        """ Walk the pytables group hierarchy for pandas objects
+        """
+        Walk the pytables group hierarchy for pandas objects.
 
         This generator will yield the group path, subgroups and pandas object
         names for each group.
+
         Any non-pandas PyTables objects that are not a group will be ignored.
 
         The `where` group itself is listed first (preorder), then each of its
@@ -1202,18 +1216,17 @@ def walk(self, where="/"):
 
         Parameters
         ----------
-        where : str, optional
+        where : str, default "/"
             Group where to start walking.
-            If not supplied, the root group is used.
 
         Yields
         ------
         path : str
-            Full path to a group (without trailing '/')
-        groups : list of str
-            names of the groups contained in `path`
-        leaves : list of str
-            names of the pandas objects contained in `path`
+            Full path to a group (without trailing '/').
+        groups : list
+            Names (strings) of the groups contained in `path`.
+        leaves : list
+            Names (strings) of the pandas objects contained in `path`.
         """
         _tables()
         self._check_if_open()
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index a208d5ad2fea99..edf58ba3850a1c 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -204,8 +204,7 @@ def __add__(date):
     normalize : bool, default False
         Whether to round the result of a DateOffset addition down to the
         previous midnight.
-    **kwds
-        Temporal parameter that add to or replace the offset value.
+    **kwds : Temporal parameter that add to or replace the offset value.
 
         Parameters that **add** to the offset (like Timedelta):
 
@@ -233,16 +232,19 @@ def __add__(date):
 
     See Also
     --------
-    dateutil.relativedelta.relativedelta
+    dateutil.relativedelta.relativedelta : The relativedelta type is designed
+        to be applied to an existing datetime an can replace specific components of
+        that datetime, or represents an interval of time.
 
     Examples
     --------
+    >>> from pandas.tseries.offsets import DateOffset
     >>> ts = pd.Timestamp('2017-01-01 09:10:11')
     >>> ts + DateOffset(months=3)
     Timestamp('2017-04-01 09:10:11')
 
     >>> ts = pd.Timestamp('2017-01-01 09:10:11')
-    >>> ts + DateOffset(month=3)
+    >>> ts + DateOffset(months=2)
     Timestamp('2017-03-01 09:10:11')
     """
 

From 2c9c4223442cd555a1fbc894eb5e89792c09ea63 Mon Sep 17 00:00:00 2001
From: Bhuvana KA <bhuvana.kundumani@gmail.com>
Date: Mon, 26 Aug 2019 07:49:37 +0530
Subject: [PATCH 14/95] DOC: Fix RangeIndex and other docstrings for missing
 period in summary (#28123)

---
 pandas/core/arrays/categorical.py |  2 +-
 pandas/core/base.py               |  2 +-
 pandas/core/groupby/grouper.py    |  2 +-
 pandas/core/indexes/base.py       |  4 ++--
 pandas/core/indexes/datetimes.py  |  2 +-
 pandas/core/indexes/interval.py   |  2 +-
 pandas/core/indexes/period.py     |  2 +-
 pandas/core/indexes/range.py      | 12 ++++++------
 pandas/core/indexes/timedeltas.py |  2 +-
 pandas/core/reshape/merge.py      |  2 +-
 pandas/core/util/hashing.py       |  2 +-
 11 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index a895da6184eeba..5929a8d51fe430 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -471,7 +471,7 @@ def ordered(self) -> Ordered:
     @property
     def dtype(self) -> CategoricalDtype:
         """
-        The :class:`~pandas.api.types.CategoricalDtype` for this instance
+        The :class:`~pandas.api.types.CategoricalDtype` for this instance.
         """
         return self._dtype
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 7d2a62318232c3..767b5594450385 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -1462,7 +1462,7 @@ def is_monotonic_decreasing(self):
 
     def memory_usage(self, deep=False):
         """
-        Memory usage of the values
+        Memory usage of the values.
 
         Parameters
         ----------
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 143755a47b97b3..3415c0e056a1ce 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -37,7 +37,7 @@
 class Grouper:
     """
     A Grouper allows the user to specify a groupby instruction for a target
-    object
+    object.
 
     This specification will select a column via the key parameter, or if the
     level and/or axis parameters are given, a level of the index of the target
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 415255cdbad06c..38c5e136d0e600 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2020,7 +2020,7 @@ def notna(self):
     _index_shared_docs[
         "fillna"
     ] = """
-        Fill NA/NaN values with the specified value
+        Fill NA/NaN values with the specified value.
 
         Parameters
         ----------
@@ -2051,7 +2051,7 @@ def fillna(self, value=None, downcast=None):
     _index_shared_docs[
         "dropna"
     ] = """
-        Return Index without NA/NaN values
+        Return Index without NA/NaN values.
 
         Parameters
         ----------
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 272066d476ce34..cce390d98c0378 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -1594,7 +1594,7 @@ def bdate_range(
 ):
     """
     Return a fixed frequency DatetimeIndex, with business day as the default
-    frequency
+    frequency.
 
     Parameters
     ----------
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 9361408290bb16..3874c6404565c7 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -1310,7 +1310,7 @@ def interval_range(
     start=None, end=None, periods=None, freq=None, name=None, closed="right"
 ):
     """
-    Return a fixed frequency IntervalIndex
+    Return a fixed frequency IntervalIndex.
 
     Parameters
     ----------
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 5a2ca109597e85..f7bf77928bdc7c 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -994,7 +994,7 @@ def memory_usage(self, deep=False):
 def period_range(start=None, end=None, periods=None, freq=None, name=None):
     """
     Return a fixed frequency PeriodIndex, with day (calendar) as the default
-    frequency
+    frequency.
 
     Parameters
     ----------
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 43ed6e7b122eae..8783351cc74d1c 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -236,7 +236,7 @@ def _format_with_header(self, header, na_rep="NaN", **kwargs):
     @cache_readonly
     def start(self):
         """
-        The value of the `start` parameter (``0`` if this was not supplied)
+        The value of the `start` parameter (``0`` if this was not supplied).
         """
         # GH 25710
         return self._range.start
@@ -244,7 +244,7 @@ def start(self):
     @property
     def _start(self):
         """
-        The value of the `start` parameter (``0`` if this was not supplied)
+        The value of the `start` parameter (``0`` if this was not supplied).
 
          .. deprecated:: 0.25.0
             Use ``start`` instead.
@@ -259,14 +259,14 @@ def _start(self):
     @cache_readonly
     def stop(self):
         """
-        The value of the `stop` parameter
+        The value of the `stop` parameter.
         """
         return self._range.stop
 
     @property
     def _stop(self):
         """
-        The value of the `stop` parameter
+        The value of the `stop` parameter.
 
          .. deprecated:: 0.25.0
             Use ``stop`` instead.
@@ -282,7 +282,7 @@ def _stop(self):
     @cache_readonly
     def step(self):
         """
-        The value of the `step` parameter (``1`` if this was not supplied)
+        The value of the `step` parameter (``1`` if this was not supplied).
         """
         # GH 25710
         return self._range.step
@@ -290,7 +290,7 @@ def step(self):
     @property
     def _step(self):
         """
-        The value of the `step` parameter (``1`` if this was not supplied)
+        The value of the `step` parameter (``1`` if this was not supplied).
 
          .. deprecated:: 0.25.0
             Use ``step`` instead.
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 8cf14e2ca777e4..b03d60c7b5b371 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -713,7 +713,7 @@ def timedelta_range(
 ):
     """
     Return a fixed frequency TimedeltaIndex, with day as the default
-    frequency
+    frequency.
 
     Parameters
     ----------
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 225de3f11cf7d7..d7fbe464cb1e52 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -178,7 +178,7 @@ def merge_ordered(
     """
     Perform merge with optional filling/interpolation designed for ordered
     data like time series data. Optionally perform group-wise merge (see
-    examples)
+    examples).
 
     Parameters
     ----------
diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
index 73e126cf230a5e..bcdbf0855cbb49 100644
--- a/pandas/core/util/hashing.py
+++ b/pandas/core/util/hashing.py
@@ -58,7 +58,7 @@ def hash_pandas_object(
     obj, index=True, encoding="utf8", hash_key=None, categorize=True
 ):
     """
-    Return a data hash of the Index/Series/DataFrame
+    Return a data hash of the Index/Series/DataFrame.
 
     Parameters
     ----------

From ea60c1966bf7291829a1479512d7aa89d08bd6dd Mon Sep 17 00:00:00 2001
From: jalbritt <james.albritton15@gmail.com>
Date: Sun, 25 Aug 2019 21:21:36 -0500
Subject: [PATCH 15/95] DOC: Added periods to end of docstrings in explode
 function (#27973)

---
 pandas/core/frame.py  | 8 ++++----
 pandas/core/series.py | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f2bb964f35dbd4..9da7999724a186 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6183,14 +6183,14 @@ def stack(self, level=-1, dropna=True):
 
     def explode(self, column: Union[str, Tuple]) -> "DataFrame":
         """
-        Transform each element of a list-like to a row, replicating the
-        index values.
+        Transform each element of a list-like to a row, replicating index values.
 
         .. versionadded:: 0.25.0
 
         Parameters
         ----------
         column : str or tuple
+            Column to explode.
 
         Returns
         -------
@@ -6206,8 +6206,8 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
         See Also
         --------
         DataFrame.unstack : Pivot a level of the (necessarily hierarchical)
-            index labels
-        DataFrame.melt : Unpivot a DataFrame from wide format to long format
+            index labels.
+        DataFrame.melt : Unpivot a DataFrame from wide format to long format.
         Series.explode : Explode a DataFrame from list-like columns to long format.
 
         Notes
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 8b6c963e40e9d7..6fb39c422de932 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3620,7 +3620,7 @@ def explode(self) -> "Series":
         Series.str.split : Split string values on specified separator.
         Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex
             to produce DataFrame.
-        DataFrame.melt : Unpivot a DataFrame from wide format to long format
+        DataFrame.melt : Unpivot a DataFrame from wide format to long format.
         DataFrame.explode : Explode a DataFrame from list-like
             columns to long format.
 

From 765eb8d8a02aed564bb9d3be93cf36e355ba0d64 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 26 Aug 2019 09:22:30 -0500
Subject: [PATCH 16/95] COMPAT: 3.8 compat for tests and DataFrame.query
 (#28101)

* COMPAT: implement visit_Constant for 3.8 compat
* Updated tests for new error messages.
---
 doc/source/whatsnew/v0.25.2.rst       |  2 +-
 pandas/compat/__init__.py             |  1 +
 pandas/core/computation/expr.py       |  3 +++
 pandas/tests/computation/test_eval.py | 27 +++++++++++++++++++++++++--
 pandas/tests/io/parser/test_common.py |  5 ++++-
 pandas/tests/scalar/test_nat.py       |  3 +++
 6 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst
index 76473405374e84..403c02c3ff129d 100644
--- a/doc/source/whatsnew/v0.25.2.rst
+++ b/doc/source/whatsnew/v0.25.2.rst
@@ -99,7 +99,7 @@ Sparse
 Other
 ^^^^^
 
--
+- Compatibility with Python 3.8 in :meth:`DataFrame.query` (:issue:`27261`)
 -
 
 .. _whatsnew_0.252.contributors:
diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
index b32da8da3a1fbe..9c778f68727c6b 100644
--- a/pandas/compat/__init__.py
+++ b/pandas/compat/__init__.py
@@ -15,6 +15,7 @@
 PY35 = sys.version_info[:2] == (3, 5)
 PY36 = sys.version_info >= (3, 6)
 PY37 = sys.version_info >= (3, 7)
+PY38 = sys.version_info >= (3, 8)
 PYPY = platform.python_implementation() == "PyPy"
 
 
diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
index a58f256cf61d41..4c164968575a16 100644
--- a/pandas/core/computation/expr.py
+++ b/pandas/core/computation/expr.py
@@ -582,6 +582,9 @@ def visit_NameConstant(self, node, **kwargs):
     def visit_Num(self, node, **kwargs):
         return self.const_type(node.n, self.env)
 
+    def visit_Constant(self, node, **kwargs):
+        return self.const_type(node.n, self.env)
+
     def visit_Str(self, node, **kwargs):
         name = self.env.add_tmp(node.s)
         return self.term_type(name, self.env)
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index c500760fa1390a..b6ffd8a83e409d 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -14,7 +14,7 @@
 from pandas.core.dtypes.common import is_bool, is_list_like, is_scalar
 
 import pandas as pd
-from pandas import DataFrame, Series, date_range
+from pandas import DataFrame, Series, compat, date_range
 from pandas.core.computation import pytables
 from pandas.core.computation.check import _NUMEXPR_VERSION
 from pandas.core.computation.engines import NumExprClobberingError, _engines
@@ -1267,7 +1267,10 @@ def test_assignment_column(self):
         msg = "left hand side of an assignment must be a single name"
         with pytest.raises(SyntaxError, match=msg):
             df.eval("d,c = a + b")
-        msg = "can't assign to function call"
+        if compat.PY38:
+            msg = "cannot assign to function call"
+        else:
+            msg = "can't assign to function call"
         with pytest.raises(SyntaxError, match=msg):
             df.eval('Timestamp("20131001") = a + b')
 
@@ -1967,6 +1970,26 @@ def test_bool_ops_fails_on_scalars(lhs, cmp, rhs, engine, parser):
             pd.eval(ex, engine=engine, parser=parser)
 
 
+@pytest.mark.parametrize(
+    "other",
+    [
+        "'x'",
+        pytest.param(
+            "...", marks=pytest.mark.xfail(not compat.PY38, reason="GH-28116")
+        ),
+    ],
+)
+def test_equals_various(other):
+    df = DataFrame({"A": ["a", "b", "c"]})
+    result = df.eval("A == {}".format(other))
+    expected = Series([False, False, False], name="A")
+    if _USE_NUMEXPR:
+        # https://github.com/pandas-dev/pandas/issues/10239
+        # lose name with numexpr engine. Remove when that's fixed.
+        expected.name = None
+    tm.assert_series_equal(result, expected)
+
+
 def test_inf(engine, parser):
     s = "inf + 1"
     expected = np.inf
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index e5366a8357adbc..e04535df56663c 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -1898,7 +1898,10 @@ def test_null_byte_char(all_parsers):
         out = parser.read_csv(StringIO(data), names=names)
         tm.assert_frame_equal(out, expected)
     else:
-        msg = "NULL byte detected"
+        if compat.PY38:
+            msg = "line contains NUL"
+        else:
+            msg = "NULL byte detected"
         with pytest.raises(ParserError, match=msg):
             parser.read_csv(StringIO(data), names=names)
 
diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py
index 5b1c4f92bf3419..5eb69fb2952dcb 100644
--- a/pandas/tests/scalar/test_nat.py
+++ b/pandas/tests/scalar/test_nat.py
@@ -252,6 +252,7 @@ def _get_overlap_public_nat_methods(klass, as_tuple=False):
                 "day_name",
                 "dst",
                 "floor",
+                "fromisocalendar",
                 "fromisoformat",
                 "fromordinal",
                 "fromtimestamp",
@@ -296,6 +297,8 @@ def test_overlap_public_nat_methods(klass, expected):
     # "fromisoformat" was introduced in 3.7
     if klass is Timestamp and not compat.PY37:
         expected.remove("fromisoformat")
+    if klass is Timestamp and not compat.PY38:
+        expected.remove("fromisocalendar")
 
     assert _get_overlap_public_nat_methods(klass) == expected
 

From cebc34327c74fed38ad8ee4cffb7b63999c83b9a Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Mon, 26 Aug 2019 15:26:25 +0100
Subject: [PATCH 17/95] TYPING: --check-untyped-defs for Index.__new__ (#28141)

---
 pandas/core/indexes/base.py | 56 ++++++++++---------------------------
 1 file changed, 14 insertions(+), 42 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 38c5e136d0e600..2dbd592fc67873 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -10,6 +10,7 @@
 import pandas._libs.join as libjoin
 from pandas._libs.lib import is_datetime_array
 from pandas._libs.tslibs import OutOfBoundsDatetime, Timestamp
+from pandas._libs.tslibs.period import IncompatibleFrequency
 from pandas._libs.tslibs.timezones import tz_compare
 from pandas.compat import set_function_name
 from pandas.compat.numpy import function as nv
@@ -262,7 +263,13 @@ def __new__(
         fastpath=None,
         tupleize_cols=True,
         **kwargs
-    ):
+    ) -> "Index":
+
+        from .range import RangeIndex
+        from pandas import PeriodIndex, DatetimeIndex, TimedeltaIndex
+        from .numeric import Float64Index, Int64Index, UInt64Index
+        from .interval import IntervalIndex
+        from .category import CategoricalIndex
 
         if name is None and hasattr(data, "name"):
             name = data.name
@@ -277,8 +284,6 @@ def __new__(
             if fastpath:
                 return cls._simple_new(data, name)
 
-        from .range import RangeIndex
-
         if isinstance(data, ABCPandasArray):
             # ensure users don't accidentally put a PandasArray in an index.
             data = data.to_numpy()
@@ -291,16 +296,12 @@ def __new__(
 
         # categorical
         elif is_categorical_dtype(data) or is_categorical_dtype(dtype):
-            from .category import CategoricalIndex
-
             return CategoricalIndex(data, dtype=dtype, copy=copy, name=name, **kwargs)
 
         # interval
         elif (
             is_interval_dtype(data) or is_interval_dtype(dtype)
         ) and not is_object_dtype(dtype):
-            from .interval import IntervalIndex
-
             closed = kwargs.get("closed", None)
             return IntervalIndex(data, dtype=dtype, name=name, copy=copy, closed=closed)
 
@@ -309,8 +310,6 @@ def __new__(
             or is_datetime64_any_dtype(dtype)
             or "tz" in kwargs
         ):
-            from pandas import DatetimeIndex
-
             if is_dtype_equal(_o_dtype, dtype):
                 # GH#23524 passing `dtype=object` to DatetimeIndex is invalid,
                 #  will raise in the where `data` is already tz-aware.  So
@@ -318,33 +317,24 @@ def __new__(
                 #  the DatetimeIndex construction.
                 # Note we can pass copy=False because the .astype below
                 #  will always make a copy
-                result = DatetimeIndex(data, copy=False, name=name, **kwargs)
+                result = DatetimeIndex(
+                    data, copy=False, name=name, **kwargs
+                )  # type: "Index"
                 return result.astype(object)
             else:
-                result = DatetimeIndex(
-                    data, copy=copy, name=name, dtype=dtype, **kwargs
-                )
-                return result
+                return DatetimeIndex(data, copy=copy, name=name, dtype=dtype, **kwargs)
 
         elif is_timedelta64_dtype(data) or is_timedelta64_dtype(dtype):
-            from pandas import TimedeltaIndex
-
             if is_dtype_equal(_o_dtype, dtype):
                 # Note we can pass copy=False because the .astype below
                 #  will always make a copy
                 result = TimedeltaIndex(data, copy=False, name=name, **kwargs)
                 return result.astype(object)
             else:
-                result = TimedeltaIndex(
-                    data, copy=copy, name=name, dtype=dtype, **kwargs
-                )
-                return result
+                return TimedeltaIndex(data, copy=copy, name=name, dtype=dtype, **kwargs)
 
         elif is_period_dtype(data) and not is_object_dtype(dtype):
-            from pandas import PeriodIndex
-
-            result = PeriodIndex(data, copy=copy, name=name, **kwargs)
-            return result
+            return PeriodIndex(data, copy=copy, name=name, **kwargs)
 
         # extension dtype
         elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype):
@@ -387,8 +377,6 @@ def __new__(
                             pass
 
                         # Return an actual float index.
-                        from .numeric import Float64Index
-
                         return Float64Index(data, copy=copy, dtype=dtype, name=name)
 
                     elif inferred == "string":
@@ -405,19 +393,11 @@ def __new__(
                     data = np.array(data, dtype=dtype, copy=copy)
 
             # maybe coerce to a sub-class
-            from pandas.core.indexes.period import PeriodIndex, IncompatibleFrequency
-
             if is_signed_integer_dtype(data.dtype):
-                from .numeric import Int64Index
-
                 return Int64Index(data, copy=copy, dtype=dtype, name=name)
             elif is_unsigned_integer_dtype(data.dtype):
-                from .numeric import UInt64Index
-
                 return UInt64Index(data, copy=copy, dtype=dtype, name=name)
             elif is_float_dtype(data.dtype):
-                from .numeric import Float64Index
-
                 return Float64Index(data, copy=copy, dtype=dtype, name=name)
             elif issubclass(data.dtype.type, np.bool) or is_bool_dtype(data):
                 subarr = data.astype("object")
@@ -440,12 +420,8 @@ def __new__(
                     return Index(subarr, copy=copy, dtype=object, name=name)
                 elif inferred in ["floating", "mixed-integer-float", "integer-na"]:
                     # TODO: Returns IntegerArray for integer-na case in the future
-                    from .numeric import Float64Index
-
                     return Float64Index(subarr, copy=copy, name=name)
                 elif inferred == "interval":
-                    from .interval import IntervalIndex
-
                     try:
                         return IntervalIndex(subarr, name=name, copy=copy)
                     except ValueError:
@@ -456,8 +432,6 @@ def __new__(
                     pass
                 elif inferred != "string":
                     if inferred.startswith("datetime"):
-                        from pandas import DatetimeIndex
-
                         try:
                             return DatetimeIndex(subarr, copy=copy, name=name, **kwargs)
                         except (ValueError, OutOfBoundsDatetime):
@@ -467,8 +441,6 @@ def __new__(
                             pass
 
                     elif inferred.startswith("timedelta"):
-                        from pandas import TimedeltaIndex
-
                         return TimedeltaIndex(subarr, copy=copy, name=name, **kwargs)
                     elif inferred == "period":
                         try:

From 0d0daa8466d257c3329c54633a9a98867c86d009 Mon Sep 17 00:00:00 2001
From: Drew Heenan <drewah@umich.edu>
Date: Mon, 26 Aug 2019 07:27:40 -0700
Subject: [PATCH 18/95]  ENH: Allow compression in NDFrame.to_csv to be a dict
 with optional arguments (#26023) (#26024)

---
 doc/source/whatsnew/v1.0.0.rst         |   1 +
 pandas/core/generic.py                 |  91 ++++++++++++-------
 pandas/io/common.py                    | 115 +++++++++++++++++++------
 pandas/io/formats/csvs.py              |  10 ++-
 pandas/tests/io/formats/test_to_csv.py |  41 +++++++++
 5 files changed, 200 insertions(+), 58 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 8e25857e5ad693..2bfc09e52c68b5 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -206,6 +206,7 @@ ExtensionArray
 Other
 ^^^^^
 - Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`)
+- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`)
 
 
 .. _whatsnew_1000.contributors:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 90779baea32cbf..fac5e0f085fc62 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -7,7 +7,17 @@
 import pickle
 import re
 from textwrap import dedent
-from typing import Callable, Dict, FrozenSet, List, Optional, Set
+from typing import (
+    Callable,
+    Dict,
+    FrozenSet,
+    Hashable,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Union,
+)
 import warnings
 import weakref
 
@@ -50,7 +60,7 @@
 from pandas.core.dtypes.missing import isna, notna
 
 import pandas as pd
-from pandas._typing import Dtype
+from pandas._typing import Dtype, FilePathOrBuffer
 from pandas.core import missing, nanops
 import pandas.core.algorithms as algos
 from pandas.core.base import PandasObject, SelectionMixin
@@ -122,6 +132,9 @@ def _single_replace(self, to_replace, method, inplace, limit):
     return result
 
 
+bool_t = bool  # Need alias because NDFrame has def bool:
+
+
 class NDFrame(PandasObject, SelectionMixin):
     """
     N-dimensional analogue of DataFrame. Store multi-dimensional in a
@@ -3051,26 +3064,26 @@ def to_latex(
 
     def to_csv(
         self,
-        path_or_buf=None,
-        sep=",",
-        na_rep="",
-        float_format=None,
-        columns=None,
-        header=True,
-        index=True,
-        index_label=None,
-        mode="w",
-        encoding=None,
-        compression="infer",
-        quoting=None,
-        quotechar='"',
-        line_terminator=None,
-        chunksize=None,
-        date_format=None,
-        doublequote=True,
-        escapechar=None,
-        decimal=".",
-    ):
+        path_or_buf: Optional[FilePathOrBuffer] = None,
+        sep: str = ",",
+        na_rep: str = "",
+        float_format: Optional[str] = None,
+        columns: Optional[Sequence[Hashable]] = None,
+        header: Union[bool_t, List[str]] = True,
+        index: bool_t = True,
+        index_label: Optional[Union[bool_t, str, Sequence[Hashable]]] = None,
+        mode: str = "w",
+        encoding: Optional[str] = None,
+        compression: Optional[Union[str, Dict[str, str]]] = "infer",
+        quoting: Optional[int] = None,
+        quotechar: str = '"',
+        line_terminator: Optional[str] = None,
+        chunksize: Optional[int] = None,
+        date_format: Optional[str] = None,
+        doublequote: bool_t = True,
+        escapechar: Optional[str] = None,
+        decimal: Optional[str] = ".",
+    ) -> Optional[str]:
         r"""
         Write object to a comma-separated values (csv) file.
 
@@ -3117,16 +3130,21 @@ def to_csv(
         encoding : str, optional
             A string representing the encoding to use in the output file,
             defaults to 'utf-8'.
-        compression : str, default 'infer'
-            Compression mode among the following possible values: {'infer',
-            'gzip', 'bz2', 'zip', 'xz', None}. If 'infer' and `path_or_buf`
-            is path-like, then detect compression from the following
-            extensions: '.gz', '.bz2', '.zip' or '.xz'. (otherwise no
-            compression).
-
-            .. versionchanged:: 0.24.0
-
-               'infer' option added and set to default.
+        compression : str or dict, default 'infer'
+            If str, represents compression mode. If dict, value at 'method' is
+            the compression mode. Compression mode may be any of the following
+            possible values: {'infer', 'gzip', 'bz2', 'zip', 'xz', None}. If
+            compression mode is 'infer' and `path_or_buf` is path-like, then
+            detect compression mode from the following extensions: '.gz',
+            '.bz2', '.zip' or '.xz'. (otherwise no compression). If dict given
+            and mode is 'zip' or inferred as 'zip', other entries passed as
+            additional compression options.
+
+            .. versionchanged:: 0.25.0
+
+               May now be a dict with key 'method' as compression mode
+               and other entries as additional compression options if
+               compression mode is 'zip'.
 
         quoting : optional constant from csv module
             Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
@@ -3171,6 +3189,13 @@ def to_csv(
         ...                    'weapon': ['sai', 'bo staff']})
         >>> df.to_csv(index=False)
         'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
+
+        # create 'out.zip' containing 'out.csv'
+        >>> compression_opts = dict(method='zip',
+        ...                         archive_name='out.csv')  # doctest: +SKIP
+
+        >>> df.to_csv('out.zip', index=False,
+        ...           compression=compression_opts)  # doctest: +SKIP
         """
 
         df = self if isinstance(self, ABCDataFrame) else self.to_frame()
@@ -3204,6 +3229,8 @@ def to_csv(
         if path_or_buf is None:
             return formatter.path_or_buf.getvalue()
 
+        return None
+
     # ----------------------------------------------------------------------
     # Fancy Indexing
 
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 26b68dda7b464a..290022167e5205 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -9,7 +9,19 @@
 import mmap
 import os
 import pathlib
-from typing import IO, AnyStr, BinaryIO, Optional, TextIO, Type
+from typing import (
+    IO,
+    Any,
+    AnyStr,
+    BinaryIO,
+    Dict,
+    List,
+    Optional,
+    TextIO,
+    Tuple,
+    Type,
+    Union,
+)
 from urllib.error import URLError  # noqa
 from urllib.parse import (  # noqa
     urlencode,
@@ -255,6 +267,40 @@ def file_path_to_url(path: str) -> str:
 _compression_to_extension = {"gzip": ".gz", "bz2": ".bz2", "zip": ".zip", "xz": ".xz"}
 
 
+def _get_compression_method(
+    compression: Optional[Union[str, Dict[str, str]]]
+) -> Tuple[Optional[str], Dict[str, str]]:
+    """
+    Simplifies a compression argument to a compression method string and
+    a dict containing additional arguments.
+
+    Parameters
+    ----------
+    compression : str or dict
+        If string, specifies the compression method. If dict, value at key
+        'method' specifies compression method.
+
+    Returns
+    -------
+    tuple of ({compression method}, Optional[str]
+              {compression arguments}, Dict[str, str])
+
+    Raises
+    ------
+    ValueError on dict missing 'method' key
+    """
+    # Handle dict
+    if isinstance(compression, dict):
+        compression_args = compression.copy()
+        try:
+            compression = compression_args.pop("method")
+        except KeyError:
+            raise ValueError("If dict, compression must have key 'method'")
+    else:
+        compression_args = {}
+    return compression, compression_args
+
+
 def _infer_compression(
     filepath_or_buffer: FilePathOrBuffer, compression: Optional[str]
 ) -> Optional[str]:
@@ -266,8 +312,8 @@ def _infer_compression(
 
     Parameters
     ----------
-    filepath_or_buffer :
-        a path (str) or buffer
+    filepath_or_buffer : str or file handle
+        File path or object.
     compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}
         If 'infer' and `filepath_or_buffer` is path-like, then detect
         compression from the following extensions: '.gz', '.bz2', '.zip',
@@ -275,12 +321,11 @@ def _infer_compression(
 
     Returns
     -------
-    string or None :
-        compression method
+    string or None
 
     Raises
     ------
-    ValueError on invalid compression specified
+    ValueError on invalid compression specified.
     """
 
     # No compression has been explicitly specified
@@ -312,32 +357,49 @@ def _infer_compression(
 
 
 def _get_handle(
-    path_or_buf, mode, encoding=None, compression=None, memory_map=False, is_text=True
+    path_or_buf,
+    mode: str,
+    encoding=None,
+    compression: Optional[Union[str, Dict[str, Any]]] = None,
+    memory_map: bool = False,
+    is_text: bool = True,
 ):
     """
     Get file handle for given path/buffer and mode.
 
     Parameters
     ----------
-    path_or_buf :
-        a path (str) or buffer
+    path_or_buf : str or file handle
+        File path or object.
     mode : str
-        mode to open path_or_buf with
+        Mode to open path_or_buf with.
     encoding : str or None
-    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None
-        If 'infer' and `filepath_or_buffer` is path-like, then detect
-        compression from the following extensions: '.gz', '.bz2', '.zip',
-        or '.xz' (otherwise no compression).
+        Encoding to use.
+    compression : str or dict, default None
+        If string, specifies compression mode. If dict, value at key 'method'
+        specifies compression mode. Compression mode must be one of {'infer',
+        'gzip', 'bz2', 'zip', 'xz', None}. If compression mode is 'infer'
+        and `filepath_or_buffer` is path-like, then detect compression from
+        the following extensions: '.gz', '.bz2', '.zip', or '.xz' (otherwise
+        no compression). If dict and compression mode is 'zip' or inferred as
+        'zip', other entries passed as additional compression options.
+
+        .. versionchanged:: 1.0.0
+
+           May now be a dict with key 'method' as compression mode
+           and other keys as compression options if compression
+           mode is 'zip'.
+
     memory_map : boolean, default False
         See parsers._parser_params for more information.
     is_text : boolean, default True
         whether file/buffer is in text format (csv, json, etc.), or in binary
-        mode (pickle, etc.)
+        mode (pickle, etc.).
 
     Returns
     -------
     f : file-like
-        A file-like object
+        A file-like object.
     handles : list of file-like objects
         A list of file-like object that were opened in this function.
     """
@@ -346,15 +408,16 @@ def _get_handle(
 
         need_text_wrapping = (BufferedIOBase, S3File)
     except ImportError:
-        need_text_wrapping = BufferedIOBase
+        need_text_wrapping = BufferedIOBase  # type: ignore
 
-    handles = list()
+    handles = list()  # type: List[IO]
     f = path_or_buf
 
     # Convert pathlib.Path/py.path.local or string
     path_or_buf = _stringify_path(path_or_buf)
     is_path = isinstance(path_or_buf, str)
 
+    compression, compression_args = _get_compression_method(compression)
     if is_path:
         compression = _infer_compression(path_or_buf, compression)
 
@@ -376,7 +439,7 @@ def _get_handle(
 
         # ZIP Compression
         elif compression == "zip":
-            zf = BytesZipFile(path_or_buf, mode)
+            zf = BytesZipFile(path_or_buf, mode, **compression_args)
             # Ensure the container is closed as well.
             handles.append(zf)
             if zf.mode == "w":
@@ -429,9 +492,9 @@ def _get_handle(
 
     if memory_map and hasattr(f, "fileno"):
         try:
-            g = MMapWrapper(f)
+            wrapped = MMapWrapper(f)
             f.close()
-            f = g
+            f = wrapped
         except Exception:
             # we catch any errors that may have occurred
             # because that is consistent with the lower-level
@@ -456,15 +519,19 @@ def __init__(
         self,
         file: FilePathOrBuffer,
         mode: str,
-        compression: int = zipfile.ZIP_DEFLATED,
+        archive_name: Optional[str] = None,
         **kwargs
     ):
         if mode in ["wb", "rb"]:
             mode = mode.replace("b", "")
-        super().__init__(file, mode, compression, **kwargs)
+        self.archive_name = archive_name
+        super().__init__(file, mode, zipfile.ZIP_DEFLATED, **kwargs)
 
     def write(self, data):
-        super().writestr(self.filename, data)
+        archive_name = self.filename
+        if self.archive_name is not None:
+            archive_name = self.archive_name
+        super().writestr(archive_name, data)
 
     @property
     def closed(self):
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 60daf311397e80..e25862537cbfc5 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -22,6 +22,7 @@
 
 from pandas.io.common import (
     UnicodeWriter,
+    _get_compression_method,
     _get_handle,
     _infer_compression,
     get_filepath_or_buffer,
@@ -58,6 +59,9 @@ def __init__(
         if path_or_buf is None:
             path_or_buf = StringIO()
 
+        # Extract compression mode as given, if dict
+        compression, self.compression_args = _get_compression_method(compression)
+
         self.path_or_buf, _, _, _ = get_filepath_or_buffer(
             path_or_buf, encoding=encoding, compression=compression, mode=mode
         )
@@ -178,7 +182,7 @@ def save(self):
                 self.path_or_buf,
                 self.mode,
                 encoding=self.encoding,
-                compression=self.compression,
+                compression=dict(self.compression_args, method=self.compression),
             )
             close = True
 
@@ -206,11 +210,13 @@ def save(self):
                 if hasattr(self.path_or_buf, "write"):
                     self.path_or_buf.write(buf)
                 else:
+                    compression = dict(self.compression_args, method=self.compression)
+
                     f, handles = _get_handle(
                         self.path_or_buf,
                         self.mode,
                         encoding=self.encoding,
-                        compression=self.compression,
+                        compression=compression,
                     )
                     f.write(buf)
                     close = True
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
index ee236a8253b01a..ab44b8b8059eb4 100644
--- a/pandas/tests/io/formats/test_to_csv.py
+++ b/pandas/tests/io/formats/test_to_csv.py
@@ -514,3 +514,44 @@ def test_to_csv_compression(self, compression_only, read_infer, to_infer):
             df.to_csv(path, compression=to_compression)
             result = pd.read_csv(path, index_col=0, compression=read_compression)
             tm.assert_frame_equal(result, df)
+
+    def test_to_csv_compression_dict(self, compression_only):
+        # GH 26023
+        method = compression_only
+        df = DataFrame({"ABC": [1]})
+        filename = "to_csv_compress_as_dict."
+        filename += "gz" if method == "gzip" else method
+        with tm.ensure_clean(filename) as path:
+            df.to_csv(path, compression={"method": method})
+            read_df = pd.read_csv(path, index_col=0)
+            tm.assert_frame_equal(read_df, df)
+
+    def test_to_csv_compression_dict_no_method_raises(self):
+        # GH 26023
+        df = DataFrame({"ABC": [1]})
+        compression = {"some_option": True}
+        msg = "must have key 'method'"
+
+        with tm.ensure_clean("out.zip") as path:
+            with pytest.raises(ValueError, match=msg):
+                df.to_csv(path, compression=compression)
+
+    @pytest.mark.parametrize("compression", ["zip", "infer"])
+    @pytest.mark.parametrize(
+        "archive_name", [None, "test_to_csv.csv", "test_to_csv.zip"]
+    )
+    def test_to_csv_zip_arguments(self, compression, archive_name):
+        # GH 26023
+        from zipfile import ZipFile
+
+        df = DataFrame({"ABC": [1]})
+        with tm.ensure_clean("to_csv_archive_name.zip") as path:
+            df.to_csv(
+                path, compression={"method": compression, "archive_name": archive_name}
+            )
+            zp = ZipFile(path)
+            expected_arcname = path if archive_name is None else archive_name
+            expected_arcname = os.path.basename(expected_arcname)
+            assert len(zp.filelist) == 1
+            archived_file = os.path.basename(zp.filelist[0].filename)
+            assert archived_file == expected_arcname

From a1bdacfaf0693336b957b1bd3821f15c05120aff Mon Sep 17 00:00:00 2001
From: Katrin Leinweber <9948149+katrinleinweber@users.noreply.github.com>
Date: Mon, 26 Aug 2019 18:37:14 +0200
Subject: [PATCH 19/95] DOC: Harmonize column selection to bracket notation
 (#27562)

* Harmonize column selection to bracket notation

As suggested by https://medium.com/dunder-data/minimally-sufficient-pandas-a8e67f2a2428#46f9
---
 doc/source/getting_started/10min.rst          |  2 +-
 doc/source/getting_started/basics.rst         | 12 +++---
 .../comparison/comparison_with_r.rst          |  8 ++--
 doc/source/user_guide/advanced.rst            |  2 +-
 doc/source/user_guide/cookbook.rst            |  6 +--
 doc/source/user_guide/enhancingperf.rst       | 12 +++---
 doc/source/user_guide/indexing.rst            | 39 ++++++++++---------
 doc/source/user_guide/reshaping.rst           | 10 ++---
 doc/source/user_guide/visualization.rst       | 14 +++----
 9 files changed, 54 insertions(+), 51 deletions(-)

diff --git a/doc/source/getting_started/10min.rst b/doc/source/getting_started/10min.rst
index 9045e5b32c29fe..41520795bde62e 100644
--- a/doc/source/getting_started/10min.rst
+++ b/doc/source/getting_started/10min.rst
@@ -278,7 +278,7 @@ Using a single column's values to select data.
 
 .. ipython:: python
 
-   df[df.A > 0]
+   df[df['A'] > 0]
 
 Selecting values from a DataFrame where a boolean condition is met.
 
diff --git a/doc/source/getting_started/basics.rst b/doc/source/getting_started/basics.rst
index 3f6f56376861fd..802ffadf2a81ef 100644
--- a/doc/source/getting_started/basics.rst
+++ b/doc/source/getting_started/basics.rst
@@ -926,7 +926,7 @@ Single aggregations on a ``Series`` this will return a scalar value:
 
 .. ipython:: python
 
-   tsdf.A.agg('sum')
+   tsdf['A'].agg('sum')
 
 
 Aggregating with multiple functions
@@ -950,13 +950,13 @@ On a ``Series``, multiple functions return a ``Series``, indexed by the function
 
 .. ipython:: python
 
-   tsdf.A.agg(['sum', 'mean'])
+   tsdf['A'].agg(['sum', 'mean'])
 
 Passing a ``lambda`` function will yield a ``<lambda>`` named row:
 
 .. ipython:: python
 
-   tsdf.A.agg(['sum', lambda x: x.mean()])
+   tsdf['A'].agg(['sum', lambda x: x.mean()])
 
 Passing a named function will yield that name for the row:
 
@@ -965,7 +965,7 @@ Passing a named function will yield that name for the row:
    def mymean(x):
        return x.mean()
 
-   tsdf.A.agg(['sum', mymean])
+   tsdf['A'].agg(['sum', mymean])
 
 Aggregating with a dict
 +++++++++++++++++++++++
@@ -1065,7 +1065,7 @@ Passing a single function to ``.transform()`` with a ``Series`` will yield a sin
 
 .. ipython:: python
 
-   tsdf.A.transform(np.abs)
+   tsdf['A'].transform(np.abs)
 
 
 Transform with multiple functions
@@ -1084,7 +1084,7 @@ resulting column names will be the transforming functions.
 
 .. ipython:: python
 
-   tsdf.A.transform([np.abs, lambda x: x + 1])
+   tsdf['A'].transform([np.abs, lambda x: x + 1])
 
 
 Transforming with a dict
diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst
index 444e886bc951d2..f67f46fc2b29ba 100644
--- a/doc/source/getting_started/comparison/comparison_with_r.rst
+++ b/doc/source/getting_started/comparison/comparison_with_r.rst
@@ -81,7 +81,7 @@ R                                            pandas
 ===========================================  ===========================================
 ``select(df, col_one = col1)``               ``df.rename(columns={'col1': 'col_one'})['col_one']``
 ``rename(df, col_one = col1)``               ``df.rename(columns={'col1': 'col_one'})``
-``mutate(df, c=a-b)``                        ``df.assign(c=df.a-df.b)``
+``mutate(df, c=a-b)``                        ``df.assign(c=df['a']-df['b'])``
 ===========================================  ===========================================
 
 
@@ -258,8 +258,8 @@ index/slice as well as standard boolean indexing:
 
    df = pd.DataFrame({'a': np.random.randn(10), 'b': np.random.randn(10)})
    df.query('a <= b')
-   df[df.a <= df.b]
-   df.loc[df.a <= df.b]
+   df[df['a'] <= df['b']]
+   df.loc[df['a'] <= df['b']]
 
 For more details and examples see :ref:`the query documentation
 <indexing.query>`.
@@ -284,7 +284,7 @@ In ``pandas`` the equivalent expression, using the
 
    df = pd.DataFrame({'a': np.random.randn(10), 'b': np.random.randn(10)})
    df.eval('a + b')
-   df.a + df.b  # same as the previous expression
+   df['a'] + df['b']  # same as the previous expression
 
 In certain cases :meth:`~pandas.DataFrame.eval` will be much faster than
 evaluation in pure Python. For more details and examples see :ref:`the eval
diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst
index 22a9791ffde30e..62a9b6396404a7 100644
--- a/doc/source/user_guide/advanced.rst
+++ b/doc/source/user_guide/advanced.rst
@@ -738,7 +738,7 @@ and allows efficient indexing and storage of an index with a large number of dup
    df['B'] = df['B'].astype(CategoricalDtype(list('cab')))
    df
    df.dtypes
-   df.B.cat.categories
+   df['B'].cat.categories
 
 Setting the index will create a ``CategoricalIndex``.
 
diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst
index 15af5208a4f1f3..c9d3bc3a28c704 100644
--- a/doc/source/user_guide/cookbook.rst
+++ b/doc/source/user_guide/cookbook.rst
@@ -592,8 +592,8 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
 .. ipython:: python
 
    df = pd.DataFrame([0, 1, 0, 1, 1, 1, 0, 1, 1], columns=['A'])
-   df.A.groupby((df.A != df.A.shift()).cumsum()).groups
-   df.A.groupby((df.A != df.A.shift()).cumsum()).cumsum()
+   df['A'].groupby((df['A'] != df['A'].shift()).cumsum()).groups
+   df['A'].groupby((df['A'] != df['A'].shift()).cumsum()).cumsum()
 
 Expanding data
 **************
@@ -719,7 +719,7 @@ Rolling Apply to multiple columns where function calculates a Series before a Sc
    df
 
    def gm(df, const):
-       v = ((((df.A + df.B) + 1).cumprod()) - 1) * const
+       v = ((((df['A'] + df['B']) + 1).cumprod()) - 1) * const
        return v.iloc[-1]
 
    s = pd.Series({df.index[i]: gm(df.iloc[i:min(i + 51, len(df) - 1)], 5)
diff --git a/doc/source/user_guide/enhancingperf.rst b/doc/source/user_guide/enhancingperf.rst
index a4eefadd54d8c4..2df5b9d82dcc37 100644
--- a/doc/source/user_guide/enhancingperf.rst
+++ b/doc/source/user_guide/enhancingperf.rst
@@ -393,15 +393,15 @@ Consider the following toy example of doubling each observation:
 .. code-block:: ipython
 
    # Custom function without numba
-   In [5]: %timeit df['col1_doubled'] = df.a.apply(double_every_value_nonumba)  # noqa E501
+   In [5]: %timeit df['col1_doubled'] = df['a'].apply(double_every_value_nonumba)  # noqa E501
    1000 loops, best of 3: 797 us per loop
 
    # Standard implementation (faster than a custom function)
-   In [6]: %timeit df['col1_doubled'] = df.a * 2
+   In [6]: %timeit df['col1_doubled'] = df['a'] * 2
    1000 loops, best of 3: 233 us per loop
 
    # Custom function with numba
-   In [7]: %timeit (df['col1_doubled'] = double_every_value_withnumba(df.a.to_numpy())
+   In [7]: %timeit (df['col1_doubled'] = double_every_value_withnumba(df['a'].to_numpy())
    1000 loops, best of 3: 145 us per loop
 
 Caveats
@@ -643,8 +643,8 @@ The equivalent in standard Python would be
 .. ipython:: python
 
    df = pd.DataFrame(dict(a=range(5), b=range(5, 10)))
-   df['c'] = df.a + df.b
-   df['d'] = df.a + df.b + df.c
+   df['c'] = df['a'] + df['b']
+   df['d'] = df['a'] + df['b'] + df['c']
    df['a'] = 1
    df
 
@@ -688,7 +688,7 @@ name in an expression.
 
    a = np.random.randn()
    df.query('@a < a')
-   df.loc[a < df.a]  # same as the previous expression
+   df.loc[a < df['a']]  # same as the previous expression
 
 With :func:`pandas.eval` you cannot use the ``@`` prefix *at all*, because it
 isn't defined in that context. ``pandas`` will let you know this if you try to
diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
index e3b75afcf945e2..cf55ce0c9a6d4e 100644
--- a/doc/source/user_guide/indexing.rst
+++ b/doc/source/user_guide/indexing.rst
@@ -210,7 +210,7 @@ as an attribute:
      See `here for an explanation of valid identifiers
      <https://docs.python.org/3/reference/lexical_analysis.html#identifiers>`__.
 
-   - The attribute will not be available if it conflicts with an existing method name, e.g. ``s.min`` is not allowed.
+   - The attribute will not be available if it conflicts with an existing method name, e.g. ``s.min`` is not allowed, but ``s['min']`` is possible.
 
    - Similarly, the attribute will not be available if it conflicts with any of the following list: ``index``,
      ``major_axis``, ``minor_axis``, ``items``.
@@ -540,7 +540,7 @@ The ``callable`` must be a function with one argument (the calling Series or Dat
                       columns=list('ABCD'))
    df1
 
-   df1.loc[lambda df: df.A > 0, :]
+   df1.loc[lambda df: df['A'] > 0, :]
    df1.loc[:, lambda df: ['A', 'B']]
 
    df1.iloc[:, lambda df: [0, 1]]
@@ -552,7 +552,7 @@ You can use callable indexing in ``Series``.
 
 .. ipython:: python
 
-   df1.A.loc[lambda s: s > 0]
+   df1['A'].loc[lambda s: s > 0]
 
 Using these methods / indexers, you can chain data selection operations
 without using a temporary variable.
@@ -561,7 +561,7 @@ without using a temporary variable.
 
    bb = pd.read_csv('data/baseball.csv', index_col='id')
    (bb.groupby(['year', 'team']).sum()
-      .loc[lambda df: df.r > 100])
+      .loc[lambda df: df['r'] > 100])
 
 .. _indexing.deprecate_ix:
 
@@ -871,9 +871,9 @@ Boolean indexing
 Another common operation is the use of boolean vectors to filter the data.
 The operators are: ``|`` for ``or``, ``&`` for ``and``, and ``~`` for ``not``.
 These **must** be grouped by using parentheses, since by default Python will
-evaluate an expression such as ``df.A > 2 & df.B < 3`` as
-``df.A > (2 & df.B) < 3``, while the desired evaluation order is
-``(df.A > 2) & (df.B < 3)``.
+evaluate an expression such as ``df['A'] > 2 & df['B'] < 3`` as
+``df['A'] > (2 & df['B']) < 3``, while the desired evaluation order is
+``(df['A > 2) & (df['B'] < 3)``.
 
 Using a boolean vector to index a Series works exactly as in a NumPy ndarray:
 
@@ -1134,7 +1134,7 @@ between the values of columns ``a`` and ``c``. For example:
    df
 
    # pure python
-   df[(df.a < df.b) & (df.b < df.c)]
+   df[(df['a'] < df['b']) & (df['b'] < df['c'])]
 
    # query
    df.query('(a < b) & (b < c)')
@@ -1241,7 +1241,7 @@ Full numpy-like syntax:
    df = pd.DataFrame(np.random.randint(n, size=(n, 3)), columns=list('abc'))
    df
    df.query('(a < b) & (b < c)')
-   df[(df.a < df.b) & (df.b < df.c)]
+   df[(df['a'] < df['b']) & (df['b'] < df['c'])]
 
 Slightly nicer by removing the parentheses (by binding making comparison
 operators bind tighter than ``&`` and ``|``).
@@ -1279,12 +1279,12 @@ The ``in`` and ``not in`` operators
    df.query('a in b')
 
    # How you'd do it in pure Python
-   df[df.a.isin(df.b)]
+   df[df['a'].isin(df['b'])]
 
    df.query('a not in b')
 
    # pure Python
-   df[~df.a.isin(df.b)]
+   df[~df['a'].isin(df['b'])]
 
 
 You can combine this with other expressions for very succinct queries:
@@ -1297,7 +1297,7 @@ You can combine this with other expressions for very succinct queries:
    df.query('a in b and c < d')
 
    # pure Python
-   df[df.b.isin(df.a) & (df.c < df.d)]
+   df[df['b'].isin(df['a']) & (df['c'] < df['d'])]
 
 
 .. note::
@@ -1326,7 +1326,7 @@ to ``in``/``not in``.
    df.query('b == ["a", "b", "c"]')
 
    # pure Python
-   df[df.b.isin(["a", "b", "c"])]
+   df[df['b'].isin(["a", "b", "c"])]
 
    df.query('c == [1, 2]')
 
@@ -1338,7 +1338,7 @@ to ``in``/``not in``.
    df.query('[1, 2] not in c')
 
    # pure Python
-   df[df.c.isin([1, 2])]
+   df[df['c'].isin([1, 2])]
 
 
 Boolean operators
@@ -1352,7 +1352,7 @@ You can negate boolean expressions with the word ``not`` or the ``~`` operator.
    df['bools'] = np.random.rand(len(df)) > 0.5
    df.query('~bools')
    df.query('not bools')
-   df.query('not bools') == df[~df.bools]
+   df.query('not bools') == df[~df['bools']]
 
 Of course, expressions can be arbitrarily complex too:
 
@@ -1362,7 +1362,10 @@ Of course, expressions can be arbitrarily complex too:
    shorter = df.query('a < b < c and (not bools) or bools > 2')
 
    # equivalent in pure Python
-   longer = df[(df.a < df.b) & (df.b < df.c) & (~df.bools) | (df.bools > 2)]
+   longer = df[(df['a'] < df['b'])
+               & (df['b'] < df['c'])
+               & (~df['bools'])
+               | (df['bools'] > 2)]
 
    shorter
    longer
@@ -1835,14 +1838,14 @@ chained indexing expression, you can set the :ref:`option <options>`
 
    # This will show the SettingWithCopyWarning
    # but the frame values will be set
-   dfb['c'][dfb.a.str.startswith('o')] = 42
+   dfb['c'][dfb['a'].str.startswith('o')] = 42
 
 This however is operating on a copy and will not work.
 
 ::
 
    >>> pd.set_option('mode.chained_assignment','warn')
-   >>> dfb[dfb.a.str.startswith('o')]['c'] = 42
+   >>> dfb[dfb['a'].str.startswith('o')]['c'] = 42
    Traceback (most recent call last)
         ...
    SettingWithCopyWarning:
diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst
index f118fe84d523a6..dd6d3062a8f0ae 100644
--- a/doc/source/user_guide/reshaping.rst
+++ b/doc/source/user_guide/reshaping.rst
@@ -469,7 +469,7 @@ If ``crosstab`` receives only two Series, it will provide a frequency table.
                        'C': [1, 1, np.nan, 1, 1]})
     df
 
-    pd.crosstab(df.A, df.B)
+    pd.crosstab(df['A'], df['B'])
 
 Any input passed containing ``Categorical`` data will have **all** of its
 categories included in the cross-tabulation, even if the actual data does
@@ -489,13 +489,13 @@ using the ``normalize`` argument:
 
 .. ipython:: python
 
-   pd.crosstab(df.A, df.B, normalize=True)
+   pd.crosstab(df['A'], df['B'], normalize=True)
 
 ``normalize`` can also normalize values within each row or within each column:
 
 .. ipython:: python
 
-   pd.crosstab(df.A, df.B, normalize='columns')
+   pd.crosstab(df['A'], df['B'], normalize='columns')
 
 ``crosstab`` can also be passed a third ``Series`` and an aggregation function
 (``aggfunc``) that will be applied to the values of the third ``Series`` within
@@ -503,7 +503,7 @@ each group defined by the first two ``Series``:
 
 .. ipython:: python
 
-   pd.crosstab(df.A, df.B, values=df.C, aggfunc=np.sum)
+   pd.crosstab(df['A'], df['B'], values=df['C'], aggfunc=np.sum)
 
 Adding margins
 ~~~~~~~~~~~~~~
@@ -512,7 +512,7 @@ Finally, one can also add margins or normalize this output.
 
 .. ipython:: python
 
-   pd.crosstab(df.A, df.B, values=df.C, aggfunc=np.sum, normalize=True,
+   pd.crosstab(df['A'], df['B'], values=df['C'], aggfunc=np.sum, normalize=True,
                margins=True)
 
 .. _reshaping.tile:
diff --git a/doc/source/user_guide/visualization.rst b/doc/source/user_guide/visualization.rst
index fdceaa5868cecd..fa16b2f2166105 100644
--- a/doc/source/user_guide/visualization.rst
+++ b/doc/source/user_guide/visualization.rst
@@ -1148,10 +1148,10 @@ To plot data on a secondary y-axis, use the ``secondary_y`` keyword:
 
 .. ipython:: python
 
-   df.A.plot()
+   df['A'].plot()
 
    @savefig series_plot_secondary_y.png
-   df.B.plot(secondary_y=True, style='g')
+   df['B'].plot(secondary_y=True, style='g')
 
 .. ipython:: python
    :suppress:
@@ -1205,7 +1205,7 @@ Here is the default behavior, notice how the x-axis tick labeling is performed:
    plt.figure()
 
    @savefig ser_plot_suppress.png
-   df.A.plot()
+   df['A'].plot()
 
 .. ipython:: python
    :suppress:
@@ -1219,7 +1219,7 @@ Using the ``x_compat`` parameter, you can suppress this behavior:
    plt.figure()
 
    @savefig ser_plot_suppress_parm.png
-   df.A.plot(x_compat=True)
+   df['A'].plot(x_compat=True)
 
 .. ipython:: python
    :suppress:
@@ -1235,9 +1235,9 @@ in ``pandas.plotting.plot_params`` can be used in a `with statement`:
 
    @savefig ser_plot_suppress_context.png
    with pd.plotting.plot_params.use('x_compat', True):
-       df.A.plot(color='r')
-       df.B.plot(color='g')
-       df.C.plot(color='b')
+       df['A'].plot(color='r')
+       df['B'].plot(color='g')
+       df['C'].plot(color='b')
 
 .. ipython:: python
    :suppress:

From 7528d088c9aa597174fbccbc1bddb9290ba2556e Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Mon, 26 Aug 2019 18:10:26 +0100
Subject: [PATCH 20/95] TYPING: add stubs for _packer and _unpacker (#28135)

---
 pandas/io/msgpack/_packer.pyi   | 22 ++++++++++++
 pandas/io/msgpack/_unpacker.pyi | 59 +++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 pandas/io/msgpack/_packer.pyi
 create mode 100644 pandas/io/msgpack/_unpacker.pyi

diff --git a/pandas/io/msgpack/_packer.pyi b/pandas/io/msgpack/_packer.pyi
new file mode 100644
index 00000000000000..e95a1622c56153
--- /dev/null
+++ b/pandas/io/msgpack/_packer.pyi
@@ -0,0 +1,22 @@
+# flake8: noqa
+
+class Packer:
+    def __cinit__(self): ...
+    def __init__(
+        self,
+        default=...,
+        encoding=...,
+        unicode_errors=...,
+        use_single_float=...,
+        autoreset: int = ...,
+        use_bin_type: int = ...,
+    ): ...
+    def __dealloc__(self): ...
+    def _pack(self, o, nest_limit: int = ...) -> int: ...
+    def pack(self, obj): ...
+    def pack_ext_type(self, typecode, data): ...
+    def pack_array_header(self, size): ...
+    def pack_map_header(self, size): ...
+    def pack_map_pairs(self, pairs): ...
+    def reset(self) -> None: ...
+    def bytes(self): ...
diff --git a/pandas/io/msgpack/_unpacker.pyi b/pandas/io/msgpack/_unpacker.pyi
new file mode 100644
index 00000000000000..9910895947fb64
--- /dev/null
+++ b/pandas/io/msgpack/_unpacker.pyi
@@ -0,0 +1,59 @@
+# flake8: noqa
+
+def unpackb(
+    packed,
+    object_hook=...,
+    list_hook=...,
+    use_list=...,
+    encoding=...,
+    unicode_errors=...,
+    object_pairs_hook=...,
+    ext_hook=...,
+    max_str_len=...,
+    max_bin_len=...,
+    max_array_len=...,
+    max_map_len=...,
+    max_ext_len=...,
+): ...
+def unpack(
+    stream,
+    object_hook=...,
+    list_hook=...,
+    use_list=...,
+    encoding=...,
+    unicode_errors=...,
+    object_pairs_hook=...,
+): ...
+
+class Unpacker:
+    def __cinit__(self): ...
+    def __dealloc__(self): ...
+    def __init__(
+        self,
+        file_like=...,
+        read_size=...,
+        use_list=...,
+        object_hook=...,
+        object_pairs_hook=...,
+        list_hook=...,
+        encoding=...,
+        unicode_errors=...,
+        max_buffer_size: int = ...,
+        ext_hook=...,
+        max_str_len=...,
+        max_bin_len=...,
+        max_array_len=...,
+        max_map_len=...,
+        max_ext_len=...,
+    ): ...
+    def feed(self, next_bytes): ...
+    def append_buffer(self, _buf, _buf_len): ...
+    def read_from_file(self): ...
+    def _unpack(self, execute, write_bytes, iter=...): ...
+    def read_bytes(self, nbytes): ...
+    def unpack(self, write_bytes=...): ...
+    def skip(self, write_bytes=...): ...
+    def read_array_header(self, write_bytes=...): ...
+    def read_map_header(self, write_bytes=...): ...
+    def __iter__(self): ...
+    def __next__(self): ...

From bca39a72b073758d3cfa7afa470462255f1bc066 Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@gmail.com>
Date: Mon, 26 Aug 2019 10:53:59 -0700
Subject: [PATCH 21/95] Run clang-format on objToJSON (#28144)

---
 pandas/_libs/src/ujson/python/objToJSON.c | 381 +++++++++++-----------
 1 file changed, 188 insertions(+), 193 deletions(-)

diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c
index de336fb3aa1dcb..4b612bb033761d 100644
--- a/pandas/_libs/src/ujson/python/objToJSON.c
+++ b/pandas/_libs/src/ujson/python/objToJSON.c
@@ -16,18 +16,19 @@ derived from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE LIABLE
-FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+DISCLAIMED. IN NO EVENT SHALL ESN SOCIAL SOFTWARE AB OR JONAS TARNSTROM BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
+THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 Portions of code from MODP_ASCII - Ascii transformations (upper/lower, etc)
 https://github.com/client9/stringencoders
-Copyright (c) 2007  Nick Galbreath -- nickg [at] modp [dot] com. All rights reserved.
+Copyright (c) 2007  Nick Galbreath -- nickg [at] modp [dot] com. All rights
+reserved.
 
 Numeric decoder derived from from TCL library
 http://www.opensource.apple.com/source/tcl/tcl-14/tcl/license.terms
@@ -64,9 +65,9 @@ typedef void *(*PFN_PyTypeToJSON)(JSOBJ obj, JSONTypeContext *ti,
 typedef struct __NpyArrContext {
     PyObject *array;
     char *dataptr;
-    int curdim;     // current dimension in array's order
-    int stridedim;  // dimension we are striding over
-    int inc;        // stride dimension increment (+/- 1)
+    int curdim;    // current dimension in array's order
+    int stridedim; // dimension we are striding over
+    int inc;       // stride dimension increment (+/- 1)
     npy_intp dim;
     npy_intp stride;
     npy_intp ndim;
@@ -83,8 +84,8 @@ typedef struct __PdBlockContext {
     int ncols;
     int transpose;
 
-    int *cindices;             // frame column -> block column map
-    NpyArrContext **npyCtxts;  // NpyArrContext for each column
+    int *cindices;            // frame column -> block column map
+    NpyArrContext **npyCtxts; // NpyArrContext for each column
 } PdBlockContext;
 
 typedef struct __TypeContext {
@@ -148,13 +149,12 @@ enum PANDAS_FORMAT { SPLIT, RECORDS, INDEX, COLUMNS, VALUES };
 
 int PdBlock_iterNext(JSOBJ, JSONTypeContext *);
 
-void *initObjToJSON(void)
-{
+void *initObjToJSON(void) {
     PyObject *mod_pandas;
     PyObject *mod_nattype;
     PyObject *mod_decimal = PyImport_ImportModule("decimal");
     type_decimal =
-      (PyTypeObject *)PyObject_GetAttrString(mod_decimal, "Decimal");
+        (PyTypeObject *)PyObject_GetAttrString(mod_decimal, "Decimal");
     Py_DECREF(mod_decimal);
 
     PyDateTime_IMPORT;
@@ -167,14 +167,14 @@ void *initObjToJSON(void)
         cls_series =
             (PyTypeObject *)PyObject_GetAttrString(mod_pandas, "Series");
         cls_timestamp = PyObject_GetAttrString(mod_pandas, "Timestamp");
-	cls_timedelta = PyObject_GetAttrString(mod_pandas, "Timedelta");
+        cls_timedelta = PyObject_GetAttrString(mod_pandas, "Timedelta");
         Py_DECREF(mod_pandas);
     }
 
     mod_nattype = PyImport_ImportModule("pandas._libs.tslibs.nattype");
     if (mod_nattype) {
-        cls_nat = (PyTypeObject *)PyObject_GetAttrString(mod_nattype,
-                                                         "NaTType");
+        cls_nat =
+            (PyTypeObject *)PyObject_GetAttrString(mod_nattype, "NaTType");
         Py_DECREF(mod_nattype);
     }
 
@@ -212,7 +212,6 @@ static TypeContext *createTypeContext(void) {
     return pc;
 }
 
-
 static int is_sparse_array(PyObject *obj) {
     // TODO can be removed again once SparseArray.values is removed (GH26421)
     if (PyObject_HasAttrString(obj, "_subtyp")) {
@@ -227,7 +226,6 @@ static int is_sparse_array(PyObject *obj) {
     return 0;
 }
 
-
 static PyObject *get_values(PyObject *obj) {
     PyObject *values = NULL;
 
@@ -242,7 +240,8 @@ static PyObject *get_values(PyObject *obj) {
             values = PyObject_CallMethod(values, "to_numpy", NULL);
         }
 
-        if (!is_sparse_array(values) && PyObject_HasAttrString(values, "values")) {
+        if (!is_sparse_array(values) &&
+            PyObject_HasAttrString(values, "values")) {
             PyObject *subvals = get_values(values);
             PyErr_Clear();
             PRINTMARK();
@@ -357,20 +356,20 @@ static Py_ssize_t get_attr_length(PyObject *obj, char *attr) {
 }
 
 static npy_int64 get_long_attr(PyObject *o, const char *attr) {
-  npy_int64 long_val;
-  PyObject *value = PyObject_GetAttrString(o, attr);
-  long_val = (PyLong_Check(value) ?
-              PyLong_AsLongLong(value) : PyLong_AsLong(value));
-  Py_DECREF(value);
-  return long_val;
+    npy_int64 long_val;
+    PyObject *value = PyObject_GetAttrString(o, attr);
+    long_val =
+        (PyLong_Check(value) ? PyLong_AsLongLong(value) : PyLong_AsLong(value));
+    Py_DECREF(value);
+    return long_val;
 }
 
 static npy_float64 total_seconds(PyObject *td) {
-  npy_float64 double_val;
-  PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL);
-  double_val = PyFloat_AS_DOUBLE(value);
-  Py_DECREF(value);
-  return double_val;
+    npy_float64 double_val;
+    PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL);
+    double_val = PyFloat_AS_DOUBLE(value);
+    Py_DECREF(value);
+    return double_val;
 }
 
 static PyObject *get_item(PyObject *obj, Py_ssize_t i) {
@@ -450,7 +449,7 @@ static void *PyUnicodeToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
 
     if (PyUnicode_IS_COMPACT_ASCII(obj)) {
         Py_ssize_t len;
-        char *data = (char*)PyUnicode_AsUTF8AndSize(obj, &len);
+        char *data = (char *)PyUnicode_AsUTF8AndSize(obj, &len);
         *_outLen = len;
         return data;
     }
@@ -505,7 +504,7 @@ static void *NpyDateTimeScalarToJSON(JSOBJ _obj, JSONTypeContext *tc,
     // TODO(anyone): Does not appear to be reached in tests.
 
     pandas_datetime_to_datetimestruct(obj->obval,
-                                     (NPY_DATETIMEUNIT)obj->obmeta.base, &dts);
+                                      (NPY_DATETIMEUNIT)obj->obmeta.base, &dts);
     return PandasDateTimeStructToJSON(&dts, tc, outValue, _outLen);
 }
 
@@ -664,9 +663,9 @@ void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
     GET_TC(tc)->npyarr = npyarr;
 
     if (!npyarr) {
-      PyErr_NoMemory();
-      GET_TC(tc)->iterNext = NpyArr_iterNextNone;
-      return;
+        PyErr_NoMemory();
+        GET_TC(tc)->iterNext = NpyArr_iterNextNone;
+        return;
     }
 
     npyarr->array = (PyObject *)obj;
@@ -677,17 +676,17 @@ void NpyArr_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
     npyarr->type_num = PyArray_DESCR(obj)->type_num;
 
     if (GET_TC(tc)->transpose) {
-      npyarr->dim = PyArray_DIM(obj, npyarr->ndim);
-      npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim);
-      npyarr->stridedim = npyarr->ndim;
-      npyarr->index[npyarr->ndim] = 0;
-      npyarr->inc = -1;
+        npyarr->dim = PyArray_DIM(obj, npyarr->ndim);
+        npyarr->stride = PyArray_STRIDE(obj, npyarr->ndim);
+        npyarr->stridedim = npyarr->ndim;
+        npyarr->index[npyarr->ndim] = 0;
+        npyarr->inc = -1;
     } else {
-      npyarr->dim = PyArray_DIM(obj, 0);
-      npyarr->stride = PyArray_STRIDE(obj, 0);
-      npyarr->stridedim = 0;
-      npyarr->index[0] = 0;
-      npyarr->inc = 1;
+        npyarr->dim = PyArray_DIM(obj, 0);
+        npyarr->stride = PyArray_STRIDE(obj, 0);
+        npyarr->stridedim = 0;
+        npyarr->index[0] = 0;
+        npyarr->inc = 1;
     }
 
     npyarr->columnLabels = GET_TC(tc)->columnLabels;
@@ -735,8 +734,7 @@ int NpyArr_iterNextItem(JSOBJ obj, JSONTypeContext *tc) {
 
     NpyArr_freeItemValue(obj, tc);
 
-    if (PyArray_ISDATETIME(npyarr->array))
-    {
+    if (PyArray_ISDATETIME(npyarr->array)) {
         PRINTMARK();
         GET_TC(tc)->itemValue = obj;
         Py_INCREF(obj);
@@ -797,10 +795,10 @@ char *NpyArr_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) {
 
     if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
         idx = npyarr->index[npyarr->stridedim] - 1;
-	cStr = npyarr->columnLabels[idx];
+        cStr = npyarr->columnLabels[idx];
     } else {
         idx = npyarr->index[npyarr->stridedim - npyarr->inc] - 1;
-	cStr = npyarr->rowLabels[idx];
+        cStr = npyarr->rowLabels[idx];
     }
 
     *outLen = strlen(cStr);
@@ -852,13 +850,13 @@ char *PdBlock_iterGetName(JSOBJ obj, JSONTypeContext *tc, size_t *outLen) {
 
     if (GET_TC(tc)->iterNext == PdBlock_iterNextItem) {
         idx = blkCtxt->colIdx - 1;
-	cStr = npyarr->columnLabels[idx];
+        cStr = npyarr->columnLabels[idx];
     } else {
         idx = GET_TC(tc)->iterNext != PdBlock_iterNext
                   ? npyarr->index[npyarr->stridedim - npyarr->inc] - 1
                   : npyarr->index[npyarr->stridedim];
 
-	cStr = npyarr->rowLabels[idx];
+        cStr = npyarr->rowLabels[idx];
     }
 
     *outLen = strlen(cStr);
@@ -875,10 +873,10 @@ char *PdBlock_iterGetName_Transpose(JSOBJ obj, JSONTypeContext *tc,
 
     if (GET_TC(tc)->iterNext == NpyArr_iterNextItem) {
         idx = npyarr->index[npyarr->stridedim] - 1;
-	cStr = npyarr->columnLabels[idx];
+        cStr = npyarr->columnLabels[idx];
     } else {
         idx = blkCtxt->colIdx;
-	cStr = npyarr->rowLabels[idx];
+        cStr = npyarr->rowLabels[idx];
     }
 
     *outLen = strlen(cStr);
@@ -943,9 +941,9 @@ void PdBlock_iterBegin(JSOBJ _obj, JSONTypeContext *tc) {
     dtype = PyArray_DescrFromType(NPY_INT64);
     obj = (PyObject *)_obj;
 
-    GET_TC(tc)
-        ->iterGetName = GET_TC(tc)->transpose ? PdBlock_iterGetName_Transpose
-                                              : PdBlock_iterGetName;
+    GET_TC(tc)->iterGetName = GET_TC(tc)->transpose
+                                  ? PdBlock_iterGetName_Transpose
+                                  : PdBlock_iterGetName;
 
     blkCtxt = PyObject_Malloc(sizeof(PdBlockContext));
     if (!blkCtxt) {
@@ -1396,7 +1394,7 @@ void Series_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
     PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
     GET_TC(tc)->index = 0;
     GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char));
-    enc->outputFormat = VALUES;  // for contained series
+    enc->outputFormat = VALUES; // for contained series
     if (!GET_TC(tc)->cStr) {
         PyErr_NoMemory();
     }
@@ -1455,7 +1453,7 @@ void DataFrame_iterBegin(JSOBJ obj, JSONTypeContext *tc) {
     PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
     GET_TC(tc)->index = 0;
     GET_TC(tc)->cStr = PyObject_Malloc(20 * sizeof(char));
-    enc->outputFormat = VALUES;  // for contained series & index
+    enc->outputFormat = VALUES; // for contained series & index
     if (!GET_TC(tc)->cStr) {
         PyErr_NoMemory();
     }
@@ -1634,115 +1632,116 @@ char **NpyArr_encodeLabels(PyArrayObject *labels, PyObjectEncoder *enc,
     type_num = PyArray_TYPE(labels);
 
     for (i = 0; i < num; i++) {
-      item = PyArray_GETITEM(labels, dataptr);
+        item = PyArray_GETITEM(labels, dataptr);
         if (!item) {
-	  NpyArr_freeLabels(ret, num);
-	  ret = 0;
-	  break;
-	}
-
-	// TODO: for any matches on type_num (date and timedeltas) should use a
-	// vectorized solution to convert to epoch or iso formats
-	if (enc->datetimeIso && (type_num == NPY_TIMEDELTA || PyDelta_Check(item))) {
-	  PyObject *td = PyObject_CallFunction(cls_timedelta, "(O)", item);
-	  if (td == NULL) {
-	    Py_DECREF(item);
             NpyArr_freeLabels(ret, num);
             ret = 0;
             break;
-	  }	  
-
-	  PyObject *iso = PyObject_CallMethod(td, "isoformat", NULL);
-	  Py_DECREF(td);
-	  if (iso == NULL) {
-	    Py_DECREF(item);
-	    NpyArr_freeLabels(ret, num);
-	    ret = 0;
-	    break;
-	  }
-
-	  cLabel = (char *)PyUnicode_AsUTF8(iso);
-	  Py_DECREF(iso);
-	  len = strlen(cLabel);
-	}
-	else if (PyTypeNum_ISDATETIME(type_num) || 
-	    PyDateTime_Check(item) || PyDate_Check(item)) {
-	  PyObject *ts = PyObject_CallFunction(cls_timestamp, "(O)", item);
-	  if (ts == NULL) {
-	    Py_DECREF(item);
-            NpyArr_freeLabels(ret, num);
-            ret = 0;
-            break;
-	  }
-
-	  if (enc->datetimeIso) {
-	    PyObject *iso = PyObject_CallMethod(ts, "isoformat", NULL);
-	    Py_DECREF(ts);
-	    if (iso == NULL) {
-	      Py_DECREF(item);
-	      NpyArr_freeLabels(ret, num);
-	      ret = 0;
-	      break;
-	    }
-
-	    cLabel = (char *)PyUnicode_AsUTF8(iso);
-	    Py_DECREF(iso);
-	    len = strlen(cLabel);
-	  } else {
-	    npy_int64 value;
-	    // TODO: refactor to not duplicate what goes on in beginTypeContext
-	    if (PyObject_HasAttrString(ts, "value")) {
-	      PRINTMARK();
-	      value = get_long_attr(ts, "value");
-	    } else {
-	      PRINTMARK();
-	      value =
-                total_seconds(ts) * 1000000000LL;  // nanoseconds per second
-	    }
-	    Py_DECREF(ts);
-
-	    switch (enc->datetimeUnit) {
-            case NPY_FR_ns:
-	      break;
-            case NPY_FR_us:
-	      value /= 1000LL;
-	      break;
-            case NPY_FR_ms:
-	      value /= 1000000LL;
-	      break;
-            case NPY_FR_s:
-	      value /= 1000000000LL;
-	      break;
-	    default:
-	      Py_DECREF(item);
-	      NpyArr_freeLabels(ret, num);
-	      ret = 0;
-	      break;	      
-	    }
-
-	    char buf[21] = {0};  // 21 chars for 2**63 as string
-	    cLabel = buf;
-	    sprintf(buf, "%" NPY_INT64_FMT, value);
-	    len = strlen(cLabel);
-	  }
-	} else {  // Fallack to string representation
-	  PyObject *str = PyObject_Str(item);
-	  if (str == NULL) {
-	    Py_DECREF(item);
-	    NpyArr_freeLabels(ret, num);
-	    ret = 0;
-	    break;
-	  }
-
-	  cLabel = (char *)PyUnicode_AsUTF8(str);
-	  Py_DECREF(str);	  
-	  len = strlen(cLabel);
-	}
-
-	Py_DECREF(item);
-	// Add 1 to include NULL terminator
-	ret[i] = PyObject_Malloc(len + 1);
-	memcpy(ret[i], cLabel, len + 1);
+        }
+
+        // TODO: for any matches on type_num (date and timedeltas) should use a
+        // vectorized solution to convert to epoch or iso formats
+        if (enc->datetimeIso &&
+            (type_num == NPY_TIMEDELTA || PyDelta_Check(item))) {
+            PyObject *td = PyObject_CallFunction(cls_timedelta, "(O)", item);
+            if (td == NULL) {
+                Py_DECREF(item);
+                NpyArr_freeLabels(ret, num);
+                ret = 0;
+                break;
+            }
+
+            PyObject *iso = PyObject_CallMethod(td, "isoformat", NULL);
+            Py_DECREF(td);
+            if (iso == NULL) {
+                Py_DECREF(item);
+                NpyArr_freeLabels(ret, num);
+                ret = 0;
+                break;
+            }
+
+            cLabel = (char *)PyUnicode_AsUTF8(iso);
+            Py_DECREF(iso);
+            len = strlen(cLabel);
+        } else if (PyTypeNum_ISDATETIME(type_num) || PyDateTime_Check(item) ||
+                   PyDate_Check(item)) {
+            PyObject *ts = PyObject_CallFunction(cls_timestamp, "(O)", item);
+            if (ts == NULL) {
+                Py_DECREF(item);
+                NpyArr_freeLabels(ret, num);
+                ret = 0;
+                break;
+            }
+
+            if (enc->datetimeIso) {
+                PyObject *iso = PyObject_CallMethod(ts, "isoformat", NULL);
+                Py_DECREF(ts);
+                if (iso == NULL) {
+                    Py_DECREF(item);
+                    NpyArr_freeLabels(ret, num);
+                    ret = 0;
+                    break;
+                }
+
+                cLabel = (char *)PyUnicode_AsUTF8(iso);
+                Py_DECREF(iso);
+                len = strlen(cLabel);
+            } else {
+                npy_int64 value;
+                // TODO: refactor to not duplicate what goes on in
+                // beginTypeContext
+                if (PyObject_HasAttrString(ts, "value")) {
+                    PRINTMARK();
+                    value = get_long_attr(ts, "value");
+                } else {
+                    PRINTMARK();
+                    value = total_seconds(ts) *
+                            1000000000LL; // nanoseconds per second
+                }
+                Py_DECREF(ts);
+
+                switch (enc->datetimeUnit) {
+                case NPY_FR_ns:
+                    break;
+                case NPY_FR_us:
+                    value /= 1000LL;
+                    break;
+                case NPY_FR_ms:
+                    value /= 1000000LL;
+                    break;
+                case NPY_FR_s:
+                    value /= 1000000000LL;
+                    break;
+                default:
+                    Py_DECREF(item);
+                    NpyArr_freeLabels(ret, num);
+                    ret = 0;
+                    break;
+                }
+
+                char buf[21] = {0}; // 21 chars for 2**63 as string
+                cLabel = buf;
+                sprintf(buf, "%" NPY_INT64_FMT, value);
+                len = strlen(cLabel);
+            }
+        } else { // Fallack to string representation
+            PyObject *str = PyObject_Str(item);
+            if (str == NULL) {
+                Py_DECREF(item);
+                NpyArr_freeLabels(ret, num);
+                ret = 0;
+                break;
+            }
+
+            cLabel = (char *)PyUnicode_AsUTF8(str);
+            Py_DECREF(str);
+            len = strlen(cLabel);
+        }
+
+        Py_DECREF(item);
+        // Add 1 to include NULL terminator
+        ret[i] = PyObject_Malloc(len + 1);
+        memcpy(ret[i], cLabel, len + 1);
 
         if (PyErr_Occurred()) {
             NpyArr_freeLabels(ret, num);
@@ -1923,23 +1922,22 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
             value = get_long_attr(obj, "value");
         } else {
             PRINTMARK();
-            value =
-                total_seconds(obj) * 1000000000LL;  // nanoseconds per second
+            value = total_seconds(obj) * 1000000000LL; // nanoseconds per second
         }
 
         base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
         switch (base) {
-            case NPY_FR_ns:
-                break;
-            case NPY_FR_us:
-                value /= 1000LL;
-                break;
-            case NPY_FR_ms:
-                value /= 1000000LL;
-                break;
-            case NPY_FR_s:
-                value /= 1000000000LL;
-                break;
+        case NPY_FR_ns:
+            break;
+        case NPY_FR_us:
+            value /= 1000LL;
+            break;
+        case NPY_FR_ms:
+            value /= 1000000LL;
+            break;
+        case NPY_FR_s:
+            value /= 1000000000LL;
+            break;
         }
 
         exc = PyErr_Occurred();
@@ -2054,8 +2052,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
                 goto INVALID;
             }
             pc->columnLabelsLen = PyArray_DIM(pc->newObj, 0);
-            pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values,
-                                                   enc,
+            pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc,
                                                    pc->columnLabelsLen);
             if (!pc->columnLabels) {
                 goto INVALID;
@@ -2157,8 +2154,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
                 goto INVALID;
             }
             pc->columnLabelsLen = PyObject_Size(tmpObj);
-            pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values,
-                                                   enc,
+            pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc,
                                                    pc->columnLabelsLen);
             Py_DECREF(tmpObj);
             if (!pc->columnLabels) {
@@ -2179,9 +2175,8 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
                 goto INVALID;
             }
             pc->rowLabelsLen = PyObject_Size(tmpObj);
-            pc->rowLabels =
-                NpyArr_encodeLabels((PyArrayObject *)values,
-                                    enc, pc->rowLabelsLen);
+            pc->rowLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc,
+                                                pc->rowLabelsLen);
             Py_DECREF(tmpObj);
             tmpObj = (enc->outputFormat == INDEX
                           ? PyObject_GetAttrString(obj, "columns")
@@ -2199,8 +2194,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
                 goto INVALID;
             }
             pc->columnLabelsLen = PyObject_Size(tmpObj);
-            pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values,
-                                                   enc,
+            pc->columnLabels = NpyArr_encodeLabels((PyArrayObject *)values, enc,
                                                    pc->columnLabelsLen);
             Py_DECREF(tmpObj);
             if (!pc->columnLabels) {
@@ -2325,7 +2319,8 @@ void Object_endTypeContext(JSOBJ obj, JSONTypeContext *tc) {
 
         PyObject_Free(GET_TC(tc)->cStr);
         GET_TC(tc)->cStr = NULL;
-        if (tc->prv != &(((PyObjectEncoder *)tc->encoder)->basicTypeContext)) {  // NOLINT
+        if (tc->prv !=
+            &(((PyObjectEncoder *)tc->encoder)->basicTypeContext)) { // NOLINT
             PyObject_Free(tc->prv);
         }
         tc->prv = NULL;
@@ -2388,7 +2383,7 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
     PyObject *newobj;
     PyObject *oinput = NULL;
     PyObject *oensureAscii = NULL;
-    int idoublePrecision = 10;  // default double precision setting
+    int idoublePrecision = 10; // default double precision setting
     PyObject *oencodeHTMLChars = NULL;
     char *sOrient = NULL;
     char *sdateFormat = NULL;
@@ -2411,10 +2406,10 @@ PyObject *objToJSON(PyObject *self, PyObject *args, PyObject *kwargs) {
         PyObject_Malloc,
         PyObject_Realloc,
         PyObject_Free,
-        -1,  // recursionMax
+        -1, // recursionMax
         idoublePrecision,
-        1,  // forceAscii
-        0,  // encodeHTMLChars
+        1, // forceAscii
+        0, // encodeHTMLChars
     }};
     JSONObjectEncoder *encoder = (JSONObjectEncoder *)&pyEncoder;
 

From 87d26bafcdb2495f8a9e76489d3438b1571beb05 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 26 Aug 2019 13:14:06 -0700
Subject: [PATCH 22/95] PERF: replace with list, closes #28084 (#28099)

---
 asv_bench/benchmarks/replace.py | 17 +++++++++++++++++
 pandas/core/internals/blocks.py | 22 +++++++++++++++++++++-
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py
index 6137e944e6b9e3..f69ae150285255 100644
--- a/asv_bench/benchmarks/replace.py
+++ b/asv_bench/benchmarks/replace.py
@@ -36,6 +36,23 @@ def time_replace_series(self, inplace):
         self.s.replace(self.to_rep, inplace=inplace)
 
 
+class ReplaceList:
+    # GH#28099
+
+    params = [(True, False)]
+    param_names = ["inplace"]
+
+    def setup(self, inplace):
+        self.df = pd.DataFrame({"A": 0, "B": 0}, index=range(4 * 10 ** 7))
+
+    def time_replace_list(self, inplace):
+        self.df.replace([np.inf, -np.inf], np.nan, inplace=inplace)
+
+    def time_replace_list_one_match(self, inplace):
+        # the 1 can be held in self._df.blocks[0], while the inf and -inf cant
+        self.df.replace([np.inf, -np.inf, 1], np.nan, inplace=inplace)
+
+
 class Convert:
 
     params = (["DataFrame", "Series"], ["Timestamp", "Timedelta"])
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index f0ee56f403325a..a2a51881016a38 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -743,6 +743,26 @@ def replace(
                     return [self]
                 return [self.copy()]
 
+            to_replace = [x for x in to_replace if self._can_hold_element(x)]
+            if not len(to_replace):
+                # GH#28084 avoid costly checks since we can infer
+                #  that there is nothing to replace in this block
+                if inplace:
+                    return [self]
+                return [self.copy()]
+
+            if len(to_replace) == 1:
+                # _can_hold_element checks have reduced this back to the
+                #  scalar case and we can avoid a costly object cast
+                return self.replace(
+                    to_replace[0],
+                    value,
+                    inplace=inplace,
+                    filter=filter,
+                    regex=regex,
+                    convert=convert,
+                )
+
             # GH 22083, TypeError or ValueError occurred within error handling
             # causes infinite loop. Cast and retry only if not objectblock.
             if is_object_dtype(self):
@@ -751,7 +771,7 @@ def replace(
             # try again with a compatible block
             block = self.astype(object)
             return block.replace(
-                to_replace=original_to_replace,
+                to_replace=to_replace,
                 value=value,
                 inplace=inplace,
                 filter=filter,

From 7deda218435e787275e5899162b482001df85684 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 26 Aug 2019 15:56:57 -0500
Subject: [PATCH 23/95] DOC: whatsnew for 28099 (#28154)

---
 doc/source/whatsnew/v1.0.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 2bfc09e52c68b5..7fe358d3820f23 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -76,6 +76,7 @@ Performance improvements
 - Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`)
 - Performance improvement in `MultiIndex.is_monotonic` (:issue:`27495`)
 - Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`)
+- Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`)
 
 
 .. _whatsnew_1000.bug_fixes:

From 9f48098a021c7b744ff4604b605de7b99c7e62f4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 26 Aug 2019 17:49:08 -0500
Subject: [PATCH 24/95] DOC: Set 1.0.0 in index.rst (#28149)

---
 doc/source/index.rst.template | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/index.rst.template b/doc/source/index.rst.template
index b57ce83cfc33c9..f5669626aa2b31 100644
--- a/doc/source/index.rst.template
+++ b/doc/source/index.rst.template
@@ -39,7 +39,7 @@ See the :ref:`overview` for more detail about what's in the library.
     :hidden:
 {% endif %}
 {% if not single_doc %}
-    What's New in 0.25.0 <whatsnew/v0.25.0>
+    What's New in 1.0.0 <whatsnew/v1.0.0>
     install
     getting_started/index
     user_guide/index
@@ -53,7 +53,7 @@ See the :ref:`overview` for more detail about what's in the library.
     whatsnew/index
 {% endif %}
 
-* :doc:`whatsnew/v0.25.0`
+* :doc:`whatsnew/v1.0.0`
 * :doc:`install`
 * :doc:`getting_started/index`
 

From 294a22c0baa2e024d12f70705c4ec85f4c82b2b0 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 26 Aug 2019 18:11:05 -0500
Subject: [PATCH 25/95] BUG: Fix groupby quantile array (#28113)

---
 doc/source/whatsnew/v0.25.2.rst       |  3 +--
 pandas/core/groupby/groupby.py        |  4 ++--
 pandas/tests/groupby/test_function.py | 18 ++++++++++++++++++
 3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst
index 403c02c3ff129d..6974c7521a2376 100644
--- a/doc/source/whatsnew/v0.25.2.rst
+++ b/doc/source/whatsnew/v0.25.2.rst
@@ -76,8 +76,7 @@ Plotting
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
--
--
+- Bug incorrectly raising an ``IndexError`` when passing a list of quantiles to :meth:`pandas.core.groupby.DataFrameGroupBy.quantile` (:issue:`28113`).
 -
 -
 -
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 3eeecd9c149e1b..87047d21709927 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1947,8 +1947,8 @@ def post_processor(vals: np.ndarray, inference: Optional[Type]) -> np.ndarray:
             arrays = []
 
             for i in range(self.ngroups):
-                arr = arr + i
-                arrays.append(arr)
+                arr2 = arr + i
+                arrays.append(arr2)
 
             indices = np.concatenate(arrays)
             assert len(indices) == len(result)
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 509d7c33b643b5..d89233f2fd603c 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1257,6 +1257,24 @@ def test_quantile_array():
     tm.assert_frame_equal(result, expected)
 
 
+def test_quantile_array2():
+    # https://github.com/pandas-dev/pandas/pull/28085#issuecomment-524066959
+    df = pd.DataFrame(
+        np.random.RandomState(0).randint(0, 5, size=(10, 3)), columns=list("ABC")
+    )
+    result = df.groupby("A").quantile([0.3, 0.7])
+    expected = pd.DataFrame(
+        {
+            "B": [0.9, 2.1, 2.2, 3.4, 1.6, 2.4, 2.3, 2.7, 0.0, 0.0],
+            "C": [1.2, 2.8, 1.8, 3.0, 0.0, 0.0, 1.9, 3.1, 3.0, 3.0],
+        },
+        index=pd.MultiIndex.from_product(
+            [[0, 1, 2, 3, 4], [0.3, 0.7]], names=["A", None]
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
+
+
 def test_quantile_array_no_sort():
     df = pd.DataFrame({"A": [0, 1, 2], "B": [3, 4, 5]})
     result = df.groupby([1, 0, 1], sort=False).quantile([0.25, 0.5, 0.75])

From ddfc9a232f605e935c06efebdc0830d2b14dfdd5 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Tue, 27 Aug 2019 00:16:13 +0100
Subject: [PATCH 26/95]  TYPING: --disallow-any-expr for HTMLFormatter.__init__
 (#28140)

---
 pandas/io/formats/format.py | 6 ++++--
 pandas/io/formats/html.py   | 8 ++++----
 pandas/io/formats/latex.py  | 5 +++--
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 61af935bd82276..8ff4b9bda0430a 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -549,7 +549,8 @@ def __init__(
         decimal: str = ".",
         table_id: Optional[str] = None,
         render_links: bool = False,
-        **kwds
+        bold_rows: bool = False,
+        escape: bool = True,
     ):
         self.frame = frame
         self.show_index_names = index_names
@@ -580,7 +581,8 @@ def __init__(
         else:
             self.justify = justify
 
-        self.kwds = kwds
+        self.bold_rows = bold_rows
+        self.escape = escape
 
         if columns is not None:
             self.columns = ensure_index(columns)
diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py
index 4b44893df70ed5..8c4a7f4a1213d9 100644
--- a/pandas/io/formats/html.py
+++ b/pandas/io/formats/html.py
@@ -37,7 +37,7 @@ class HTMLFormatter(TableFormatter):
     def __init__(
         self,
         formatter: DataFrameFormatter,
-        classes: Optional[Union[str, List, Tuple]] = None,
+        classes: Optional[Union[str, List[str], Tuple[str, ...]]] = None,
         border: Optional[int] = None,
     ) -> None:
         self.fmt = formatter
@@ -46,11 +46,11 @@ def __init__(
         self.frame = self.fmt.frame
         self.columns = self.fmt.tr_frame.columns
         self.elements = []  # type: List[str]
-        self.bold_rows = self.fmt.kwds.get("bold_rows", False)
-        self.escape = self.fmt.kwds.get("escape", True)
+        self.bold_rows = self.fmt.bold_rows
+        self.escape = self.fmt.escape
         self.show_dimensions = self.fmt.show_dimensions
         if border is None:
-            border = get_option("display.html.border")
+            border = cast(int, get_option("display.html.border"))
         self.border = border
         self.table_id = self.fmt.table_id
         self.render_links = self.fmt.render_links
diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py
index c60e15b733f0a9..4c4d5ec73269a5 100644
--- a/pandas/io/formats/latex.py
+++ b/pandas/io/formats/latex.py
@@ -39,12 +39,13 @@ def __init__(
     ):
         self.fmt = formatter
         self.frame = self.fmt.frame
-        self.bold_rows = self.fmt.kwds.get("bold_rows", False)
+        self.bold_rows = self.fmt.bold_rows
         self.column_format = column_format
         self.longtable = longtable
         self.multicolumn = multicolumn
         self.multicolumn_format = multicolumn_format
         self.multirow = multirow
+        self.escape = self.fmt.escape
 
     def write_result(self, buf: IO[str]) -> None:
         """
@@ -142,7 +143,7 @@ def pad_empties(x):
                     buf.write("\\endfoot\n\n")
                     buf.write("\\bottomrule\n")
                     buf.write("\\endlastfoot\n")
-            if self.fmt.kwds.get("escape", True):
+            if self.escape:
                 # escape backslashes first
                 crow = [
                     (

From 357774695a4caf7b83506686f4c29cc38d2b9726 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 26 Aug 2019 16:39:12 -0700
Subject: [PATCH 27/95] CLN: small ops optimizations (#28036)

---
 pandas/core/frame.py         | 28 +++++++++++++++++-----------
 pandas/core/ops/__init__.py  | 12 ++++++------
 pandas/core/ops/array_ops.py | 12 ++++++------
 pandas/core/ops/missing.py   |  4 ++--
 pandas/core/sparse/frame.py  |  2 +-
 5 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 9da7999724a186..f636bb6db74309 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -5298,12 +5298,19 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
         this, other = self.align(other, join="outer", level=level, copy=False)
         new_index, new_columns = this.index, this.columns
 
-        def _arith_op(left, right):
-            # for the mixed_type case where we iterate over columns,
-            # _arith_op(left, right) is equivalent to
-            # left._binop(right, func, fill_value=fill_value)
-            left, right = ops.fill_binop(left, right, fill_value)
-            return func(left, right)
+        if fill_value is None:
+            # since _arith_op may be called in a loop, avoid function call
+            #  overhead if possible by doing this check once
+            _arith_op = func
+
+        else:
+
+            def _arith_op(left, right):
+                # for the mixed_type case where we iterate over columns,
+                # _arith_op(left, right) is equivalent to
+                # left._binop(right, func, fill_value=fill_value)
+                left, right = ops.fill_binop(left, right, fill_value)
+                return func(left, right)
 
         if ops.should_series_dispatch(this, other, func):
             # iterate over columns
@@ -5318,7 +5325,7 @@ def _arith_op(left, right):
 
     def _combine_match_index(self, other, func, level=None):
         left, right = self.align(other, join="outer", axis=0, level=level, copy=False)
-        assert left.index.equals(right.index)
+        # at this point we have `left.index.equals(right.index)`
 
         if left._is_mixed_type or right._is_mixed_type:
             # operate column-wise; avoid costly object-casting in `.values`
@@ -5331,14 +5338,13 @@ def _combine_match_index(self, other, func, level=None):
                 new_data, index=left.index, columns=self.columns, copy=False
             )
 
-    def _combine_match_columns(self, other, func, level=None):
-        assert isinstance(other, Series)
+    def _combine_match_columns(self, other: Series, func, level=None):
         left, right = self.align(other, join="outer", axis=1, level=level, copy=False)
-        assert left.columns.equals(right.index)
+        # at this point we have `left.columns.equals(right.index)`
         return ops.dispatch_to_series(left, right, func, axis="columns")
 
     def _combine_const(self, other, func):
-        assert lib.is_scalar(other) or np.ndim(other) == 0
+        # scalar other or np.ndim(other) == 0
         return ops.dispatch_to_series(self, other, func)
 
     def combine(self, other, func, fill_value=None, overwrite=True):
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 7e03b9544ee727..86cd6e878cde60 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -169,7 +169,7 @@ def maybe_upcast_for_op(obj, shape: Tuple[int, ...]):
         #  np.timedelta64(3, 'D') / 2 == np.timedelta64(1, 'D')
         return Timedelta(obj)
 
-    elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj):
+    elif isinstance(obj, np.ndarray) and is_timedelta64_dtype(obj.dtype):
         # GH#22390 Unfortunately we need to special-case right-hand
         # timedelta64 dtypes because numpy casts integer dtypes to
         # timedelta64 when operating with timedelta64
@@ -415,7 +415,7 @@ def should_extension_dispatch(left: ABCSeries, right: Any) -> bool:
     ):
         return True
 
-    if is_extension_array_dtype(right) and not is_scalar(right):
+    if not is_scalar(right) and is_extension_array_dtype(right):
         # GH#22378 disallow scalar to exclude e.g. "category", "Int64"
         return True
 
@@ -755,7 +755,7 @@ def na_op(x, y):
             assert not isinstance(y, (list, ABCSeries, ABCIndexClass))
             if isinstance(y, np.ndarray):
                 # bool-bool dtype operations should be OK, should not get here
-                assert not (is_bool_dtype(x) and is_bool_dtype(y))
+                assert not (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype))
                 x = ensure_object(x)
                 y = ensure_object(y)
                 result = libops.vec_binop(x, y, op)
@@ -804,7 +804,7 @@ def wrapper(self, other):
 
         else:
             # scalars, list, tuple, np.array
-            is_other_int_dtype = is_integer_dtype(np.asarray(other))
+            is_other_int_dtype = is_integer_dtype(np.asarray(other).dtype)
             if is_list_like(other) and not isinstance(other, np.ndarray):
                 # TODO: Can we do this before the is_integer_dtype check?
                 # could the is_integer_dtype check be checking the wrong
@@ -988,10 +988,10 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
                 self, other, pass_op, fill_value=fill_value, axis=axis, level=level
             )
         else:
+            # in this case we always have `np.ndim(other) == 0`
             if fill_value is not None:
                 self = self.fillna(fill_value)
 
-            assert np.ndim(other) == 0
             return self._combine_const(other, op)
 
     f.__name__ = op_name
@@ -1032,7 +1032,7 @@ def f(self, other, axis=default_axis, level=None):
                 self, other, na_op, fill_value=None, axis=axis, level=level
             )
         else:
-            assert np.ndim(other) == 0, other
+            # in this case we always have `np.ndim(other) == 0`
             return self._combine_const(other, na_op)
 
     f.__name__ = op_name
diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py
index 523ba5d42a69cf..f5f6d77676f1f3 100644
--- a/pandas/core/ops/array_ops.py
+++ b/pandas/core/ops/array_ops.py
@@ -11,7 +11,7 @@
     find_common_type,
     maybe_upcast_putmask,
 )
-from pandas.core.dtypes.common import is_object_dtype, is_period_dtype, is_scalar
+from pandas.core.dtypes.common import is_object_dtype, is_scalar
 from pandas.core.dtypes.generic import ABCIndex, ABCSeries
 from pandas.core.dtypes.missing import notna
 
@@ -57,9 +57,9 @@ def masked_arith_op(x, y, op):
         dtype = find_common_type([x.dtype, y.dtype])
         result = np.empty(x.size, dtype=dtype)
 
-        # PeriodIndex.ravel() returns int64 dtype, so we have
-        # to work around that case.  See GH#19956
-        yrav = y if is_period_dtype(y) else y.ravel()
+        # NB: ravel() is only safe since y is ndarray; for e.g. PeriodIndex
+        #  we would get int64 dtype, see GH#19956
+        yrav = y.ravel()
         mask = notna(xrav) & notna(yrav)
 
         if yrav.shape != mask.shape:
@@ -82,9 +82,9 @@ def masked_arith_op(x, y, op):
         mask = notna(xrav)
 
         # 1 ** np.nan is 1. So we have to unmask those.
-        if op == pow:
+        if op is pow:
             mask = np.where(x == 1, False, mask)
-        elif op == rpow:
+        elif op is rpow:
             mask = np.where(y == 1, False, mask)
 
         if mask.any():
diff --git a/pandas/core/ops/missing.py b/pandas/core/ops/missing.py
index 01bc345a40b83c..45fa6a2830af64 100644
--- a/pandas/core/ops/missing.py
+++ b/pandas/core/ops/missing.py
@@ -40,7 +40,7 @@ def fill_zeros(result, x, y, name, fill):
 
     Mask the nan's from x.
     """
-    if fill is None or is_float_dtype(result):
+    if fill is None or is_float_dtype(result.dtype):
         return result
 
     if name.startswith(("r", "__r")):
@@ -55,7 +55,7 @@ def fill_zeros(result, x, y, name, fill):
     if is_scalar_type:
         y = np.array(y)
 
-    if is_integer_dtype(y):
+    if is_integer_dtype(y.dtype):
 
         if (y == 0).any():
 
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index f5add426297a73..8fe6850c84b8b1 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -569,13 +569,13 @@ def _combine_frame(self, other, func, fill_value=None, level=None):
         ).__finalize__(self)
 
     def _combine_match_index(self, other, func, level=None):
-        new_data = {}
 
         if level is not None:
             raise NotImplementedError("'level' argument is not supported")
 
         this, other = self.align(other, join="outer", axis=0, level=level, copy=False)
 
+        new_data = {}
         for col, series in this.items():
             new_data[col] = func(series.values, other.values)
 

From 49d2019723b0089bd357adf6c936c5a82e0cc775 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 26 Aug 2019 16:52:44 -0700
Subject: [PATCH 28/95] CLN: internals.blocks cleanup, typing (#27941)

---
 pandas/core/internals/blocks.py | 90 ++++++++++-----------------------
 1 file changed, 28 insertions(+), 62 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index a2a51881016a38..33698d245e9ffc 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 
-from pandas._libs import NaT, Timestamp, lib, tslib, tslibs
+from pandas._libs import NaT, Timestamp, lib, tslib
 import pandas._libs.internals as libinternals
 from pandas._libs.tslibs import Timedelta, conversion
 from pandas._libs.tslibs.timezones import tz_compare
@@ -407,7 +407,7 @@ def fillna(self, value, limit=None, inplace=False, downcast=None):
                 return self.copy()
 
         if self._can_hold_element(value):
-            # equivalent: self._try_coerce_args(value) would not raise
+            # equivalent: _try_coerce_args(value) would not raise
             blocks = self.putmask(mask, value, inplace=inplace)
             return self._maybe_downcast(blocks, downcast)
 
@@ -669,7 +669,7 @@ def convert(
 
         return self.copy() if copy else self
 
-    def _can_hold_element(self, element):
+    def _can_hold_element(self, element: Any) -> bool:
         """ require the same dtype as ourselves """
         dtype = self.values.dtype.type
         tipo = maybe_infer_dtype_type(element)
@@ -857,12 +857,6 @@ def setitem(self, indexer, value):
         if self._can_hold_element(value):
             value = self._try_coerce_args(value)
 
-            # can keep its own dtype
-            if hasattr(value, "dtype") and is_dtype_equal(values.dtype, value.dtype):
-                dtype = self.dtype
-            else:
-                dtype = "infer"
-
         else:
             # current dtype cannot store value, coerce to common dtype
             find_dtype = False
@@ -871,15 +865,9 @@ def setitem(self, indexer, value):
                 dtype = value.dtype
                 find_dtype = True
 
-            elif lib.is_scalar(value):
-                if isna(value):
-                    # NaN promotion is handled in latter path
-                    dtype = False
-                else:
-                    dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True)
-                    find_dtype = True
-            else:
-                dtype = "infer"
+            elif lib.is_scalar(value) and not isna(value):
+                dtype, _ = infer_dtype_from_scalar(value, pandas_dtype=True)
+                find_dtype = True
 
             if find_dtype:
                 dtype = find_common_type([values.dtype, dtype])
@@ -1088,7 +1076,7 @@ def coerce_to_target_dtype(self, other):
             mytz = getattr(self.dtype, "tz", None)
             othertz = getattr(dtype, "tz", None)
 
-            if str(mytz) != str(othertz):
+            if not tz_compare(mytz, othertz):
                 return self.astype(object)
 
             raise AssertionError(
@@ -1308,7 +1296,7 @@ def take_nd(self, indexer, axis, new_mgr_locs=None, fill_tuple=None):
         else:
             return self.make_block_same_class(new_values, new_mgr_locs)
 
-    def diff(self, n, axis=1):
+    def diff(self, n: int, axis: int = 1) -> List["Block"]:
         """ return block for the diff of the values """
         new_values = algos.diff(self.values, n, axis=axis)
         return [self.make_block(values=new_values)]
@@ -1397,7 +1385,7 @@ def func(cond, values, other):
 
             if not (
                 (self.is_integer or self.is_bool)
-                and lib.is_scalar(other)
+                and lib.is_float(other)
                 and np.isnan(other)
             ):
                 # np.where will cast integer array to floats in this case
@@ -1450,7 +1438,7 @@ def func(cond, values, other):
 
         return result_blocks
 
-    def equals(self, other):
+    def equals(self, other) -> bool:
         if self.dtype != other.dtype or self.shape != other.shape:
             return False
         return array_equivalent(self.values, other.values)
@@ -1830,7 +1818,7 @@ def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
 
         return self.make_block_same_class(new_values, new_mgr_locs)
 
-    def _can_hold_element(self, element):
+    def _can_hold_element(self, element: Any) -> bool:
         # XXX: We may need to think about pushing this onto the array.
         # We're doing the same as CategoricalBlock here.
         return True
@@ -2000,7 +1988,7 @@ class NumericBlock(Block):
 class FloatOrComplexBlock(NumericBlock):
     __slots__ = ()
 
-    def equals(self, other):
+    def equals(self, other) -> bool:
         if self.dtype != other.dtype or self.shape != other.shape:
             return False
         left, right = self.values, other.values
@@ -2011,7 +1999,7 @@ class FloatBlock(FloatOrComplexBlock):
     __slots__ = ()
     is_float = True
 
-    def _can_hold_element(self, element):
+    def _can_hold_element(self, element: Any) -> bool:
         tipo = maybe_infer_dtype_type(element)
         if tipo is not None:
             return issubclass(tipo.type, (np.floating, np.integer)) and not issubclass(
@@ -2075,7 +2063,7 @@ class ComplexBlock(FloatOrComplexBlock):
     __slots__ = ()
     is_complex = True
 
-    def _can_hold_element(self, element):
+    def _can_hold_element(self, element: Any) -> bool:
         tipo = maybe_infer_dtype_type(element)
         if tipo is not None:
             return issubclass(tipo.type, (np.floating, np.integer, np.complexfloating))
@@ -2092,7 +2080,7 @@ class IntBlock(NumericBlock):
     is_integer = True
     _can_hold_na = False
 
-    def _can_hold_element(self, element):
+    def _can_hold_element(self, element: Any) -> bool:
         tipo = maybe_infer_dtype_type(element)
         if tipo is not None:
             return (
@@ -2182,7 +2170,7 @@ def _astype(self, dtype, **kwargs):
         # delegate
         return super()._astype(dtype=dtype, **kwargs)
 
-    def _can_hold_element(self, element):
+    def _can_hold_element(self, element: Any) -> bool:
         tipo = maybe_infer_dtype_type(element)
         if tipo is not None:
             if self.is_datetimetz:
@@ -2372,41 +2360,19 @@ def _slice(self, slicer):
         return self.values[slicer]
 
     def _try_coerce_args(self, other):
-        """
-        localize and return i8 for the values
-
-        Parameters
-        ----------
-        other : ndarray-like or scalar
-
-        Returns
-        -------
-        base-type other
-        """
-        if is_valid_nat_for_dtype(other, self.dtype):
-            other = np.datetime64("NaT", "ns")
-        elif isinstance(other, self._holder):
-            if not tz_compare(other.tz, self.values.tz):
-                raise ValueError("incompatible or non tz-aware value")
-
-        elif isinstance(other, (np.datetime64, datetime, date)):
-            other = tslibs.Timestamp(other)
-
-            # test we can have an equal time zone
-            if not tz_compare(other.tz, self.values.tz):
-                raise ValueError("incompatible or non tz-aware value")
-        else:
-            raise TypeError(other)
-
+        # DatetimeArray handles this for us
         return other
 
-    def diff(self, n, axis=0):
-        """1st discrete difference
+    def diff(self, n: int, axis: int = 0) -> List["Block"]:
+        """
+        1st discrete difference.
 
         Parameters
         ----------
-        n : int, number of periods to diff
-        axis : int, axis to diff upon. default 0
+        n : int
+            Number of periods to diff.
+        axis : int, default 0
+            Axis to diff upon.
 
         Returns
         -------
@@ -2468,7 +2434,7 @@ def setitem(self, indexer, value):
         )
         return newb.setitem(indexer, value)
 
-    def equals(self, other):
+    def equals(self, other) -> bool:
         # override for significant performance improvement
         if self.dtype != other.dtype or self.shape != other.shape:
             return False
@@ -2507,7 +2473,7 @@ def __init__(self, values, placement, ndim=None):
     def _holder(self):
         return TimedeltaArray
 
-    def _can_hold_element(self, element):
+    def _can_hold_element(self, element: Any) -> bool:
         tipo = maybe_infer_dtype_type(element)
         if tipo is not None:
             return issubclass(tipo.type, np.timedelta64)
@@ -2600,7 +2566,7 @@ class BoolBlock(NumericBlock):
     is_bool = True
     _can_hold_na = False
 
-    def _can_hold_element(self, element):
+    def _can_hold_element(self, element: Any) -> bool:
         tipo = maybe_infer_dtype_type(element)
         if tipo is not None:
             return issubclass(tipo.type, np.bool_)
@@ -2694,7 +2660,7 @@ def _maybe_downcast(self, blocks: List["Block"], downcast=None) -> List["Block"]
         # split and convert the blocks
         return _extend_blocks([b.convert(datetime=True, numeric=False) for b in blocks])
 
-    def _can_hold_element(self, element):
+    def _can_hold_element(self, element: Any) -> bool:
         return True
 
     def _try_coerce_args(self, other):

From 041b6b180f8175b642977852f01e9211983b46ce Mon Sep 17 00:00:00 2001
From: Kaiqi Dong <kaiqi@kth.se>
Date: Tue, 27 Aug 2019 16:09:41 +0200
Subject: [PATCH 29/95] Replace with nested dict raises for overlapping keys
 (#27696)

---
 doc/source/whatsnew/v1.0.0.rst     |  1 +
 pandas/core/generic.py             |  6 +-----
 pandas/tests/frame/test_replace.py | 18 ++++++++++++------
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 7fe358d3820f23..7a10447e3ad402 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -207,6 +207,7 @@ ExtensionArray
 Other
 ^^^^^
 - Trying to set the ``display.precision``, ``display.max_rows`` or ``display.max_columns`` using :meth:`set_option` to anything but a ``None`` or a positive int will raise a ``ValueError`` (:issue:`23348`)
+- Using :meth:`DataFrame.replace` with overlapping keys in a nested dictionary will no longer raise, now matching the behavior of a flat dictionary (:issue:`27660`)
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`)
 
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index fac5e0f085fc62..6ade69fb4ca9d9 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6669,11 +6669,7 @@ def replace(
 
                 for k, v in items:
                     keys, values = list(zip(*v.items())) or ([], [])
-                    if set(keys) & set(values):
-                        raise ValueError(
-                            "Replacement not allowed with "
-                            "overlapping keys and values"
-                        )
+
                     to_rep_dict[k] = list(keys)
                     value_dict[k] = list(values)
 
diff --git a/pandas/tests/frame/test_replace.py b/pandas/tests/frame/test_replace.py
index 2862615ef8585a..b341ed6a52ca57 100644
--- a/pandas/tests/frame/test_replace.py
+++ b/pandas/tests/frame/test_replace.py
@@ -1069,18 +1069,24 @@ def test_replace_truthy(self):
         e = df
         assert_frame_equal(r, e)
 
-    def test_replace_int_to_int_chain(self):
+    def test_nested_dict_overlapping_keys_replace_int(self):
+        # GH 27660 keep behaviour consistent for simple dictionary and
+        # nested dictionary replacement
         df = DataFrame({"a": list(range(1, 5))})
-        with pytest.raises(ValueError, match="Replacement not allowed .+"):
-            df.replace({"a": dict(zip(range(1, 5), range(2, 6)))})
 
-    def test_replace_str_to_str_chain(self):
+        result = df.replace({"a": dict(zip(range(1, 5), range(2, 6)))})
+        expected = df.replace(dict(zip(range(1, 5), range(2, 6))))
+        assert_frame_equal(result, expected)
+
+    def test_nested_dict_overlapping_keys_replace_str(self):
+        # GH 27660
         a = np.arange(1, 5)
         astr = a.astype(str)
         bstr = np.arange(2, 6).astype(str)
         df = DataFrame({"a": astr})
-        with pytest.raises(ValueError, match="Replacement not allowed .+"):
-            df.replace({"a": dict(zip(astr, bstr))})
+        result = df.replace(dict(zip(astr, bstr)))
+        expected = df.replace({"a": dict(zip(astr, bstr))})
+        assert_frame_equal(result, expected)
 
     def test_replace_swapping_bug(self):
         df = pd.DataFrame({"a": [True, False, True]})

From bd8dbf906e4352567094637c9c824c350dae3ad2 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Tue, 27 Aug 2019 22:32:40 +0100
Subject: [PATCH 30/95] TYPING: --check-untyped-defs util._decorators (#28128)

---
 pandas/core/groupby/generic.py  | 30 ++++++------
 pandas/core/indexes/interval.py |  4 +-
 pandas/core/window/ewm.py       |  4 +-
 pandas/core/window/expanding.py |  4 +-
 pandas/core/window/rolling.py   | 10 ++--
 pandas/util/_decorators.py      | 82 +++++++++++++++++++--------------
 6 files changed, 74 insertions(+), 60 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index ea2bd22cccc3d0..7d6690a0dfa5ac 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -833,45 +833,45 @@ def apply(self, func, *args, **kwargs):
         axis="",
     )
     @Appender(_shared_docs["aggregate"])
-    def aggregate(self, func_or_funcs=None, *args, **kwargs):
+    def aggregate(self, func=None, *args, **kwargs):
         _level = kwargs.pop("_level", None)
 
-        relabeling = func_or_funcs is None
+        relabeling = func is None
         columns = None
-        no_arg_message = "Must provide 'func_or_funcs' or named aggregation **kwargs."
+        no_arg_message = "Must provide 'func' or named aggregation **kwargs."
         if relabeling:
             columns = list(kwargs)
             if not PY36:
                 # sort for 3.5 and earlier
                 columns = list(sorted(columns))
 
-            func_or_funcs = [kwargs[col] for col in columns]
+            func = [kwargs[col] for col in columns]
             kwargs = {}
             if not columns:
                 raise TypeError(no_arg_message)
 
-        if isinstance(func_or_funcs, str):
-            return getattr(self, func_or_funcs)(*args, **kwargs)
+        if isinstance(func, str):
+            return getattr(self, func)(*args, **kwargs)
 
-        if isinstance(func_or_funcs, abc.Iterable):
+        if isinstance(func, abc.Iterable):
             # Catch instances of lists / tuples
             # but not the class list / tuple itself.
-            func_or_funcs = _maybe_mangle_lambdas(func_or_funcs)
-            ret = self._aggregate_multiple_funcs(func_or_funcs, (_level or 0) + 1)
+            func = _maybe_mangle_lambdas(func)
+            ret = self._aggregate_multiple_funcs(func, (_level or 0) + 1)
             if relabeling:
                 ret.columns = columns
         else:
-            cyfunc = self._get_cython_func(func_or_funcs)
+            cyfunc = self._get_cython_func(func)
             if cyfunc and not args and not kwargs:
                 return getattr(self, cyfunc)()
 
             if self.grouper.nkeys > 1:
-                return self._python_agg_general(func_or_funcs, *args, **kwargs)
+                return self._python_agg_general(func, *args, **kwargs)
 
             try:
-                return self._python_agg_general(func_or_funcs, *args, **kwargs)
+                return self._python_agg_general(func, *args, **kwargs)
             except Exception:
-                result = self._aggregate_named(func_or_funcs, *args, **kwargs)
+                result = self._aggregate_named(func, *args, **kwargs)
 
             index = Index(sorted(result), name=self.grouper.names[0])
             ret = Series(result, index=index)
@@ -1464,8 +1464,8 @@ class DataFrameGroupBy(NDFrameGroupBy):
         axis="",
     )
     @Appender(_shared_docs["aggregate"])
-    def aggregate(self, arg=None, *args, **kwargs):
-        return super().aggregate(arg, *args, **kwargs)
+    def aggregate(self, func=None, *args, **kwargs):
+        return super().aggregate(func, *args, **kwargs)
 
     agg = aggregate
 
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 3874c6404565c7..021ff5fb462767 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -788,7 +788,7 @@ def _find_non_overlapping_monotonic_bounds(self, key):
         return start, stop
 
     def get_loc(
-        self, key: Any, method: Optional[str] = None
+        self, key: Any, method: Optional[str] = None, tolerance=None
     ) -> Union[int, slice, np.ndarray]:
         """
         Get integer location, slice or boolean mask for requested label.
@@ -982,7 +982,7 @@ def get_indexer_for(self, target: AnyArrayLike, **kwargs) -> np.ndarray:
             List of indices.
         """
         if self.is_overlapping:
-            return self.get_indexer_non_unique(target, **kwargs)[0]
+            return self.get_indexer_non_unique(target)[0]
         return self.get_indexer(target, **kwargs)
 
     @Appender(_index_shared_docs["get_value"] % _index_doc_kwargs)
diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py
index 0ce6d5ddec2ad7..40e6c679ba72d8 100644
--- a/pandas/core/window/ewm.py
+++ b/pandas/core/window/ewm.py
@@ -206,8 +206,8 @@ def _constructor(self):
         axis="",
     )
     @Appender(_shared_docs["aggregate"])
-    def aggregate(self, arg, *args, **kwargs):
-        return super().aggregate(arg, *args, **kwargs)
+    def aggregate(self, func, *args, **kwargs):
+        return super().aggregate(func, *args, **kwargs)
 
     agg = aggregate
 
diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py
index c43ca6b0565f36..47bd8f2ec593b5 100644
--- a/pandas/core/window/expanding.py
+++ b/pandas/core/window/expanding.py
@@ -136,8 +136,8 @@ def _get_window(self, other=None, **kwargs):
         axis="",
     )
     @Appender(_shared_docs["aggregate"])
-    def aggregate(self, arg, *args, **kwargs):
-        return super().aggregate(arg, *args, **kwargs)
+    def aggregate(self, func, *args, **kwargs):
+        return super().aggregate(func, *args, **kwargs)
 
     agg = aggregate
 
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index 323089b3fdf6b4..a7e122fa3528ff 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -901,12 +901,12 @@ def func(arg, window, min_periods=None, closed=None):
         axis="",
     )
     @Appender(_shared_docs["aggregate"])
-    def aggregate(self, arg, *args, **kwargs):
-        result, how = self._aggregate(arg, *args, **kwargs)
+    def aggregate(self, func, *args, **kwargs):
+        result, how = self._aggregate(func, *args, **kwargs)
         if result is None:
 
             # these must apply directly
-            result = arg(self)
+            result = func(self)
 
         return result
 
@@ -1788,8 +1788,8 @@ def _validate_freq(self):
         axis="",
     )
     @Appender(_shared_docs["aggregate"])
-    def aggregate(self, arg, *args, **kwargs):
-        return super().aggregate(arg, *args, **kwargs)
+    def aggregate(self, func, *args, **kwargs):
+        return super().aggregate(func, *args, **kwargs)
 
     agg = aggregate
 
diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py
index 5c7d481ff2586e..8a25e511b5fc4f 100644
--- a/pandas/util/_decorators.py
+++ b/pandas/util/_decorators.py
@@ -1,21 +1,35 @@
 from functools import wraps
 import inspect
 from textwrap import dedent
-from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+    cast,
+)
 import warnings
 
 from pandas._libs.properties import cache_readonly  # noqa
 
+FuncType = Callable[..., Any]
+F = TypeVar("F", bound=FuncType)
+
 
 def deprecate(
     name: str,
-    alternative: Callable,
+    alternative: Callable[..., Any],
     version: str,
     alt_name: Optional[str] = None,
     klass: Optional[Type[Warning]] = None,
     stacklevel: int = 2,
     msg: Optional[str] = None,
-) -> Callable:
+) -> Callable[..., Any]:
     """
     Return a new function that emits a deprecation warning on use.
 
@@ -47,7 +61,7 @@ def deprecate(
     warning_msg = msg or "{} is deprecated, use {} instead".format(name, alt_name)
 
     @wraps(alternative)
-    def wrapper(*args, **kwargs):
+    def wrapper(*args, **kwargs) -> Callable[..., Any]:
         warnings.warn(warning_msg, klass, stacklevel=stacklevel)
         return alternative(*args, **kwargs)
 
@@ -90,9 +104,9 @@ def wrapper(*args, **kwargs):
 def deprecate_kwarg(
     old_arg_name: str,
     new_arg_name: Optional[str],
-    mapping: Optional[Union[Dict, Callable[[Any], Any]]] = None,
+    mapping: Optional[Union[Dict[Any, Any], Callable[[Any], Any]]] = None,
     stacklevel: int = 2,
-) -> Callable:
+) -> Callable[..., Any]:
     """
     Decorator to deprecate a keyword argument of a function.
 
@@ -160,27 +174,27 @@ def deprecate_kwarg(
             "mapping from old to new argument values " "must be dict or callable!"
         )
 
-    def _deprecate_kwarg(func):
+    def _deprecate_kwarg(func: F) -> F:
         @wraps(func)
-        def wrapper(*args, **kwargs):
+        def wrapper(*args, **kwargs) -> Callable[..., Any]:
             old_arg_value = kwargs.pop(old_arg_name, None)
 
-            if new_arg_name is None and old_arg_value is not None:
-                msg = (
-                    "the '{old_name}' keyword is deprecated and will be "
-                    "removed in a future version. "
-                    "Please take steps to stop the use of '{old_name}'"
-                ).format(old_name=old_arg_name)
-                warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
-                kwargs[old_arg_name] = old_arg_value
-                return func(*args, **kwargs)
-
             if old_arg_value is not None:
-                if mapping is not None:
-                    if hasattr(mapping, "get"):
-                        new_arg_value = mapping.get(old_arg_value, old_arg_value)
-                    else:
+                if new_arg_name is None:
+                    msg = (
+                        "the '{old_name}' keyword is deprecated and will be "
+                        "removed in a future version. "
+                        "Please take steps to stop the use of '{old_name}'"
+                    ).format(old_name=old_arg_name)
+                    warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
+                    kwargs[old_arg_name] = old_arg_value
+                    return func(*args, **kwargs)
+
+                elif mapping is not None:
+                    if callable(mapping):
                         new_arg_value = mapping(old_arg_value)
+                    else:
+                        new_arg_value = mapping.get(old_arg_value, old_arg_value)
                     msg = (
                         "the {old_name}={old_val!r} keyword is deprecated, "
                         "use {new_name}={new_val!r} instead"
@@ -198,7 +212,7 @@ def wrapper(*args, **kwargs):
                     ).format(old_name=old_arg_name, new_name=new_arg_name)
 
                 warnings.warn(msg, FutureWarning, stacklevel=stacklevel)
-                if kwargs.get(new_arg_name, None) is not None:
+                if kwargs.get(new_arg_name) is not None:
                     msg = (
                         "Can only specify '{old_name}' or '{new_name}', " "not both"
                     ).format(old_name=old_arg_name, new_name=new_arg_name)
@@ -207,17 +221,17 @@ def wrapper(*args, **kwargs):
                     kwargs[new_arg_name] = new_arg_value
             return func(*args, **kwargs)
 
-        return wrapper
+        return cast(F, wrapper)
 
     return _deprecate_kwarg
 
 
 def rewrite_axis_style_signature(
     name: str, extra_params: List[Tuple[str, Any]]
-) -> Callable:
-    def decorate(func):
+) -> Callable[..., Any]:
+    def decorate(func: F) -> F:
         @wraps(func)
-        def wrapper(*args, **kwargs):
+        def wrapper(*args, **kwargs) -> Callable[..., Any]:
             return func(*args, **kwargs)
 
         kind = inspect.Parameter.POSITIONAL_OR_KEYWORD
@@ -234,8 +248,9 @@ def wrapper(*args, **kwargs):
 
         sig = inspect.Signature(params)
 
-        func.__signature__ = sig
-        return wrapper
+        # https://github.com/python/typing/issues/598
+        func.__signature__ = sig  # type: ignore
+        return cast(F, wrapper)
 
     return decorate
 
@@ -279,18 +294,17 @@ def __init__(self, *args, **kwargs):
 
         self.params = args or kwargs
 
-    def __call__(self, func: Callable) -> Callable:
+    def __call__(self, func: F) -> F:
         func.__doc__ = func.__doc__ and func.__doc__ % self.params
         return func
 
     def update(self, *args, **kwargs) -> None:
         """
         Update self.params with supplied args.
-
-        If called, we assume self.params is a dict.
         """
 
-        self.params.update(*args, **kwargs)
+        if isinstance(self.params, dict):
+            self.params.update(*args, **kwargs)
 
 
 class Appender:
@@ -320,7 +334,7 @@ def __init__(self, addendum: Optional[str], join: str = "", indents: int = 0):
             self.addendum = addendum
         self.join = join
 
-    def __call__(self, func: Callable) -> Callable:
+    def __call__(self, func: F) -> F:
         func.__doc__ = func.__doc__ if func.__doc__ else ""
         self.addendum = self.addendum if self.addendum else ""
         docitems = [func.__doc__, self.addendum]

From 080d57ee9fef9275518908cb7665ea062684c29b Mon Sep 17 00:00:00 2001
From: Addison Lynch <ahlshop@gmail.com>
Date: Tue, 27 Aug 2019 17:39:03 -0400
Subject: [PATCH 31/95] CLN: Use ABC classes for isinstance checks, remove
 unnecessary imports (#28158)

* CLN: Use ABC classes for isinstance checks, remove unnecessary imports

* Formatting repairs
---
 pandas/core/frame.py    | 27 +++++++++++----------------
 pandas/core/indexing.py | 40 +++++++++++++++++++++-------------------
 2 files changed, 32 insertions(+), 35 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f636bb6db74309..3d1a39a86c784e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -86,12 +86,7 @@
 from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
 from pandas.core.arrays.sparse import SparseFrameAccessor
 from pandas.core.generic import NDFrame, _shared_docs
-from pandas.core.index import (
-    Index,
-    MultiIndex,
-    ensure_index,
-    ensure_index_from_sequences,
-)
+from pandas.core.index import Index, ensure_index, ensure_index_from_sequences
 from pandas.core.indexes import base as ibase
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.indexes.multi import maybe_droplevels
@@ -1734,7 +1729,7 @@ def to_records(
             if is_datetime64_any_dtype(self.index) and convert_datetime64:
                 ix_vals = [self.index.to_pydatetime()]
             else:
-                if isinstance(self.index, MultiIndex):
+                if isinstance(self.index, ABCMultiIndex):
                     # array of tuples to numpy cols. copy copy copy
                     ix_vals = list(map(np.array, zip(*self.index.values)))
                 else:
@@ -1745,7 +1740,7 @@ def to_records(
             count = 0
             index_names = list(self.index.names)
 
-            if isinstance(self.index, MultiIndex):
+            if isinstance(self.index, ABCMultiIndex):
                 for i, n in enumerate(index_names):
                     if n is None:
                         index_names[i] = "level_%d" % count
@@ -2868,7 +2863,7 @@ def __getitem__(self, key):
             # The behavior is inconsistent. It returns a Series, except when
             # - the key itself is repeated (test on data.shape, #9519), or
             # - we have a MultiIndex on columns (test on self.columns, #21309)
-            if data.shape[1] == 1 and not isinstance(self.columns, MultiIndex):
+            if data.shape[1] == 1 and not isinstance(self.columns, ABCMultiIndex):
                 data = data[key]
 
         return data
@@ -3657,7 +3652,7 @@ def reindexer(value):
         elif isinstance(value, DataFrame):
             # align right-hand-side columns if self.columns
             # is multi-index and self[key] is a sub-frame
-            if isinstance(self.columns, MultiIndex) and key in self.columns:
+            if isinstance(self.columns, ABCMultiIndex) and key in self.columns:
                 loc = self.columns.get_loc(key)
                 if isinstance(loc, (slice, Series, np.ndarray, Index)):
                     cols = maybe_droplevels(self.columns[loc], key)
@@ -3706,7 +3701,7 @@ def reindexer(value):
 
         # broadcast across multiple columns if necessary
         if broadcast and key in self.columns and value.ndim == 1:
-            if not self.columns.is_unique or isinstance(self.columns, MultiIndex):
+            if not self.columns.is_unique or isinstance(self.columns, ABCMultiIndex):
                 existing_piece = self[key]
                 if isinstance(existing_piece, DataFrame):
                     value = np.tile(value, (len(existing_piece.columns), 1))
@@ -4601,7 +4596,7 @@ def _maybe_casted_values(index, labels=None):
                 new_index = self.index.droplevel(level)
 
         if not drop:
-            if isinstance(self.index, MultiIndex):
+            if isinstance(self.index, ABCMultiIndex):
                 names = [
                     n if n is not None else ("level_%d" % i)
                     for (i, n) in enumerate(self.index.names)
@@ -4612,7 +4607,7 @@ def _maybe_casted_values(index, labels=None):
                 names = [default] if self.index.name is None else [self.index.name]
                 to_insert = ((self.index, None),)
 
-            multi_col = isinstance(self.columns, MultiIndex)
+            multi_col = isinstance(self.columns, ABCMultiIndex)
             for i, (lev, lab) in reversed(list(enumerate(to_insert))):
                 if not (level is None or i in level):
                     continue
@@ -4994,7 +4989,7 @@ def sort_index(
                 level, ascending=ascending, sort_remaining=sort_remaining
             )
 
-        elif isinstance(labels, MultiIndex):
+        elif isinstance(labels, ABCMultiIndex):
             from pandas.core.sorting import lexsort_indexer
 
             indexer = lexsort_indexer(
@@ -5280,7 +5275,7 @@ def reorder_levels(self, order, axis=0):
         type of caller (new object)
         """
         axis = self._get_axis_number(axis)
-        if not isinstance(self._get_axis(axis), MultiIndex):  # pragma: no cover
+        if not isinstance(self._get_axis(axis), ABCMultiIndex):  # pragma: no cover
             raise TypeError("Can only reorder levels on a hierarchical axis.")
 
         result = self.copy()
@@ -7784,7 +7779,7 @@ def _count_level(self, level, axis=0, numeric_only=False):
         count_axis = frame._get_axis(axis)
         agg_axis = frame._get_agg_axis(axis)
 
-        if not isinstance(count_axis, MultiIndex):
+        if not isinstance(count_axis, ABCMultiIndex):
             raise TypeError(
                 "Can only count levels on hierarchical "
                 "{ax}.".format(ax=self._get_axis_name(axis))
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index b8ca3419af4d7e..3d495eeb8c885b 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -22,11 +22,11 @@
     is_sparse,
 )
 from pandas.core.dtypes.concat import concat_compat
-from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
+from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries
 from pandas.core.dtypes.missing import _infer_fill_value, isna
 
 import pandas.core.common as com
-from pandas.core.index import Index, InvalidIndexError, MultiIndex
+from pandas.core.index import Index, InvalidIndexError
 from pandas.core.indexers import is_list_like_indexer, length_of_indexer
 
 
@@ -172,7 +172,7 @@ def _get_setitem_indexer(self, key):
 
         ax = self.obj._get_axis(0)
 
-        if isinstance(ax, MultiIndex) and self.name != "iloc":
+        if isinstance(ax, ABCMultiIndex) and self.name != "iloc":
             try:
                 return ax.get_loc(key)
             except Exception:
@@ -241,7 +241,7 @@ def _has_valid_tuple(self, key: Tuple):
                 )
 
     def _is_nested_tuple_indexer(self, tup: Tuple):
-        if any(isinstance(ax, MultiIndex) for ax in self.obj.axes):
+        if any(isinstance(ax, ABCMultiIndex) for ax in self.obj.axes):
             return any(is_nested_tuple(tup, ax) for ax in self.obj.axes)
         return False
 
@@ -329,7 +329,7 @@ def _setitem_with_indexer(self, indexer, value):
         # GH 10360, GH 27841
         if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes):
             for i, ax in zip(indexer, self.obj.axes):
-                if isinstance(ax, MultiIndex) and not (
+                if isinstance(ax, ABCMultiIndex) and not (
                     is_integer(i) or com.is_null_slice(i)
                 ):
                     take_split_path = True
@@ -422,7 +422,9 @@ def _setitem_with_indexer(self, indexer, value):
 
             # if we have a partial multiindex, then need to adjust the plane
             # indexer here
-            if len(labels) == 1 and isinstance(self.obj[labels[0]].axes[0], MultiIndex):
+            if len(labels) == 1 and isinstance(
+                self.obj[labels[0]].axes[0], ABCMultiIndex
+            ):
                 item = labels[0]
                 obj = self.obj[item]
                 index = obj.index
@@ -495,7 +497,7 @@ def setter(item, v):
                 # we have an equal len Frame
                 if isinstance(value, ABCDataFrame):
                     sub_indexer = list(indexer)
-                    multiindex_indexer = isinstance(labels, MultiIndex)
+                    multiindex_indexer = isinstance(labels, ABCMultiIndex)
 
                     for item in labels:
                         if item in value:
@@ -777,8 +779,8 @@ def _align_frame(self, indexer, df: ABCDataFrame):
                 # we have a multi-index and are trying to align
                 # with a particular, level GH3738
                 if (
-                    isinstance(ax, MultiIndex)
-                    and isinstance(df.index, MultiIndex)
+                    isinstance(ax, ABCMultiIndex)
+                    and isinstance(df.index, ABCMultiIndex)
                     and ax.nlevels != df.index.nlevels
                 ):
                     raise TypeError(
@@ -904,7 +906,7 @@ def _getitem_lowerdim(self, tup: Tuple):
         ax0 = self.obj._get_axis(0)
         # ...but iloc should handle the tuple as simple integer-location
         # instead of checking it as multiindex representation (GH 13797)
-        if isinstance(ax0, MultiIndex) and self.name != "iloc":
+        if isinstance(ax0, ABCMultiIndex) and self.name != "iloc":
             result = self._handle_lowerdim_multi_index_axis0(tup)
             if result is not None:
                 return result
@@ -1004,7 +1006,7 @@ def _getitem_axis(self, key, axis: int):
         if isinstance(key, slice):
             return self._get_slice_axis(key, axis=axis)
         elif is_list_like_indexer(key) and not (
-            isinstance(key, tuple) and isinstance(labels, MultiIndex)
+            isinstance(key, tuple) and isinstance(labels, ABCMultiIndex)
         ):
 
             if hasattr(key, "ndim") and key.ndim > 1:
@@ -1017,7 +1019,7 @@ def _getitem_axis(self, key, axis: int):
             key = labels._maybe_cast_indexer(key)
 
             if is_integer(key):
-                if axis == 0 and isinstance(labels, MultiIndex):
+                if axis == 0 and isinstance(labels, ABCMultiIndex):
                     try:
                         return self._get_label(key, axis=axis)
                     except (KeyError, TypeError):
@@ -1228,7 +1230,7 @@ def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False):
         try:
             return labels.get_loc(obj)
         except LookupError:
-            if isinstance(obj, tuple) and isinstance(labels, MultiIndex):
+            if isinstance(obj, tuple) and isinstance(labels, ABCMultiIndex):
                 if len(obj) == labels.nlevels:
                     return {"key": obj}
                 raise
@@ -1248,7 +1250,7 @@ def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False):
                 # always valid
                 return {"key": obj}
 
-            if obj >= self.obj.shape[axis] and not isinstance(labels, MultiIndex):
+            if obj >= self.obj.shape[axis] and not isinstance(labels, ABCMultiIndex):
                 # a positional
                 raise ValueError("cannot set by positional indexing with enlargement")
 
@@ -1715,7 +1717,7 @@ def _is_scalar_access(self, key: Tuple):
                 return False
 
             ax = self.obj.axes[i]
-            if isinstance(ax, MultiIndex):
+            if isinstance(ax, ABCMultiIndex):
                 return False
 
             if isinstance(k, str) and ax._supports_partial_string_indexing:
@@ -1737,7 +1739,7 @@ def _getitem_scalar(self, key):
     def _get_partial_string_timestamp_match_key(self, key, labels):
         """Translate any partial string timestamp matches in key, returning the
         new key (GH 10331)"""
-        if isinstance(labels, MultiIndex):
+        if isinstance(labels, ABCMultiIndex):
             if (
                 isinstance(key, str)
                 and labels.levels[0]._supports_partial_string_indexing
@@ -1781,7 +1783,7 @@ def _getitem_axis(self, key, axis: int):
             # to a list of keys
             # we will use the *values* of the object
             # and NOT the index if its a PandasObject
-            if isinstance(labels, MultiIndex):
+            if isinstance(labels, ABCMultiIndex):
 
                 if isinstance(key, (ABCSeries, np.ndarray)) and key.ndim <= 1:
                     # Series, or 0,1 ndim ndarray
@@ -1809,7 +1811,7 @@ def _getitem_axis(self, key, axis: int):
                     key = tuple([key])
 
             # an iterable multi-selection
-            if not (isinstance(key, tuple) and isinstance(labels, MultiIndex)):
+            if not (isinstance(key, tuple) and isinstance(labels, ABCMultiIndex)):
 
                 if hasattr(key, "ndim") and key.ndim > 1:
                     raise ValueError("Cannot index with multidimensional key")
@@ -2474,7 +2476,7 @@ def is_nested_tuple(tup, labels):
     for i, k in enumerate(tup):
 
         if is_list_like(k) or isinstance(k, slice):
-            return isinstance(labels, MultiIndex)
+            return isinstance(labels, ABCMultiIndex)
 
     return False
 

From d91ffa6407c1baf6afe7d0a1b9655f44da77ac24 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Tue, 27 Aug 2019 22:50:22 +0100
Subject: [PATCH 32/95] TYPING: change to FrameOrSeries Alias in pandas._typing
 (#28173)

---
 pandas/_typing.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/_typing.py b/pandas/_typing.py
index 837a7a89e0b839..37a5d7945955de 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -11,9 +11,9 @@
     from pandas.core.arrays.base import ExtensionArray  # noqa: F401
     from pandas.core.dtypes.dtypes import ExtensionDtype  # noqa: F401
     from pandas.core.indexes.base import Index  # noqa: F401
-    from pandas.core.frame import DataFrame  # noqa: F401
     from pandas.core.series import Series  # noqa: F401
     from pandas.core.sparse.series import SparseSeries  # noqa: F401
+    from pandas.core.generic import NDFrame  # noqa: F401
 
 
 AnyArrayLike = TypeVar(
@@ -24,7 +24,10 @@
 Dtype = Union[str, np.dtype, "ExtensionDtype"]
 FilePathOrBuffer = Union[str, Path, IO[AnyStr]]
 
-FrameOrSeries = TypeVar("FrameOrSeries", "Series", "DataFrame")
+FrameOrSeries = TypeVar("FrameOrSeries", bound="NDFrame")
 Scalar = Union[str, int, float]
 Axis = Union[str, int]
 Ordered = Optional[bool]
+
+# to maintain type information across generic functions and parametrization
+_T = TypeVar("_T")

From 612d3b23da5b99f6c5642be574fb08713a45d7d1 Mon Sep 17 00:00:00 2001
From: killerontherun1 <mathewcgeorge212@gmail.com>
Date: Thu, 29 Aug 2019 02:04:56 +0530
Subject: [PATCH 33/95] Solving GL01,GL02 in pandas.Interval and a few
 mentioned in the comments (#28197)

---
 pandas/core/indexes/interval.py | 3 ++-
 pandas/io/sql.py                | 3 ++-
 pandas/io/stata.py              | 2 +-
 pandas/plotting/_misc.py        | 6 ++++--
 pandas/util/testing.py          | 6 ++++--
 5 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 021ff5fb462767..6b0081c6a2ff51 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -331,7 +331,8 @@ def __contains__(self, key):
         >>> idx.to_tuples()
         Index([(0.0, 1.0), (nan, nan), (2.0, 3.0)], dtype='object')
         >>> idx.to_tuples(na_tuple=False)
-        Index([(0.0, 1.0), nan, (2.0, 3.0)], dtype='object')""",
+        Index([(0.0, 1.0), nan, (2.0, 3.0)], dtype='object')
+        """,
         )
     )
     def to_tuples(self, na_tuple=True):
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index f1f52a9198d29d..72df00fd4c5a19 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -269,7 +269,8 @@ def read_sql_query(
     parse_dates=None,
     chunksize=None,
 ):
-    """Read SQL query into a DataFrame.
+    """
+    Read SQL query into a DataFrame.
 
     Returns a DataFrame corresponding to the result set of the query
     string. Optionally provide an `index_col` parameter to use one of the
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 69bafc77492587..31fdaa5cc67359 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -138,7 +138,7 @@
     _iterator_params,
 )
 
-_data_method_doc = """\
+_data_method_doc = """
 Read observations from Stata file, converting them into a dataframe
 
 .. deprecated::
diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py
index 1cba0e73541826..7ed0ffc6d0115e 100644
--- a/pandas/plotting/_misc.py
+++ b/pandas/plotting/_misc.py
@@ -329,7 +329,8 @@ def parallel_coordinates(
     sort_labels=False,
     **kwds
 ):
-    """Parallel coordinates plotting.
+    """
+    Parallel coordinates plotting.
 
     Parameters
     ----------
@@ -392,7 +393,8 @@ def parallel_coordinates(
 
 
 def lag_plot(series, lag=1, ax=None, **kwds):
-    """Lag plot for time series.
+    """
+    Lag plot for time series.
 
     Parameters
     ----------
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index a8f0d0da52e1f4..0d543f891a5f63 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -580,7 +580,8 @@ def assert_index_equal(
     check_categorical: bool = True,
     obj: str = "Index",
 ) -> None:
-    """Check that left and right Index are equal.
+    """
+    Check that left and right Index are equal.
 
     Parameters
     ----------
@@ -1081,7 +1082,8 @@ def assert_series_equal(
     check_categorical=True,
     obj="Series",
 ):
-    """Check that left and right Series are equal.
+    """
+    Check that left and right Series are equal.
 
     Parameters
     ----------

From bc65fe6c12dc78679ba8584eee83c6e3e243b5b9 Mon Sep 17 00:00:00 2001
From: "Roei.r" <m.roei.raz@gmail.com>
Date: Thu, 29 Aug 2019 02:01:46 +0300
Subject: [PATCH 34/95] Fix slicer assignment bug (#28131)

---
 doc/source/whatsnew/v1.0.0.rst    |  2 +-
 pandas/core/indexers.py           |  1 +
 pandas/tests/indexing/test_loc.py | 10 ++++++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 7a10447e3ad402..050a26cc86d429 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -141,7 +141,7 @@ Interval
 Indexing
 ^^^^^^^^
 
--
+- Bug in assignment using a reverse slicer (:issue:`26939`)
 -
 
 Missing
diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py
index 70c48e969172f5..433bca940c0285 100644
--- a/pandas/core/indexers.py
+++ b/pandas/core/indexers.py
@@ -226,6 +226,7 @@ def length_of_indexer(indexer, target=None) -> int:
         if step is None:
             step = 1
         elif step < 0:
+            start, stop = stop + 1, start + 1
             step = -step
         return (stop - start + step - 1) // step
     elif isinstance(indexer, (ABCSeries, ABCIndexClass, np.ndarray, list)):
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index abe0cd86c90d7d..9845b1ac3a4b9a 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -1070,6 +1070,16 @@ def test_series_indexing_zerodim_np_array(self):
         result = s.loc[np.array(0)]
         assert result == 1
 
+    def test_loc_reverse_assignment(self):
+        # GH26939
+        data = [1, 2, 3, 4, 5, 6] + [None] * 4
+        expected = Series(data, index=range(2010, 2020))
+
+        result = pd.Series(index=range(2010, 2020))
+        result.loc[2015:2010:-1] = [6, 5, 4, 3, 2, 1]
+
+        tm.assert_series_equal(result, expected)
+
 
 def test_series_loc_getitem_label_list_missing_values():
     # gh-11428

From 2518040894ef00d9ce427539937a86b2328a9e50 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 29 Aug 2019 05:17:03 -0700
Subject: [PATCH 35/95] STY: whitespace before class docstringsd (#28209)

---
 pandas/core/base.py                 |  1 -
 pandas/core/computation/expr.py     |  8 ++++----
 pandas/core/computation/pytables.py |  2 --
 pandas/core/groupby/groupby.py      |  1 -
 pandas/core/groupby/grouper.py      |  1 -
 pandas/core/groupby/ops.py          |  1 -
 pandas/core/indexes/frozen.py       |  1 -
 pandas/core/sorting.py              |  1 -
 pandas/io/common.py                 |  1 -
 pandas/io/packers.py                |  1 -
 pandas/io/pytables.py               | 14 --------------
 pandas/tests/io/test_sql.py         |  1 -
 pandas/tests/reshape/test_concat.py |  1 -
 13 files changed, 4 insertions(+), 30 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 767b5594450385..2d5ffb5e913923 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -47,7 +47,6 @@
 
 
 class PandasObject(DirNamesMixin):
-
     """baseclass for various pandas objects"""
 
     @property
diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
index 4c164968575a16..45319a4d63d948 100644
--- a/pandas/core/computation/expr.py
+++ b/pandas/core/computation/expr.py
@@ -367,8 +367,8 @@ def f(cls):
 @disallow(_unsupported_nodes)
 @add_ops(_op_classes)
 class BaseExprVisitor(ast.NodeVisitor):
-
-    """Custom ast walker. Parsers of other engines should subclass this class
+    """
+    Custom ast walker. Parsers of other engines should subclass this class
     if necessary.
 
     Parameters
@@ -803,8 +803,8 @@ def __init__(self, env, engine, parser, preparser=lambda x: x):
 
 
 class Expr:
-
-    """Object encapsulating an expression.
+    """
+    Object encapsulating an expression.
 
     Parameters
     ----------
diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py
index 1523eb05ac41dd..81658ab23ba466 100644
--- a/pandas/core/computation/pytables.py
+++ b/pandas/core/computation/pytables.py
@@ -478,7 +478,6 @@ def _validate_where(w):
 
 
 class Expr(expr.Expr):
-
     """ hold a pytables like expression, comprised of possibly multiple 'terms'
 
     Parameters
@@ -573,7 +572,6 @@ def evaluate(self):
 
 
 class TermValue:
-
     """ hold a term value the we use to construct a condition/filter """
 
     def __init__(self, value, converted, kind):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 87047d21709927..4d21b5810470a7 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1011,7 +1011,6 @@ def _apply_filter(self, indices, dropna):
 
 
 class GroupBy(_GroupBy):
-
     """
     Class for grouping and aggregating relational data.
 
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 3415c0e056a1ce..31623171e9e631 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -217,7 +217,6 @@ def __repr__(self):
 
 
 class Grouping:
-
     """
     Holds the grouping information for a single key
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index b0c629f017dd34..5ad48fa675dd92 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -706,7 +706,6 @@ def _aggregate_series_pure_python(self, obj, func):
 
 
 class BinGrouper(BaseGrouper):
-
     """
     This is an internal Grouper class
 
diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py
index 2e5b3ff8ef502d..329456e25bdedc 100644
--- a/pandas/core/indexes/frozen.py
+++ b/pandas/core/indexes/frozen.py
@@ -22,7 +22,6 @@
 
 
 class FrozenList(PandasObject, list):
-
     """
     Container that doesn't allow setting item *but*
     because it's technically non-hashable, will be used
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 5db31fe6664eaf..e6edad656d430e 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -271,7 +271,6 @@ def nargsort(items, kind="quicksort", ascending=True, na_position="last"):
 
 
 class _KeyMapper:
-
     """
     Ease my suffering. Map compressed group id -> key tuple
     """
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 290022167e5205..30228d660e8167 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -576,7 +576,6 @@ def __next__(self) -> str:
 
 
 class UTF8Recoder(BaseIterator):
-
     """
     Iterator that reads an encoded stream and re-encodes the input to UTF-8
     """
diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index 04e49708ff082b..ad47ba23b9221d 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -846,7 +846,6 @@ def __init__(
 
 
 class Iterator:
-
     """ manage the unpacking iteration,
         close the file on completion """
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 576c45a2f8097e..fbe413f820c901 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -429,7 +429,6 @@ def _is_metadata_of(group, parent_group):
 
 
 class HDFStore:
-
     """
     Dict-like IO interface for storing pandas objects in PyTables.
 
@@ -1546,7 +1545,6 @@ def _read_group(self, group, **kwargs):
 
 
 class TableIterator:
-
     """ define the iteration interface on a table
 
         Parameters
@@ -1654,7 +1652,6 @@ def get_result(self, coordinates=False):
 
 
 class IndexCol:
-
     """ an index column description class
 
         Parameters
@@ -1968,7 +1965,6 @@ def write_metadata(self, handler):
 
 
 class GenericIndexCol(IndexCol):
-
     """ an index which is not represented in the data of the table """
 
     @property
@@ -2006,7 +2002,6 @@ def set_attr(self):
 
 
 class DataCol(IndexCol):
-
     """ a data holding column, by definition this is not indexable
 
         Parameters
@@ -2456,7 +2451,6 @@ def set_attr(self):
 
 
 class DataIndexableCol(DataCol):
-
     """ represent a data column that can be indexed """
 
     is_data_indexable = True
@@ -2479,7 +2473,6 @@ def get_atom_timedelta64(self, block):
 
 
 class GenericDataIndexableCol(DataIndexableCol):
-
     """ represent a generic pytables data column """
 
     def get_attr(self):
@@ -2487,7 +2480,6 @@ def get_attr(self):
 
 
 class Fixed:
-
     """ represent an object in my store
         facilitate read/write of various types of objects
         this is an abstract base class
@@ -2655,7 +2647,6 @@ def delete(self, where=None, start=None, stop=None, **kwargs):
 
 
 class GenericFixed(Fixed):
-
     """ a generified fixed version """
 
     _index_type_map = {DatetimeIndex: "datetime", PeriodIndex: "period"}
@@ -3252,7 +3243,6 @@ class FrameFixed(BlockManagerFixed):
 
 
 class Table(Fixed):
-
     """ represent a table:
           facilitate read/write of various types of tables
 
@@ -4127,7 +4117,6 @@ def read_column(self, column, where=None, start=None, stop=None):
 
 
 class WORMTable(Table):
-
     """ a write-once read-many table: this format DOES NOT ALLOW appending to a
          table. writing is a one-time operation the data are stored in a format
          that allows for searching the data on disk
@@ -4149,7 +4138,6 @@ def write(self, **kwargs):
 
 
 class LegacyTable(Table):
-
     """ an appendable table: allow append/query/delete operations to a
           (possibly) already existing appendable table this table ALLOWS
           append (but doesn't require them), and stores the data in a format
@@ -4603,7 +4591,6 @@ def write(self, **kwargs):
 
 
 class AppendableMultiFrameTable(AppendableFrameTable):
-
     """ a frame with a multi-index """
 
     table_type = "appendable_multiframe"
@@ -4962,7 +4949,6 @@ def _need_convert(kind):
 
 
 class Selection:
-
     """
     Carries out a selection operation on a tables.Table object.
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index d8465a427eaea5..25727447b4c6fb 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -565,7 +565,6 @@ def _transaction_test(self):
 
 
 class _TestSQLApi(PandasSQLTest):
-
     """
     Base class to test the public API.
 
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 6366bf0521fbc2..13f0f14014a314 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -50,7 +50,6 @@ def sort_with_none(request):
 
 
 class TestConcatAppendCommon:
-
     """
     Test common dtype coercion rules between concat and append.
     """

From 5f34933848d7daa129651a53158cb94367bacbcd Mon Sep 17 00:00:00 2001
From: DavidRosen <DavidRosen@users.noreply.github.com>
Date: Thu, 29 Aug 2019 08:31:31 -0400
Subject: [PATCH 36/95] DOC: Example for adding a calculated column in SQL and
 Pandas (#28182)

* Add example for adding a calculated column
---
 .../comparison/comparison_with_sql.rst             | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/doc/source/getting_started/comparison/comparison_with_sql.rst b/doc/source/getting_started/comparison/comparison_with_sql.rst
index 366fdd546f58b5..6a03c06de3699a 100644
--- a/doc/source/getting_started/comparison/comparison_with_sql.rst
+++ b/doc/source/getting_started/comparison/comparison_with_sql.rst
@@ -49,6 +49,20 @@ With pandas, column selection is done by passing a list of column names to your
 Calling the DataFrame without the list of column names would display all columns (akin to SQL's
 ``*``).
 
+In SQL, you can add a calculated column:
+
+.. code-block:: sql
+
+    SELECT *, tip/total_bill as tip_rate
+    FROM tips
+    LIMIT 5;
+
+With pandas, you can use the :meth:`DataFrame.assign` method of a DataFrame to append a new column:
+
+.. ipython:: python
+
+    tips.assign(tip_rate=tips['tip'] / tips['total_bill']).head(5)
+
 WHERE
 -----
 Filtering in SQL is done via a WHERE clause.

From 03b3c8fc82b3a18a3ddcad1b3b26d601467fc74c Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Thu, 29 Aug 2019 20:28:54 +0100
Subject: [PATCH 37/95] CLN: minor typos MutliIndex -> MultiIndex (#28223)

---
 doc/source/whatsnew/v0.20.0.rst    | 2 +-
 pandas/tests/frame/test_reshape.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst
index ef6108ae3ec909..62604dd3edd2dd 100644
--- a/doc/source/whatsnew/v0.20.0.rst
+++ b/doc/source/whatsnew/v0.20.0.rst
@@ -495,7 +495,7 @@ Other enhancements
 - :func:`pandas.util.hash_pandas_object` has gained the ability to hash a ``MultiIndex`` (:issue:`15224`)
 - ``Series/DataFrame.squeeze()`` have gained the ``axis`` parameter. (:issue:`15339`)
 - ``DataFrame.to_excel()`` has a new ``freeze_panes`` parameter to turn on Freeze Panes when exporting to Excel (:issue:`15160`)
-- ``pd.read_html()`` will parse multiple header rows, creating a MutliIndex header. (:issue:`13434`).
+- ``pd.read_html()`` will parse multiple header rows, creating a MultiIndex header. (:issue:`13434`).
 - HTML table output skips ``colspan`` or ``rowspan`` attribute if equal to 1. (:issue:`15403`)
 - :class:`pandas.io.formats.style.Styler` template now has blocks for easier extension, see the :ref:`example notebook </user_guide/style.ipynb#Subclassing>` (:issue:`15649`)
 - :meth:`Styler.render() <pandas.io.formats.style.Styler.render>` now accepts ``**kwargs`` to allow user-defined variables in the template (:issue:`15649`)
diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py
index f3452e9a85fb3e..84e343f07f990d 100644
--- a/pandas/tests/frame/test_reshape.py
+++ b/pandas/tests/frame/test_reshape.py
@@ -984,7 +984,7 @@ def test_stack_preserve_categorical_dtype(self, ordered, labels):
         df = DataFrame([[10, 11, 12]], columns=cidx)
         result = df.stack()
 
-        # `MutliIndex.from_product` preserves categorical dtype -
+        # `MultiIndex.from_product` preserves categorical dtype -
         # it's tested elsewhere.
         midx = pd.MultiIndex.from_product([df.index, cidx])
         expected = Series([10, 11, 12], index=midx)

From d9b3993cc3722ddd01367089d374652c0b5ce0ce Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 30 Aug 2019 07:28:37 -0700
Subject: [PATCH 38/95] reduction-> libreduction for grepability (#28184)

---
 pandas/core/apply.py                     |  6 +++---
 pandas/core/groupby/ops.py               | 10 +++++-----
 pandas/tests/groupby/test_bin_groupby.py | 16 ++++++++--------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 5c8599dbb054b6..b96b3c75720315 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 
-from pandas._libs import reduction
+from pandas._libs import reduction as libreduction
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.common import (
@@ -221,7 +221,7 @@ def apply_raw(self):
         """ apply to the values as a numpy array """
 
         try:
-            result = reduction.compute_reduction(self.values, self.f, axis=self.axis)
+            result = libreduction.compute_reduction(self.values, self.f, axis=self.axis)
         except Exception:
             result = np.apply_along_axis(self.f, self.axis, self.values)
 
@@ -281,7 +281,7 @@ def apply_standard(self):
             dummy = Series(empty_arr, index=index, dtype=values.dtype)
 
             try:
-                result = reduction.compute_reduction(
+                result = libreduction.compute_reduction(
                     values, self.f, axis=self.axis, dummy=dummy, labels=labels
                 )
                 return self.obj._constructor_sliced(result, index=labels)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 5ad48fa675dd92..7afb0a28f943ee 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -12,7 +12,7 @@
 
 from pandas._libs import NaT, iNaT, lib
 import pandas._libs.groupby as libgroupby
-import pandas._libs.reduction as reduction
+import pandas._libs.reduction as libreduction
 from pandas.errors import AbstractMethodError
 from pandas.util._decorators import cache_readonly
 
@@ -207,7 +207,7 @@ def apply(self, f, data, axis=0):
                 if len(result_values) == len(group_keys):
                     return group_keys, result_values, mutated
 
-            except reduction.InvalidApply:
+            except libreduction.InvalidApply:
                 # Cannot fast apply on MultiIndex (_has_complex_internals).
                 # This Exception is also raised if `f` triggers an exception
                 # but it is preferable to raise the exception in Python.
@@ -678,7 +678,7 @@ def _aggregate_series_fast(self, obj, func):
         indexer = get_group_index_sorter(group_index, ngroups)
         obj = obj.take(indexer)
         group_index = algorithms.take_nd(group_index, indexer, allow_fill=False)
-        grouper = reduction.SeriesGrouper(obj, func, group_index, ngroups, dummy)
+        grouper = libreduction.SeriesGrouper(obj, func, group_index, ngroups, dummy)
         result, counts = grouper.get_result()
         return result, counts
 
@@ -851,7 +851,7 @@ def groupings(self):
 
     def agg_series(self, obj, func):
         dummy = obj[:0]
-        grouper = reduction.SeriesBinGrouper(obj, func, self.bins, dummy)
+        grouper = libreduction.SeriesBinGrouper(obj, func, self.bins, dummy)
         return grouper.get_result()
 
 
@@ -939,7 +939,7 @@ def fast_apply(self, f, names):
             return [], True
 
         sdata = self._get_sorted_data()
-        return reduction.apply_frame_axis0(sdata, f, names, starts, ends)
+        return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
 
     def _chop(self, sdata, slice_obj):
         if self.axis == 0:
diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
index 2195686ee9c7f6..b8f9ecd42bae3c 100644
--- a/pandas/tests/groupby/test_bin_groupby.py
+++ b/pandas/tests/groupby/test_bin_groupby.py
@@ -2,7 +2,7 @@
 from numpy import nan
 import pytest
 
-from pandas._libs import groupby, lib, reduction
+from pandas._libs import groupby, lib, reduction as libreduction
 
 from pandas.core.dtypes.common import ensure_int64
 
@@ -18,7 +18,7 @@ def test_series_grouper():
 
     labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.int64)
 
-    grouper = reduction.SeriesGrouper(obj, np.mean, labels, 2, dummy)
+    grouper = libreduction.SeriesGrouper(obj, np.mean, labels, 2, dummy)
     result, counts = grouper.get_result()
 
     expected = np.array([obj[3:6].mean(), obj[6:].mean()])
@@ -34,7 +34,7 @@ def test_series_bin_grouper():
 
     bins = np.array([3, 6])
 
-    grouper = reduction.SeriesBinGrouper(obj, np.mean, bins, dummy)
+    grouper = libreduction.SeriesBinGrouper(obj, np.mean, bins, dummy)
     result, counts = grouper.get_result()
 
     expected = np.array([obj[:3].mean(), obj[3:6].mean(), obj[6:].mean()])
@@ -120,31 +120,31 @@ class TestMoments:
 class TestReducer:
     def test_int_index(self):
         arr = np.random.randn(100, 4)
-        result = reduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4)))
+        result = libreduction.compute_reduction(arr, np.sum, labels=Index(np.arange(4)))
         expected = arr.sum(0)
         assert_almost_equal(result, expected)
 
-        result = reduction.compute_reduction(
+        result = libreduction.compute_reduction(
             arr, np.sum, axis=1, labels=Index(np.arange(100))
         )
         expected = arr.sum(1)
         assert_almost_equal(result, expected)
 
         dummy = Series(0.0, index=np.arange(100))
-        result = reduction.compute_reduction(
+        result = libreduction.compute_reduction(
             arr, np.sum, dummy=dummy, labels=Index(np.arange(4))
         )
         expected = arr.sum(0)
         assert_almost_equal(result, expected)
 
         dummy = Series(0.0, index=np.arange(4))
-        result = reduction.compute_reduction(
+        result = libreduction.compute_reduction(
             arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
         )
         expected = arr.sum(1)
         assert_almost_equal(result, expected)
 
-        result = reduction.compute_reduction(
+        result = libreduction.compute_reduction(
             arr, np.sum, axis=1, dummy=dummy, labels=Index(np.arange(100))
         )
         assert_almost_equal(result, expected)

From 82a7455f8a69b99e9508e6f69bae943072d12a1b Mon Sep 17 00:00:00 2001
From: Jeremy Schendel <jschendel@users.noreply.github.com>
Date: Fri, 30 Aug 2019 08:32:27 -0600
Subject: [PATCH 39/95] REGR: Fix to_csv with IntervalIndex (#28229)

* REGR: Fix to_csv with IntervalIndex
---
 doc/source/whatsnew/v0.25.2.rst               |  2 +-
 pandas/core/indexes/interval.py               |  8 +---
 pandas/tests/frame/test_to_csv.py             | 14 +++++++
 .../tests/indexes/interval/test_interval.py   | 40 +++++++++++++++++++
 pandas/tests/series/test_io.py                | 14 +++++++
 5 files changed, 71 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst
index 6974c7521a2376..8d8a39139cf84c 100644
--- a/doc/source/whatsnew/v0.25.2.rst
+++ b/doc/source/whatsnew/v0.25.2.rst
@@ -62,7 +62,7 @@ Missing
 I/O
 ^^^
 
--
+- Regression in :meth:`~DataFrame.to_csv` where writing a :class:`Series` or :class:`DataFrame` indexed by an :class:`IntervalIndex` would incorrectly raise a ``TypeError`` (:issue:`28210`)
 -
 -
 
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 6b0081c6a2ff51..7c581a12764b1e 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -1096,12 +1096,8 @@ def _format_with_header(self, header, **kwargs):
         return header + list(self._format_native_types(**kwargs))
 
     def _format_native_types(self, na_rep="NaN", quoting=None, **kwargs):
-        """ actually format my specific types """
-        from pandas.io.formats.format import ExtensionArrayFormatter
-
-        return ExtensionArrayFormatter(
-            values=self, na_rep=na_rep, justify="all", leading_space=False
-        ).get_result()
+        # GH 28210: use base method but with different default na_rep
+        return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs)
 
     def _format_data(self, name=None):
 
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
index e2e4a82ff581cf..8fb028a0f0326d 100644
--- a/pandas/tests/frame/test_to_csv.py
+++ b/pandas/tests/frame/test_to_csv.py
@@ -695,6 +695,20 @@ def _make_frame(names=None):
             tm.assert_index_equal(recons.columns, exp.columns)
             assert len(recons) == 0
 
+    def test_to_csv_interval_index(self):
+        # GH 28210
+        df = DataFrame({"A": list("abc"), "B": range(3)}, index=pd.interval_range(0, 3))
+
+        with ensure_clean("__tmp_to_csv_interval_index__.csv") as path:
+            df.to_csv(path)
+            result = self.read_csv(path, index_col=0)
+
+            # can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
+            expected = df.copy()
+            expected.index = expected.index.astype(str)
+
+            assert_frame_equal(result, expected)
+
     def test_to_csv_float32_nanrep(self):
         df = DataFrame(np.random.randn(1, 4).astype(np.float32))
         df[1] = np.nan
diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
index c1a21e6a7f1527..eeb0f43f4b9003 100644
--- a/pandas/tests/indexes/interval/test_interval.py
+++ b/pandas/tests/indexes/interval/test_interval.py
@@ -417,6 +417,46 @@ def test_repr_missing(self, constructor, expected):
         result = repr(obj)
         assert result == expected
 
+    @pytest.mark.parametrize(
+        "tuples, closed, expected_data",
+        [
+            ([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]),
+            (
+                [(0.5, 1.0), np.nan, (2.0, 3.0)],
+                "right",
+                ["(0.5, 1.0]", "NaN", "(2.0, 3.0]"],
+            ),
+            (
+                [
+                    (Timestamp("20180101"), Timestamp("20180102")),
+                    np.nan,
+                    ((Timestamp("20180102"), Timestamp("20180103"))),
+                ],
+                "both",
+                ["[2018-01-01, 2018-01-02]", "NaN", "[2018-01-02, 2018-01-03]"],
+            ),
+            (
+                [
+                    (Timedelta("0 days"), Timedelta("1 days")),
+                    (Timedelta("1 days"), Timedelta("2 days")),
+                    np.nan,
+                ],
+                "neither",
+                [
+                    "(0 days 00:00:00, 1 days 00:00:00)",
+                    "(1 days 00:00:00, 2 days 00:00:00)",
+                    "NaN",
+                ],
+            ),
+        ],
+    )
+    def test_to_native_types(self, tuples, closed, expected_data):
+        # GH 28210
+        index = IntervalIndex.from_tuples(tuples, closed=closed)
+        result = index.to_native_types()
+        expected = np.array(expected_data)
+        tm.assert_numpy_array_equal(result, expected)
+
     def test_get_item(self, closed):
         i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)
         assert i[0] == Interval(0.0, 1.0, closed=closed)
diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py
index 0686b397cbd811..0ddf1dfcabb59b 100644
--- a/pandas/tests/series/test_io.py
+++ b/pandas/tests/series/test_io.py
@@ -191,6 +191,20 @@ def test_to_csv_compression(self, s, encoding, compression):
                     s, pd.read_csv(fh, index_col=0, squeeze=True, encoding=encoding)
                 )
 
+    def test_to_csv_interval_index(self):
+        # GH 28210
+        s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3))
+
+        with ensure_clean("__tmp_to_csv_interval_index__.csv") as path:
+            s.to_csv(path, header=False)
+            result = self.read_csv(path, index_col=0, squeeze=True)
+
+            # can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
+            expected = s.copy()
+            expected.index = expected.index.astype(str)
+
+            assert_series_equal(result, expected)
+
 
 class TestSeriesIO:
     def test_to_frame(self, datetime_series):

From 7b25463abeeea197f55ff2d5187938dd4cba08ce Mon Sep 17 00:00:00 2001
From: Kaiqi Dong <kaiqi@kth.se>
Date: Fri, 30 Aug 2019 16:47:16 +0200
Subject: [PATCH 40/95] BUG: Multiple lambdas in named aggregation (#27921)

---
 doc/source/whatsnew/v1.0.0.rst                |   1 +
 pandas/core/groupby/generic.py                |  42 ++++-
 .../tests/groupby/aggregate/test_aggregate.py | 149 +++++++++++++++++-
 3 files changed, 187 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 050a26cc86d429..83beec5607986f 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -178,6 +178,7 @@ Groupby/resample/rolling
 -
 -
 - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
+- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 7d6690a0dfa5ac..b0bcd1cc1e27c4 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -268,7 +268,9 @@ def aggregate(self, func, *args, **kwargs):
             result.index = np.arange(len(result))
 
         if relabeling:
-            result = result[order]
+
+            # used reordered index of columns
+            result = result.iloc[:, order]
             result.columns = columns
 
         return result._convert(datetime=True)
@@ -1731,8 +1733,8 @@ def _normalize_keyword_aggregation(kwargs):
         The transformed kwargs.
     columns : List[str]
         The user-provided keys.
-    order : List[Tuple[str, str]]
-        Pairs of the input and output column names.
+    col_idx_order : List[int]
+        List of columns indices.
 
     Examples
     --------
@@ -1759,7 +1761,39 @@ def _normalize_keyword_aggregation(kwargs):
         else:
             aggspec[column] = [aggfunc]
         order.append((column, com.get_callable_name(aggfunc) or aggfunc))
-    return aggspec, columns, order
+
+    # uniquify aggfunc name if duplicated in order list
+    uniquified_order = _make_unique(order)
+
+    # GH 25719, due to aggspec will change the order of assigned columns in aggregation
+    # uniquified_aggspec will store uniquified order list and will compare it with order
+    # based on index
+    aggspec_order = [
+        (column, com.get_callable_name(aggfunc) or aggfunc)
+        for column, aggfuncs in aggspec.items()
+        for aggfunc in aggfuncs
+    ]
+    uniquified_aggspec = _make_unique(aggspec_order)
+
+    # get the new indice of columns by comparison
+    col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order)
+    return aggspec, columns, col_idx_order
+
+
+def _make_unique(seq):
+    """Uniquify aggfunc name of the pairs in the order list
+
+    Examples:
+    --------
+    >>> _make_unique([('a', '<lambda>'), ('a', '<lambda>'), ('b', '<lambda>')])
+    [('a', '<lambda>_0'), ('a', '<lambda>_1'), ('b', '<lambda>')]
+    """
+    return [
+        (pair[0], "_".join([pair[1], str(seq[:i].count(pair))]))
+        if seq.count(pair) > 1
+        else pair
+        for i, pair in enumerate(seq)
+    ]
 
 
 # TODO: Can't use, because mypy doesn't like us setting __name__
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 52d4fa76bf8794..aa80c461a00e79 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -10,7 +10,7 @@
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
 from pandas.core.base import SpecificationError
-from pandas.core.groupby.generic import _maybe_mangle_lambdas
+from pandas.core.groupby.generic import _make_unique, _maybe_mangle_lambdas
 from pandas.core.groupby.grouper import Grouping
 import pandas.util.testing as tm
 
@@ -560,3 +560,150 @@ def test_with_kwargs(self):
         result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10)
         expected = pd.DataFrame({"<lambda_0>": [13], "<lambda_1>": [30]})
         tm.assert_frame_equal(result, expected)
+
+    def test_agg_with_one_lambda(self):
+        # GH 25719, write tests for DataFrameGroupby.agg with only one lambda
+        df = pd.DataFrame(
+            {
+                "kind": ["cat", "dog", "cat", "dog"],
+                "height": [9.1, 6.0, 9.5, 34.0],
+                "weight": [7.9, 7.5, 9.9, 198.0],
+            }
+        )
+
+        # sort for 35 and earlier
+        columns = ["height_sqr_min", "height_max", "weight_max"]
+        if compat.PY35:
+            columns = ["height_max", "height_sqr_min", "weight_max"]
+        expected = pd.DataFrame(
+            {
+                "height_sqr_min": [82.81, 36.00],
+                "height_max": [9.5, 34.0],
+                "weight_max": [9.9, 198.0],
+            },
+            index=pd.Index(["cat", "dog"], name="kind"),
+            columns=columns,
+        )
+
+        # check pd.NameAgg case
+        result1 = df.groupby(by="kind").agg(
+            height_sqr_min=pd.NamedAgg(
+                column="height", aggfunc=lambda x: np.min(x ** 2)
+            ),
+            height_max=pd.NamedAgg(column="height", aggfunc="max"),
+            weight_max=pd.NamedAgg(column="weight", aggfunc="max"),
+        )
+        tm.assert_frame_equal(result1, expected)
+
+        # check agg(key=(col, aggfunc)) case
+        result2 = df.groupby(by="kind").agg(
+            height_sqr_min=("height", lambda x: np.min(x ** 2)),
+            height_max=("height", "max"),
+            weight_max=("weight", "max"),
+        )
+        tm.assert_frame_equal(result2, expected)
+
+    def test_agg_multiple_lambda(self):
+        # GH25719, test for DataFrameGroupby.agg with multiple lambdas
+        # with mixed aggfunc
+        df = pd.DataFrame(
+            {
+                "kind": ["cat", "dog", "cat", "dog"],
+                "height": [9.1, 6.0, 9.5, 34.0],
+                "weight": [7.9, 7.5, 9.9, 198.0],
+            }
+        )
+        # sort for 35 and earlier
+        columns = [
+            "height_sqr_min",
+            "height_max",
+            "weight_max",
+            "height_max_2",
+            "weight_min",
+        ]
+        if compat.PY35:
+            columns = [
+                "height_max",
+                "height_max_2",
+                "height_sqr_min",
+                "weight_max",
+                "weight_min",
+            ]
+        expected = pd.DataFrame(
+            {
+                "height_sqr_min": [82.81, 36.00],
+                "height_max": [9.5, 34.0],
+                "weight_max": [9.9, 198.0],
+                "height_max_2": [9.5, 34.0],
+                "weight_min": [7.9, 7.5],
+            },
+            index=pd.Index(["cat", "dog"], name="kind"),
+            columns=columns,
+        )
+
+        # check agg(key=(col, aggfunc)) case
+        result1 = df.groupby(by="kind").agg(
+            height_sqr_min=("height", lambda x: np.min(x ** 2)),
+            height_max=("height", "max"),
+            weight_max=("weight", "max"),
+            height_max_2=("height", lambda x: np.max(x)),
+            weight_min=("weight", lambda x: np.min(x)),
+        )
+        tm.assert_frame_equal(result1, expected)
+
+        # check pd.NamedAgg case
+        result2 = df.groupby(by="kind").agg(
+            height_sqr_min=pd.NamedAgg(
+                column="height", aggfunc=lambda x: np.min(x ** 2)
+            ),
+            height_max=pd.NamedAgg(column="height", aggfunc="max"),
+            weight_max=pd.NamedAgg(column="weight", aggfunc="max"),
+            height_max_2=pd.NamedAgg(column="height", aggfunc=lambda x: np.max(x)),
+            weight_min=pd.NamedAgg(column="weight", aggfunc=lambda x: np.min(x)),
+        )
+        tm.assert_frame_equal(result2, expected)
+
+    @pytest.mark.parametrize(
+        "order, expected_reorder",
+        [
+            (
+                [
+                    ("height", "<lambda>"),
+                    ("height", "max"),
+                    ("weight", "max"),
+                    ("height", "<lambda>"),
+                    ("weight", "<lambda>"),
+                ],
+                [
+                    ("height", "<lambda>_0"),
+                    ("height", "max"),
+                    ("weight", "max"),
+                    ("height", "<lambda>_1"),
+                    ("weight", "<lambda>"),
+                ],
+            ),
+            (
+                [
+                    ("col2", "min"),
+                    ("col1", "<lambda>"),
+                    ("col1", "<lambda>"),
+                    ("col1", "<lambda>"),
+                ],
+                [
+                    ("col2", "min"),
+                    ("col1", "<lambda>_0"),
+                    ("col1", "<lambda>_1"),
+                    ("col1", "<lambda>_2"),
+                ],
+            ),
+            (
+                [("col", "<lambda>"), ("col", "<lambda>"), ("col", "<lambda>")],
+                [("col", "<lambda>_0"), ("col", "<lambda>_1"), ("col", "<lambda>_2")],
+            ),
+        ],
+    )
+    def test_make_unique(self, order, expected_reorder):
+        # GH 27519, test if make_unique function reorders correctly
+        result = _make_unique(order)
+
+        assert result == expected_reorder

From 51db82d9cc1abcec6c912d83e714811005471379 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 30 Aug 2019 09:38:39 -0700
Subject: [PATCH 41/95] PERF: lazify pytz seqToRE call, trims 35ms from import
 (#28228)

---
 pandas/_libs/tslibs/strptime.pyx | 44 +++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx
index d93858cff5e053..fbda5f178e1647 100644
--- a/pandas/_libs/tslibs/strptime.pyx
+++ b/pandas/_libs/tslibs/strptime.pyx
@@ -341,7 +341,8 @@ def array_strptime(object[:] values, object fmt,
     return result, result_timezone.base
 
 
-"""_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
+"""
+_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
 from the standard library, see
 https://github.com/python/cpython/blob/master/Lib/_strptime.py
 The original module-level docstring follows.
@@ -363,7 +364,8 @@ def _getlang():
 
 
 class LocaleTime:
-    """Stores and handles locale-specific information related to time.
+    """
+    Stores and handles locale-specific information related to time.
 
     ATTRIBUTES:
         f_weekday -- full weekday names (7-item list)
@@ -382,7 +384,8 @@ class LocaleTime:
     """
 
     def __init__(self):
-        """Set all attributes.
+        """
+        Set all attributes.
 
         Order of methods called matters for dependency reasons.
 
@@ -399,7 +402,6 @@ class LocaleTime:
         Only other possible issue is if someone changed the timezone and did
         not call tz.tzset .  That is an issue for the programmer, though,
         since changing the timezone is worthless without that call.
-
         """
         self.lang = _getlang()
         self.__calc_weekday()
@@ -518,15 +520,16 @@ class TimeRE(dict):
     """
 
     def __init__(self, locale_time=None):
-        """Create keys/values.
+        """
+        Create keys/values.
 
         Order of execution is important for dependency reasons.
-
         """
         if locale_time:
             self.locale_time = locale_time
         else:
             self.locale_time = LocaleTime()
+        self._Z = None
         base = super()
         base.__init__({
             # The " \d" part of the regex is to make %c from ANSI C work
@@ -555,21 +558,29 @@ class TimeRE(dict):
             'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
             'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
             'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
-            'Z': self.__seqToRE(pytz.all_timezones, 'Z'),
+            # 'Z' key is generated lazily via __getitem__
             '%': '%'})
         base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
         base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
         base.__setitem__('x', self.pattern(self.locale_time.LC_date))
         base.__setitem__('X', self.pattern(self.locale_time.LC_time))
 
+    def __getitem__(self, key):
+        if key == "Z":
+            # lazy computation
+            if self._Z is None:
+                self._Z = self.__seqToRE(pytz.all_timezones, 'Z')
+            return self._Z
+        return super().__getitem__(key)
+
     def __seqToRE(self, to_convert, directive):
-        """Convert a list to a regex string for matching a directive.
+        """
+        Convert a list to a regex string for matching a directive.
 
         Want possible matching values to be from longest to shortest.  This
         prevents the possibility of a match occurring for a value that also
         a substring of a larger value that should have matched (e.g., 'abc'
         matching when 'abcdef' should have been the match).
-
         """
         to_convert = sorted(to_convert, key=len, reverse=True)
         for value in to_convert:
@@ -582,11 +593,11 @@ class TimeRE(dict):
         return '%s)' % regex
 
     def pattern(self, format):
-        """Return regex pattern for the format string.
+        """
+        Return regex pattern for the format string.
 
         Need to make sure that any characters that might be interpreted as
         regex syntax are escaped.
-
         """
         processed_format = ''
         # The sub() call escapes all characters that might be misconstrued
@@ -619,7 +630,8 @@ _regex_cache = {}
 
 cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
                                   int day_of_week, int week_starts_Mon):
-    """Calculate the Julian day based on the year, week of the year, and day of
+    """
+    Calculate the Julian day based on the year, week of the year, and day of
     the week, with week_start_day representing whether the week of the year
     assumes the week starts on Sunday or Monday (6 or 0).
 
@@ -660,8 +672,10 @@ cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
         return 1 + days_to_week + day_of_week
 
 
-cdef object _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
-    """Calculate the Julian day based on the ISO 8601 year, week, and weekday.
+cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
+    """
+    Calculate the Julian day based on the ISO 8601 year, week, and weekday.
+
     ISO weeks start on Mondays, with week 01 being the week containing 4 Jan.
     ISO week days range from 1 (Monday) to 7 (Sunday).
 
@@ -694,7 +708,7 @@ cdef object _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
     return iso_year, ordinal
 
 
-cdef parse_timezone_directive(object z):
+cdef parse_timezone_directive(str z):
     """
     Parse the '%z' directive and return a pytz.FixedOffset
 

From 75c9783d4924c98d84e9722060686fc7b4643259 Mon Sep 17 00:00:00 2001
From: Bas Nijholt <basnijholt@gmail.com>
Date: Fri, 30 Aug 2019 19:05:31 +0200
Subject: [PATCH 42/95] STYLE: run pre-commit filters on the repo (#27915)

* add isort:skip to "from .pandas_vb_common import setup"

* add isort:skip to noqa: E402 marked lines

* run black

* add noqa: E402 isort:skip where needed

* run pre-commit filters on asv_bench/benchmarks/

* parse the isort config when using pre-commit

* run isort on pandas/core/api.py

* run pre-commit filters and commit trivial import sorting changes

* specify flake8 errors in pandas/io/msgpack/__init__.py

* fix imports for doc/source/conf.py

* fix the [isort] skip entry in setup.cfg

Also I removed the files for which I have fixed the problems.
---
 .pre-commit-config.yaml                   | 36 ++++++-----
 asv_bench/benchmarks/attrs_caching.py     |  3 +-
 asv_bench/benchmarks/binary_ops.py        |  3 +-
 asv_bench/benchmarks/categoricals.py      |  6 +-
 asv_bench/benchmarks/ctors.py             |  5 +-
 asv_bench/benchmarks/dtypes.py            | 10 +--
 asv_bench/benchmarks/eval.py              |  3 +-
 asv_bench/benchmarks/frame_ctor.py        |  5 +-
 asv_bench/benchmarks/frame_methods.py     |  4 +-
 asv_bench/benchmarks/gil.py               |  9 +--
 asv_bench/benchmarks/groupby.py           |  3 +-
 asv_bench/benchmarks/index_object.py      | 14 +++--
 asv_bench/benchmarks/indexing.py          | 17 ++---
 asv_bench/benchmarks/inference.py         |  7 ++-
 asv_bench/benchmarks/io/csv.py            |  7 ++-
 asv_bench/benchmarks/io/excel.py          |  6 +-
 asv_bench/benchmarks/io/hdf.py            |  5 +-
 asv_bench/benchmarks/io/json.py           |  5 +-
 asv_bench/benchmarks/io/msgpack.py        |  4 +-
 asv_bench/benchmarks/io/pickle.py         |  3 +-
 asv_bench/benchmarks/io/sql.py            |  7 ++-
 asv_bench/benchmarks/io/stata.py          |  3 +-
 asv_bench/benchmarks/join_merge.py        |  5 +-
 asv_bench/benchmarks/multiindex_object.py |  5 +-
 asv_bench/benchmarks/offset.py            |  3 +-
 asv_bench/benchmarks/pandas_vb_common.py  |  3 +-
 asv_bench/benchmarks/period.py            |  1 +
 asv_bench/benchmarks/plotting.py          |  7 ++-
 asv_bench/benchmarks/reindex.py           |  6 +-
 asv_bench/benchmarks/replace.py           |  3 +-
 asv_bench/benchmarks/reshape.py           |  7 ++-
 asv_bench/benchmarks/rolling.py           |  5 +-
 asv_bench/benchmarks/series_methods.py    |  5 +-
 asv_bench/benchmarks/sparse.py            |  2 +-
 asv_bench/benchmarks/stat_ops.py          |  4 +-
 asv_bench/benchmarks/strings.py           |  3 +-
 asv_bench/benchmarks/timeseries.py        |  6 +-
 ci/print_skipped.py                       |  2 +-
 doc/logo/pandas_logo.py                   |  3 +-
 doc/make.py                               |  8 +--
 doc/source/conf.py                        | 24 +++++---
 doc/source/user_guide/io.rst              |  2 +-
 doc/sphinxext/contributors.py             |  3 +-
 pandas/core/api.py                        | 61 +++++++++---------
 pandas/io/msgpack/__init__.py             | 14 +++--
 pandas/tests/io/pytables/test_pytables.py |  5 +-
 pandas/tests/io/test_feather.py           |  2 +-
 scripts/find_commits_touching_func.py     |  6 +-
 scripts/generate_pip_deps_from_conda.py   |  2 +-
 scripts/merge-pr.py                       |  7 ++-
 scripts/tests/test_validate_docstrings.py |  7 ++-
 scripts/validate_docstrings.py            | 32 +++++-----
 setup.cfg                                 | 75 +++++------------------
 setup.py                                  | 20 +++---
 54 files changed, 255 insertions(+), 248 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 32ffb3330564c3..5cc22c638c9b13 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,17 +1,21 @@
 repos:
-    - repo: https://github.com/python/black
-      rev: stable
-      hooks:
-          - id: black
-            language_version: python3.7
-    - repo: https://gitlab.com/pycqa/flake8
-      rev: 3.7.7
-      hooks:
-          - id: flake8
-            language: python_venv
-            additional_dependencies: [flake8-comprehensions]
-    - repo: https://github.com/pre-commit/mirrors-isort
-      rev: v4.3.20
-      hooks:
-          - id: isort
-            language: python_venv
+-   repo: https://github.com/python/black
+    rev: stable
+    hooks:
+    -   id: black
+        language_version: python3.7
+-   repo: https://gitlab.com/pycqa/flake8
+    rev: 3.7.7
+    hooks:
+    -   id: flake8
+        language: python_venv
+        additional_dependencies: [flake8-comprehensions]
+-   repo: https://github.com/pre-commit/mirrors-isort
+    rev: v4.3.20
+    hooks:
+    -   id: isort
+        language: python_venv
+-   repo: https://github.com/asottile/seed-isort-config
+    rev: v1.9.2
+    hooks:
+    -   id: seed-isort-config
diff --git a/asv_bench/benchmarks/attrs_caching.py b/asv_bench/benchmarks/attrs_caching.py
index c43e5dfd729aad..501e27b9078ec6 100644
--- a/asv_bench/benchmarks/attrs_caching.py
+++ b/asv_bench/benchmarks/attrs_caching.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from pandas import DataFrame
 
 try:
@@ -32,4 +33,4 @@ def time_cache_readonly(self):
         self.obj.prop
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/binary_ops.py b/asv_bench/benchmarks/binary_ops.py
index fd3324b78f1c3d..58e0db67d60254 100644
--- a/asv_bench/benchmarks/binary_ops.py
+++ b/asv_bench/benchmarks/binary_ops.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from pandas import DataFrame, Series, date_range
 from pandas.core.algorithms import checked_add_with_arr
 
@@ -155,4 +156,4 @@ def time_add_overflow_both_arg_nan(self):
         )
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 8097118a79d20d..559aa7050a6407 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -1,7 +1,9 @@
+import warnings
+
 import numpy as np
+
 import pandas as pd
 import pandas.util.testing as tm
-import warnings
 
 try:
     from pandas.api.types import union_categoricals
@@ -280,4 +282,4 @@ def time_sort_values(self):
         self.index.sort_values(ascending=False)
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/ctors.py b/asv_bench/benchmarks/ctors.py
index 654075292cdf62..ec3dd7a48a89f4 100644
--- a/asv_bench/benchmarks/ctors.py
+++ b/asv_bench/benchmarks/ctors.py
@@ -1,6 +1,7 @@
 import numpy as np
+
+from pandas import DatetimeIndex, Index, MultiIndex, Series, Timestamp
 import pandas.util.testing as tm
-from pandas import Series, Index, DatetimeIndex, Timestamp, MultiIndex
 
 
 def no_change(arr):
@@ -113,4 +114,4 @@ def time_multiindex_from_iterables(self):
         MultiIndex.from_product(self.iterables)
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py
index 60800b1f9cae71..24cc1c6f9fa701 100644
--- a/asv_bench/benchmarks/dtypes.py
+++ b/asv_bench/benchmarks/dtypes.py
@@ -1,14 +1,14 @@
+import numpy as np
+
 from pandas.api.types import pandas_dtype
 
-import numpy as np
 from .pandas_vb_common import (
-    numeric_dtypes,
     datetime_dtypes,
-    string_dtypes,
     extension_dtypes,
+    numeric_dtypes,
+    string_dtypes,
 )
 
-
 _numpy_dtypes = [
     np.dtype(dtype) for dtype in (numeric_dtypes + datetime_dtypes + string_dtypes)
 ]
@@ -40,4 +40,4 @@ def time_pandas_dtype_invalid(self, dtype):
             pass
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/eval.py b/asv_bench/benchmarks/eval.py
index 84e94315cc28b0..06a181875aaa85 100644
--- a/asv_bench/benchmarks/eval.py
+++ b/asv_bench/benchmarks/eval.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 import pandas as pd
 
 try:
@@ -62,4 +63,4 @@ def time_query_with_boolean_selection(self):
         self.df.query("(a >= @self.min_val) & (a <= @self.max_val)")
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/frame_ctor.py b/asv_bench/benchmarks/frame_ctor.py
index acfb26bcf5d7ca..3944e0bc523d84 100644
--- a/asv_bench/benchmarks/frame_ctor.py
+++ b/asv_bench/benchmarks/frame_ctor.py
@@ -1,6 +1,7 @@
 import numpy as np
+
+from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range
 import pandas.util.testing as tm
-from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range
 
 try:
     from pandas.tseries.offsets import Nano, Hour
@@ -104,4 +105,4 @@ def time_frame_from_lists(self):
         self.df = DataFrame(self.data)
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py
index e2f6764c76eef8..05f98c66faa2b8 100644
--- a/asv_bench/benchmarks/frame_methods.py
+++ b/asv_bench/benchmarks/frame_methods.py
@@ -1,5 +1,5 @@
-import warnings
 import string
+import warnings
 
 import numpy as np
 
@@ -609,4 +609,4 @@ def time_dataframe_describe(self):
         self.df.describe()
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/gil.py b/asv_bench/benchmarks/gil.py
index 0d0b75561d057a..d57492dd372680 100644
--- a/asv_bench/benchmarks/gil.py
+++ b/asv_bench/benchmarks/gil.py
@@ -1,7 +1,8 @@
 import numpy as np
-import pandas.util.testing as tm
-from pandas import DataFrame, Series, read_csv, factorize, date_range
+
+from pandas import DataFrame, Series, date_range, factorize, read_csv
 from pandas.core.algorithms import take_1d
+import pandas.util.testing as tm
 
 try:
     from pandas import (
@@ -36,7 +37,7 @@ def wrapper(fname):
         return wrapper
 
 
-from .pandas_vb_common import BaseIO
+from .pandas_vb_common import BaseIO  # noqa: E402 isort:skip
 
 
 class ParallelGroupbyMethods:
@@ -301,4 +302,4 @@ def time_loop(self, threads):
             self.loop()
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py
index 39b07d4734399e..d51c53e2264f1a 100644
--- a/asv_bench/benchmarks/groupby.py
+++ b/asv_bench/benchmarks/groupby.py
@@ -15,7 +15,6 @@
 )
 import pandas.util.testing as tm
 
-
 method_blacklist = {
     "object": {
         "median",
@@ -626,4 +625,4 @@ def time_first(self):
         self.df_nans.groupby("key").transform("first")
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py
index 49834ae94cc387..a94960d4947077 100644
--- a/asv_bench/benchmarks/index_object.py
+++ b/asv_bench/benchmarks/index_object.py
@@ -1,15 +1,17 @@
 import gc
+
 import numpy as np
-import pandas.util.testing as tm
+
 from pandas import (
-    Series,
-    date_range,
     DatetimeIndex,
-    Index,
-    RangeIndex,
     Float64Index,
+    Index,
     IntervalIndex,
+    RangeIndex,
+    Series,
+    date_range,
 )
+import pandas.util.testing as tm
 
 
 class SetOperations:
@@ -243,4 +245,4 @@ def peakmem_gc_instances(self, N):
             gc.enable()
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py
index 84604b8196536b..ac35139c1954ab 100644
--- a/asv_bench/benchmarks/indexing.py
+++ b/asv_bench/benchmarks/indexing.py
@@ -1,22 +1,23 @@
 import warnings
 
 import numpy as np
-import pandas.util.testing as tm
+
 from pandas import (
-    Series,
+    CategoricalIndex,
     DataFrame,
-    MultiIndex,
-    Int64Index,
-    UInt64Index,
     Float64Index,
-    IntervalIndex,
-    CategoricalIndex,
     IndexSlice,
+    Int64Index,
+    IntervalIndex,
+    MultiIndex,
+    Series,
+    UInt64Index,
     concat,
     date_range,
     option_context,
     period_range,
 )
+import pandas.util.testing as tm
 
 
 class NumericSeriesIndexing:
@@ -371,4 +372,4 @@ def time_chained_indexing(self, mode):
                 df2["C"] = 1.0
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py
index 66ef4f2aec380c..e85b3bd2c76879 100644
--- a/asv_bench/benchmarks/inference.py
+++ b/asv_bench/benchmarks/inference.py
@@ -1,8 +1,9 @@
 import numpy as np
-import pandas.util.testing as tm
+
 from pandas import DataFrame, Series, to_numeric
+import pandas.util.testing as tm
 
-from .pandas_vb_common import numeric_dtypes, lib
+from .pandas_vb_common import lib, numeric_dtypes
 
 
 class NumericInferOps:
@@ -120,4 +121,4 @@ def time_convert(self, data):
         lib.maybe_convert_numeric(data, set(), coerce_numeric=False)
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py
index 4525e504fc4dd5..9b8599b0a1b64a 100644
--- a/asv_bench/benchmarks/io/csv.py
+++ b/asv_bench/benchmarks/io/csv.py
@@ -1,10 +1,11 @@
+from io import StringIO
 import random
 import string
 
 import numpy as np
+
+from pandas import Categorical, DataFrame, date_range, read_csv, to_datetime
 import pandas.util.testing as tm
-from pandas import DataFrame, Categorical, date_range, read_csv, to_datetime
-from io import StringIO
 
 from ..pandas_vb_common import BaseIO
 
@@ -406,4 +407,4 @@ def time_to_datetime_format_DD_MM_YYYY(self, cache_dates):
         to_datetime(df["date"], cache=cache_dates, format="%d-%m-%Y")
 
 
-from ..pandas_vb_common import setup  # noqa: F401
+from ..pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py
index 12e70f84e52038..9aa5cbd5b6f7c3 100644
--- a/asv_bench/benchmarks/io/excel.py
+++ b/asv_bench/benchmarks/io/excel.py
@@ -1,6 +1,8 @@
 from io import BytesIO
+
 import numpy as np
-from pandas import DataFrame, date_range, ExcelWriter, read_excel
+
+from pandas import DataFrame, ExcelWriter, date_range, read_excel
 import pandas.util.testing as tm
 
 
@@ -35,4 +37,4 @@ def time_write_excel(self, engine):
         writer_write.save()
 
 
-from ..pandas_vb_common import setup  # noqa: F401
+from ..pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/io/hdf.py b/asv_bench/benchmarks/io/hdf.py
index 2874a7889156bf..8ec04a2087f1b7 100644
--- a/asv_bench/benchmarks/io/hdf.py
+++ b/asv_bench/benchmarks/io/hdf.py
@@ -1,5 +1,6 @@
 import numpy as np
-from pandas import DataFrame, date_range, HDFStore, read_hdf
+
+from pandas import DataFrame, HDFStore, date_range, read_hdf
 import pandas.util.testing as tm
 
 from ..pandas_vb_common import BaseIO
@@ -127,4 +128,4 @@ def time_write_hdf(self, format):
         self.df.to_hdf(self.fname, "df", format=format)
 
 
-from ..pandas_vb_common import setup  # noqa: F401
+from ..pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py
index fc07f2a4841025..b249c92b53e93e 100644
--- a/asv_bench/benchmarks/io/json.py
+++ b/asv_bench/benchmarks/io/json.py
@@ -1,6 +1,7 @@
 import numpy as np
+
+from pandas import DataFrame, concat, date_range, read_json, timedelta_range
 import pandas.util.testing as tm
-from pandas import DataFrame, date_range, timedelta_range, concat, read_json
 
 from ..pandas_vb_common import BaseIO
 
@@ -214,4 +215,4 @@ def peakmem_float(self, frames):
             df.to_json()
 
 
-from ..pandas_vb_common import setup  # noqa: F401
+from ..pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/io/msgpack.py b/asv_bench/benchmarks/io/msgpack.py
index d97b4ae13f0bd5..f5038602539ab6 100644
--- a/asv_bench/benchmarks/io/msgpack.py
+++ b/asv_bench/benchmarks/io/msgpack.py
@@ -1,5 +1,7 @@
 import warnings
+
 import numpy as np
+
 from pandas import DataFrame, date_range, read_msgpack
 import pandas.util.testing as tm
 
@@ -27,4 +29,4 @@ def time_write_msgpack(self):
         self.df.to_msgpack(self.fname)
 
 
-from ..pandas_vb_common import setup  # noqa: F401
+from ..pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/io/pickle.py b/asv_bench/benchmarks/io/pickle.py
index 286ac767c02e7e..647e9d27dec9d3 100644
--- a/asv_bench/benchmarks/io/pickle.py
+++ b/asv_bench/benchmarks/io/pickle.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from pandas import DataFrame, date_range, read_pickle
 import pandas.util.testing as tm
 
@@ -25,4 +26,4 @@ def time_write_pickle(self):
         self.df.to_pickle(self.fname)
 
 
-from ..pandas_vb_common import setup  # noqa: F401
+from ..pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/io/sql.py b/asv_bench/benchmarks/io/sql.py
index b80872b17a9e4a..fe84c869717e38 100644
--- a/asv_bench/benchmarks/io/sql.py
+++ b/asv_bench/benchmarks/io/sql.py
@@ -1,10 +1,11 @@
 import sqlite3
 
 import numpy as np
-import pandas.util.testing as tm
-from pandas import DataFrame, date_range, read_sql_query, read_sql_table
 from sqlalchemy import create_engine
 
+from pandas import DataFrame, date_range, read_sql_query, read_sql_table
+import pandas.util.testing as tm
+
 
 class SQL:
 
@@ -141,4 +142,4 @@ def time_read_sql_table_column(self, dtype):
         read_sql_table(self.table_name, self.con, columns=[dtype])
 
 
-from ..pandas_vb_common import setup  # noqa: F401
+from ..pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/io/stata.py b/asv_bench/benchmarks/io/stata.py
index b3ed71af47dc8b..28829785d72e92 100644
--- a/asv_bench/benchmarks/io/stata.py
+++ b/asv_bench/benchmarks/io/stata.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 from pandas import DataFrame, date_range, read_stata
 import pandas.util.testing as tm
 
@@ -50,4 +51,4 @@ def setup(self, convert_dates):
         self.df.to_stata(self.fname, self.convert_dates)
 
 
-from ..pandas_vb_common import setup  # noqa: F401
+from ..pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
index 7c899e3dc6ac8a..6aa82a43a4d6a0 100644
--- a/asv_bench/benchmarks/join_merge.py
+++ b/asv_bench/benchmarks/join_merge.py
@@ -1,8 +1,9 @@
 import string
 
 import numpy as np
+
+from pandas import DataFrame, MultiIndex, Series, concat, date_range, merge, merge_asof
 import pandas.util.testing as tm
-from pandas import DataFrame, Series, MultiIndex, date_range, concat, merge, merge_asof
 
 try:
     from pandas import merge_ordered
@@ -348,4 +349,4 @@ def time_series_align_left_monotonic(self):
         self.ts1.align(self.ts2, join="left")
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/multiindex_object.py b/asv_bench/benchmarks/multiindex_object.py
index eda059a68e8a58..3f4fd7ad911c1e 100644
--- a/asv_bench/benchmarks/multiindex_object.py
+++ b/asv_bench/benchmarks/multiindex_object.py
@@ -1,8 +1,9 @@
 import string
 
 import numpy as np
+
+from pandas import DataFrame, MultiIndex, date_range
 import pandas.util.testing as tm
-from pandas import date_range, MultiIndex, DataFrame
 
 
 class GetLoc:
@@ -146,4 +147,4 @@ def time_categorical_level(self):
         self.df.set_index(["a", "b"])
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/offset.py b/asv_bench/benchmarks/offset.py
index 31c3b6fb6cb60a..d822646e712ae5 100644
--- a/asv_bench/benchmarks/offset.py
+++ b/asv_bench/benchmarks/offset.py
@@ -1,7 +1,8 @@
-import warnings
 from datetime import datetime
+import warnings
 
 import numpy as np
+
 import pandas as pd
 
 try:
diff --git a/asv_bench/benchmarks/pandas_vb_common.py b/asv_bench/benchmarks/pandas_vb_common.py
index fdc8207021c0f3..1faf13329110d6 100644
--- a/asv_bench/benchmarks/pandas_vb_common.py
+++ b/asv_bench/benchmarks/pandas_vb_common.py
@@ -1,7 +1,8 @@
-import os
 from importlib import import_module
+import os
 
 import numpy as np
+
 import pandas as pd
 
 # Compatibility import for lib
diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py
index 2f8ae0650ab751..7303240a25f292 100644
--- a/asv_bench/benchmarks/period.py
+++ b/asv_bench/benchmarks/period.py
@@ -1,4 +1,5 @@
 from pandas import DataFrame, Period, PeriodIndex, Series, date_range, period_range
+
 from pandas.tseries.frequencies import to_offset
 
 
diff --git a/asv_bench/benchmarks/plotting.py b/asv_bench/benchmarks/plotting.py
index 4fb0876f05a0a0..5c718516360ed2 100644
--- a/asv_bench/benchmarks/plotting.py
+++ b/asv_bench/benchmarks/plotting.py
@@ -1,11 +1,12 @@
+import matplotlib
 import numpy as np
-from pandas import DataFrame, Series, DatetimeIndex, date_range
+
+from pandas import DataFrame, DatetimeIndex, Series, date_range
 
 try:
     from pandas.plotting import andrews_curves
 except ImportError:
     from pandas.tools.plotting import andrews_curves
-import matplotlib
 
 matplotlib.use("Agg")
 
@@ -93,4 +94,4 @@ def time_plot_andrews_curves(self):
         andrews_curves(self.df, "Name")
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py
index 8d4c9ebaf3e891..cd450f801c8052 100644
--- a/asv_bench/benchmarks/reindex.py
+++ b/asv_bench/benchmarks/reindex.py
@@ -1,6 +1,8 @@
 import numpy as np
+
+from pandas import DataFrame, Index, MultiIndex, Series, date_range, period_range
 import pandas.util.testing as tm
-from pandas import DataFrame, Series, MultiIndex, Index, date_range, period_range
+
 from .pandas_vb_common import lib
 
 
@@ -159,4 +161,4 @@ def time_lib_fast_zip(self):
         lib.fast_zip(self.col_array_list)
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py
index f69ae150285255..2a115fb0b4fe33 100644
--- a/asv_bench/benchmarks/replace.py
+++ b/asv_bench/benchmarks/replace.py
@@ -1,4 +1,5 @@
 import numpy as np
+
 import pandas as pd
 
 
@@ -73,4 +74,4 @@ def time_replace(self, constructor, replace_data):
         self.data.replace(self.to_replace)
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/reshape.py b/asv_bench/benchmarks/reshape.py
index cc373f413fb885..441f4b380656ec 100644
--- a/asv_bench/benchmarks/reshape.py
+++ b/asv_bench/benchmarks/reshape.py
@@ -1,9 +1,10 @@
-import string
 from itertools import product
+import string
 
 import numpy as np
-from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long
+
 import pandas as pd
+from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long
 
 
 class Melt:
@@ -262,4 +263,4 @@ def time_explode(self, n_rows, max_list_length):
         self.series.explode()
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
index a70977fcf539f7..3640513d31be26 100644
--- a/asv_bench/benchmarks/rolling.py
+++ b/asv_bench/benchmarks/rolling.py
@@ -1,6 +1,7 @@
-import pandas as pd
 import numpy as np
 
+import pandas as pd
+
 
 class Methods:
 
@@ -121,4 +122,4 @@ def peakmem_fixed(self):
             self.roll.max()
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index 6038a2ab4bd9f9..a3f1d92545c3f2 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -1,8 +1,9 @@
 from datetime import datetime
 
 import numpy as np
+
+from pandas import NaT, Series, date_range
 import pandas.util.testing as tm
-from pandas import Series, date_range, NaT
 
 
 class SeriesConstructor:
@@ -275,4 +276,4 @@ def time_func(self, func, N, dtype):
         self.func()
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
index 19d08c086a508a..ac78ca53679fd6 100644
--- a/asv_bench/benchmarks/sparse.py
+++ b/asv_bench/benchmarks/sparse.py
@@ -136,4 +136,4 @@ def time_division(self, fill_value):
         self.arr1 / self.arr2
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py
index 620a6de0f5f341..6032bee41958e1 100644
--- a/asv_bench/benchmarks/stat_ops.py
+++ b/asv_bench/benchmarks/stat_ops.py
@@ -1,6 +1,6 @@
 import numpy as np
-import pandas as pd
 
+import pandas as pd
 
 ops = ["mean", "sum", "median", "std", "skew", "kurt", "mad", "prod", "sem", "var"]
 
@@ -148,4 +148,4 @@ def time_cov_series(self, use_bottleneck):
         self.s.cov(self.s2)
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/asv_bench/benchmarks/strings.py b/asv_bench/benchmarks/strings.py
index 6be2fa92d9eac3..f30b2482615bd2 100644
--- a/asv_bench/benchmarks/strings.py
+++ b/asv_bench/benchmarks/strings.py
@@ -1,7 +1,8 @@
 import warnings
 
 import numpy as np
-from pandas import Series, DataFrame
+
+from pandas import DataFrame, Series
 import pandas.util.testing as tm
 
 
diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index 1020b773f8acbb..498774034d6422 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -2,7 +2,9 @@
 
 import dateutil
 import numpy as np
-from pandas import to_datetime, date_range, Series, DataFrame, period_range
+
+from pandas import DataFrame, Series, date_range, period_range, to_datetime
+
 from pandas.tseries.frequencies import infer_freq
 
 try:
@@ -426,4 +428,4 @@ def time_dt_accessor_year(self, tz):
         self.series.dt.year
 
 
-from .pandas_vb_common import setup  # noqa: F401
+from .pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/ci/print_skipped.py b/ci/print_skipped.py
index a44281044e11d0..6bc1dcfcd320dd 100755
--- a/ci/print_skipped.py
+++ b/ci/print_skipped.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 
+import math
 import os
 import sys
-import math
 import xml.etree.ElementTree as et
 
 
diff --git a/doc/logo/pandas_logo.py b/doc/logo/pandas_logo.py
index 5a07b094e6ad35..89410e3847bef9 100644
--- a/doc/logo/pandas_logo.py
+++ b/doc/logo/pandas_logo.py
@@ -1,7 +1,6 @@
 # script to generate the pandas logo
 
-from matplotlib import pyplot as plt
-from matplotlib import rcParams
+from matplotlib import pyplot as plt, rcParams
 import numpy as np
 
 rcParams["mathtext.fontset"] = "cm"
diff --git a/doc/make.py b/doc/make.py
index 48febef20fbe66..cbb1fa6a5324aa 100755
--- a/doc/make.py
+++ b/doc/make.py
@@ -11,18 +11,18 @@
     $ python make.py html
     $ python make.py latex
 """
+import argparse
+import csv
 import importlib
-import sys
 import os
 import shutil
-import csv
 import subprocess
-import argparse
+import sys
 import webbrowser
+
 import docutils
 import docutils.parsers.rst
 
-
 DOC_PATH = os.path.dirname(os.path.abspath(__file__))
 SOURCE_PATH = os.path.join(DOC_PATH, "source")
 BUILD_PATH = os.path.join(DOC_PATH, "build")
diff --git a/doc/source/conf.py b/doc/source/conf.py
index a4b7d97c2cf5e2..1da1948e452688 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -10,15 +10,15 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import sys
-import os
-import inspect
 import importlib
+import inspect
 import logging
+import os
+import sys
+
 import jinja2
-from sphinx.ext.autosummary import _import_by_name
 from numpydoc.docscrape import NumpyDocString
-
+from sphinx.ext.autosummary import _import_by_name
 
 logger = logging.getLogger(__name__)
 
@@ -141,7 +141,7 @@
 # built documents.
 #
 # The short X.Y version.
-import pandas
+import pandas  # noqa: E402 isort:skip
 
 # version = '%s r%s' % (pandas.__version__, svn_version())
 version = str(pandas.__version__)
@@ -432,10 +432,14 @@
 # Add custom Documenter to handle attributes/methods of an AccessorProperty
 # eg pandas.Series.str and pandas.Series.dt (see GH9322)
 
-import sphinx
-from sphinx.util import rpartition
-from sphinx.ext.autodoc import Documenter, MethodDocumenter, AttributeDocumenter
-from sphinx.ext.autosummary import Autosummary
+import sphinx  # noqa: E402 isort:skip
+from sphinx.util import rpartition  # noqa: E402 isort:skip
+from sphinx.ext.autodoc import (  # noqa: E402 isort:skip
+    AttributeDocumenter,
+    Documenter,
+    MethodDocumenter,
+)
+from sphinx.ext.autosummary import Autosummary  # noqa: E402 isort:skip
 
 
 class AccessorDocumenter(MethodDocumenter):
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 1d49dbdee9c03a..338c890ce317c5 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -3206,7 +3206,7 @@ argument to ``to_excel`` and to ``ExcelWriter``. The built-in engines are:
    writer = pd.ExcelWriter('path_to_file.xlsx', engine='xlsxwriter')
 
    # Or via pandas configuration.
-   from pandas import options                                     # noqa: E402
+   from pandas import options  # noqa: E402
    options.io.excel.xlsx.writer = 'xlsxwriter'
 
    df.to_excel('path_to_file.xlsx', sheet_name='Sheet1')
diff --git a/doc/sphinxext/contributors.py b/doc/sphinxext/contributors.py
index 4256e4659715d2..1a064f71792e96 100644
--- a/doc/sphinxext/contributors.py
+++ b/doc/sphinxext/contributors.py
@@ -8,12 +8,11 @@
 code contributors and commits, and then list each contributor
 individually.
 """
+from announce import build_components
 from docutils import nodes
 from docutils.parsers.rst import Directive
 import git
 
-from announce import build_components
-
 
 class ContributorsDirective(Directive):
     required_arguments = 1
diff --git a/pandas/core/api.py b/pandas/core/api.py
index 73323d93b8215a..bd2a57a15bdd2b 100644
--- a/pandas/core/api.py
+++ b/pandas/core/api.py
@@ -2,6 +2,16 @@
 
 import numpy as np
 
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype,
+    DatetimeTZDtype,
+    IntervalDtype,
+    PeriodDtype,
+)
+from pandas.core.dtypes.missing import isna, isnull, notna, notnull
+
+from pandas.core.algorithms import factorize, unique, value_counts
+from pandas.core.arrays import Categorical
 from pandas.core.arrays.integer import (
     Int8Dtype,
     Int16Dtype,
@@ -12,45 +22,38 @@
     UInt32Dtype,
     UInt64Dtype,
 )
-from pandas.core.algorithms import factorize, unique, value_counts
-from pandas.core.dtypes.missing import isna, isnull, notna, notnull
-from pandas.core.dtypes.dtypes import (
-    CategoricalDtype,
-    PeriodDtype,
-    IntervalDtype,
-    DatetimeTZDtype,
-)
-from pandas.core.arrays import Categorical
 from pandas.core.construction import array
+
 from pandas.core.groupby import Grouper, NamedAgg
-from pandas.io.formats.format import set_eng_float_format
+
+# DataFrame needs to be imported after NamedAgg to avoid a circular import
+from pandas.core.frame import DataFrame  # isort:skip
 from pandas.core.index import (
-    Index,
     CategoricalIndex,
-    Int64Index,
-    UInt64Index,
-    RangeIndex,
+    DatetimeIndex,
     Float64Index,
-    MultiIndex,
+    Index,
+    Int64Index,
     IntervalIndex,
-    TimedeltaIndex,
-    DatetimeIndex,
-    PeriodIndex,
+    MultiIndex,
     NaT,
+    PeriodIndex,
+    RangeIndex,
+    TimedeltaIndex,
+    UInt64Index,
 )
+from pandas.core.indexes.datetimes import Timestamp, bdate_range, date_range
+from pandas.core.indexes.interval import Interval, interval_range
 from pandas.core.indexes.period import Period, period_range
 from pandas.core.indexes.timedeltas import Timedelta, timedelta_range
-from pandas.core.indexes.datetimes import Timestamp, date_range, bdate_range
-from pandas.core.indexes.interval import Interval, interval_range
-
-from pandas.core.series import Series
-from pandas.core.frame import DataFrame
-
-# TODO: Remove import when statsmodels updates #18264
-from pandas.core.reshape.reshape import get_dummies
-
 from pandas.core.indexing import IndexSlice
-from pandas.core.tools.numeric import to_numeric
-from pandas.tseries.offsets import DateOffset
+from pandas.core.reshape.reshape import (
+    get_dummies,
+)  # TODO: Remove get_dummies import when statsmodels updates #18264
+from pandas.core.series import Series
 from pandas.core.tools.datetimes import to_datetime
+from pandas.core.tools.numeric import to_numeric
 from pandas.core.tools.timedeltas import to_timedelta
+
+from pandas.io.formats.format import set_eng_float_format
+from pandas.tseries.offsets import DateOffset
diff --git a/pandas/io/msgpack/__init__.py b/pandas/io/msgpack/__init__.py
index 9b09cffd83f755..7107263c180cb1 100644
--- a/pandas/io/msgpack/__init__.py
+++ b/pandas/io/msgpack/__init__.py
@@ -2,8 +2,8 @@
 
 from collections import namedtuple
 
-from pandas.io.msgpack.exceptions import *  # noqa
-from pandas.io.msgpack._version import version  # noqa
+from pandas.io.msgpack.exceptions import *  # noqa: F401,F403 isort:skip
+from pandas.io.msgpack._version import version  # noqa: F401 isort:skip
 
 
 class ExtType(namedtuple("ExtType", "code data")):
@@ -19,10 +19,14 @@ def __new__(cls, code, data):
         return super().__new__(cls, code, data)
 
 
-import os  # noqa
+import os  # noqa: F401,E402 isort:skip
 
-from pandas.io.msgpack._packer import Packer  # noqa
-from pandas.io.msgpack._unpacker import unpack, unpackb, Unpacker  # noqa
+from pandas.io.msgpack._unpacker import (  # noqa: F401,E402 isort:skip
+    Unpacker,
+    unpack,
+    unpackb,
+)
+from pandas.io.msgpack._packer import Packer  # noqa: E402 isort:skip
 
 
 def pack(o, stream, **kwargs):
diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py
index d67f2c3b7bd66e..7306393a1339ee 100644
--- a/pandas/tests/io/pytables/test_pytables.py
+++ b/pandas/tests/io/pytables/test_pytables.py
@@ -37,7 +37,6 @@
 import pandas.util.testing as tm
 from pandas.util.testing import assert_frame_equal, assert_series_equal, set_timezone
 
-from pandas.io import pytables as pytables  # noqa:E402
 from pandas.io.formats.printing import pprint_thing
 from pandas.io.pytables import (
     ClosedFileError,
@@ -46,7 +45,9 @@
     Term,
     read_hdf,
 )
-from pandas.io.pytables import TableIterator  # noqa:E402
+
+from pandas.io import pytables as pytables  # noqa: E402 isort:skip
+from pandas.io.pytables import TableIterator  # noqa: E402 isort:skip
 
 tables = pytest.importorskip("tables")
 
diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py
index 87a2405a10dd5c..ee668d6890756f 100644
--- a/pandas/tests/io/test_feather.py
+++ b/pandas/tests/io/test_feather.py
@@ -8,7 +8,7 @@
 import pandas.util.testing as tm
 from pandas.util.testing import assert_frame_equal, ensure_clean
 
-from pandas.io.feather_format import read_feather, to_feather  # noqa:E402
+from pandas.io.feather_format import read_feather, to_feather  # noqa: E402 isort:skip
 
 pyarrow = pytest.importorskip("pyarrow")
 
diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py
index 1075a257d42705..95a892b822cff7 100755
--- a/scripts/find_commits_touching_func.py
+++ b/scripts/find_commits_touching_func.py
@@ -10,11 +10,11 @@
 Usage::
     $ ./find_commits_touching_func.py  (see arguments below)
 """
-import logging
-import re
-import os
 import argparse
 from collections import namedtuple
+import logging
+import os
+import re
 
 from dateutil.parser import parse
 
diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py
index 6ae10c2cb07d29..29fe8bf84c12b0 100755
--- a/scripts/generate_pip_deps_from_conda.py
+++ b/scripts/generate_pip_deps_from_conda.py
@@ -16,8 +16,8 @@
 import os
 import re
 import sys
-import yaml
 
+import yaml
 
 EXCLUDE = {"python=3"}
 RENAME = {"pytables": "tables", "pyqt": "pyqt5"}
diff --git a/scripts/merge-pr.py b/scripts/merge-pr.py
index 95352751a23c6b..300cb149f387fc 100755
--- a/scripts/merge-pr.py
+++ b/scripts/merge-pr.py
@@ -22,14 +22,15 @@
 #   usage: ./apache-pr-merge.py    (see config env vars below)
 #
 # Lightly modified from version of this script in incubator-parquet-format
-from subprocess import check_output
-from requests.auth import HTTPBasicAuth
-import requests
 
 import os
+from subprocess import check_output
 import sys
 import textwrap
 
+import requests
+from requests.auth import HTTPBasicAuth
+
 PANDAS_HOME = "."
 PROJECT_NAME = "pandas"
 print("PANDAS_HOME = " + PANDAS_HOME)
diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py
index 35aaf10458f449..85e5bf239cbfa8 100644
--- a/scripts/tests/test_validate_docstrings.py
+++ b/scripts/tests/test_validate_docstrings.py
@@ -2,12 +2,13 @@
 import random
 import string
 import textwrap
-import pytest
-import numpy as np
-import pandas as pd
 
+import numpy as np
+import pytest
 import validate_docstrings
 
+import pandas as pd
+
 validate_one = validate_docstrings.validate_one
 
 
diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py
index bf5d861281a36b..401eaf8ff5ed5c 100755
--- a/scripts/validate_docstrings.py
+++ b/scripts/validate_docstrings.py
@@ -13,20 +13,20 @@
     $ ./validate_docstrings.py
     $ ./validate_docstrings.py pandas.DataFrame.head
 """
-import os
-import sys
-import json
-import re
-import glob
-import functools
-import collections
 import argparse
-import pydoc
-import inspect
-import importlib
+import ast
+import collections
 import doctest
+import functools
+import glob
+import importlib
+import inspect
+import json
+import os
+import pydoc
+import re
+import sys
 import tempfile
-import ast
 import textwrap
 
 import flake8.main.application
@@ -41,20 +41,20 @@
 # script. Setting here before matplotlib is loaded.
 # We don't warn for the number of open plots, as none is actually being opened
 os.environ["MPLBACKEND"] = "Template"
-import matplotlib
+import matplotlib  # noqa: E402 isort:skip
 
 matplotlib.rc("figure", max_open_warning=10000)
 
-import numpy
+import numpy  # noqa: E402 isort:skip
 
 BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
 sys.path.insert(0, os.path.join(BASE_PATH))
-import pandas
+import pandas  # noqa: E402 isort:skip
 
 sys.path.insert(1, os.path.join(BASE_PATH, "doc", "sphinxext"))
-from numpydoc.docscrape import NumpyDocString
-from pandas.io.formats.printing import pprint_thing
+from numpydoc.docscrape import NumpyDocString  # noqa: E402 isort:skip
+from pandas.io.formats.printing import pprint_thing  # noqa: E402 isort:skip
 
 
 PRIVATE_CLASSES = ["NDFrame", "IndexOpsMixin"]
diff --git a/setup.cfg b/setup.cfg
index 716ff5d9d8853f..43dbac15f5cfe7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -110,68 +110,25 @@ directory = coverage_html_report
 
 # To be kept consistent with "Import Formatting" section in contributing.rst
 [isort]
-known_pre_libs=pandas._config
-known_pre_core=pandas._libs,pandas.util._*,pandas.compat,pandas.errors
-known_dtypes=pandas.core.dtypes
-known_post_core=pandas.tseries,pandas.io,pandas.plotting
-sections=FUTURE,STDLIB,THIRDPARTY,PRE_LIBS,PRE_CORE,DTYPES,FIRSTPARTY,POST_CORE,LOCALFOLDER
-
-known_first_party=pandas
-known_third_party=Cython,numpy,dateutil,matplotlib,python-dateutil,pytz,pyarrow,pytest
-
-multi_line_output=3
-include_trailing_comma=True
-force_grid_wrap=0
-combine_as_imports=True
-line_length=88
-force_sort_within_sections=True
-skip_glob=env,
-skip=
-    pandas/__init__.py
-    pandas/core/api.py,
-    pandas/io/msgpack/__init__.py
-    asv_bench/benchmarks/attrs_caching.py,
-    asv_bench/benchmarks/binary_ops.py,
-    asv_bench/benchmarks/categoricals.py,
-    asv_bench/benchmarks/ctors.py,
-    asv_bench/benchmarks/eval.py,
-    asv_bench/benchmarks/frame_ctor.py,
-    asv_bench/benchmarks/frame_methods.py,
-    asv_bench/benchmarks/gil.py,
-    asv_bench/benchmarks/groupby.py,
-    asv_bench/benchmarks/index_object.py,
-    asv_bench/benchmarks/indexing.py,
-    asv_bench/benchmarks/inference.py,
-    asv_bench/benchmarks/io/csv.py,
-    asv_bench/benchmarks/io/excel.py,
-    asv_bench/benchmarks/io/hdf.py,
-    asv_bench/benchmarks/io/json.py,
-    asv_bench/benchmarks/io/msgpack.py,
-    asv_bench/benchmarks/io/pickle.py,
-    asv_bench/benchmarks/io/sql.py,
-    asv_bench/benchmarks/io/stata.py,
-    asv_bench/benchmarks/join_merge.py,
-    asv_bench/benchmarks/multiindex_object.py,
-    asv_bench/benchmarks/panel_ctor.py,
-    asv_bench/benchmarks/panel_methods.py,
-    asv_bench/benchmarks/plotting.py,
-    asv_bench/benchmarks/reindex.py,
-    asv_bench/benchmarks/replace.py,
-    asv_bench/benchmarks/reshape.py,
-    asv_bench/benchmarks/rolling.py,
-    asv_bench/benchmarks/series_methods.py,
-    asv_bench/benchmarks/sparse.py,
-    asv_bench/benchmarks/stat_ops.py,
-    asv_bench/benchmarks/timeseries.py
-    asv_bench/benchmarks/pandas_vb_common.py
-    asv_bench/benchmarks/offset.py
-    asv_bench/benchmarks/dtypes.py
-    asv_bench/benchmarks/strings.py
-    asv_bench/benchmarks/period.py
+known_pre_libs = pandas._config
+known_pre_core = pandas._libs,pandas.util._*,pandas.compat,pandas.errors
+known_dtypes = pandas.core.dtypes
+known_post_core = pandas.tseries,pandas.io,pandas.plotting
+sections = FUTURE,STDLIB,THIRDPARTY,PRE_LIBS,PRE_CORE,DTYPES,FIRSTPARTY,POST_CORE,LOCALFOLDER
+known_first_party = pandas
+known_third_party = _pytest,announce,dateutil,docutils,flake8,git,hypothesis,jinja2,lxml,matplotlib,numpy,numpydoc,pkg_resources,pyarrow,pytest,pytz,requests,scipy,setuptools,sphinx,sqlalchemy,validate_docstrings,yaml
+multi_line_output = 3
+include_trailing_comma = True
+force_grid_wrap = 0
+combine_as_imports = True
+line_length = 88
+force_sort_within_sections = True
+skip_glob = env,
+skip = pandas/__init__.py,pandas/core/api.py
 
 [mypy]
 ignore_missing_imports=True
 no_implicit_optional=True
 
 [mypy-pandas.conftest,pandas.tests.*]
-ignore_errors=True
\ No newline at end of file
+ignore_errors=True
diff --git a/setup.py b/setup.py
index d2c6b18b892cda..a86527ace092b3 100755
--- a/setup.py
+++ b/setup.py
@@ -6,16 +6,16 @@
 BSD license. Parts are from lxml (https://github.com/lxml/lxml)
 """
 
+from distutils.sysconfig import get_config_vars
+from distutils.version import LooseVersion
 import os
 from os.path import join as pjoin
-
-import pkg_resources
 import platform
-from distutils.sysconfig import get_config_vars
-import sys
 import shutil
-from distutils.version import LooseVersion
-from setuptools import setup, Command, find_packages
+import sys
+
+import pkg_resources
+from setuptools import Command, find_packages, setup
 
 # versioning
 import versioneer
@@ -58,8 +58,8 @@ def is_platform_mac():
 # The import of Extension must be after the import of Cython, otherwise
 # we do not get the appropriately patched class.
 # See https://cython.readthedocs.io/en/latest/src/reference/compilation.html
-from distutils.extension import Extension  # noqa:E402
-from distutils.command.build import build  # noqa:E402
+from distutils.extension import Extension  # noqa: E402 isort:skip
+from distutils.command.build import build  # noqa: E402 isort:skip
 
 try:
     if not _CYTHON_INSTALLED:
@@ -831,9 +831,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"):
         ]
     },
     entry_points={
-        "pandas_plotting_backends": [
-            "matplotlib = pandas:plotting._matplotlib",
-        ],
+        "pandas_plotting_backends": ["matplotlib = pandas:plotting._matplotlib"]
     },
     **setuptools_kwargs
 )

From fadb27138a97eb96b619111f906b8921d2290d26 Mon Sep 17 00:00:00 2001
From: Simon Hawkins <simonjayhawkins@gmail.com>
Date: Fri, 30 Aug 2019 18:06:49 +0100
Subject: [PATCH 43/95] REGR: <th> tags for notebook display closes #28204
 (#28216)

* REGR: <th> tags for notebook display closes #28204
---
 doc/source/whatsnew/v0.25.2.rst               |   1 +
 pandas/core/frame.py                          |  15 +
 .../html_repr_max_rows_10_min_rows_12.html    |  70 +++++
 .../html_repr_max_rows_10_min_rows_4.html     |  46 +++
 .../html_repr_max_rows_12_min_rows_None.html  |  78 +++++
 .../html_repr_max_rows_None_min_rows_12.html  | 269 ++++++++++++++++++
 ...l_repr_min_rows_default_no_truncation.html | 105 +++++++
 .../html_repr_min_rows_default_truncated.html |  70 +++++
 pandas/tests/io/formats/test_to_html.py       |  39 +++
 9 files changed, 693 insertions(+)
 create mode 100644 pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_12.html
 create mode 100644 pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_4.html
 create mode 100644 pandas/tests/io/formats/data/html/html_repr_max_rows_12_min_rows_None.html
 create mode 100644 pandas/tests/io/formats/data/html/html_repr_max_rows_None_min_rows_12.html
 create mode 100644 pandas/tests/io/formats/data/html/html_repr_min_rows_default_no_truncation.html
 create mode 100644 pandas/tests/io/formats/data/html/html_repr_min_rows_default_truncated.html

diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst
index 8d8a39139cf84c..1cdf213d81a74b 100644
--- a/doc/source/whatsnew/v0.25.2.rst
+++ b/doc/source/whatsnew/v0.25.2.rst
@@ -62,6 +62,7 @@ Missing
 I/O
 ^^^
 
+- Fix regression in notebook display where <th> tags not used for :attr:`DataFrame.index` (:issue:`28204`).
 - Regression in :meth:`~DataFrame.to_csv` where writing a :class:`Series` or :class:`DataFrame` indexed by an :class:`IntervalIndex` would incorrectly raise a ``TypeError`` (:issue:`28210`)
 -
 -
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 3d1a39a86c784e..16fece1c7eb8ba 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -671,10 +671,25 @@ def _repr_html_(self):
 
             formatter = fmt.DataFrameFormatter(
                 self,
+                columns=None,
+                col_space=None,
+                na_rep="NaN",
+                formatters=None,
+                float_format=None,
+                sparsify=None,
+                justify=None,
+                index_names=True,
+                header=True,
+                index=True,
+                bold_rows=True,
+                escape=True,
                 max_rows=max_rows,
                 min_rows=min_rows,
                 max_cols=max_cols,
                 show_dimensions=show_dimensions,
+                decimal=".",
+                table_id=None,
+                render_links=False,
             )
             return formatter.to_html(notebook=True)
         else:
diff --git a/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_12.html b/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_12.html
new file mode 100644
index 00000000000000..4eb3f5319749d9
--- /dev/null
+++ b/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_12.html
@@ -0,0 +1,70 @@
+<div>
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>a</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>0</th>
+      <td>0</td>
+    </tr>
+    <tr>
+      <th>1</th>
+      <td>1</td>
+    </tr>
+    <tr>
+      <th>2</th>
+      <td>2</td>
+    </tr>
+    <tr>
+      <th>3</th>
+      <td>3</td>
+    </tr>
+    <tr>
+      <th>4</th>
+      <td>4</td>
+    </tr>
+    <tr>
+      <th>...</th>
+      <td>...</td>
+    </tr>
+    <tr>
+      <th>56</th>
+      <td>56</td>
+    </tr>
+    <tr>
+      <th>57</th>
+      <td>57</td>
+    </tr>
+    <tr>
+      <th>58</th>
+      <td>58</td>
+    </tr>
+    <tr>
+      <th>59</th>
+      <td>59</td>
+    </tr>
+    <tr>
+      <th>60</th>
+      <td>60</td>
+    </tr>
+  </tbody>
+</table>
+<p>61 rows × 1 columns</p>
+</div>
diff --git a/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_4.html b/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_4.html
new file mode 100644
index 00000000000000..2b1d97aec517c5
--- /dev/null
+++ b/pandas/tests/io/formats/data/html/html_repr_max_rows_10_min_rows_4.html
@@ -0,0 +1,46 @@
+<div>
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>a</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>0</th>
+      <td>0</td>
+    </tr>
+    <tr>
+      <th>1</th>
+      <td>1</td>
+    </tr>
+    <tr>
+      <th>...</th>
+      <td>...</td>
+    </tr>
+    <tr>
+      <th>59</th>
+      <td>59</td>
+    </tr>
+    <tr>
+      <th>60</th>
+      <td>60</td>
+    </tr>
+  </tbody>
+</table>
+<p>61 rows × 1 columns</p>
+</div>
diff --git a/pandas/tests/io/formats/data/html/html_repr_max_rows_12_min_rows_None.html b/pandas/tests/io/formats/data/html/html_repr_max_rows_12_min_rows_None.html
new file mode 100644
index 00000000000000..a539e5a4884a12
--- /dev/null
+++ b/pandas/tests/io/formats/data/html/html_repr_max_rows_12_min_rows_None.html
@@ -0,0 +1,78 @@
+<div>
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>a</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>0</th>
+      <td>0</td>
+    </tr>
+    <tr>
+      <th>1</th>
+      <td>1</td>
+    </tr>
+    <tr>
+      <th>2</th>
+      <td>2</td>
+    </tr>
+    <tr>
+      <th>3</th>
+      <td>3</td>
+    </tr>
+    <tr>
+      <th>4</th>
+      <td>4</td>
+    </tr>
+    <tr>
+      <th>5</th>
+      <td>5</td>
+    </tr>
+    <tr>
+      <th>...</th>
+      <td>...</td>
+    </tr>
+    <tr>
+      <th>55</th>
+      <td>55</td>
+    </tr>
+    <tr>
+      <th>56</th>
+      <td>56</td>
+    </tr>
+    <tr>
+      <th>57</th>
+      <td>57</td>
+    </tr>
+    <tr>
+      <th>58</th>
+      <td>58</td>
+    </tr>
+    <tr>
+      <th>59</th>
+      <td>59</td>
+    </tr>
+    <tr>
+      <th>60</th>
+      <td>60</td>
+    </tr>
+  </tbody>
+</table>
+<p>61 rows × 1 columns</p>
+</div>
diff --git a/pandas/tests/io/formats/data/html/html_repr_max_rows_None_min_rows_12.html b/pandas/tests/io/formats/data/html/html_repr_max_rows_None_min_rows_12.html
new file mode 100644
index 00000000000000..3e680a505c6d68
--- /dev/null
+++ b/pandas/tests/io/formats/data/html/html_repr_max_rows_None_min_rows_12.html
@@ -0,0 +1,269 @@
+<div>
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>a</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>0</th>
+      <td>0</td>
+    </tr>
+    <tr>
+      <th>1</th>
+      <td>1</td>
+    </tr>
+    <tr>
+      <th>2</th>
+      <td>2</td>
+    </tr>
+    <tr>
+      <th>3</th>
+      <td>3</td>
+    </tr>
+    <tr>
+      <th>4</th>
+      <td>4</td>
+    </tr>
+    <tr>
+      <th>5</th>
+      <td>5</td>
+    </tr>
+    <tr>
+      <th>6</th>
+      <td>6</td>
+    </tr>
+    <tr>
+      <th>7</th>
+      <td>7</td>
+    </tr>
+    <tr>
+      <th>8</th>
+      <td>8</td>
+    </tr>
+    <tr>
+      <th>9</th>
+      <td>9</td>
+    </tr>
+    <tr>
+      <th>10</th>
+      <td>10</td>
+    </tr>
+    <tr>
+      <th>11</th>
+      <td>11</td>
+    </tr>
+    <tr>
+      <th>12</th>
+      <td>12</td>
+    </tr>
+    <tr>
+      <th>13</th>
+      <td>13</td>
+    </tr>
+    <tr>
+      <th>14</th>
+      <td>14</td>
+    </tr>
+    <tr>
+      <th>15</th>
+      <td>15</td>
+    </tr>
+    <tr>
+      <th>16</th>
+      <td>16</td>
+    </tr>
+    <tr>
+      <th>17</th>
+      <td>17</td>
+    </tr>
+    <tr>
+      <th>18</th>
+      <td>18</td>
+    </tr>
+    <tr>
+      <th>19</th>
+      <td>19</td>
+    </tr>
+    <tr>
+      <th>20</th>
+      <td>20</td>
+    </tr>
+    <tr>
+      <th>21</th>
+      <td>21</td>
+    </tr>
+    <tr>
+      <th>22</th>
+      <td>22</td>
+    </tr>
+    <tr>
+      <th>23</th>
+      <td>23</td>
+    </tr>
+    <tr>
+      <th>24</th>
+      <td>24</td>
+    </tr>
+    <tr>
+      <th>25</th>
+      <td>25</td>
+    </tr>
+    <tr>
+      <th>26</th>
+      <td>26</td>
+    </tr>
+    <tr>
+      <th>27</th>
+      <td>27</td>
+    </tr>
+    <tr>
+      <th>28</th>
+      <td>28</td>
+    </tr>
+    <tr>
+      <th>29</th>
+      <td>29</td>
+    </tr>
+    <tr>
+      <th>30</th>
+      <td>30</td>
+    </tr>
+    <tr>
+      <th>31</th>
+      <td>31</td>
+    </tr>
+    <tr>
+      <th>32</th>
+      <td>32</td>
+    </tr>
+    <tr>
+      <th>33</th>
+      <td>33</td>
+    </tr>
+    <tr>
+      <th>34</th>
+      <td>34</td>
+    </tr>
+    <tr>
+      <th>35</th>
+      <td>35</td>
+    </tr>
+    <tr>
+      <th>36</th>
+      <td>36</td>
+    </tr>
+    <tr>
+      <th>37</th>
+      <td>37</td>
+    </tr>
+    <tr>
+      <th>38</th>
+      <td>38</td>
+    </tr>
+    <tr>
+      <th>39</th>
+      <td>39</td>
+    </tr>
+    <tr>
+      <th>40</th>
+      <td>40</td>
+    </tr>
+    <tr>
+      <th>41</th>
+      <td>41</td>
+    </tr>
+    <tr>
+      <th>42</th>
+      <td>42</td>
+    </tr>
+    <tr>
+      <th>43</th>
+      <td>43</td>
+    </tr>
+    <tr>
+      <th>44</th>
+      <td>44</td>
+    </tr>
+    <tr>
+      <th>45</th>
+      <td>45</td>
+    </tr>
+    <tr>
+      <th>46</th>
+      <td>46</td>
+    </tr>
+    <tr>
+      <th>47</th>
+      <td>47</td>
+    </tr>
+    <tr>
+      <th>48</th>
+      <td>48</td>
+    </tr>
+    <tr>
+      <th>49</th>
+      <td>49</td>
+    </tr>
+    <tr>
+      <th>50</th>
+      <td>50</td>
+    </tr>
+    <tr>
+      <th>51</th>
+      <td>51</td>
+    </tr>
+    <tr>
+      <th>52</th>
+      <td>52</td>
+    </tr>
+    <tr>
+      <th>53</th>
+      <td>53</td>
+    </tr>
+    <tr>
+      <th>54</th>
+      <td>54</td>
+    </tr>
+    <tr>
+      <th>55</th>
+      <td>55</td>
+    </tr>
+    <tr>
+      <th>56</th>
+      <td>56</td>
+    </tr>
+    <tr>
+      <th>57</th>
+      <td>57</td>
+    </tr>
+    <tr>
+      <th>58</th>
+      <td>58</td>
+    </tr>
+    <tr>
+      <th>59</th>
+      <td>59</td>
+    </tr>
+    <tr>
+      <th>60</th>
+      <td>60</td>
+    </tr>
+  </tbody>
+</table>
+</div>
diff --git a/pandas/tests/io/formats/data/html/html_repr_min_rows_default_no_truncation.html b/pandas/tests/io/formats/data/html/html_repr_min_rows_default_no_truncation.html
new file mode 100644
index 00000000000000..10f6247e37deff
--- /dev/null
+++ b/pandas/tests/io/formats/data/html/html_repr_min_rows_default_no_truncation.html
@@ -0,0 +1,105 @@
+<div>
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>a</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>0</th>
+      <td>0</td>
+    </tr>
+    <tr>
+      <th>1</th>
+      <td>1</td>
+    </tr>
+    <tr>
+      <th>2</th>
+      <td>2</td>
+    </tr>
+    <tr>
+      <th>3</th>
+      <td>3</td>
+    </tr>
+    <tr>
+      <th>4</th>
+      <td>4</td>
+    </tr>
+    <tr>
+      <th>5</th>
+      <td>5</td>
+    </tr>
+    <tr>
+      <th>6</th>
+      <td>6</td>
+    </tr>
+    <tr>
+      <th>7</th>
+      <td>7</td>
+    </tr>
+    <tr>
+      <th>8</th>
+      <td>8</td>
+    </tr>
+    <tr>
+      <th>9</th>
+      <td>9</td>
+    </tr>
+    <tr>
+      <th>10</th>
+      <td>10</td>
+    </tr>
+    <tr>
+      <th>11</th>
+      <td>11</td>
+    </tr>
+    <tr>
+      <th>12</th>
+      <td>12</td>
+    </tr>
+    <tr>
+      <th>13</th>
+      <td>13</td>
+    </tr>
+    <tr>
+      <th>14</th>
+      <td>14</td>
+    </tr>
+    <tr>
+      <th>15</th>
+      <td>15</td>
+    </tr>
+    <tr>
+      <th>16</th>
+      <td>16</td>
+    </tr>
+    <tr>
+      <th>17</th>
+      <td>17</td>
+    </tr>
+    <tr>
+      <th>18</th>
+      <td>18</td>
+    </tr>
+    <tr>
+      <th>19</th>
+      <td>19</td>
+    </tr>
+  </tbody>
+</table>
+</div>
diff --git a/pandas/tests/io/formats/data/html/html_repr_min_rows_default_truncated.html b/pandas/tests/io/formats/data/html/html_repr_min_rows_default_truncated.html
new file mode 100644
index 00000000000000..4eb3f5319749d9
--- /dev/null
+++ b/pandas/tests/io/formats/data/html/html_repr_min_rows_default_truncated.html
@@ -0,0 +1,70 @@
+<div>
+<style scoped>
+    .dataframe tbody tr th:only-of-type {
+        vertical-align: middle;
+    }
+
+    .dataframe tbody tr th {
+        vertical-align: top;
+    }
+
+    .dataframe thead th {
+        text-align: right;
+    }
+</style>
+<table border="1" class="dataframe">
+  <thead>
+    <tr style="text-align: right;">
+      <th></th>
+      <th>a</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <th>0</th>
+      <td>0</td>
+    </tr>
+    <tr>
+      <th>1</th>
+      <td>1</td>
+    </tr>
+    <tr>
+      <th>2</th>
+      <td>2</td>
+    </tr>
+    <tr>
+      <th>3</th>
+      <td>3</td>
+    </tr>
+    <tr>
+      <th>4</th>
+      <td>4</td>
+    </tr>
+    <tr>
+      <th>...</th>
+      <td>...</td>
+    </tr>
+    <tr>
+      <th>56</th>
+      <td>56</td>
+    </tr>
+    <tr>
+      <th>57</th>
+      <td>57</td>
+    </tr>
+    <tr>
+      <th>58</th>
+      <td>58</td>
+    </tr>
+    <tr>
+      <th>59</th>
+      <td>59</td>
+    </tr>
+    <tr>
+      <th>60</th>
+      <td>60</td>
+    </tr>
+  </tbody>
+</table>
+<p>61 rows × 1 columns</p>
+</div>
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
index 448e869df950dd..52c7b89220f06b 100644
--- a/pandas/tests/io/formats/test_to_html.py
+++ b/pandas/tests/io/formats/test_to_html.py
@@ -713,3 +713,42 @@ def test_to_html_with_col_space_units(unit):
     for h in hdrs:
         expected = '<th style="min-width: {unit};">'.format(unit=unit)
         assert expected in h
+
+
+def test_html_repr_min_rows_default(datapath):
+    # gh-27991
+
+    # default setting no truncation even if above min_rows
+    df = pd.DataFrame({"a": range(20)})
+    result = df._repr_html_()
+    expected = expected_html(datapath, "html_repr_min_rows_default_no_truncation")
+    assert result == expected
+
+    # default of max_rows 60 triggers truncation if above
+    df = pd.DataFrame({"a": range(61)})
+    result = df._repr_html_()
+    expected = expected_html(datapath, "html_repr_min_rows_default_truncated")
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    "max_rows,min_rows,expected",
+    [
+        # truncated after first two rows
+        (10, 4, "html_repr_max_rows_10_min_rows_4"),
+        # when set to None, follow value of max_rows
+        (12, None, "html_repr_max_rows_12_min_rows_None"),
+        # when set value higher as max_rows, use the minimum
+        (10, 12, "html_repr_max_rows_10_min_rows_12"),
+        # max_rows of None -> never truncate
+        (None, 12, "html_repr_max_rows_None_min_rows_12"),
+    ],
+)
+def test_html_repr_min_rows(datapath, max_rows, min_rows, expected):
+    # gh-27991
+
+    df = pd.DataFrame({"a": range(61)})
+    expected = expected_html(datapath, expected)
+    with option_context("display.max_rows", max_rows, "display.min_rows", min_rows):
+        result = df._repr_html_()
+    assert result == expected

From cad39188c64bb844d6e915a97d1b88c6b4337723 Mon Sep 17 00:00:00 2001
From: John G Evans <john.g.evans.ne@gmail.com>
Date: Fri, 30 Aug 2019 13:08:33 -0400
Subject: [PATCH 44/95] Fix read of py27 pytables tz attribute, gh#26443
 (#28221)

---
 doc/source/whatsnew/v1.0.0.rst             |   1 +
 pandas/io/pytables.py                      |   7 ++++++-
 pandas/tests/io/data/legacy_hdf/gh26443.h5 | Bin 0 -> 7168 bytes
 pandas/tests/io/pytables/test_pytables.py  |  13 +++++++++++++
 4 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 pandas/tests/io/data/legacy_hdf/gh26443.h5

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 83beec5607986f..3b6288146bdf2e 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -97,6 +97,7 @@ Datetimelike
 ^^^^^^^^^^^^
 - Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`)
 - Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`)
+- Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`)
 -
 
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index fbe413f820c901..1ff3400323e54a 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -2902,7 +2902,12 @@ def read_index_node(self, node, start=None, stop=None):
             kwargs["freq"] = node._v_attrs["freq"]
 
         if "tz" in node._v_attrs:
-            kwargs["tz"] = node._v_attrs["tz"]
+            if isinstance(node._v_attrs["tz"], bytes):
+                # created by python2
+                kwargs["tz"] = node._v_attrs["tz"].decode("utf-8")
+            else:
+                # created by python3
+                kwargs["tz"] = node._v_attrs["tz"]
 
         if kind in ("date", "datetime"):
             index = factory(
diff --git a/pandas/tests/io/data/legacy_hdf/gh26443.h5 b/pandas/tests/io/data/legacy_hdf/gh26443.h5
new file mode 100644
index 0000000000000000000000000000000000000000..45aa64324530f943b48fa5c63390392af1110c6b
GIT binary patch
literal 7168
zcmeHL%}*0S6rU|$3s$KgL?j-FryfiNg2safsuUrqZPQjF2UAwM;0C&_y9GgG;!*z-
zkNycBIdb>n(Sry61D*G^+bk4B4Z*P6bl%R)n|ZTue(%kj_i=u9?&8qckb=iFRj*3n
zs}w)^Q8%e2s58SdQ-kRTrk@h@VyJ!veWTo-;`zsWsNp-eSIfDa(ws8CQ0`W{{q$x^
zLrl+=2Ih0w5`6G8{%S$#F5^s;v2O3+tKe~7I{uJeX1qpmom6R-)2~eZt1eXY8o=tR
z{)23hk4(CV@;}OFD;3=i{C)Z_{Ey4Ur|_X6Kg@2b#ay|vP%I!nn2zvIW+rqE`0pb%
zw50)mX{nlhn9o&ebH(zLY_-;H{8VO=$E9%2sGX+R;J&cH<LvYsd6K~1aq_r0HyViA
zDWOuS7W;2egthJXfLx$}z!w|MmGz0*mbI~I`(fQ_+YP7&cprz#Ay1`HoHaBT!&RBR
z#3Xhy!SzyP9fwSHuAAh19vvYyfG>h4^hamK-;v)l^vb~BwJf)8`L*^=%f|Ia{pzY`
zHJ~S{WZZD)xGmfBou*6v7$6ehCNp_AGu1h65Xaf>O0({`YbZ<?AP<fr8CS?qPbjYs
zSL)WC2K0bmC9KXMql}Z|EF5<U<)OW3^Y9$I4JRDP8_<6l=h<tU8<wZ*!_>wdvY+W|
zC`RoQ3CbQlJFvohU?P27Pb3mJgQ-0^y!069q$c`dL!yeh!e0SA$$?wXqd4hy&SlHx
z>=KL#&e5aKxv9)FN=&M8q!3p^v>H8kwf;nf$VYymmshFO-XJ!2EmGX`lJVb-G0h$6
zv2GmHF6Zftyd2GbW_#;a+fvK#etiDAIr5fQiEU?g#qq*o4a6FVH4tkc)<CR*ztsTb
z72q)p3=1j7ggUl%#&H8W)-u&xKD$^fQ(W1Z-*7h@Er?&ke8mNfi`$;%`mLsK*J(ls
zS7~mFl0}ZtFm8m1)l}<_iz6Y9?Tp|3wue!Al;5T6K-aik;^zJ9w4M?Xy;iwDgmL-g
z3F7i?j!uu$XOGT1!M8^53EkuK+Z>;hzIeH3Ha-vc<FxQ1`*sj7kg^h7>6q4TI}Q6D
zT{k-JCp7Lj+=Td`d#Q${y<J<`uzVl41n>zv6qOh1gXFAw_A3|w`~Vr@KX|W%8=R@y
zZ-ATl;SMAG*@o>oE7qNY{kpc)^wtH}dpD3h$ft(uHqjnkOvv~L@F(+zbO>d==LSRW
zb98+S))DfPT&bTN=hHFHQH6*5Mafr3^If689YMTsz76wh9q#Lhyh0N9ayQuLqtAoi
a&nrCmuks3^ot~E8O5<)Dn3?(a=iD!5Y$rGX

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/pytables/test_pytables.py b/pandas/tests/io/pytables/test_pytables.py
index 7306393a1339ee..77cac00882771f 100644
--- a/pandas/tests/io/pytables/test_pytables.py
+++ b/pandas/tests/io/pytables/test_pytables.py
@@ -5447,3 +5447,16 @@ def test_read_with_where_tz_aware_index(self):
                 store.append(key, expected, format="table", append=True)
             result = pd.read_hdf(path, key, where="DATE > 20151130")
             assert_frame_equal(result, expected)
+
+    def test_py2_created_with_datetimez(self, datapath):
+        # The test HDF5 file was created in Python 2, but could not be read in
+        # Python 3.
+        #
+        # GH26443
+        index = [pd.Timestamp("2019-01-01T18:00").tz_localize("America/New_York")]
+        expected = DataFrame({"data": 123}, index=index)
+        with ensure_clean_store(
+            datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"
+        ) as store:
+            result = store["key"]
+            assert_frame_equal(result, expected)

From 621ad9df37911ea577029d8cac5de0920f07f33e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?=
 <oguzhan@oguzhanogreden.com>
Date: Fri, 30 Aug 2019 19:09:03 +0200
Subject: [PATCH 45/95] DOC: Document existing functionality of
 pandas.DataFrame.to_sql() #11886 (#26795)

* DOC: add single dtype to NDFrame.to_sql
---
 pandas/core/generic.py | 15 ++++++++-------
 pandas/io/sql.py       | 23 ++++++++++++-----------
 2 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 6ade69fb4ca9d9..1a5b36b07e93ca 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2594,13 +2594,14 @@ def to_sql(
             `index` is True, then the index names are used.
             A sequence should be given if the DataFrame uses MultiIndex.
         chunksize : int, optional
-            Rows will be written in batches of this size at a time. By default,
-            all rows will be written at once.
-        dtype : dict, optional
-            Specifying the datatype for columns. The keys should be the column
-            names and the values should be the SQLAlchemy types or strings for
-            the sqlite3 legacy mode.
-        method : {None, 'multi', callable}, default None
+            Specify the number of rows in each batch to be written at a time.
+            By default, all rows will be written at once.
+        dtype : dict or scalar, optional
+            Specifying the datatype for columns. If a dictionary is used, the
+            keys should be the column names and the values should be the
+            SQLAlchemy types or strings for the sqlite3 legacy mode. If a
+            scalar is provided, it will be applied to all columns.
+        method : {None, 'multi', callable}, optional
             Controls the SQL insertion clause used:
 
             * None : Uses standard SQL ``INSERT`` clause (one per row).
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 72df00fd4c5a19..44cb399336d62f 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -456,14 +456,14 @@ def to_sql(
     Parameters
     ----------
     frame : DataFrame, Series
-    name : string
+    name : str
         Name of SQL table.
     con : SQLAlchemy connectable(engine/connection) or database string URI
         or sqlite3 DBAPI2 connection
         Using SQLAlchemy makes it possible to use any DB supported by that
         library.
         If a DBAPI2 object, only sqlite3 is supported.
-    schema : string, default None
+    schema : str, optional
         Name of SQL schema in database to write to (if database flavor
         supports this). If None, use default schema (default).
     if_exists : {'fail', 'replace', 'append'}, default 'fail'
@@ -472,18 +472,19 @@ def to_sql(
         - append: If table exists, insert data. Create if does not exist.
     index : boolean, default True
         Write DataFrame index as a column.
-    index_label : string or sequence, default None
+    index_label : str or sequence, optional
         Column label for index column(s). If None is given (default) and
         `index` is True, then the index names are used.
         A sequence should be given if the DataFrame uses MultiIndex.
-    chunksize : int, default None
-        If not None, then rows will be written in batches of this size at a
-        time.  If None, all rows will be written at once.
-    dtype : single SQLtype or dict of column name to SQL type, default None
-        Optional specifying the datatype for columns. The SQL type should
-        be a SQLAlchemy type, or a string for sqlite3 fallback connection.
-        If all columns are of the same type, one single value can be used.
-    method : {None, 'multi', callable}, default None
+    chunksize : int, optional
+        Specify the number of rows in each batch to be written at a time.
+        By default, all rows will be written at once.
+    dtype : dict or scalar, optional
+        Specifying the datatype for columns. If a dictionary is used, the
+        keys should be the column names and the values should be the
+        SQLAlchemy types or strings for the sqlite3 fallback mode. If a
+        scalar is provided, it will be applied to all columns.
+    method : {None, 'multi', callable}, optional
         Controls the SQL insertion clause used:
 
         - None : Uses standard SQL ``INSERT`` clause (one per row).

From bfdbebec423d781ebde189de24f5413298ab7c81 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 30 Aug 2019 11:43:48 -0700
Subject: [PATCH 46/95] CLN: catch less inside try/except (#28203)

* CLN: catch less inside try/except
---
 pandas/_libs/reduction.pyx     |  4 ----
 pandas/core/groupby/generic.py | 17 +++++++++--------
 pandas/core/groupby/groupby.py |  3 ++-
 3 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index f95685c3379696..c892c1cf1b8a3e 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -296,8 +296,6 @@ cdef class SeriesBinGrouper:
                 islider.advance(group_size)
                 vslider.advance(group_size)
 
-        except:
-            raise
         finally:
             # so we don't free the wrong memory
             islider.reset()
@@ -425,8 +423,6 @@ cdef class SeriesGrouper:
 
                     group_size = 0
 
-        except:
-            raise
         finally:
             # so we don't free the wrong memory
             islider.reset()
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index b0bcd1cc1e27c4..5e463d50d43d6d 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -242,15 +242,18 @@ def aggregate(self, func, *args, **kwargs):
             # grouper specific aggregations
             if self.grouper.nkeys > 1:
                 return self._python_agg_general(func, *args, **kwargs)
+            elif args or kwargs:
+                result = self._aggregate_generic(func, *args, **kwargs)
             else:
 
                 # try to treat as if we are passing a list
                 try:
-                    assert not args and not kwargs
                     result = self._aggregate_multiple_funcs(
                         [func], _level=_level, _axis=self.axis
                     )
-
+                except Exception:
+                    result = self._aggregate_generic(func)
+                else:
                     result.columns = Index(
                         result.columns.levels[0], name=self._selected_obj.columns.name
                     )
@@ -260,8 +263,6 @@ def aggregate(self, func, *args, **kwargs):
                         # values. concat no longer converts DataFrame[Sparse]
                         # to SparseDataFrame, so we do it here.
                         result = SparseDataFrame(result._data)
-                except Exception:
-                    result = self._aggregate_generic(func, *args, **kwargs)
 
         if not self.as_index:
             self._insert_inaxis_grouper_inplace(result)
@@ -313,10 +314,10 @@ def _aggregate_item_by_item(self, func, *args, **kwargs):
         cannot_agg = []
         errors = None
         for item in obj:
-            try:
-                data = obj[item]
-                colg = SeriesGroupBy(data, selection=item, grouper=self.grouper)
+            data = obj[item]
+            colg = SeriesGroupBy(data, selection=item, grouper=self.grouper)
 
+            try:
                 cast = self._transform_should_cast(func)
 
                 result[item] = colg.aggregate(func, *args, **kwargs)
@@ -684,7 +685,7 @@ def _transform_item_by_item(self, obj, wrapper):
 
         return DataFrame(output, index=obj.index, columns=columns)
 
-    def filter(self, func, dropna=True, *args, **kwargs):  # noqa
+    def filter(self, func, dropna=True, *args, **kwargs):
         """
         Return a copy of a DataFrame excluding elements from groups that
         do not satisfy the boolean criterion specified by func.
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 4d21b5810470a7..6deef16bdec131 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -653,7 +653,8 @@ def curried(x):
                     # mark this column as an error
                     try:
                         return self._aggregate_item_by_item(name, *args, **kwargs)
-                    except (AttributeError):
+                    except AttributeError:
+                        # e.g. SparseArray has no flags attr
                         raise ValueError
 
         return wrapper

From f8a924bcc3191ea7c82482ddf22728e629e808f3 Mon Sep 17 00:00:00 2001
From: Will Holmgren <william.holmgren@gmail.com>
Date: Fri, 30 Aug 2019 13:54:02 -0700
Subject: [PATCH 47/95] DOC: fix DatetimeIndex.tz_localize doc string example
 (#28237)

---
 pandas/core/arrays/datetimes.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 70df708d36b3bf..732f819e743a47 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -1063,6 +1063,7 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise", errors=None):
 
         Be careful with DST changes. When there is sequential data, pandas can
         infer the DST time:
+
         >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00',
         ...                               '2018-10-28 02:00:00',
         ...                               '2018-10-28 02:30:00',
@@ -1094,6 +1095,7 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise", errors=None):
         If the DST transition causes nonexistent times, you can shift these
         dates forward or backwards with a timedelta object or `'shift_forward'`
         or `'shift_backwards'`.
+
         >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00',
         ...                               '2015-03-29 03:30:00']))
         >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward')

From 42d6ee7cd1d43dfc2054ec00d82135af87c33574 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 2 Sep 2019 14:02:09 -0700
Subject: [PATCH 48/95] have Timestamp return NotImplemented (#28157)

---
 pandas/_libs/tslibs/c_timestamp.pyx | 11 ++---------
 pandas/core/arrays/datetimelike.py  | 16 ++++++++++++----
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx
index 906dabba09486c..10ed2588deaca5 100644
--- a/pandas/_libs/tslibs/c_timestamp.pyx
+++ b/pandas/_libs/tslibs/c_timestamp.pyx
@@ -269,15 +269,8 @@ cdef class _Timestamp(datetime):
             return self + neg_other
 
         typ = getattr(other, '_typ', None)
-
-        # a Timestamp-DatetimeIndex -> yields a negative TimedeltaIndex
-        if typ in ('datetimeindex', 'datetimearray'):
-            # timezone comparison is performed in DatetimeIndex._sub_datelike
-            return -other.__sub__(self)
-
-        # a Timestamp-TimedeltaIndex -> yields a negative TimedeltaIndex
-        elif typ in ('timedeltaindex', 'timedeltaarray'):
-            return (-other).__add__(self)
+        if typ is not None:
+            return NotImplemented
 
         elif other is NaT:
             return NaT
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 1988726edc79b9..bda5f8f4326f18 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1300,7 +1300,7 @@ def __sub__(self, other):
         return result
 
     def __rsub__(self, other):
-        if is_datetime64_any_dtype(other) and is_timedelta64_dtype(self):
+        if is_datetime64_any_dtype(other) and is_timedelta64_dtype(self.dtype):
             # ndarray[datetime64] cannot be subtracted from self, so
             # we need to wrap in DatetimeArray/Index and flip the operation
             if not isinstance(other, DatetimeLikeArrayMixin):
@@ -1310,9 +1310,9 @@ def __rsub__(self, other):
                 other = DatetimeArray(other)
             return other - self
         elif (
-            is_datetime64_any_dtype(self)
+            is_datetime64_any_dtype(self.dtype)
             and hasattr(other, "dtype")
-            and not is_datetime64_any_dtype(other)
+            and not is_datetime64_any_dtype(other.dtype)
         ):
             # GH#19959 datetime - datetime is well-defined as timedelta,
             # but any other type - datetime is not well-defined.
@@ -1321,13 +1321,21 @@ def __rsub__(self, other):
                     cls=type(self).__name__, typ=type(other).__name__
                 )
             )
-        elif is_period_dtype(self) and is_timedelta64_dtype(other):
+        elif is_period_dtype(self.dtype) and is_timedelta64_dtype(other):
             # TODO: Can we simplify/generalize these cases at all?
             raise TypeError(
                 "cannot subtract {cls} from {dtype}".format(
                     cls=type(self).__name__, dtype=other.dtype
                 )
             )
+        elif is_timedelta64_dtype(self.dtype):
+            if lib.is_integer(other) or is_integer_dtype(other):
+                # need to subtract before negating, since that flips freq
+                # -self flips self.freq, messing up results
+                return -(self - other)
+
+            return (-self) + other
+
         return -(self - other)
 
     # FIXME: DTA/TDA/PA inplace methods should actually be inplace, GH#24115

From 05cc95971e56b503d4df9911a44cd60a7b74cc79 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 2 Sep 2019 14:04:11 -0700
Subject: [PATCH 49/95] BUG: SparseDataFrame op incorrectly casting to float
 (#28107)

---
 doc/source/whatsnew/v1.0.0.rst          |  2 +-
 pandas/core/sparse/frame.py             |  6 +++---
 pandas/tests/sparse/frame/test_frame.py | 16 ++++++++++++++++
 3 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 3b6288146bdf2e..6834435adb4780 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -189,7 +189,7 @@ Reshaping
 
 Sparse
 ^^^^^^
-
+- Bug in :class:`SparseDataFrame` arithmetic operations incorrectly casting inputs to float (:issue:`28107`)
 -
 -
 
diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index 8fe6850c84b8b1..3d6ba0b8d97745 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -576,8 +576,8 @@ def _combine_match_index(self, other, func, level=None):
         this, other = self.align(other, join="outer", axis=0, level=level, copy=False)
 
         new_data = {}
-        for col, series in this.items():
-            new_data[col] = func(series.values, other.values)
+        for col in this.columns:
+            new_data[col] = func(this[col], other)
 
         fill_value = self._get_op_result_fill_value(other, func)
 
@@ -603,7 +603,7 @@ def _combine_match_columns(self, other, func, level=None):
         new_data = {}
 
         for col in left.columns:
-            new_data[col] = func(left[col], float(right[col]))
+            new_data[col] = func(left[col], right[col])
 
         return self._constructor(
             new_data,
diff --git a/pandas/tests/sparse/frame/test_frame.py b/pandas/tests/sparse/frame/test_frame.py
index ddb50e0897a869..e372e2563e682c 100644
--- a/pandas/tests/sparse/frame/test_frame.py
+++ b/pandas/tests/sparse/frame/test_frame.py
@@ -1487,6 +1487,22 @@ def test_comparison_op_scalar(self):
         assert isinstance(res, pd.SparseDataFrame)
         tm.assert_frame_equal(res.to_dense(), df != 0)
 
+    def test_add_series_retains_dtype(self):
+        # SparseDataFrame._combine_match_columns used to incorrectly cast
+        #  to float
+        d = {0: [2j, 3j], 1: [0, 1]}
+        sdf = SparseDataFrame(data=d, default_fill_value=1)
+        result = sdf + sdf[0]
+
+        df = sdf.to_dense()
+        expected = df + df[0]
+        tm.assert_frame_equal(result.to_dense(), expected)
+
+        # Make it explicit to be on the safe side
+        edata = {0: [4j, 5j], 1: [3j, 1 + 3j]}
+        expected = DataFrame(edata)
+        tm.assert_frame_equal(result.to_dense(), expected)
+
 
 @pytest.mark.filterwarnings("ignore:Sparse:FutureWarning")
 @pytest.mark.filterwarnings("ignore:DataFrame.to_sparse:FutureWarning")

From 2aeed3fb11434f16fae433480279dea9a495d473 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 2 Sep 2019 14:06:26 -0700
Subject: [PATCH 50/95] REF: separate bloated test (#28081)

---
 pandas/core/ops/__init__.py           |  17 ++-
 pandas/tests/series/test_operators.py | 170 +++++++++++++++++---------
 2 files changed, 124 insertions(+), 63 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 86cd6e878cde60..dec2722275d6ea 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -791,16 +791,21 @@ def wrapper(self, other):
         self, other = _align_method_SERIES(self, other, align_asobject=True)
         res_name = get_op_result_name(self, other)
 
+        # TODO: shouldn't we be applying finalize whenever
+        #  not isinstance(other, ABCSeries)?
+        finalizer = (
+            lambda x: x.__finalize__(self)
+            if not isinstance(other, (ABCSeries, ABCIndexClass))
+            else x
+        )
+
         if isinstance(other, ABCDataFrame):
             # Defer to DataFrame implementation; fail early
             return NotImplemented
 
         elif isinstance(other, (ABCSeries, ABCIndexClass)):
             is_other_int_dtype = is_integer_dtype(other.dtype)
-            other = fill_int(other) if is_other_int_dtype else fill_bool(other)
-
-            ovalues = other.values
-            finalizer = lambda x: x
+            other = other if is_other_int_dtype else fill_bool(other)
 
         else:
             # scalars, list, tuple, np.array
@@ -811,8 +816,8 @@ def wrapper(self, other):
                 # thing?  e.g. other = [[0, 1], [2, 3], [4, 5]]?
                 other = construct_1d_object_array_from_listlike(other)
 
-            ovalues = other
-            finalizer = lambda x: x.__finalize__(self)
+        # TODO: use extract_array once we handle EA correctly, see GH#27959
+        ovalues = lib.values_from_object(other)
 
         # For int vs int `^`, `|`, `&` are bitwise operators and return
         #   integer dtypes.  Otherwise these are boolean ops
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index 062c07cb6242aa..aa44760dcd9180 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -36,22 +36,14 @@ def test_bool_operators_with_nas(self, bool_op):
         expected[mask] = False
         assert_series_equal(result, expected)
 
-    def test_operators_bitwise(self):
+    def test_logical_operators_bool_dtype_with_empty(self):
         # GH#9016: support bitwise op for integer types
         index = list("bca")
 
         s_tft = Series([True, False, True], index=index)
         s_fff = Series([False, False, False], index=index)
-        s_tff = Series([True, False, False], index=index)
         s_empty = Series([])
 
-        # TODO: unused
-        # s_0101 = Series([0, 1, 0, 1])
-
-        s_0123 = Series(range(4), dtype="int64")
-        s_3333 = Series([3] * 4)
-        s_4444 = Series([4] * 4)
-
         res = s_tft & s_empty
         expected = s_fff
         assert_series_equal(res, expected)
@@ -60,6 +52,16 @@ def test_operators_bitwise(self):
         expected = s_tft
         assert_series_equal(res, expected)
 
+    def test_logical_operators_int_dtype_with_int_dtype(self):
+        # GH#9016: support bitwise op for integer types
+
+        # TODO: unused
+        # s_0101 = Series([0, 1, 0, 1])
+
+        s_0123 = Series(range(4), dtype="int64")
+        s_3333 = Series([3] * 4)
+        s_4444 = Series([4] * 4)
+
         res = s_0123 & s_3333
         expected = Series(range(4), dtype="int64")
         assert_series_equal(res, expected)
@@ -68,76 +70,129 @@ def test_operators_bitwise(self):
         expected = Series(range(4, 8), dtype="int64")
         assert_series_equal(res, expected)
 
-        s_a0b1c0 = Series([1], list("b"))
-
-        res = s_tft & s_a0b1c0
-        expected = s_tff.reindex(list("abc"))
+        s_1111 = Series([1] * 4, dtype="int8")
+        res = s_0123 & s_1111
+        expected = Series([0, 1, 0, 1], dtype="int64")
         assert_series_equal(res, expected)
 
-        res = s_tft | s_a0b1c0
-        expected = s_tft.reindex(list("abc"))
+        res = s_0123.astype(np.int16) | s_1111.astype(np.int32)
+        expected = Series([1, 1, 3, 3], dtype="int32")
         assert_series_equal(res, expected)
 
-        n0 = 0
-        res = s_tft & n0
-        expected = s_fff
-        assert_series_equal(res, expected)
+    def test_logical_operators_int_dtype_with_int_scalar(self):
+        # GH#9016: support bitwise op for integer types
+        s_0123 = Series(range(4), dtype="int64")
 
-        res = s_0123 & n0
+        res = s_0123 & 0
         expected = Series([0] * 4)
         assert_series_equal(res, expected)
 
-        n1 = 1
-        res = s_tft & n1
-        expected = s_tft
-        assert_series_equal(res, expected)
-
-        res = s_0123 & n1
+        res = s_0123 & 1
         expected = Series([0, 1, 0, 1])
         assert_series_equal(res, expected)
 
-        s_1111 = Series([1] * 4, dtype="int8")
-        res = s_0123 & s_1111
-        expected = Series([0, 1, 0, 1], dtype="int64")
-        assert_series_equal(res, expected)
-
-        res = s_0123.astype(np.int16) | s_1111.astype(np.int32)
-        expected = Series([1, 1, 3, 3], dtype="int32")
-        assert_series_equal(res, expected)
+    def test_logical_operators_int_dtype_with_float(self):
+        # GH#9016: support bitwise op for integer types
+        s_0123 = Series(range(4), dtype="int64")
 
-        with pytest.raises(TypeError):
-            s_1111 & "a"
-        with pytest.raises(TypeError):
-            s_1111 & ["a", "b", "c", "d"]
         with pytest.raises(TypeError):
             s_0123 & np.NaN
         with pytest.raises(TypeError):
             s_0123 & 3.14
         with pytest.raises(TypeError):
             s_0123 & [0.1, 4, 3.14, 2]
+        with pytest.raises(TypeError):
+            s_0123 & np.array([0.1, 4, 3.14, 2])
 
-        # s_0123 will be all false now because of reindexing like s_tft
-        exp = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
-        assert_series_equal(s_tft & s_0123, exp)
-
-        # s_tft will be all false now because of reindexing like s_0123
-        exp = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
-        assert_series_equal(s_0123 & s_tft, exp)
-
-        assert_series_equal(s_0123 & False, Series([False] * 4))
-        assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))
-        assert_series_equal(s_0123 & [False], Series([False] * 4))
-        assert_series_equal(s_0123 & (False), Series([False] * 4))
-        assert_series_equal(
-            s_0123 & Series([False, np.NaN, False, False]), Series([False] * 4)
-        )
+        # FIXME: this should be consistent with the list case above
+        expected = Series([False, True, False, True])
+        result = s_0123 & Series([0.1, 4, -3.14, 2])
+        assert_series_equal(result, expected)
+
+    def test_logical_operators_int_dtype_with_str(self):
+        s_1111 = Series([1] * 4, dtype="int8")
+
+        with pytest.raises(TypeError):
+            s_1111 & "a"
+        with pytest.raises(TypeError):
+            s_1111 & ["a", "b", "c", "d"]
+
+    def test_logical_operators_int_dtype_with_bool(self):
+        # GH#9016: support bitwise op for integer types
+        s_0123 = Series(range(4), dtype="int64")
+
+        expected = Series([False] * 4)
+
+        result = s_0123 & False
+        assert_series_equal(result, expected)
+
+        result = s_0123 & [False]
+        assert_series_equal(result, expected)
+
+        result = s_0123 & (False,)
+        assert_series_equal(result, expected)
 
-        s_ftft = Series([False, True, False, True])
-        assert_series_equal(s_0123 & Series([0.1, 4, -3.14, 2]), s_ftft)
+        result = s_0123 ^ False
+        expected = Series([False, True, True, True])
+        assert_series_equal(result, expected)
+
+    def test_logical_operators_int_dtype_with_object(self):
+        # GH#9016: support bitwise op for integer types
+        s_0123 = Series(range(4), dtype="int64")
+
+        result = s_0123 & Series([False, np.NaN, False, False])
+        expected = Series([False] * 4)
+        assert_series_equal(result, expected)
 
         s_abNd = Series(["a", "b", np.NaN, "d"])
-        res = s_0123 & s_abNd
-        expected = s_ftft
+        result = s_0123 & s_abNd
+        expected = Series([False, True, False, True])
+        assert_series_equal(result, expected)
+
+    def test_logical_operators_bool_dtype_with_int(self):
+        index = list("bca")
+
+        s_tft = Series([True, False, True], index=index)
+        s_fff = Series([False, False, False], index=index)
+
+        res = s_tft & 0
+        expected = s_fff
+        assert_series_equal(res, expected)
+
+        res = s_tft & 1
+        expected = s_tft
+        assert_series_equal(res, expected)
+
+    def test_logical_operators_int_dtype_with_bool_dtype_and_reindex(self):
+        # GH#9016: support bitwise op for integer types
+
+        # with non-matching indexes, logical operators will cast to object
+        #  before operating
+        index = list("bca")
+
+        s_tft = Series([True, False, True], index=index)
+        s_tft = Series([True, False, True], index=index)
+        s_tff = Series([True, False, False], index=index)
+
+        s_0123 = Series(range(4), dtype="int64")
+
+        # s_0123 will be all false now because of reindexing like s_tft
+        expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
+        result = s_tft & s_0123
+        assert_series_equal(result, expected)
+
+        expected = Series([False] * 7, index=[0, 1, 2, 3, "a", "b", "c"])
+        result = s_0123 & s_tft
+        assert_series_equal(result, expected)
+
+        s_a0b1c0 = Series([1], list("b"))
+
+        res = s_tft & s_a0b1c0
+        expected = s_tff.reindex(list("abc"))
+        assert_series_equal(res, expected)
+
+        res = s_tft | s_a0b1c0
+        expected = s_tft.reindex(list("abc"))
         assert_series_equal(res, expected)
 
     def test_scalar_na_logical_ops_corners(self):
@@ -523,6 +578,7 @@ def test_comparison_operators_with_nas(self):
 
             assert_series_equal(result, expected)
 
+            # FIXME: dont leave commented-out
             # fffffffuuuuuuuuuuuu
             # result = f(val, s)
             # expected = f(val, s.dropna()).reindex(s.index)

From 2cd78883c77dc2d2fed10e07f245ba9d9ceb635a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 2 Sep 2019 14:09:19 -0700
Subject: [PATCH 51/95] REF: do extract_array earlier in series
 arith/comparison ops (#28066)

---
 pandas/core/ops/__init__.py | 100 +++++++++++++++++++++++-------------
 1 file changed, 63 insertions(+), 37 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index dec2722275d6ea..cc2d4ced1243f1 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -5,7 +5,7 @@
 """
 import datetime
 import operator
-from typing import Any, Callable, Tuple
+from typing import Any, Callable, Tuple, Union
 
 import numpy as np
 
@@ -34,10 +34,11 @@
     ABCIndexClass,
     ABCSeries,
     ABCSparseSeries,
+    ABCTimedeltaArray,
+    ABCTimedeltaIndex,
 )
 from pandas.core.dtypes.missing import isna, notna
 
-import pandas as pd
 from pandas._typing import ArrayLike
 from pandas.core.construction import array, extract_array
 from pandas.core.ops.array_ops import comp_method_OBJECT_ARRAY, define_na_arithmetic_op
@@ -148,6 +149,8 @@ def maybe_upcast_for_op(obj, shape: Tuple[int, ...]):
     Be careful to call this *after* determining the `name` attribute to be
     attached to the result of the arithmetic operation.
     """
+    from pandas.core.arrays import TimedeltaArray
+
     if type(obj) is datetime.timedelta:
         # GH#22390  cast up to Timedelta to rely on Timedelta
         # implementation; otherwise operation against numeric-dtype
@@ -157,12 +160,10 @@ def maybe_upcast_for_op(obj, shape: Tuple[int, ...]):
         if isna(obj):
             # wrapping timedelta64("NaT") in Timedelta returns NaT,
             #  which would incorrectly be treated as a datetime-NaT, so
-            #  we broadcast and wrap in a Series
+            #  we broadcast and wrap in a TimedeltaArray
+            obj = obj.astype("timedelta64[ns]")
             right = np.broadcast_to(obj, shape)
-
-            # Note: we use Series instead of TimedeltaIndex to avoid having
-            #  to worry about catching NullFrequencyError.
-            return pd.Series(right)
+            return TimedeltaArray(right)
 
         # In particular non-nanosecond timedelta64 needs to be cast to
         #  nanoseconds, or else we get undesired behavior like
@@ -173,7 +174,7 @@ def maybe_upcast_for_op(obj, shape: Tuple[int, ...]):
         # GH#22390 Unfortunately we need to special-case right-hand
         # timedelta64 dtypes because numpy casts integer dtypes to
         # timedelta64 when operating with timedelta64
-        return pd.TimedeltaIndex(obj)
+        return TimedeltaArray._from_sequence(obj)
     return obj
 
 
@@ -520,13 +521,34 @@ def column_op(a, b):
     return result
 
 
-def dispatch_to_extension_op(op, left, right):
+def dispatch_to_extension_op(
+    op,
+    left: Union[ABCExtensionArray, np.ndarray],
+    right: Any,
+    keep_null_freq: bool = False,
+):
     """
     Assume that left or right is a Series backed by an ExtensionArray,
     apply the operator defined by op.
+
+    Parameters
+    ----------
+    op : binary operator
+    left : ExtensionArray or np.ndarray
+    right : object
+    keep_null_freq : bool, default False
+        Whether to re-raise a NullFrequencyError unchanged, as opposed to
+        catching and raising TypeError.
+
+    Returns
+    -------
+    ExtensionArray or np.ndarray
+        2-tuple of these if op is divmod or rdivmod
     """
+    # NB: left and right should already be unboxed, so neither should be
+    #  a Series or Index.
 
-    if left.dtype.kind in "mM":
+    if left.dtype.kind in "mM" and isinstance(left, np.ndarray):
         # We need to cast datetime64 and timedelta64 ndarrays to
         #  DatetimeArray/TimedeltaArray.  But we avoid wrapping others in
         #  PandasArray as that behaves poorly with e.g. IntegerArray.
@@ -535,15 +557,15 @@ def dispatch_to_extension_op(op, left, right):
     # The op calls will raise TypeError if the op is not defined
     # on the ExtensionArray
 
-    # unbox Series and Index to arrays
-    new_left = extract_array(left, extract_numpy=True)
-    new_right = extract_array(right, extract_numpy=True)
-
     try:
-        res_values = op(new_left, new_right)
+        res_values = op(left, right)
     except NullFrequencyError:
         # DatetimeIndex and TimedeltaIndex with freq == None raise ValueError
         # on add/sub of integers (or int-like).  We re-raise as a TypeError.
+        if keep_null_freq:
+            # TODO: remove keep_null_freq after Timestamp+int deprecation
+            #  GH#22535 is enforced
+            raise
         raise TypeError(
             "incompatible type for a datetime/timedelta "
             "operation [{name}]".format(name=op.__name__)
@@ -615,25 +637,29 @@ def wrapper(left, right):
         if isinstance(right, ABCDataFrame):
             return NotImplemented
 
+        keep_null_freq = isinstance(
+            right,
+            (ABCDatetimeIndex, ABCDatetimeArray, ABCTimedeltaIndex, ABCTimedeltaArray),
+        )
+
         left, right = _align_method_SERIES(left, right)
         res_name = get_op_result_name(left, right)
-        right = maybe_upcast_for_op(right, left.shape)
 
-        if should_extension_dispatch(left, right):
-            result = dispatch_to_extension_op(op, left, right)
+        lvalues = extract_array(left, extract_numpy=True)
+        rvalues = extract_array(right, extract_numpy=True)
 
-        elif is_timedelta64_dtype(right) or isinstance(
-            right, (ABCDatetimeArray, ABCDatetimeIndex)
-        ):
-            # We should only get here with td64 right with non-scalar values
-            #  for right upcast by maybe_upcast_for_op
-            assert not isinstance(right, (np.timedelta64, np.ndarray))
-            result = op(left._values, right)
+        rvalues = maybe_upcast_for_op(rvalues, lvalues.shape)
 
-        else:
-            lvalues = extract_array(left, extract_numpy=True)
-            rvalues = extract_array(right, extract_numpy=True)
+        if should_extension_dispatch(lvalues, rvalues):
+            result = dispatch_to_extension_op(op, lvalues, rvalues, keep_null_freq)
+
+        elif is_timedelta64_dtype(rvalues) or isinstance(rvalues, ABCDatetimeArray):
+            # We should only get here with td64 rvalues with non-scalar values
+            #  for rvalues upcast by maybe_upcast_for_op
+            assert not isinstance(rvalues, (np.timedelta64, np.ndarray))
+            result = dispatch_to_extension_op(op, lvalues, rvalues, keep_null_freq)
 
+        else:
             with np.errstate(all="ignore"):
                 result = na_op(lvalues, rvalues)
 
@@ -708,25 +734,25 @@ def wrapper(self, other, axis=None):
             if len(self) != len(other):
                 raise ValueError("Lengths must match to compare")
 
-        if should_extension_dispatch(self, other):
-            res_values = dispatch_to_extension_op(op, self, other)
+        lvalues = extract_array(self, extract_numpy=True)
+        rvalues = extract_array(other, extract_numpy=True)
 
-        elif is_scalar(other) and isna(other):
+        if should_extension_dispatch(lvalues, rvalues):
+            res_values = dispatch_to_extension_op(op, lvalues, rvalues)
+
+        elif is_scalar(rvalues) and isna(rvalues):
             # numpy does not like comparisons vs None
             if op is operator.ne:
-                res_values = np.ones(len(self), dtype=bool)
+                res_values = np.ones(len(lvalues), dtype=bool)
             else:
-                res_values = np.zeros(len(self), dtype=bool)
+                res_values = np.zeros(len(lvalues), dtype=bool)
 
         else:
-            lvalues = extract_array(self, extract_numpy=True)
-            rvalues = extract_array(other, extract_numpy=True)
-
             with np.errstate(all="ignore"):
                 res_values = na_op(lvalues, rvalues)
             if is_scalar(res_values):
                 raise TypeError(
-                    "Could not compare {typ} type with Series".format(typ=type(other))
+                    "Could not compare {typ} type with Series".format(typ=type(rvalues))
                 )
 
         result = self._constructor(res_values, index=self.index)

From 30fb087095d40230765fc544b3477700e04f0332 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 2 Sep 2019 14:17:43 -0700
Subject: [PATCH 52/95] BUG: fix+test Timestamp with int array (#28161)

---
 pandas/_libs/tslibs/c_timestamp.pyx           | 16 ++++++
 .../tests/scalar/timestamp/test_arithmetic.py | 53 +++++++++++++++++++
 2 files changed, 69 insertions(+)

diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx
index 10ed2588deaca5..41e2ae6b5b59b6 100644
--- a/pandas/_libs/tslibs/c_timestamp.pyx
+++ b/pandas/_libs/tslibs/c_timestamp.pyx
@@ -251,6 +251,14 @@ cdef class _Timestamp(datetime):
                 result = result.normalize()
             return result
 
+        elif is_array(other):
+            if other.dtype.kind in ['i', 'u']:
+                maybe_integer_op_deprecated(self)
+                if self.freq is None:
+                    raise ValueError("Cannot add integer-dtype array "
+                                     "to Timestamp without freq.")
+                return self.freq * other + self
+
         # index/series like
         elif hasattr(other, '_typ'):
             return NotImplemented
@@ -268,6 +276,14 @@ cdef class _Timestamp(datetime):
             neg_other = -other
             return self + neg_other
 
+        elif is_array(other):
+            if other.dtype.kind in ['i', 'u']:
+                maybe_integer_op_deprecated(self)
+                if self.freq is None:
+                    raise ValueError("Cannot subtract integer-dtype array "
+                                     "from Timestamp without freq.")
+                return self - self.freq * other
+
         typ = getattr(other, '_typ', None)
         if typ is not None:
             return NotImplemented
diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py
index 58bd03129f2df0..2ef4fe79eeacf5 100644
--- a/pandas/tests/scalar/timestamp/test_arithmetic.py
+++ b/pandas/tests/scalar/timestamp/test_arithmetic.py
@@ -151,3 +151,56 @@ def test_timestamp_add_timedelta64_unit(self, other, expected_difference):
         result = ts + other
         valdiff = result.value - ts.value
         assert valdiff == expected_difference
+
+    @pytest.mark.parametrize("ts", [Timestamp.now(), Timestamp.now("utc")])
+    @pytest.mark.parametrize(
+        "other",
+        [
+            1,
+            np.int64(1),
+            np.array([1, 2], dtype=np.int32),
+            np.array([3, 4], dtype=np.uint64),
+        ],
+    )
+    def test_add_int_no_freq_raises(self, ts, other):
+        with pytest.raises(ValueError, match="without freq"):
+            ts + other
+        with pytest.raises(ValueError, match="without freq"):
+            other + ts
+
+        with pytest.raises(ValueError, match="without freq"):
+            ts - other
+        with pytest.raises(TypeError):
+            other - ts
+
+    @pytest.mark.parametrize(
+        "ts",
+        [
+            Timestamp("1776-07-04", freq="D"),
+            Timestamp("1776-07-04", tz="UTC", freq="D"),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "other",
+        [
+            1,
+            np.int64(1),
+            np.array([1, 2], dtype=np.int32),
+            np.array([3, 4], dtype=np.uint64),
+        ],
+    )
+    def test_add_int_with_freq(self, ts, other):
+        with tm.assert_produces_warning(FutureWarning):
+            result1 = ts + other
+        with tm.assert_produces_warning(FutureWarning):
+            result2 = other + ts
+
+        assert np.all(result1 == result2)
+
+        with tm.assert_produces_warning(FutureWarning):
+            result = result1 - other
+
+        assert np.all(result == ts)
+
+        with pytest.raises(TypeError):
+            other - ts

From 562f423fc755c2c59307053bc5afceebb068397f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 2 Sep 2019 14:23:18 -0700
Subject: [PATCH 53/95] CLN: avoid catching Exception in _choose_path (#28205)

---
 pandas/core/groupby/generic.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 5e463d50d43d6d..6c95b521110a98 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -649,20 +649,21 @@ def _choose_path(self, fast_path, slow_path, group):
         # if we make it here, test if we can use the fast path
         try:
             res_fast = fast_path(group)
-
-            # verify fast path does not change columns (and names), otherwise
-            # its results cannot be joined with those of the slow path
-            if res_fast.columns != group.columns:
-                return path, res
-            # verify numerical equality with the slow path
-            if res.shape == res_fast.shape:
-                res_r = res.values.ravel()
-                res_fast_r = res_fast.values.ravel()
-                mask = notna(res_r)
-                if (res_r[mask] == res_fast_r[mask]).all():
-                    path = fast_path
         except Exception:
-            pass
+            # Hard to know ex-ante what exceptions `fast_path` might raise
+            return path, res
+
+        # verify fast path does not change columns (and names), otherwise
+        # its results cannot be joined with those of the slow path
+        if not isinstance(res_fast, DataFrame):
+            return path, res
+
+        if not res_fast.columns.equals(group.columns):
+            return path, res
+
+        if res_fast.equals(res):
+            path = fast_path
+
         return path, res
 
     def _transform_item_by_item(self, obj, wrapper):

From 89e5f8445a7c150c133c5c4db3e852c9947e88b4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 2 Sep 2019 14:26:53 -0700
Subject: [PATCH 54/95] REF: use dispatch_to_extension_op for bool ops (#27959)

---
 pandas/core/ops/__init__.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index cc2d4ced1243f1..df097d7ad91dc7 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -829,6 +829,11 @@ def wrapper(self, other):
             # Defer to DataFrame implementation; fail early
             return NotImplemented
 
+        elif should_extension_dispatch(self, other):
+            # e.g. SparseArray
+            res_values = dispatch_to_extension_op(op, self, other)
+            return _construct_result(self, res_values, index=self.index, name=res_name)
+
         elif isinstance(other, (ABCSeries, ABCIndexClass)):
             is_other_int_dtype = is_integer_dtype(other.dtype)
             other = other if is_other_int_dtype else fill_bool(other)

From 498f3008f583407e996322409a4f9af8dec8d775 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 2 Sep 2019 14:31:29 -0700
Subject: [PATCH 55/95] TST: parametrize and de-duplicate arith tests (#27950)

---
 pandas/tests/arithmetic/conftest.py         |   8 +-
 pandas/tests/arithmetic/test_datetime64.py  |  64 ++++------
 pandas/tests/arithmetic/test_numeric.py     |  16 ++-
 pandas/tests/arithmetic/test_object.py      |  13 +-
 pandas/tests/arithmetic/test_timedelta64.py | 131 ++++++++------------
 5 files changed, 96 insertions(+), 136 deletions(-)

diff --git a/pandas/tests/arithmetic/conftest.py b/pandas/tests/arithmetic/conftest.py
index f047154f2c6362..774ff14398bdb4 100644
--- a/pandas/tests/arithmetic/conftest.py
+++ b/pandas/tests/arithmetic/conftest.py
@@ -190,7 +190,12 @@ def box(request):
 
 
 @pytest.fixture(
-    params=[pd.Index, pd.Series, pytest.param(pd.DataFrame, marks=pytest.mark.xfail)],
+    params=[
+        pd.Index,
+        pd.Series,
+        pytest.param(pd.DataFrame, marks=pytest.mark.xfail),
+        tm.to_array,
+    ],
     ids=id_func,
 )
 def box_df_fail(request):
@@ -206,6 +211,7 @@ def box_df_fail(request):
         (pd.Series, False),
         (pd.DataFrame, False),
         pytest.param((pd.DataFrame, True), marks=pytest.mark.xfail),
+        (tm.to_array, False),
     ],
     ids=id_func,
 )
diff --git a/pandas/tests/arithmetic/test_datetime64.py b/pandas/tests/arithmetic/test_datetime64.py
index 5931cd93cc8c5a..bc7b979d2c7d03 100644
--- a/pandas/tests/arithmetic/test_datetime64.py
+++ b/pandas/tests/arithmetic/test_datetime64.py
@@ -348,28 +348,6 @@ def test_dt64arr_timestamp_equality(self, box_with_array):
         expected = tm.box_expected([False, False], xbox)
         tm.assert_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "op",
-        [operator.eq, operator.ne, operator.gt, operator.ge, operator.lt, operator.le],
-    )
-    def test_comparison_tzawareness_compat(self, op):
-        # GH#18162
-        dr = pd.date_range("2016-01-01", periods=6)
-        dz = dr.tz_localize("US/Pacific")
-
-        # Check that there isn't a problem aware-aware and naive-naive do not
-        # raise
-        naive_series = Series(dr)
-        aware_series = Series(dz)
-        msg = "Cannot compare tz-naive and tz-aware"
-        with pytest.raises(TypeError, match=msg):
-            op(dz, naive_series)
-        with pytest.raises(TypeError, match=msg):
-            op(dr, aware_series)
-
-        # TODO: implement _assert_tzawareness_compat for the reverse
-        # comparison with the Series on the left-hand side
-
 
 class TestDatetimeIndexComparisons:
 
@@ -599,15 +577,18 @@ def test_comparison_tzawareness_compat(self, op, box_df_fail):
         with pytest.raises(TypeError, match=msg):
             op(dz, np.array(list(dr), dtype=object))
 
-        # Check that there isn't a problem aware-aware and naive-naive do not
-        # raise
+        # The aware==aware and naive==naive comparisons should *not* raise
         assert_all(dr == dr)
-        assert_all(dz == dz)
+        assert_all(dr == list(dr))
+        assert_all(list(dr) == dr)
+        assert_all(np.array(list(dr), dtype=object) == dr)
+        assert_all(dr == np.array(list(dr), dtype=object))
 
-        # FIXME: DataFrame case fails to raise for == and !=, wrong
-        #  message for inequalities
-        assert (dr == list(dr)).all()
-        assert (dz == list(dz)).all()
+        assert_all(dz == dz)
+        assert_all(dz == list(dz))
+        assert_all(list(dz) == dz)
+        assert_all(np.array(list(dz), dtype=object) == dz)
+        assert_all(dz == np.array(list(dz), dtype=object))
 
     @pytest.mark.parametrize(
         "op",
@@ -844,6 +825,7 @@ def test_dt64arr_isub_timedeltalike_scalar(
         rng -= two_hours
         tm.assert_equal(rng, expected)
 
+    # TODO: redundant with test_dt64arr_add_timedeltalike_scalar
     def test_dt64arr_add_td64_scalar(self, box_with_array):
         # scalar timedeltas/np.timedelta64 objects
         # operate with np.timedelta64 correctly
@@ -1709,14 +1691,12 @@ def test_operators_datetimelike(self):
         dt1 - dt2
         dt2 - dt1
 
-        # ## datetime64 with timetimedelta ###
+        # datetime64 with timetimedelta
         dt1 + td1
         td1 + dt1
         dt1 - td1
-        # TODO: Decide if this ought to work.
-        # td1 - dt1
 
-        # ## timetimedelta with datetime64 ###
+        # timetimedelta with datetime64
         td1 + dt1
         dt1 + td1
 
@@ -1914,7 +1894,7 @@ def test_dt64_series_add_intlike(self, tz, op):
         with pytest.raises(TypeError, match=msg):
             method(other)
         with pytest.raises(TypeError, match=msg):
-            method(other.values)
+            method(np.array(other))
         with pytest.raises(TypeError, match=msg):
             method(pd.Index(other))
 
@@ -2380,34 +2360,34 @@ def test_ufunc_coercions(self):
         idx = date_range("2011-01-01", periods=3, freq="2D", name="x")
 
         delta = np.timedelta64(1, "D")
+        exp = date_range("2011-01-02", periods=3, freq="2D", name="x")
         for result in [idx + delta, np.add(idx, delta)]:
             assert isinstance(result, DatetimeIndex)
-            exp = date_range("2011-01-02", periods=3, freq="2D", name="x")
             tm.assert_index_equal(result, exp)
             assert result.freq == "2D"
 
+        exp = date_range("2010-12-31", periods=3, freq="2D", name="x")
         for result in [idx - delta, np.subtract(idx, delta)]:
             assert isinstance(result, DatetimeIndex)
-            exp = date_range("2010-12-31", periods=3, freq="2D", name="x")
             tm.assert_index_equal(result, exp)
             assert result.freq == "2D"
 
         delta = np.array(
             [np.timedelta64(1, "D"), np.timedelta64(2, "D"), np.timedelta64(3, "D")]
         )
+        exp = DatetimeIndex(
+            ["2011-01-02", "2011-01-05", "2011-01-08"], freq="3D", name="x"
+        )
         for result in [idx + delta, np.add(idx, delta)]:
             assert isinstance(result, DatetimeIndex)
-            exp = DatetimeIndex(
-                ["2011-01-02", "2011-01-05", "2011-01-08"], freq="3D", name="x"
-            )
             tm.assert_index_equal(result, exp)
             assert result.freq == "3D"
 
+        exp = DatetimeIndex(
+            ["2010-12-31", "2011-01-01", "2011-01-02"], freq="D", name="x"
+        )
         for result in [idx - delta, np.subtract(idx, delta)]:
             assert isinstance(result, DatetimeIndex)
-            exp = DatetimeIndex(
-                ["2010-12-31", "2011-01-01", "2011-01-02"], freq="D", name="x"
-            )
             tm.assert_index_equal(result, exp)
             assert result.freq == "D"
 
diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py
index d686d9f90a5a4a..8e7e72fcdc5800 100644
--- a/pandas/tests/arithmetic/test_numeric.py
+++ b/pandas/tests/arithmetic/test_numeric.py
@@ -561,9 +561,9 @@ def test_div_int(self, numeric_idx):
         tm.assert_index_equal(result, expected)
 
     @pytest.mark.parametrize("op", [operator.mul, ops.rmul, operator.floordiv])
-    def test_mul_int_identity(self, op, numeric_idx, box):
+    def test_mul_int_identity(self, op, numeric_idx, box_with_array):
         idx = numeric_idx
-        idx = tm.box_expected(idx, box)
+        idx = tm.box_expected(idx, box_with_array)
 
         result = op(idx, 1)
         tm.assert_equal(result, idx)
@@ -615,8 +615,9 @@ def test_mul_size_mismatch_raises(self, numeric_idx):
             idx * np.array([1, 2])
 
     @pytest.mark.parametrize("op", [operator.pow, ops.rpow])
-    def test_pow_float(self, op, numeric_idx, box):
+    def test_pow_float(self, op, numeric_idx, box_with_array):
         # test power calculations both ways, GH#14973
+        box = box_with_array
         idx = numeric_idx
         expected = pd.Float64Index(op(idx.values, 2.0))
 
@@ -626,8 +627,9 @@ def test_pow_float(self, op, numeric_idx, box):
         result = op(idx, 2.0)
         tm.assert_equal(result, expected)
 
-    def test_modulo(self, numeric_idx, box):
+    def test_modulo(self, numeric_idx, box_with_array):
         # GH#9244
+        box = box_with_array
         idx = numeric_idx
         expected = Index(idx.values % 2)
 
@@ -1041,7 +1043,8 @@ class TestObjectDtypeEquivalence:
     # Tests that arithmetic operations match operations executed elementwise
 
     @pytest.mark.parametrize("dtype", [None, object])
-    def test_numarr_with_dtype_add_nan(self, dtype, box):
+    def test_numarr_with_dtype_add_nan(self, dtype, box_with_array):
+        box = box_with_array
         ser = pd.Series([1, 2, 3], dtype=dtype)
         expected = pd.Series([np.nan, np.nan, np.nan], dtype=dtype)
 
@@ -1055,7 +1058,8 @@ def test_numarr_with_dtype_add_nan(self, dtype, box):
         tm.assert_equal(result, expected)
 
     @pytest.mark.parametrize("dtype", [None, object])
-    def test_numarr_with_dtype_add_int(self, dtype, box):
+    def test_numarr_with_dtype_add_int(self, dtype, box_with_array):
+        box = box_with_array
         ser = pd.Series([1, 2, 3], dtype=dtype)
         expected = pd.Series([2, 3, 4], dtype=dtype)
 
diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py
index fd9db806713603..f9c1de115b3a4f 100644
--- a/pandas/tests/arithmetic/test_object.py
+++ b/pandas/tests/arithmetic/test_object.py
@@ -89,7 +89,7 @@ def test_pow_ops_object(self):
 
     @pytest.mark.parametrize("op", [operator.add, ops.radd])
     @pytest.mark.parametrize("other", ["category", "Int64"])
-    def test_add_extension_scalar(self, other, box, op):
+    def test_add_extension_scalar(self, other, box_with_array, op):
         # GH#22378
         # Check that scalars satisfying is_extension_array_dtype(obj)
         # do not incorrectly try to dispatch to an ExtensionArray operation
@@ -97,8 +97,8 @@ def test_add_extension_scalar(self, other, box, op):
         arr = pd.Series(["a", "b", "c"])
         expected = pd.Series([op(x, other) for x in arr])
 
-        arr = tm.box_expected(arr, box)
-        expected = tm.box_expected(expected, box)
+        arr = tm.box_expected(arr, box_with_array)
+        expected = tm.box_expected(expected, box_with_array)
 
         result = op(arr, other)
         tm.assert_equal(result, expected)
@@ -133,16 +133,17 @@ def test_objarr_radd_str(self, box):
         ],
     )
     @pytest.mark.parametrize("dtype", [None, object])
-    def test_objarr_radd_str_invalid(self, dtype, data, box):
+    def test_objarr_radd_str_invalid(self, dtype, data, box_with_array):
         ser = Series(data, dtype=dtype)
 
-        ser = tm.box_expected(ser, box)
+        ser = tm.box_expected(ser, box_with_array)
         with pytest.raises(TypeError):
             "foo_" + ser
 
     @pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub])
-    def test_objarr_add_invalid(self, op, box):
+    def test_objarr_add_invalid(self, op, box_with_array):
         # invalid ops
+        box = box_with_array
 
         obj_ser = tm.makeObjectSeries()
         obj_ser.name = "objects"
diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
index 6d6b85a1e81e1c..ee27ce97f269e9 100644
--- a/pandas/tests/arithmetic/test_timedelta64.py
+++ b/pandas/tests/arithmetic/test_timedelta64.py
@@ -968,71 +968,37 @@ def test_td64arr_add_datetime64_nat(self, box_with_array):
     # ------------------------------------------------------------------
     # Operations with int-like others
 
-    def test_td64arr_add_int_series_invalid(self, box):
-        tdser = pd.Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]")
-        tdser = tm.box_expected(tdser, box)
-        err = TypeError if box is not pd.Index else NullFrequencyError
-        int_ser = Series([2, 3, 4])
-
-        with pytest.raises(err):
-            tdser + int_ser
-        with pytest.raises(err):
-            int_ser + tdser
-        with pytest.raises(err):
-            tdser - int_ser
-        with pytest.raises(err):
-            int_ser - tdser
-
-    def test_td64arr_add_intlike(self, box_with_array):
-        # GH#19123
-        tdi = TimedeltaIndex(["59 days", "59 days", "NaT"])
-        ser = tm.box_expected(tdi, box_with_array)
-
-        err = TypeError
-        if box_with_array in [pd.Index, tm.to_array]:
-            err = NullFrequencyError
-
-        other = Series([20, 30, 40], dtype="uint8")
-
-        # TODO: separate/parametrize
-        with pytest.raises(err):
-            ser + 1
-        with pytest.raises(err):
-            ser - 1
-
-        with pytest.raises(err):
-            ser + other
-        with pytest.raises(err):
-            ser - other
-
-        with pytest.raises(err):
-            ser + np.array(other)
-        with pytest.raises(err):
-            ser - np.array(other)
-
-        with pytest.raises(err):
-            ser + pd.Index(other)
-        with pytest.raises(err):
-            ser - pd.Index(other)
-
-    @pytest.mark.parametrize("scalar", [1, 1.5, np.array(2)])
-    def test_td64arr_add_sub_numeric_scalar_invalid(self, box_with_array, scalar):
+    @pytest.mark.parametrize(
+        "other",
+        [
+            # GH#19123
+            1,
+            Series([20, 30, 40], dtype="uint8"),
+            np.array([20, 30, 40], dtype="uint8"),
+            pd.UInt64Index([20, 30, 40]),
+            pd.Int64Index([20, 30, 40]),
+            Series([2, 3, 4]),
+            1.5,
+            np.array(2),
+        ],
+    )
+    def test_td64arr_addsub_numeric_invalid(self, box_with_array, other):
         box = box_with_array
-
         tdser = pd.Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]")
         tdser = tm.box_expected(tdser, box)
+
         err = TypeError
-        if box in [pd.Index, tm.to_array] and not isinstance(scalar, float):
+        if box in [pd.Index, tm.to_array] and not isinstance(other, float):
             err = NullFrequencyError
 
         with pytest.raises(err):
-            tdser + scalar
+            tdser + other
         with pytest.raises(err):
-            scalar + tdser
+            other + tdser
         with pytest.raises(err):
-            tdser - scalar
+            tdser - other
         with pytest.raises(err):
-            scalar - tdser
+            other - tdser
 
     @pytest.mark.parametrize(
         "dtype",
@@ -1059,11 +1025,12 @@ def test_td64arr_add_sub_numeric_scalar_invalid(self, box_with_array, scalar):
         ],
         ids=lambda x: type(x).__name__,
     )
-    def test_td64arr_add_sub_numeric_arr_invalid(self, box, vec, dtype):
+    def test_td64arr_add_sub_numeric_arr_invalid(self, box_with_array, vec, dtype):
+        box = box_with_array
         tdser = pd.Series(["59 Days", "59 Days", "NaT"], dtype="m8[ns]")
         tdser = tm.box_expected(tdser, box)
         err = TypeError
-        if box is pd.Index and not dtype.startswith("float"):
+        if box in [pd.Index, tm.to_array] and not dtype.startswith("float"):
             err = NullFrequencyError
 
         vector = vec.astype(dtype)
@@ -1080,14 +1047,6 @@ def test_td64arr_add_sub_numeric_arr_invalid(self, box, vec, dtype):
     # Operations with timedelta-like others
 
     # TODO: this was taken from tests.series.test_ops; de-duplicate
-    @pytest.mark.parametrize(
-        "scalar_td",
-        [
-            timedelta(minutes=5, seconds=4),
-            Timedelta(minutes=5, seconds=4),
-            Timedelta("5m4s").to_timedelta64(),
-        ],
-    )
     def test_operators_timedelta64_with_timedelta(self, scalar_td):
         # smoke tests
         td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
@@ -1141,7 +1100,8 @@ def test_timedelta64_operations_with_timedeltas(self):
         # roundtrip
         tm.assert_series_equal(result + td2, td1)
 
-    def test_td64arr_add_td64_array(self, box):
+    def test_td64arr_add_td64_array(self, box_with_array):
+        box = box_with_array
         dti = pd.date_range("2016-01-01", periods=3)
         tdi = dti - dti.shift(1)
         tdarr = tdi.values
@@ -1155,7 +1115,8 @@ def test_td64arr_add_td64_array(self, box):
         result = tdarr + tdi
         tm.assert_equal(result, expected)
 
-    def test_td64arr_sub_td64_array(self, box):
+    def test_td64arr_sub_td64_array(self, box_with_array):
+        box = box_with_array
         dti = pd.date_range("2016-01-01", periods=3)
         tdi = dti - dti.shift(1)
         tdarr = tdi.values
@@ -1229,8 +1190,9 @@ def test_td64arr_add_sub_tdi(self, box, names):
         else:
             assert result.dtypes[0] == "timedelta64[ns]"
 
-    def test_td64arr_add_sub_td64_nat(self, box):
+    def test_td64arr_add_sub_td64_nat(self, box_with_array):
         # GH#23320 special handling for timedelta64("NaT")
+        box = box_with_array
         tdi = pd.TimedeltaIndex([NaT, Timedelta("1s")])
         other = np.timedelta64("NaT")
         expected = pd.TimedeltaIndex(["NaT"] * 2)
@@ -1247,8 +1209,9 @@ def test_td64arr_add_sub_td64_nat(self, box):
         result = other - obj
         tm.assert_equal(result, expected)
 
-    def test_td64arr_sub_NaT(self, box):
+    def test_td64arr_sub_NaT(self, box_with_array):
         # GH#18808
+        box = box_with_array
         ser = Series([NaT, Timedelta("1s")])
         expected = Series([NaT, NaT], dtype="timedelta64[ns]")
 
@@ -1258,8 +1221,9 @@ def test_td64arr_sub_NaT(self, box):
         res = ser - pd.NaT
         tm.assert_equal(res, expected)
 
-    def test_td64arr_add_timedeltalike(self, two_hours, box):
+    def test_td64arr_add_timedeltalike(self, two_hours, box_with_array):
         # only test adding/sub offsets as + is now numeric
+        box = box_with_array
         rng = timedelta_range("1 days", "10 days")
         expected = timedelta_range("1 days 02:00:00", "10 days 02:00:00", freq="D")
         rng = tm.box_expected(rng, box)
@@ -1268,8 +1232,9 @@ def test_td64arr_add_timedeltalike(self, two_hours, box):
         result = rng + two_hours
         tm.assert_equal(result, expected)
 
-    def test_td64arr_sub_timedeltalike(self, two_hours, box):
+    def test_td64arr_sub_timedeltalike(self, two_hours, box_with_array):
         # only test adding/sub offsets as - is now numeric
+        box = box_with_array
         rng = timedelta_range("1 days", "10 days")
         expected = timedelta_range("0 days 22:00:00", "9 days 22:00:00")
 
@@ -1352,8 +1317,9 @@ def test_td64arr_add_offset_index(self, names, box):
 
     # TODO: combine with test_td64arr_add_offset_index by parametrizing
     # over second box?
-    def test_td64arr_add_offset_array(self, box):
+    def test_td64arr_add_offset_array(self, box_with_array):
         # GH#18849
+        box = box_with_array
         tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"])
         other = np.array([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)])
 
@@ -1433,13 +1399,12 @@ def test_td64arr_with_offset_series(self, names, box_df_fail):
         # GH#18849
         box = box_df_fail
         box2 = Series if box in [pd.Index, tm.to_array] else box
+        exname = names[2] if box is not tm.to_array else names[1]
 
         tdi = TimedeltaIndex(["1 days 00:00:00", "3 days 04:00:00"], name=names[0])
         other = Series([pd.offsets.Hour(n=1), pd.offsets.Minute(n=-2)], name=names[1])
 
-        expected_add = Series(
-            [tdi[n] + other[n] for n in range(len(tdi))], name=names[2]
-        )
+        expected_add = Series([tdi[n] + other[n] for n in range(len(tdi))], name=exname)
         tdi = tm.box_expected(tdi, box)
         expected_add = tm.box_expected(expected_add, box2)
 
@@ -1452,9 +1417,7 @@ def test_td64arr_with_offset_series(self, names, box_df_fail):
         tm.assert_equal(res2, expected_add)
 
         # TODO: separate/parametrize add/sub test?
-        expected_sub = Series(
-            [tdi[n] - other[n] for n in range(len(tdi))], name=names[2]
-        )
+        expected_sub = Series([tdi[n] - other[n] for n in range(len(tdi))], name=exname)
         expected_sub = tm.box_expected(expected_sub, box2)
 
         with tm.assert_produces_warning(PerformanceWarning):
@@ -2055,6 +2018,8 @@ def test_td64arr_div_numeric_array(self, box_with_array, vector, dtype):
     def test_td64arr_mul_int_series(self, box_df_fail, names):
         # GH#19042 test for correct name attachment
         box = box_df_fail  # broadcasts along wrong axis, but doesn't raise
+        exname = names[2] if box is not tm.to_array else names[1]
+
         tdi = TimedeltaIndex(
             ["0days", "1day", "2days", "3days", "4days"], name=names[0]
         )
@@ -2064,11 +2029,11 @@ def test_td64arr_mul_int_series(self, box_df_fail, names):
         expected = Series(
             ["0days", "1day", "4days", "9days", "16days"],
             dtype="timedelta64[ns]",
-            name=names[2],
+            name=exname,
         )
 
         tdi = tm.box_expected(tdi, box)
-        box = Series if (box is pd.Index and type(ser) is Series) else box
+        box = Series if (box is pd.Index or box is tm.to_array) else box
         expected = tm.box_expected(expected, box)
 
         result = ser * tdi
@@ -2119,7 +2084,11 @@ def test_float_series_rdiv_td64arr(self, box_with_array, names):
             tm.assert_equal(result, expected)
 
 
-class TestTimedeltaArraylikeInvalidArithmeticOps:
+class TestTimedelta64ArrayLikeArithmetic:
+    # Arithmetic tests for timedelta64[ns] vectors fully parametrized over
+    #  DataFrame/Series/TimedeltaIndex/TimedeltaArray.  Ideally all arithmetic
+    #  tests will eventually end up here.
+
     def test_td64arr_pow_invalid(self, scalar_td, box_with_array):
         td1 = Series([timedelta(minutes=5, seconds=3)] * 3)
         td1.iloc[2] = np.nan

From 15eb9cad864b6794c4f7e7c08c2933a0a1169859 Mon Sep 17 00:00:00 2001
From: Sergei Ivko <s.v.ivko@gmail.com>
Date: Tue, 3 Sep 2019 02:52:10 +0300
Subject: [PATCH 56/95] ENH: Enable read_csv interpret 'Infinity' as floating
 point value #10065 (#28181)

---
 doc/source/whatsnew/v1.0.0.rst        |  2 +-
 pandas/_libs/parsers.pyx              | 18 ++++++++++++++----
 pandas/_libs/src/parse_helper.h       | 19 ++++++++++++++++++-
 pandas/tests/io/parser/test_common.py | 17 +++++++++++++++++
 4 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 6834435adb4780..cd0714838a3f15 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -162,7 +162,7 @@ I/O
 
 - :meth:`read_csv` now accepts binary mode file buffers when using the Python csv engine (:issue:`23779`)
 - Bug in :meth:`DataFrame.to_json` where using a Tuple as a column or index value and using ``orient="columns"`` or ``orient="index"`` would produce invalid JSON (:issue:`20500`)
--
+- Improve infinity parsing. :meth:`read_csv` now interprets ``Infinity``, ``+Infinity``, ``-Infinity`` as floating point values (:issue:`10065`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 6cc9dd22ce7c92..62a3568932def4 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -1693,6 +1693,10 @@ cdef:
     char* cposinf = b'+inf'
     char* cneginf = b'-inf'
 
+    char* cinfty = b'Infinity'
+    char* cposinfty = b'+Infinity'
+    char* cneginfty = b'-Infinity'
+
 
 cdef _try_double(parser_t *parser, int64_t col,
                  int64_t line_start, int64_t line_end,
@@ -1772,9 +1776,12 @@ cdef inline int _try_double_nogil(parser_t *parser,
                 if error != 0 or p_end == word or p_end[0]:
                     error = 0
                     if (strcasecmp(word, cinf) == 0 or
-                            strcasecmp(word, cposinf) == 0):
+                            strcasecmp(word, cposinf) == 0 or
+                            strcasecmp(word, cinfty) == 0 or
+                            strcasecmp(word, cposinfty) == 0):
                         data[0] = INF
-                    elif strcasecmp(word, cneginf) == 0:
+                    elif (strcasecmp(word, cneginf) == 0 or
+                            strcasecmp(word, cneginfty) == 0 ):
                         data[0] = NEGINF
                     else:
                         return 1
@@ -1793,9 +1800,12 @@ cdef inline int _try_double_nogil(parser_t *parser,
             if error != 0 or p_end == word or p_end[0]:
                 error = 0
                 if (strcasecmp(word, cinf) == 0 or
-                        strcasecmp(word, cposinf) == 0):
+                        strcasecmp(word, cposinf) == 0 or
+                        strcasecmp(word, cinfty) == 0 or
+                        strcasecmp(word, cposinfty) == 0):
                     data[0] = INF
-                elif strcasecmp(word, cneginf) == 0:
+                elif (strcasecmp(word, cneginf) == 0 or
+                        strcasecmp(word, cneginfty) == 0):
                     data[0] = NEGINF
                 else:
                     return 1
diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h
index 1db1878a8a773f..1db4c813bb4930 100644
--- a/pandas/_libs/src/parse_helper.h
+++ b/pandas/_libs/src/parse_helper.h
@@ -50,7 +50,7 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
     status = to_double(data, result, sci, dec, maybe_int);
 
     if (!status) {
-        /* handle inf/-inf */
+        /* handle inf/-inf infinity/-infinity */
         if (strlen(data) == 3) {
             if (0 == strcasecmp(data, "inf")) {
                 *result = HUGE_VAL;
@@ -68,6 +68,23 @@ int floatify(PyObject *str, double *result, int *maybe_int) {
             } else {
                 goto parsingerror;
             }
+        } else if (strlen(data) == 8) {
+            if (0 == strcasecmp(data, "infinity")) {
+                *result = HUGE_VAL;
+                *maybe_int = 0;
+            } else {
+                goto parsingerror;
+            }
+        } else if (strlen(data) == 9) {
+            if (0 == strcasecmp(data, "-infinity")) {
+                *result = -HUGE_VAL;
+                *maybe_int = 0;
+            } else if (0 == strcasecmp(data, "+infinity")) {
+                *result = HUGE_VAL;
+                *maybe_int = 0;
+            } else {
+                goto parsingerror;
+            }
         } else {
             goto parsingerror;
         }
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index e04535df56663c..0586593c87cc54 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -1865,6 +1865,23 @@ def test_inf_parsing(all_parsers, na_filter):
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.parametrize("na_filter", [True, False])
+def test_infinity_parsing(all_parsers, na_filter):
+    parser = all_parsers
+    data = """\
+,A
+a,Infinity
+b,-Infinity
+c,+Infinity
+"""
+    expected = DataFrame(
+        {"A": [float("infinity"), float("-infinity"), float("+infinity")]},
+        index=["a", "b", "c"],
+    )
+    result = parser.read_csv(StringIO(data), index_col=0, na_filter=na_filter)
+    tm.assert_frame_equal(result, expected)
+
+
 @pytest.mark.parametrize("nrows", [0, 1, 2, 3, 4, 5])
 def test_raise_on_no_columns(all_parsers, nrows):
     parser = all_parsers

From 91e5b85aeaa4e06c18ec1c8a59e3fce3f2545f10 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 2 Sep 2019 17:07:26 -0700
Subject: [PATCH 57/95] Revert #27959 (#28258)

---
 pandas/core/ops/__init__.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index df097d7ad91dc7..cc2d4ced1243f1 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -829,11 +829,6 @@ def wrapper(self, other):
             # Defer to DataFrame implementation; fail early
             return NotImplemented
 
-        elif should_extension_dispatch(self, other):
-            # e.g. SparseArray
-            res_values = dispatch_to_extension_op(op, self, other)
-            return _construct_result(self, res_values, index=self.index, name=res_name)
-
         elif isinstance(other, (ABCSeries, ABCIndexClass)):
             is_other_int_dtype = is_integer_dtype(other.dtype)
             other = other if is_other_int_dtype else fill_bool(other)

From ae93c2302dd687ea8f5bcfdb6e9591fb9bfdb19a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Mon, 2 Sep 2019 22:04:45 -0700
Subject: [PATCH 58/95] Add peakmem benchmarks for rolling (#28255)

---
 asv_bench/benchmarks/rolling.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/asv_bench/benchmarks/rolling.py b/asv_bench/benchmarks/rolling.py
index 3640513d31be26..b42fa553b495ce 100644
--- a/asv_bench/benchmarks/rolling.py
+++ b/asv_bench/benchmarks/rolling.py
@@ -21,6 +21,9 @@ def setup(self, constructor, window, dtype, method):
     def time_rolling(self, constructor, window, dtype, method):
         getattr(self.roll, method)()
 
+    def peakmem_rolling(self, constructor, window, dtype, method):
+        getattr(self.roll, method)()
+
 
 class ExpandingMethods:
 

From 9cb5de04bc61f23047eb7f34bef2bb14ef58da8e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 3 Sep 2019 04:39:20 -0700
Subject: [PATCH 59/95] CLN: Catch more specific exceptions in groupby (#27909)

* catch stricter
---
 pandas/_libs/index.pyx                 |  1 +
 pandas/_libs/index_class_helper.pxi.in | 11 ++++++++++-
 pandas/core/groupby/generic.py         |  2 +-
 pandas/core/groupby/groupby.py         |  3 +--
 pandas/core/groupby/grouper.py         |  6 ++++--
 pandas/core/groupby/ops.py             |  4 ++--
 6 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 7424c4ddc3d924..979dad6db0838f 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -1,4 +1,5 @@
 from datetime import datetime, timedelta, date
+import warnings
 
 import cython
 
diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in
index 3c9a096e7ecc0c..4db048eeb03831 100644
--- a/pandas/_libs/index_class_helper.pxi.in
+++ b/pandas/_libs/index_class_helper.pxi.in
@@ -60,7 +60,16 @@ cdef class {{name}}Engine(IndexEngine):
 
         # A view is needed for some subclasses, such as PeriodEngine:
         values = self._get_index_values().view('{{dtype}}')
-        indexer = values == val
+        try:
+            with warnings.catch_warnings():
+                # e.g. if values is float64 and `val` is a str, suppress warning
+                warnings.filterwarnings("ignore", category=FutureWarning)
+                indexer = values == val
+        except TypeError:
+            # if the equality above returns a bool, cython will raise TypeError
+            #  when trying to cast it to ndarray
+            raise KeyError(val)
+
         found = np.where(indexer)[0]
         count = len(found)
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 6c95b521110a98..c0436e93890782 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -349,7 +349,7 @@ def _decide_output_index(self, output, labels):
             output_keys = sorted(output)
             try:
                 output_keys.sort()
-            except Exception:  # pragma: no cover
+            except TypeError:
                 pass
 
             if isinstance(labels, MultiIndex):
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 6deef16bdec131..55def024cb1d46 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -727,8 +727,7 @@ def f(g):
         with option_context("mode.chained_assignment", None):
             try:
                 result = self._python_apply_general(f)
-            except Exception:
-
+            except TypeError:
                 # gh-20949
                 # try again, with .apply acting as a filtering
                 # operation, by excluding the grouping column
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 31623171e9e631..d079a1c4ef4f7b 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -592,9 +592,11 @@ def is_in_axis(key):
 
     # if the grouper is obj[name]
     def is_in_obj(gpr):
+        if not hasattr(gpr, "name"):
+            return False
         try:
-            return id(gpr) == id(obj[gpr.name])
-        except Exception:
+            return gpr is obj[gpr.name]
+        except (KeyError, IndexError):
             return False
 
     for i, (gpr, level) in enumerate(zip(keys, levels)):
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 7afb0a28f943ee..6263973fb0d2fe 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -212,8 +212,8 @@ def apply(self, f, data, axis=0):
                 # This Exception is also raised if `f` triggers an exception
                 # but it is preferable to raise the exception in Python.
                 pass
-            except Exception:
-                # raise this error to the caller
+            except TypeError:
+                # occurs if we have any EAs
                 pass
 
         for key, (i, group) in zip(group_keys, splitter):

From 45668500c0b48bd4b534b57f84e7cfc374b9da80 Mon Sep 17 00:00:00 2001
From: Marc Garcia <garcia.marc@gmail.com>
Date: Tue, 3 Sep 2019 12:47:35 +0100
Subject: [PATCH 60/95] DOC: Add missing public plotting functions to the docs
 (#28179)

* DOC: Add missing public plotting functions to the docs
---
 doc/source/reference/plotting.rst | 4 ++++
 pandas/plotting/_misc.py          | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/doc/source/reference/plotting.rst b/doc/source/reference/plotting.rst
index 7615e1d20f5e27..95657dfa5fde5b 100644
--- a/doc/source/reference/plotting.rst
+++ b/doc/source/reference/plotting.rst
@@ -13,10 +13,14 @@ The following functions are contained in the `pandas.plotting` module.
    :toctree: api/
 
    andrews_curves
+   autocorrelation_plot
    bootstrap_plot
+   boxplot
    deregister_matplotlib_converters
    lag_plot
    parallel_coordinates
+   plot_params
    radviz
    register_matplotlib_converters
    scatter_matrix
+   table
diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py
index 7ed0ffc6d0115e..a8e86d9dfa997d 100644
--- a/pandas/plotting/_misc.py
+++ b/pandas/plotting/_misc.py
@@ -417,8 +417,8 @@ def autocorrelation_plot(series, ax=None, **kwds):
 
     Parameters
     ----------
-    series: Time series
-    ax: Matplotlib axis object, optional
+    series : Time series
+    ax : Matplotlib axis object, optional
     kwds : keywords
         Options to pass to matplotlib plotting method
 

From afe0cc360950302be41b4c7e8fd3c5272b537297 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?O=C4=9Fuzhan=20=C3=96=C4=9Freden?=
 <oguzhan@oguzhanogreden.com>
Date: Tue, 3 Sep 2019 20:36:06 +0200
Subject: [PATCH 61/95] DOC: Add docstring to the insertion method & add empty
 result note (#26872)

* Add docstring to the insertion method & fix #21364

Credit for empty result documentation goes to MagnetarAlex
---
 doc/source/user_guide/io.rst | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
index 338c890ce317c5..f6b0c55d39f65d 100644
--- a/doc/source/user_guide/io.rst
+++ b/doc/source/user_guide/io.rst
@@ -5047,6 +5047,17 @@ Example of a callable using PostgreSQL `COPY clause
   from io import StringIO
 
   def psql_insert_copy(table, conn, keys, data_iter):
+      """
+      Execute SQL statement inserting data
+
+      Parameters
+      ----------
+      table : pandas.io.sql.SQLTable
+      conn : sqlalchemy.engine.Engine or sqlalchemy.engine.Connection
+      keys : list of str
+          Column names
+      data_iter : Iterable that iterates the values to be inserted
+      """
       # gets a DBAPI connection that can provide a cursor
       dbapi_conn = conn.connection
       with dbapi_conn.cursor() as cur:
@@ -5080,6 +5091,18 @@ table name and optionally a subset of columns to read.
 
    pd.read_sql_table('data', engine)
 
+.. note::
+
+  Note that pandas infers column dtypes from query outputs, and not by looking
+  up data types in the physical database schema. For example, assume ``userid``
+  is an integer column in a table. Then, intuitively, ``select userid ...`` will
+  return integer-valued series, while ``select cast(userid as text) ...`` will
+  return object-valued (str) series. Accordingly, if the query output is empty,
+  then all resulting columns will be returned as object-valued (since they are
+  most general). If you foresee that your query will sometimes generate an empty
+  result, you may want to explicitly typecast afterwards to ensure dtype
+  integrity.
+
 You can also specify the name of the column as the ``DataFrame`` index,
 and specify a subset of columns to be read.
 

From 9777e8402cf353ce9c33375e1ed885202264a34d Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 3 Sep 2019 11:53:52 -0700
Subject: [PATCH 62/95] REF: use dispatch_to_extension_op for bool ops (#28260)

re-implement #27959, which was previously merged and reverted.
---
 pandas/core/ops/__init__.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index cc2d4ced1243f1..9fd6efe32de291 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -829,6 +829,13 @@ def wrapper(self, other):
             # Defer to DataFrame implementation; fail early
             return NotImplemented
 
+        elif should_extension_dispatch(self, other):
+            lvalues = extract_array(self, extract_numpy=True)
+            rvalues = extract_array(other, extract_numpy=True)
+            res_values = dispatch_to_extension_op(op, lvalues, rvalues)
+            result = self._constructor(res_values, index=self.index, name=res_name)
+            return finalizer(result)
+
         elif isinstance(other, (ABCSeries, ABCIndexClass)):
             is_other_int_dtype = is_integer_dtype(other.dtype)
             other = other if is_other_int_dtype else fill_bool(other)

From efa177d4eedf03ce0dd33063b09b7bd5580c5a98 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Tue, 3 Sep 2019 14:01:08 -0500
Subject: [PATCH 63/95] VIS: Validate plot backend when setting. (#28164)

* Validate plot backend when setting.

Closes https://github.com/pandas-dev/pandas/issues/28163
---
 doc/source/whatsnew/v1.0.0.rst        |  1 +
 pandas/core/config_init.py            | 29 +++---------
 pandas/plotting/_core.py              | 24 +++++++---
 pandas/tests/plotting/test_backend.py | 63 +++++++++++++--------------
 pandas/tests/plotting/test_misc.py    |  2 +-
 5 files changed, 58 insertions(+), 61 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index cd0714838a3f15..91e8c9efba693d 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -172,6 +172,7 @@ Plotting
 - Bug in :meth:`DataFrame.plot` producing incorrect legend markers when plotting multiple series on the same axis (:issue:`18222`)
 - Bug in :meth:`DataFrame.plot` when ``kind='box'`` and data contains datetime or timedelta data. These types are now automatically dropped (:issue:`22799`)
 - Bug in :meth:`DataFrame.plot.line` and :meth:`DataFrame.plot.area` produce wrong xlim in x-axis (:issue:`27686`, :issue:`25160`, :issue:`24784`)
+- :func:`set_option` now validates that the plot backend provided to ``'plotting.backend'`` implements the backend when the option is set, rather than when a plot is created (:issue:`28163`)
 
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index 08dce6aca6e6d1..dfc80140433f8e 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -9,8 +9,6 @@
 module is imported, register them here rather then in the module.
 
 """
-import importlib
-
 import pandas._config.config as cf
 from pandas._config.config import (
     is_bool,
@@ -581,26 +579,12 @@ def use_inf_as_na_cb(key):
 
 
 def register_plotting_backend_cb(key):
-    backend_str = cf.get_option(key)
-    if backend_str == "matplotlib":
-        try:
-            import pandas.plotting._matplotlib  # noqa
-        except ImportError:
-            raise ImportError(
-                "matplotlib is required for plotting when the "
-                'default backend "matplotlib" is selected.'
-            )
-        else:
-            return
+    if key == "matplotlib":
+        # We defer matplotlib validation, since it's the default
+        return
+    from pandas.plotting._core import _get_plot_backend
 
-    try:
-        importlib.import_module(backend_str)
-    except ImportError:
-        raise ValueError(
-            '"{}" does not seem to be an installed module. '
-            "A pandas plotting backend must be a module that "
-            "can be imported".format(backend_str)
-        )
+    _get_plot_backend(key)
 
 
 with cf.config_prefix("plotting"):
@@ -608,8 +592,7 @@ def register_plotting_backend_cb(key):
         "backend",
         defval="matplotlib",
         doc=plotting_backend_doc,
-        validator=str,
-        cb=register_plotting_backend_cb,
+        validator=register_plotting_backend_cb,
     )
 
 
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 2e6a401b49efc4..d3c9e8ccfa51ca 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1576,10 +1576,18 @@ def _find_backend(backend: str):
             # We re-raise later on.
             pass
         else:
-            _backends[backend] = module
-            return module
-
-    raise ValueError("No backend {}".format(backend))
+            if hasattr(module, "plot"):
+                # Validate that the interface is implemented when the option
+                # is set, rather than at plot time.
+                _backends[backend] = module
+                return module
+
+    msg = (
+        "Could not find plotting backend '{name}'. Ensure that you've installed the "
+        "package providing the '{name}' entrypoint, or that the package has a"
+        "top-level `.plot` method."
+    )
+    raise ValueError(msg.format(name=backend))
 
 
 def _get_plot_backend(backend=None):
@@ -1600,7 +1608,13 @@ def _get_plot_backend(backend=None):
     if backend == "matplotlib":
         # Because matplotlib is an optional dependency and first-party backend,
         # we need to attempt an import here to raise an ImportError if needed.
-        import pandas.plotting._matplotlib as module
+        try:
+            import pandas.plotting._matplotlib as module
+        except ImportError:
+            raise ImportError(
+                "matplotlib is required for plotting when the "
+                'default backend "matplotlib" is selected.'
+            ) from None
 
         _backends["matplotlib"] = module
 
diff --git a/pandas/tests/plotting/test_backend.py b/pandas/tests/plotting/test_backend.py
index d126407cfd823e..6511d94aa4c094 100644
--- a/pandas/tests/plotting/test_backend.py
+++ b/pandas/tests/plotting/test_backend.py
@@ -8,44 +8,38 @@
 
 import pandas
 
+dummy_backend = types.ModuleType("pandas_dummy_backend")
+dummy_backend.plot = lambda *args, **kwargs: None
 
-def test_matplotlib_backend_error():
-    msg = (
-        "matplotlib is required for plotting when the default backend "
-        '"matplotlib" is selected.'
-    )
-    try:
-        import matplotlib  # noqa
-    except ImportError:
-        with pytest.raises(ImportError, match=msg):
-            pandas.set_option("plotting.backend", "matplotlib")
+
+@pytest.fixture
+def restore_backend():
+    """Restore the plotting backend to matplotlib"""
+    pandas.set_option("plotting.backend", "matplotlib")
+    yield
+    pandas.set_option("plotting.backend", "matplotlib")
 
 
 def test_backend_is_not_module():
-    msg = (
-        '"not_an_existing_module" does not seem to be an installed module. '
-        "A pandas plotting backend must be a module that can be imported"
-    )
+    msg = "Could not find plotting backend 'not_an_existing_module'."
     with pytest.raises(ValueError, match=msg):
         pandas.set_option("plotting.backend", "not_an_existing_module")
 
+    assert pandas.options.plotting.backend == "matplotlib"
 
-def test_backend_is_correct(monkeypatch):
-    monkeypatch.setattr(
-        "pandas.core.config_init.importlib.import_module", lambda name: None
-    )
-    pandas.set_option("plotting.backend", "correct_backend")
-    assert pandas.get_option("plotting.backend") == "correct_backend"
 
-    # Restore backend for other tests (matplotlib can be not installed)
-    try:
-        pandas.set_option("plotting.backend", "matplotlib")
-    except ImportError:
-        pass
+def test_backend_is_correct(monkeypatch, restore_backend):
+    monkeypatch.setitem(sys.modules, "pandas_dummy_backend", dummy_backend)
+
+    pandas.set_option("plotting.backend", "pandas_dummy_backend")
+    assert pandas.get_option("plotting.backend") == "pandas_dummy_backend"
+    assert (
+        pandas.plotting._core._get_plot_backend("pandas_dummy_backend") is dummy_backend
+    )
 
 
 @td.skip_if_no_mpl
-def test_register_entrypoint():
+def test_register_entrypoint(restore_backend):
 
     dist = pkg_resources.get_distribution("pandas")
     if dist.module_path not in pandas.__file__:
@@ -74,13 +68,18 @@ def test_register_entrypoint():
     assert result is mod
 
 
-def test_register_import():
-    mod = types.ModuleType("my_backend2")
-    mod.plot = lambda *args, **kwargs: 1
-    sys.modules["my_backend2"] = mod
+def test_setting_backend_without_plot_raises():
+    # GH-28163
+    module = types.ModuleType("pandas_plot_backend")
+    sys.modules["pandas_plot_backend"] = module
 
-    result = pandas.plotting._core._get_plot_backend("my_backend2")
-    assert result is mod
+    assert pandas.options.plotting.backend == "matplotlib"
+    with pytest.raises(
+        ValueError, match="Could not find plotting backend 'pandas_plot_backend'."
+    ):
+        pandas.set_option("plotting.backend", "pandas_plot_backend")
+
+    assert pandas.options.plotting.backend == "matplotlib"
 
 
 @td.skip_if_mpl
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
index 6cb6f818d40fdd..940cfef4058e03 100644
--- a/pandas/tests/plotting/test_misc.py
+++ b/pandas/tests/plotting/test_misc.py
@@ -21,7 +21,7 @@ def test_import_error_message():
     # GH-19810
     df = DataFrame({"A": [1, 2]})
 
-    with pytest.raises(ImportError, match="No module named 'matplotlib'"):
+    with pytest.raises(ImportError, match="matplotlib is required for plotting"):
         df.plot()
 
 

From bfff080275b4456b28d71f0c7b4ec9e678d4270c Mon Sep 17 00:00:00 2001
From: jeschwar <36767735+jeschwar@users.noreply.github.com>
Date: Tue, 3 Sep 2019 13:26:01 -0600
Subject: [PATCH 64/95] ENH: added optional caption and label arguments to
 DataFrame.to_latex() (#25437)

* ENH: added optional caption and label support to DataFrame.to_latex() (#25436)
---
 doc/source/whatsnew/v1.0.0.rst           |   3 +-
 pandas/core/generic.py                   |  29 ++++-
 pandas/io/formats/format.py              |   4 +
 pandas/io/formats/latex.py               | 128 +++++++++++++++++++--
 pandas/tests/io/formats/test_to_latex.py | 138 ++++++++++++++++++++++-
 5 files changed, 283 insertions(+), 19 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 91e8c9efba693d..0d2b81eca6789c 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -20,8 +20,7 @@ including other versions of pandas.
 
 Enhancements
 ~~~~~~~~~~~~
-
--
+- :meth:`DataFrame.to_latex` now accepts ``caption`` and ``label`` arguments (:issue:`25436`)
 -
 
 .. _whatsnew_1000.enhancements.other:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 1a5b36b07e93ca..b427b1f0ac8580 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2925,15 +2925,21 @@ def to_latex(
         multicolumn=None,
         multicolumn_format=None,
         multirow=None,
+        caption=None,
+        label=None,
     ):
         r"""
-        Render an object to a LaTeX tabular environment table.
+        Render object to a LaTeX tabular, longtable, or nested table/tabular.
 
-        Render an object to a tabular environment table. You can splice
-        this into a LaTeX document. Requires \usepackage{booktabs}.
+        Requires ``\usepackage{booktabs}``.  The output can be copy/pasted
+        into a main LaTeX document or read from an external file
+        with ``\input{table.tex}``.
 
         .. versionchanged:: 0.20.2
-           Added to Series
+           Added to Series.
+
+        .. versionchanged:: 1.0.0
+           Added caption and label arguments.
 
         Parameters
         ----------
@@ -3002,6 +3008,17 @@ def to_latex(
             from the pandas config module.
 
             .. versionadded:: 0.20.0
+
+        caption : str, optional
+            The LaTeX caption to be placed inside ``\caption{}`` in the output.
+
+            .. versionadded:: 1.0.0
+
+        label : str, optional
+            The LaTeX label to be placed inside ``\label{}`` in the output.
+            This is used with ``\ref{}`` in the main ``.tex`` file.
+
+            .. versionadded:: 1.0.0
         %(returns)s
         See Also
         --------
@@ -3014,7 +3031,7 @@ def to_latex(
         >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
         ...                    'mask': ['red', 'purple'],
         ...                    'weapon': ['sai', 'bo staff']})
-        >>> print(df.to_latex(index=False)) # doctest: +NORMALIZE_WHITESPACE
+        >>> print(df.to_latex(index=False))  # doctest: +NORMALIZE_WHITESPACE
         \begin{tabular}{lll}
          \toprule
                name &    mask &    weapon \\
@@ -3061,6 +3078,8 @@ def to_latex(
             multicolumn=multicolumn,
             multicolumn_format=multicolumn_format,
             multirow=multirow,
+            caption=caption,
+            label=label,
         )
 
     def to_csv(
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 8ff4b9bda0430a..f8db1b19dadfa8 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -888,6 +888,8 @@ def to_latex(
         multicolumn: bool = False,
         multicolumn_format: Optional[str] = None,
         multirow: bool = False,
+        caption: Optional[str] = None,
+        label: Optional[str] = None,
     ) -> Optional[str]:
         """
         Render a DataFrame to a LaTeX tabular/longtable environment output.
@@ -902,6 +904,8 @@ def to_latex(
             multicolumn=multicolumn,
             multicolumn_format=multicolumn_format,
             multirow=multirow,
+            caption=caption,
+            label=label,
         ).get_result(buf=buf, encoding=encoding)
 
     def _format_col(self, i: int) -> List[str]:
diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py
index 4c4d5ec73269a5..ca9db88ae7be46 100644
--- a/pandas/io/formats/latex.py
+++ b/pandas/io/formats/latex.py
@@ -36,6 +36,8 @@ def __init__(
         multicolumn: bool = False,
         multicolumn_format: Optional[str] = None,
         multirow: bool = False,
+        caption: Optional[str] = None,
+        label: Optional[str] = None,
     ):
         self.fmt = formatter
         self.frame = self.fmt.frame
@@ -45,11 +47,14 @@ def __init__(
         self.multicolumn = multicolumn
         self.multicolumn_format = multicolumn_format
         self.multirow = multirow
+        self.caption = caption
+        self.label = label
         self.escape = self.fmt.escape
 
     def write_result(self, buf: IO[str]) -> None:
         """
-        Render a DataFrame to a LaTeX tabular/longtable environment output.
+        Render a DataFrame to a LaTeX tabular, longtable, or table/tabular
+        environment output.
         """
 
         # string representation of the columns
@@ -114,12 +119,12 @@ def pad_empties(x):
                 "not {typ}".format(typ=type(column_format))
             )
 
-        if not self.longtable:
-            buf.write("\\begin{{tabular}}{{{fmt}}}\n".format(fmt=column_format))
-            buf.write("\\toprule\n")
+        if self.longtable:
+            self._write_longtable_begin(buf, column_format)
         else:
-            buf.write("\\begin{{longtable}}{{{fmt}}}\n".format(fmt=column_format))
-            buf.write("\\toprule\n")
+            self._write_tabular_begin(buf, column_format)
+
+        buf.write("\\toprule\n")
 
         ilevels = self.frame.index.nlevels
         clevels = self.frame.columns.nlevels
@@ -183,11 +188,10 @@ def pad_empties(x):
             if self.multirow and i < len(strrows) - 1:
                 self._print_cline(buf, i, len(strcols))
 
-        if not self.longtable:
-            buf.write("\\bottomrule\n")
-            buf.write("\\end{tabular}\n")
+        if self.longtable:
+            self._write_longtable_end(buf)
         else:
-            buf.write("\\end{longtable}\n")
+            self._write_tabular_end(buf)
 
     def _format_multicolumn(self, row: List[str], ilevels: int) -> List[str]:
         r"""
@@ -268,3 +272,107 @@ def _print_cline(self, buf: IO[str], i: int, icol: int) -> None:
                 buf.write("\\cline{{{cl:d}-{icol:d}}}\n".format(cl=cl[1], icol=icol))
         # remove entries that have been written to buffer
         self.clinebuf = [x for x in self.clinebuf if x[0] != i]
+
+    def _write_tabular_begin(self, buf, column_format):
+        """
+        Write the beginning of a tabular environment or
+        nested table/tabular environments including caption and label.
+
+        Parameters
+        ----------
+        buf : string or file handle
+            File path or object. If not specified, the result is returned as
+            a string.
+        column_format : str, default None
+            The columns format as specified in `LaTeX table format
+            <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl'
+            for 3 columns
+
+        """
+        if self.caption is not None or self.label is not None:
+            # then write output in a nested table/tabular environment
+            if self.caption is None:
+                caption_ = ""
+            else:
+                caption_ = "\n\\caption{{{}}}".format(self.caption)
+
+            if self.label is None:
+                label_ = ""
+            else:
+                label_ = "\n\\label{{{}}}".format(self.label)
+
+            buf.write("\\begin{{table}}\n\\centering{}{}\n".format(caption_, label_))
+        else:
+            # then write output only in a tabular environment
+            pass
+
+        buf.write("\\begin{{tabular}}{{{fmt}}}\n".format(fmt=column_format))
+
+    def _write_tabular_end(self, buf):
+        """
+        Write the end of a tabular environment or nested table/tabular
+        environment.
+
+        Parameters
+        ----------
+        buf : string or file handle
+            File path or object. If not specified, the result is returned as
+            a string.
+
+        """
+        buf.write("\\bottomrule\n")
+        buf.write("\\end{tabular}\n")
+        if self.caption is not None or self.label is not None:
+            buf.write("\\end{table}\n")
+        else:
+            pass
+
+    def _write_longtable_begin(self, buf, column_format):
+        """
+        Write the beginning of a longtable environment including caption and
+        label if provided by user.
+
+        Parameters
+        ----------
+        buf : string or file handle
+            File path or object. If not specified, the result is returned as
+            a string.
+        column_format : str, default None
+            The columns format as specified in `LaTeX table format
+            <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl'
+            for 3 columns
+
+        """
+        buf.write("\\begin{{longtable}}{{{fmt}}}\n".format(fmt=column_format))
+
+        if self.caption is not None or self.label is not None:
+            if self.caption is None:
+                pass
+            else:
+                buf.write("\\caption{{{}}}".format(self.caption))
+
+            if self.label is None:
+                pass
+            else:
+                buf.write("\\label{{{}}}".format(self.label))
+
+            # a double-backslash is required at the end of the line
+            # as discussed here:
+            # https://tex.stackexchange.com/questions/219138
+            buf.write("\\\\\n")
+        else:
+            pass
+
+    @staticmethod
+    def _write_longtable_end(buf):
+        """
+        Write the end of a longtable environment.
+
+        Parameters
+        ----------
+        buf : string or file handle
+            File path or object. If not specified, the result is returned as
+            a string.
+
+        """
+        buf.write("\\end{longtable}\n")
diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py
index 924b2a19e85046..9ffb54d23e37e3 100644
--- a/pandas/tests/io/formats/test_to_latex.py
+++ b/pandas/tests/io/formats/test_to_latex.py
@@ -388,8 +388,7 @@ def test_to_latex_special_escape(self):
 """
         assert escaped_result == escaped_expected
 
-    def test_to_latex_longtable(self, float_frame):
-        float_frame.to_latex(longtable=True)
+    def test_to_latex_longtable(self):
 
         df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
         withindex_result = df.to_latex(longtable=True)
@@ -439,6 +438,141 @@ def test_to_latex_longtable(self, float_frame):
         with3columns_result = df.to_latex(index=False, longtable=True)
         assert r"\multicolumn{3}" in with3columns_result
 
+    def test_to_latex_caption_label(self):
+        # GH 25436
+        the_caption = "a table in a \\texttt{table/tabular} environment"
+        the_label = "tab:table_tabular"
+
+        df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
+
+        # test when only the caption is provided
+        result_c = df.to_latex(caption=the_caption)
+
+        expected_c = r"""\begin{table}
+\centering
+\caption{a table in a \texttt{table/tabular} environment}
+\begin{tabular}{lrl}
+\toprule
+{} &  a &   b \\
+\midrule
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\bottomrule
+\end{tabular}
+\end{table}
+"""
+        assert result_c == expected_c
+
+        # test when only the label is provided
+        result_l = df.to_latex(label=the_label)
+
+        expected_l = r"""\begin{table}
+\centering
+\label{tab:table_tabular}
+\begin{tabular}{lrl}
+\toprule
+{} &  a &   b \\
+\midrule
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\bottomrule
+\end{tabular}
+\end{table}
+"""
+        assert result_l == expected_l
+
+        # test when the caption and the label are provided
+        result_cl = df.to_latex(caption=the_caption, label=the_label)
+
+        expected_cl = r"""\begin{table}
+\centering
+\caption{a table in a \texttt{table/tabular} environment}
+\label{tab:table_tabular}
+\begin{tabular}{lrl}
+\toprule
+{} &  a &   b \\
+\midrule
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\bottomrule
+\end{tabular}
+\end{table}
+"""
+        assert result_cl == expected_cl
+
+    def test_to_latex_longtable_caption_label(self):
+        # GH 25436
+        the_caption = "a table in a \\texttt{longtable} environment"
+        the_label = "tab:longtable"
+
+        df = DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
+
+        # test when only the caption is provided
+        result_c = df.to_latex(longtable=True, caption=the_caption)
+
+        expected_c = r"""\begin{longtable}{lrl}
+\caption{a table in a \texttt{longtable} environment}\\
+\toprule
+{} &  a &   b \\
+\midrule
+\endhead
+\midrule
+\multicolumn{3}{r}{{Continued on next page}} \\
+\midrule
+\endfoot
+
+\bottomrule
+\endlastfoot
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\end{longtable}
+"""
+        assert result_c == expected_c
+
+        # test when only the label is provided
+        result_l = df.to_latex(longtable=True, label=the_label)
+
+        expected_l = r"""\begin{longtable}{lrl}
+\label{tab:longtable}\\
+\toprule
+{} &  a &   b \\
+\midrule
+\endhead
+\midrule
+\multicolumn{3}{r}{{Continued on next page}} \\
+\midrule
+\endfoot
+
+\bottomrule
+\endlastfoot
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\end{longtable}
+"""
+        assert result_l == expected_l
+
+        # test when the caption and the label are provided
+        result_cl = df.to_latex(longtable=True, caption=the_caption, label=the_label)
+
+        expected_cl = r"""\begin{longtable}{lrl}
+\caption{a table in a \texttt{longtable} environment}\label{tab:longtable}\\
+\toprule
+{} &  a &   b \\
+\midrule
+\endhead
+\midrule
+\multicolumn{3}{r}{{Continued on next page}} \\
+\midrule
+\endfoot
+
+\bottomrule
+\endlastfoot
+0 &  1 &  b1 \\
+1 &  2 &  b2 \\
+\end{longtable}
+"""
+        assert result_cl == expected_cl
+
     def test_to_latex_escape_special_chars(self):
         special_characters = ["&", "%", "$", "#", "_", "{", "}", "~", "^", "\\"]
         df = DataFrame(data=special_characters)

From 60ff4e1dad69af4585644ed8e6b7b6b10fb6a98b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 4 Sep 2019 04:23:11 -0700
Subject: [PATCH 65/95] CLN: catch Exception in fewer places, assorted cleanups
 (#28276)

---
 ci/code_checks.sh               |  2 +-
 pandas/_libs/lib.pyx            |  8 ++++----
 pandas/core/common.py           |  2 +-
 pandas/core/groupby/grouper.py  |  6 ++++--
 pandas/core/groupby/ops.py      | 30 +++++-------------------------
 pandas/core/ops/__init__.py     |  7 ++-----
 pandas/tests/test_downstream.py |  1 +
 7 files changed, 18 insertions(+), 38 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 333136ddfddd95..d9369b916fe4dc 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -203,7 +203,7 @@ if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
 import sys
 import pandas
 
-blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2' 'hypothesis',
+blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2', 'hypothesis',
              'lxml', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
              'tables', 'xlrd', 'xlsxwriter', 'xlwt'}
 mods = blacklist & set(m.split('.')[0] for m in sys.modules)
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 47d1e98f214a11..4ef17b116a1d94 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -235,7 +235,7 @@ def fast_unique_multiple(list arrays, sort: bool=True):
     if sort is None:
         try:
             uniques.sort()
-        except Exception:
+        except TypeError:
             # TODO: RuntimeWarning?
             pass
 
@@ -264,7 +264,7 @@ def fast_unique_multiple_list(lists: list, sort: bool=True) -> list:
     if sort:
         try:
             uniques.sort()
-        except Exception:
+        except TypeError:
             pass
 
     return uniques
@@ -304,7 +304,7 @@ def fast_unique_multiple_list_gen(object gen, bint sort=True):
     if sort:
         try:
             uniques.sort()
-        except Exception:
+        except TypeError:
             pass
 
     return uniques
@@ -1410,7 +1410,7 @@ def infer_datetimelike_array(arr: object) -> object:
         try:
             array_to_datetime(objs, errors='raise')
             return 'datetime'
-        except:
+        except (ValueError, TypeError):
             pass
 
         # we are *not* going to infer from strings
diff --git a/pandas/core/common.py b/pandas/core/common.py
index a507625ccfa01f..cf113c8aecbfe5 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -211,7 +211,7 @@ def try_sort(iterable):
     listed = list(iterable)
     try:
         return sorted(listed)
-    except Exception:
+    except TypeError:
         return listed
 
 
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index d079a1c4ef4f7b..2ebfbed0b132a2 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -583,9 +583,11 @@ def _get_grouper(
     # if the actual grouper should be obj[key]
     def is_in_axis(key):
         if not _is_label_like(key):
+            items = obj._data.items
             try:
-                obj._data.items.get_loc(key)
-            except Exception:
+                items.get_loc(key)
+            except (KeyError, TypeError):
+                # TypeError shows up here if we pass e.g. Int64Index
                 return False
 
         return True
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 6263973fb0d2fe..bcda25bf3ce394 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -615,14 +615,9 @@ def _aggregate(
         is_datetimelike,
         min_count=-1,
     ):
-        if values.ndim > 3:
+        if values.ndim > 2:
             # punting for now
-            raise NotImplementedError("number of dimensions is currently limited to 3")
-        elif values.ndim > 2:
-            for i, chunk in enumerate(values.transpose(2, 0, 1)):
-
-                chunk = chunk.squeeze()
-                agg_func(result[:, :, i], counts, chunk, comp_ids, min_count)
+            raise NotImplementedError("number of dimensions is currently limited to 2")
         else:
             agg_func(result, counts, values, comp_ids, min_count)
 
@@ -640,20 +635,9 @@ def _transform(
     ):
 
         comp_ids, _, ngroups = self.group_info
-        if values.ndim > 3:
+        if values.ndim > 2:
             # punting for now
-            raise NotImplementedError("number of dimensions is currently limited to 3")
-        elif values.ndim > 2:
-            for i, chunk in enumerate(values.transpose(2, 0, 1)):
-
-                transform_func(
-                    result[:, :, i],
-                    values,
-                    comp_ids,
-                    ngroups,
-                    is_datetimelike,
-                    **kwargs
-                )
+            raise NotImplementedError("number of dimensions is currently limited to 2")
         else:
             transform_func(result, values, comp_ids, ngroups, is_datetimelike, **kwargs)
 
@@ -932,11 +916,7 @@ def _chop(self, sdata, slice_obj):
 class FrameSplitter(DataSplitter):
     def fast_apply(self, f, names):
         # must return keys::list, values::list, mutated::bool
-        try:
-            starts, ends = lib.generate_slices(self.slabels, self.ngroups)
-        except Exception:
-            # fails when all -1
-            return [], True
+        starts, ends = lib.generate_slices(self.slabels, self.ngroups)
 
         sdata = self._get_sorted_data()
         return libreduction.apply_frame_axis0(sdata, f, names, starts, ends)
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 9fd6efe32de291..a94a4ccff0efe5 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -698,10 +698,7 @@ def na_op(x, y):
 
         return result
 
-    def wrapper(self, other, axis=None):
-        # Validate the axis parameter
-        if axis is not None:
-            self._get_axis_number(axis)
+    def wrapper(self, other):
 
         res_name = get_op_result_name(self, other)
         other = lib.item_from_zerodim(other)
@@ -1104,7 +1101,7 @@ def f(self, other):
             # straight boolean comparisons we want to allow all columns
             # (regardless of dtype to pass thru) See #4537 for discussion.
             res = self._combine_const(other, func)
-            return res.fillna(True).astype(bool)
+            return res
 
     f.__name__ = op_name
 
diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py
index 93baafddedeb48..3a24736c57c011 100644
--- a/pandas/tests/test_downstream.py
+++ b/pandas/tests/test_downstream.py
@@ -145,6 +145,7 @@ def _getitem_tuple(self, tup):
 
 # Cython import warning
 @pytest.mark.filterwarnings("ignore:can't resolve:ImportWarning")
+@pytest.mark.filterwarnings("ignore:RangeIndex.* is deprecated:DeprecationWarning")
 def test_pyarrow(df):
 
     pyarrow = import_module("pyarrow")  # noqa

From 243c1bcfd09342efeae50f5b8104d92e2f9f06bd Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Wed, 4 Sep 2019 06:26:11 -0500
Subject: [PATCH 66/95] DEV: Remove seed-isort-config hook (#28272)

This was causing issues for me locally. Anyone else?

It took a while to run, and didn't seem to give the same
output as others (depends on something peculiar to my environment)
which doesn't seem to be great for a pre-commit hook.

Closes https://github.com/pandas-dev/pandas/issues/28236
---
 .pre-commit-config.yaml | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 5cc22c638c9b13..b79f0f71dac23d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,7 +15,3 @@ repos:
     hooks:
     -   id: isort
         language: python_venv
--   repo: https://github.com/asottile/seed-isort-config
-    rev: v1.9.2
-    hooks:
-    -   id: seed-isort-config

From 4c778a1eb73da3a2935357dbbfcbe46f3be52f31 Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@gmail.com>
Date: Wed, 4 Sep 2019 04:56:25 -0700
Subject: [PATCH 67/95] Fix to_json Memory Tests (#28259)

---
 asv_bench/benchmarks/io/json.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py
index b249c92b53e93e..5c1d39776b91c9 100644
--- a/asv_bench/benchmarks/io/json.py
+++ b/asv_bench/benchmarks/io/json.py
@@ -118,7 +118,7 @@ def setup(self, orient, frame):
     def time_to_json(self, orient, frame):
         getattr(self, frame).to_json(self.fname, orient=orient)
 
-    def mem_to_json(self, orient, frame):
+    def peakmem_to_json(self, orient, frame):
         getattr(self, frame).to_json(self.fname, orient=orient)
 
     def time_to_json_wide(self, orient, frame):
@@ -126,7 +126,7 @@ def time_to_json_wide(self, orient, frame):
         df = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1)
         df.to_json(self.fname, orient=orient)
 
-    def mem_to_json_wide(self, orient, frame):
+    def peakmem_to_json_wide(self, orient, frame):
         base_df = getattr(self, frame).copy()
         df = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1)
         df.to_json(self.fname, orient=orient)

From 4252ab7718b06820ce485b8136294616e34ab168 Mon Sep 17 00:00:00 2001
From: tobycheese <tobycheese@users.noreply.github.com>
Date: Wed, 4 Sep 2019 18:13:11 +0200
Subject: [PATCH 68/95] fix typo in example (#28281)

---
 doc/source/user_guide/options.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst
index f32a8adfd4d335..1f1dff417e68f3 100644
--- a/doc/source/user_guide/options.rst
+++ b/doc/source/user_guide/options.rst
@@ -163,7 +163,7 @@ determines how many rows are shown in the truncated repr.
 .. ipython:: python
 
    pd.set_option('max_rows', 8)
-   pd.set_option('max_rows', 4)
+   pd.set_option('min_rows', 4)
    # below max_rows -> all rows shown
    df = pd.DataFrame(np.random.randn(7, 2))
    df

From 0bde7cedf46209a9fd4fa8c7f9fbce8b49aa78cd Mon Sep 17 00:00:00 2001
From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Wed, 4 Sep 2019 18:07:16 +0100
Subject: [PATCH 69/95] BUG: Make sure correct values are passed to Rolling._on
 when axis=1 (#28267)

* Make sure correct values are passed to Rolling._on when axis=1

* Update rolling.py

* Capitalise 'd' as in documentation

* Parametrize over tz_naive_fixture

* autoformat
---
 doc/source/whatsnew/v1.0.0.rst      |  2 +-
 pandas/core/window/rolling.py       |  5 ++++-
 pandas/tests/window/test_rolling.py | 27 +++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 0d2b81eca6789c..58892b316c9408 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -177,7 +177,7 @@ Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 -
--
+- Bug in :meth:`DataFrame.rolling` not allowing for rolling over datetimes when ``axis=1`` (:issue: `28192`)
 - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`)
 - Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`)
 
diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
index a7e122fa3528ff..29ef2e917ae57f 100644
--- a/pandas/core/window/rolling.py
+++ b/pandas/core/window/rolling.py
@@ -1653,7 +1653,10 @@ def is_datetimelike(self):
     def _on(self):
 
         if self.on is None:
-            return self.obj.index
+            if self.axis == 0:
+                return self.obj.index
+            elif self.axis == 1:
+                return self.obj.columns
         elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns:
             return Index(self.obj[self.on])
         else:
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
index b4787bf25e3bb6..70ba85120af3c6 100644
--- a/pandas/tests/window/test_rolling.py
+++ b/pandas/tests/window/test_rolling.py
@@ -334,3 +334,30 @@ def test_readonly_array(self):
         result = pd.Series(arr).rolling(2).mean()
         expected = pd.Series([np.nan, 2, np.nan, np.nan, 4])
         tm.assert_series_equal(result, expected)
+
+    def test_rolling_datetime(self, axis_frame, tz_naive_fixture):
+        # GH-28192
+        tz = tz_naive_fixture
+        df = pd.DataFrame(
+            {
+                i: [1] * 2
+                for i in pd.date_range("2019-8-01", "2019-08-03", freq="D", tz=tz)
+            }
+        )
+        if axis_frame in [0, "index"]:
+            result = df.T.rolling("2D", axis=axis_frame).sum().T
+        else:
+            result = df.rolling("2D", axis=axis_frame).sum()
+        expected = pd.DataFrame(
+            {
+                **{
+                    i: [1.0] * 2
+                    for i in pd.date_range("2019-8-01", periods=1, freq="D", tz=tz)
+                },
+                **{
+                    i: [2.0] * 2
+                    for i in pd.date_range("2019-8-02", "2019-8-03", freq="D", tz=tz)
+                },
+            }
+        )
+        tm.assert_frame_equal(result, expected)

From 6a7ba96de03b1f6c3ee534bfa76afba03596a9f4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 5 Sep 2019 08:34:44 -0700
Subject: [PATCH 70/95] PERF: asv for import (#28239)

---
 asv_bench/benchmarks/package.py | 25 +++++++++++++++++++++++++
 pandas/core/dtypes/dtypes.py    |  2 +-
 pandas/util/_test_decorators.py |  2 +-
 3 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 asv_bench/benchmarks/package.py

diff --git a/asv_bench/benchmarks/package.py b/asv_bench/benchmarks/package.py
new file mode 100644
index 00000000000000..8ca33db361fa07
--- /dev/null
+++ b/asv_bench/benchmarks/package.py
@@ -0,0 +1,25 @@
+"""
+Benchmarks for pandas at the package-level.
+"""
+import subprocess
+import sys
+
+from pandas.compat import PY37
+
+
+class TimeImport:
+    def time_import(self):
+        if PY37:
+            # on py37+ we the "-X importtime" usage gives us a more precise
+            #  measurement of the import time we actually care about,
+            #  without the subprocess or interpreter overhead
+            cmd = [sys.executable, "-X", "importtime", "-c", "import pandas as pd"]
+            p = subprocess.run(cmd, stderr=subprocess.PIPE)
+
+            line = p.stderr.splitlines()[-1]
+            field = line.split(b"|")[-2].strip()
+            total = int(field)  # microseconds
+            return total
+
+        cmd = [sys.executable, "-c", "import pandas as pd"]
+        subprocess.run(cmd, stderr=subprocess.PIPE)
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index ee1866e60644b8..aa7e6801ba431c 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -23,7 +23,7 @@
 ordered_sentinel = object()  # type: object
 
 
-def register_extension_dtype(cls: Type[ExtensionDtype],) -> Type[ExtensionDtype]:
+def register_extension_dtype(cls: Type[ExtensionDtype]) -> Type[ExtensionDtype]:
     """
     Register an ExtensionType with pandas as class decorator.
 
diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py
index 627757aaa37412..0e07b9f5fe9f76 100644
--- a/pandas/util/_test_decorators.py
+++ b/pandas/util/_test_decorators.py
@@ -102,7 +102,7 @@ def _skip_if_no_scipy():
     )
 
 
-def skip_if_installed(package: str,) -> Callable:
+def skip_if_installed(package: str) -> Callable:
     """
     Skip a test if a package is installed.
 

From 04e67c46e5c9f93f26d41d9e970dc7554e80916c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 5 Sep 2019 08:52:21 -0700
Subject: [PATCH 71/95] PERF: trim import time ~5% (#28227)

* PERF: trim import time ~5% with lazy imports
---
 ci/code_checks.sh                     | 10 +++++++---
 pandas/io/common.py                   | 16 +++++++++++++---
 pandas/io/excel/_base.py              |  2 +-
 pandas/tests/io/excel/test_readers.py |  3 +--
 pandas/tests/io/parser/test_common.py |  2 +-
 pandas/tests/io/test_html.py          |  3 ++-
 pandas/util/testing.py                | 22 +++++++++++++++++-----
 7 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index d9369b916fe4dc..f839d86318e2ec 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -203,10 +203,14 @@ if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then
 import sys
 import pandas
 
-blacklist = {'bs4', 'gcsfs', 'html5lib', 'ipython', 'jinja2', 'hypothesis',
+blacklist = {'bs4', 'gcsfs', 'html5lib', 'http', 'ipython', 'jinja2', 'hypothesis',
              'lxml', 'numexpr', 'openpyxl', 'py', 'pytest', 's3fs', 'scipy',
-             'tables', 'xlrd', 'xlsxwriter', 'xlwt'}
-mods = blacklist & set(m.split('.')[0] for m in sys.modules)
+             'tables', 'urllib.request', 'xlrd', 'xlsxwriter', 'xlwt'}
+
+# GH#28227 for some of these check for top-level modules, while others are
+#  more specific (e.g. urllib.request)
+import_mods = set(m.split('.')[0] for m in sys.modules) | set(sys.modules)
+mods = blacklist & import_mods
 if mods:
     sys.stderr.write('err: pandas should not import: {}\n'.format(', '.join(mods)))
     sys.exit(len(mods))
diff --git a/pandas/io/common.py b/pandas/io/common.py
index 30228d660e8167..ac8dee8467370d 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -4,7 +4,6 @@
 import codecs
 import csv
 import gzip
-from http.client import HTTPException  # noqa
 from io import BufferedIOBase, BytesIO
 import mmap
 import os
@@ -22,7 +21,6 @@
     Type,
     Union,
 )
-from urllib.error import URLError  # noqa
 from urllib.parse import (  # noqa
     urlencode,
     urljoin,
@@ -31,7 +29,6 @@
     uses_params,
     uses_relative,
 )
-from urllib.request import pathname2url, urlopen
 import zipfile
 
 from pandas.compat import _get_lzma_file, _import_lzma
@@ -188,6 +185,16 @@ def is_gcs_url(url) -> bool:
         return False
 
 
+def urlopen(*args, **kwargs):
+    """
+    Lazy-import wrapper for stdlib urlopen, as that imports a big chunk of
+    the stdlib.
+    """
+    import urllib.request
+
+    return urllib.request.urlopen(*args, **kwargs)
+
+
 def get_filepath_or_buffer(
     filepath_or_buffer: FilePathOrBuffer,
     encoding: Optional[str] = None,
@@ -261,6 +268,9 @@ def file_path_to_url(path: str) -> str:
     -------
     a valid FILE URL
     """
+    # lazify expensive import (~30ms)
+    from urllib.request import pathname2url
+
     return urljoin("file:", pathname2url(path))
 
 
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 997edf49d9e8fc..949eff45c0e92c 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -4,7 +4,6 @@
 from io import BytesIO
 import os
 from textwrap import fill
-from urllib.request import urlopen
 
 from pandas._config import config
 
@@ -21,6 +20,7 @@
     _stringify_path,
     _validate_header_arg,
     get_filepath_or_buffer,
+    urlopen,
 )
 from pandas.io.excel._util import (
     _fill_mi_header,
diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py
index a39cface0e0157..5326f2df68972f 100644
--- a/pandas/tests/io/excel/test_readers.py
+++ b/pandas/tests/io/excel/test_readers.py
@@ -3,6 +3,7 @@
 from datetime import datetime, time
 from functools import partial
 import os
+from urllib.error import URLError
 import warnings
 
 import numpy as np
@@ -14,8 +15,6 @@
 from pandas import DataFrame, Index, MultiIndex, Series
 import pandas.util.testing as tm
 
-from pandas.io.common import URLError
-
 
 @contextlib.contextmanager
 def ignore_xlrd_time_clock_warning():
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index 0586593c87cc54..756463e9d8d335 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -11,6 +11,7 @@
 import os
 import platform
 from tempfile import TemporaryFile
+from urllib.error import URLError
 
 import numpy as np
 import pytest
@@ -21,7 +22,6 @@
 from pandas import DataFrame, Index, MultiIndex, Series, compat, concat
 import pandas.util.testing as tm
 
-from pandas.io.common import URLError
 from pandas.io.parsers import CParserWrapper, TextFileReader, TextParser
 
 
diff --git a/pandas/tests/io/test_html.py b/pandas/tests/io/test_html.py
index 615e2735cd288f..183d217eb09d61 100644
--- a/pandas/tests/io/test_html.py
+++ b/pandas/tests/io/test_html.py
@@ -4,6 +4,7 @@
 import os
 import re
 import threading
+from urllib.error import URLError
 
 import numpy as np
 from numpy.random import rand
@@ -17,7 +18,7 @@
 import pandas.util.testing as tm
 from pandas.util.testing import makeCustomDataframe as mkdf, network
 
-from pandas.io.common import URLError, file_path_to_url
+from pandas.io.common import file_path_to_url
 import pandas.io.html
 from pandas.io.html import read_html
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 0d543f891a5f63..c54dab046f57e7 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -4,7 +4,6 @@
 from datetime import datetime
 from functools import wraps
 import gzip
-import http.client
 import os
 import re
 from shutil import rmtree
@@ -2275,11 +2274,17 @@ def dec(f):
 # But some tests (test_data yahoo) contact incredibly flakey
 # servers.
 
-# and conditionally raise on these exception types
-_network_error_classes = (IOError, http.client.HTTPException, TimeoutError)
+# and conditionally raise on exception types in _get_default_network_errors
 
 
-def can_connect(url, error_classes=_network_error_classes):
+def _get_default_network_errors():
+    # Lazy import for http.client because it imports many things from the stdlib
+    import http.client
+
+    return (IOError, http.client.HTTPException, TimeoutError)
+
+
+def can_connect(url, error_classes=None):
     """Try to connect to the given url. True if succeeds, False if IOError
     raised
 
@@ -2294,6 +2299,10 @@ def can_connect(url, error_classes=_network_error_classes):
         Return True if no IOError (unable to connect) or URLError (bad url) was
         raised
     """
+
+    if error_classes is None:
+        error_classes = _get_default_network_errors()
+
     try:
         with urlopen(url):
             pass
@@ -2309,7 +2318,7 @@ def network(
     url="http://www.google.com",
     raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT,
     check_before_test=False,
-    error_classes=_network_error_classes,
+    error_classes=None,
     skip_errnos=_network_errno_vals,
     _skip_on_messages=_network_error_messages,
 ):
@@ -2397,6 +2406,9 @@ def network(
     """
     from pytest import skip
 
+    if error_classes is None:
+        error_classes = _get_default_network_errors()
+
     t.network = True
 
     @wraps(t)

From 2915223e8c6866149e78f5bdab184881fa39354c Mon Sep 17 00:00:00 2001
From: Igor Filippov <f6v@users.noreply.github.com>
Date: Thu, 5 Sep 2019 19:10:53 +0200
Subject: [PATCH 72/95] Improved benchmark coverage for reading spreadsheets
 (#28230)

* Improved benchmark coverage for reading spreadsheets

* Added blank lines

* More blank lines

* Updated whatsnew

* - Removed whatsnew entry
- Added comment in environment.yml
- Added conda-forge to asv config
- Refactored reader benchmark

* Updated requirements-dev.txt

* Fixed imports order

* Fixed imports again

* Run black

* Changed conda channels order in ASV config

* Used setup_cache to speed up read benchmark
---
 asv_bench/asv.conf.json          |  3 +-
 asv_bench/benchmarks/io/excel.py | 76 +++++++++++++++++++++++---------
 environment.yml                  |  1 +
 requirements-dev.txt             |  1 +
 4 files changed, 58 insertions(+), 23 deletions(-)

diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
index 571ede1a211340..c04bbf53a86a6f 100644
--- a/asv_bench/asv.conf.json
+++ b/asv_bench/asv.conf.json
@@ -50,12 +50,13 @@
         "xlsxwriter": [],
         "xlrd": [],
         "xlwt": [],
+        "odfpy": [],
         "pytest": [],
         // If using Windows with python 2.7 and want to build using the
         // mingw toolchain (rather than MSVC), uncomment the following line.
         // "libpython": [],
     },
-
+    "conda_channels": ["defaults", "conda-forge"],
     // Combinations of libraries/python versions can be excluded/included
     // from the set to test. Each entry is a dictionary containing additional
     // key-value pairs to include/exclude.
diff --git a/asv_bench/benchmarks/io/excel.py b/asv_bench/benchmarks/io/excel.py
index 9aa5cbd5b6f7c3..c97cf768e27d97 100644
--- a/asv_bench/benchmarks/io/excel.py
+++ b/asv_bench/benchmarks/io/excel.py
@@ -1,40 +1,72 @@
 from io import BytesIO
 
 import numpy as np
+from odf.opendocument import OpenDocumentSpreadsheet
+from odf.table import Table, TableCell, TableRow
+from odf.text import P
 
 from pandas import DataFrame, ExcelWriter, date_range, read_excel
 import pandas.util.testing as tm
 
 
-class Excel:
+def _generate_dataframe():
+    N = 2000
+    C = 5
+    df = DataFrame(
+        np.random.randn(N, C),
+        columns=["float{}".format(i) for i in range(C)],
+        index=date_range("20000101", periods=N, freq="H"),
+    )
+    df["object"] = tm.makeStringIndex(N)
+    return df
+
+
+class WriteExcel:
 
     params = ["openpyxl", "xlsxwriter", "xlwt"]
     param_names = ["engine"]
 
     def setup(self, engine):
-        N = 2000
-        C = 5
-        self.df = DataFrame(
-            np.random.randn(N, C),
-            columns=["float{}".format(i) for i in range(C)],
-            index=date_range("20000101", periods=N, freq="H"),
-        )
-        self.df["object"] = tm.makeStringIndex(N)
-        self.bio_read = BytesIO()
-        self.writer_read = ExcelWriter(self.bio_read, engine=engine)
-        self.df.to_excel(self.writer_read, sheet_name="Sheet1")
-        self.writer_read.save()
-        self.bio_read.seek(0)
-
-    def time_read_excel(self, engine):
-        read_excel(self.bio_read)
+        self.df = _generate_dataframe()
 
     def time_write_excel(self, engine):
-        bio_write = BytesIO()
-        bio_write.seek(0)
-        writer_write = ExcelWriter(bio_write, engine=engine)
-        self.df.to_excel(writer_write, sheet_name="Sheet1")
-        writer_write.save()
+        bio = BytesIO()
+        bio.seek(0)
+        writer = ExcelWriter(bio, engine=engine)
+        self.df.to_excel(writer, sheet_name="Sheet1")
+        writer.save()
+
+
+class ReadExcel:
+
+    params = ["xlrd", "openpyxl", "odf"]
+    param_names = ["engine"]
+    fname_excel = "spreadsheet.xlsx"
+    fname_odf = "spreadsheet.ods"
+
+    def _create_odf(self):
+        doc = OpenDocumentSpreadsheet()
+        table = Table(name="Table1")
+        for row in self.df.values:
+            tr = TableRow()
+            for val in row:
+                tc = TableCell(valuetype="string")
+                tc.addElement(P(text=val))
+                tr.addElement(tc)
+            table.addElement(tr)
+
+        doc.spreadsheet.addElement(table)
+        doc.save(self.fname_odf)
+
+    def setup_cache(self):
+        self.df = _generate_dataframe()
+
+        self.df.to_excel(self.fname_excel, sheet_name="Sheet1")
+        self._create_odf()
+
+    def time_read_excel(self, engine):
+        fname = self.fname_odf if engine == "odf" else self.fname_excel
+        read_excel(fname, engine=engine)
 
 
 from ..pandas_vb_common import setup  # noqa: F401 isort:skip
diff --git a/environment.yml b/environment.yml
index 6d2cd701c38540..d72972ffc4da48 100644
--- a/environment.yml
+++ b/environment.yml
@@ -80,4 +80,5 @@ dependencies:
   - xlrd  # pandas.read_excel, DataFrame.to_excel, pandas.ExcelWriter, pandas.ExcelFile
   - xlsxwriter  # pandas.read_excel, DataFrame.to_excel, pandas.ExcelWriter, pandas.ExcelFile
   - xlwt  # pandas.read_excel, DataFrame.to_excel, pandas.ExcelWriter, pandas.ExcelFile
+  - odfpy  # pandas.read_excel
   - pyreadstat  # pandas.read_spss
diff --git a/requirements-dev.txt b/requirements-dev.txt
index cf11a3ee282584..c0fb9ee331b11a 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -54,4 +54,5 @@ xarray
 xlrd
 xlsxwriter
 xlwt
+odfpy
 pyreadstat
\ No newline at end of file

From 813123b2d67860b7104f12ad2f6469aa64833fb2 Mon Sep 17 00:00:00 2001
From: zys5945 <shawnl9025@gmail.com>
Date: Thu, 5 Sep 2019 11:03:56 -0700
Subject: [PATCH 73/95] DOC: fix read_excel and ExcelFile engine parameter
 description (#28231) (#28245)

---
 pandas/io/excel/_base.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 949eff45c0e92c..6dba5e042562b7 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -112,7 +112,7 @@
 
 engine : str, default None
     If io is not a buffer or path, this must be set to identify io.
-    Acceptable values are None or xlrd.
+    Acceptable values are None, "xlrd", "openpyxl" or "odf".
 converters : dict, default None
     Dict of functions for converting values in certain columns. Keys can
     either be integers or column labels, values are functions that take one
@@ -783,11 +783,12 @@ class ExcelFile:
     Parameters
     ----------
     io : string, path object (pathlib.Path or py._path.local.LocalPath),
-        file-like object or xlrd workbook
-        If a string or path object, expected to be a path to xls or xlsx file.
+        a file-like object, xlrd workbook or openpypl workbook.
+        If a string or path object, expected to be a path to xls, xlsx or odf file.
     engine : string, default None
         If io is not a buffer or path, this must be set to identify io.
-        Acceptable values are None or ``xlrd``.
+        Acceptable values are None, ``xlrd``, ``openpyxl`` or ``odf``.
+        Note that ``odf`` reads tables out of OpenDocument formatted files.
     """
 
     from pandas.io.excel._odfreader import _ODFReader

From 2d65e38f5c245a8410c7cb37ec17424def00fa78 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 5 Sep 2019 11:14:17 -0700
Subject: [PATCH 74/95] Fix inconsistent casting to bool (#28290)

---
 pandas/core/ops/__init__.py           | 10 ++++++++--
 pandas/tests/series/test_operators.py | 12 ++++--------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index a94a4ccff0efe5..60fa1bef01f3dc 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -806,7 +806,13 @@ def na_op(x, y):
         return result
 
     fill_int = lambda x: x.fillna(0)
-    fill_bool = lambda x: x.fillna(False).astype(bool)
+
+    def fill_bool(x, left=None):
+        # if `left` is specifically not-boolean, we do not cast to bool
+        x = x.fillna(False)
+        if left is None or is_bool_dtype(left.dtype):
+            x = x.astype(bool)
+        return x
 
     def wrapper(self, other):
         is_self_int_dtype = is_integer_dtype(self.dtype)
@@ -835,7 +841,7 @@ def wrapper(self, other):
 
         elif isinstance(other, (ABCSeries, ABCIndexClass)):
             is_other_int_dtype = is_integer_dtype(other.dtype)
-            other = other if is_other_int_dtype else fill_bool(other)
+            other = other if is_other_int_dtype else fill_bool(other, self)
 
         else:
             # scalars, list, tuple, np.array
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index aa44760dcd9180..bf725a04de0589 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -103,11 +103,8 @@ def test_logical_operators_int_dtype_with_float(self):
             s_0123 & [0.1, 4, 3.14, 2]
         with pytest.raises(TypeError):
             s_0123 & np.array([0.1, 4, 3.14, 2])
-
-        # FIXME: this should be consistent with the list case above
-        expected = Series([False, True, False, True])
-        result = s_0123 & Series([0.1, 4, -3.14, 2])
-        assert_series_equal(result, expected)
+        with pytest.raises(TypeError):
+            s_0123 & Series([0.1, 4, -3.14, 2])
 
     def test_logical_operators_int_dtype_with_str(self):
         s_1111 = Series([1] * 4, dtype="int8")
@@ -145,9 +142,8 @@ def test_logical_operators_int_dtype_with_object(self):
         assert_series_equal(result, expected)
 
         s_abNd = Series(["a", "b", np.NaN, "d"])
-        result = s_0123 & s_abNd
-        expected = Series([False, True, False, True])
-        assert_series_equal(result, expected)
+        with pytest.raises(TypeError, match="unsupported.* 'int' and 'str'"):
+            s_0123 & s_abNd
 
     def test_logical_operators_bool_dtype_with_int(self):
         index = list("bca")

From 820072a0f9bccdfbfb11fe82caf60adb9fad1323 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 7 Sep 2019 12:29:40 +0100
Subject: [PATCH 75/95] BUG: Remove null values before sorting during groupby
 nunique calculation (#27951)

Closes #27904
---
 doc/source/whatsnew/v1.0.0.rst        |  2 +-
 pandas/core/groupby/generic.py        |  4 +++
 pandas/tests/groupby/test_function.py | 48 ++++++++++++++++++++++++++-
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 58892b316c9408..2f72de25c579ba 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -97,7 +97,7 @@ Datetimelike
 - Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`)
 - Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`)
 - Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`)
--
+- Bug in :meth:`pandas.core.groupby.SeriesGroupBy.nunique` where ``NaT`` values were interfering with the count of unique values (:issue:`27951`)
 
 
 Timedelta
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index c0436e93890782..e514162f84c374 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1147,6 +1147,10 @@ def nunique(self, dropna=True):
 
         val = self.obj._internal_get_values()
 
+        # GH 27951
+        # temporary fix while we wait for NumPy bug 12629 to be fixed
+        val[isna(val)] = np.datetime64("NaT")
+
         try:
             sorter = np.lexsort((val, ids))
         except TypeError:  # catches object dtypes
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index d89233f2fd603c..afb22a732691cd 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -1,4 +1,5 @@
 import builtins
+import datetime as dt
 from io import StringIO
 from itertools import product
 from string import ascii_lowercase
@@ -9,7 +10,16 @@
 from pandas.errors import UnsupportedFunctionCall
 
 import pandas as pd
-from pandas import DataFrame, Index, MultiIndex, Series, Timestamp, date_range, isna
+from pandas import (
+    DataFrame,
+    Index,
+    MultiIndex,
+    NaT,
+    Series,
+    Timestamp,
+    date_range,
+    isna,
+)
 import pandas.core.nanops as nanops
 from pandas.util import _test_decorators as td, testing as tm
 
@@ -1015,6 +1025,42 @@ def test_nunique_with_timegrouper():
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.parametrize(
+    "key, data, dropna, expected",
+    [
+        (
+            ["x", "x", "x"],
+            [Timestamp("2019-01-01"), NaT, Timestamp("2019-01-01")],
+            True,
+            Series([1], index=pd.Index(["x"], name="key"), name="data"),
+        ),
+        (
+            ["x", "x", "x"],
+            [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)],
+            True,
+            Series([1], index=pd.Index(["x"], name="key"), name="data"),
+        ),
+        (
+            ["x", "x", "x", "y", "y"],
+            [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)],
+            False,
+            Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"),
+        ),
+        (
+            ["x", "x", "x", "x", "y"],
+            [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)],
+            False,
+            Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"),
+        ),
+    ],
+)
+def test_nunique_with_NaT(key, data, dropna, expected):
+    # GH 27951
+    df = pd.DataFrame({"key": key, "data": data})
+    result = df.groupby(["key"])["data"].nunique(dropna=dropna)
+    tm.assert_series_equal(result, expected)
+
+
 def test_nunique_preserves_column_level_names():
     # GH 23222
     test = pd.DataFrame([1, 2, 2], columns=pd.Index(["A"], name="level_0"))

From c3b1252f9cada5f6f2696e34783e9dbeadb7beba Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 7 Sep 2019 10:17:31 -0700
Subject: [PATCH 76/95] CLN: split_and_operate (#28327)

---
 pandas/core/internals/blocks.py | 56 ++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 29 deletions(-)

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
index 33698d245e9ffc..2a44177d445df8 100644
--- a/pandas/core/internals/blocks.py
+++ b/pandas/core/internals/blocks.py
@@ -416,15 +416,16 @@ def fillna(self, value, limit=None, inplace=False, downcast=None):
             return self if inplace else self.copy()
 
         # operate column-by-column
-        def f(m, v, i):
+        def f(mask, val, idx):
             block = self.coerce_to_target_dtype(value)
 
             # slice out our block
-            if i is not None:
-                block = block.getitem_block(slice(i, i + 1))
+            if idx is not None:
+                # i.e. self.ndim == 2
+                block = block.getitem_block(slice(idx, idx + 1))
             return block.fillna(value, limit=limit, inplace=inplace, downcast=None)
 
-        return self.split_and_operate(mask, f, inplace)
+        return self.split_and_operate(None, f, inplace)
 
     def split_and_operate(self, mask, f, inplace: bool):
         """
@@ -444,7 +445,8 @@ def split_and_operate(self, mask, f, inplace: bool):
         """
 
         if mask is None:
-            mask = np.ones(self.shape, dtype=bool)
+            mask = np.broadcast_to(True, shape=self.shape)
+
         new_values = self.values
 
         def make_a_block(nv, ref_loc):
@@ -523,19 +525,14 @@ def downcast(self, dtypes=None):
             raise ValueError(
                 "downcast must have a dictionary or 'infer' as its argument"
             )
+        elif dtypes != "infer":
+            raise AssertionError("dtypes as dict is not supported yet")
 
         # operate column-by-column
         # this is expensive as it splits the blocks items-by-item
-        def f(m, v, i):
-
-            if dtypes == "infer":
-                dtype = "infer"
-            else:
-                raise AssertionError("dtypes as dict is not supported yet")
-
-            if dtype is not None:
-                v = maybe_downcast_to_dtype(v, dtype)
-            return v
+        def f(mask, val, idx):
+            val = maybe_downcast_to_dtype(val, dtype="infer")
+            return val
 
         return self.split_and_operate(None, f, False)
 
@@ -1002,15 +999,15 @@ def putmask(self, mask, new, align=True, inplace=False, axis=0, transpose=False)
                     new = new.reshape(tuple(new_shape))
 
             # operate column-by-column
-            def f(m, v, i):
+            def f(mask, val, idx):
 
-                if i is None:
+                if idx is None:
                     # ndim==1 case.
                     n = new
                 else:
 
                     if isinstance(new, np.ndarray):
-                        n = np.squeeze(new[i % new.shape[0]])
+                        n = np.squeeze(new[idx % new.shape[0]])
                     else:
                         n = np.array(new)
 
@@ -1020,7 +1017,7 @@ def f(m, v, i):
                     # we need to explicitly astype here to make a copy
                     n = n.astype(dtype)
 
-                nv = _putmask_smart(v, m, n)
+                nv = _putmask_smart(val, mask, n)
                 return nv
 
             new_blocks = self.split_and_operate(mask, f, inplace)
@@ -2627,10 +2624,10 @@ def convert(
         """
 
         # operate column-by-column
-        def f(m, v, i):
-            shape = v.shape
+        def f(mask, val, idx):
+            shape = val.shape
             values = soft_convert_objects(
-                v.ravel(),
+                val.ravel(),
                 datetime=datetime,
                 numeric=numeric,
                 timedelta=timedelta,
@@ -3172,14 +3169,15 @@ def _safe_reshape(arr, new_shape):
     return arr
 
 
-def _putmask_smart(v, m, n):
+def _putmask_smart(v, mask, n):
     """
     Return a new ndarray, try to preserve dtype if possible.
 
     Parameters
     ----------
     v : `values`, updated in-place (array like)
-    m : `mask`, applies to both sides (array like)
+    mask : np.ndarray
+        Applies to both sides (array like).
     n : `new values` either scalar or an array like aligned with `values`
 
     Returns
@@ -3197,12 +3195,12 @@ def _putmask_smart(v, m, n):
 
     # n should be the length of the mask or a scalar here
     if not is_list_like(n):
-        n = np.repeat(n, len(m))
+        n = np.repeat(n, len(mask))
 
     # see if we are only masking values that if putted
     # will work in the current dtype
     try:
-        nn = n[m]
+        nn = n[mask]
     except TypeError:
         # TypeError: only integer scalar arrays can be converted to a scalar index
         pass
@@ -3227,16 +3225,16 @@ def _putmask_smart(v, m, n):
             comp = nn == nn_at
             if is_list_like(comp) and comp.all():
                 nv = v.copy()
-                nv[m] = nn_at
+                nv[mask] = nn_at
                 return nv
 
     n = np.asarray(n)
 
     def _putmask_preserve(nv, n):
         try:
-            nv[m] = n[m]
+            nv[mask] = n[mask]
         except (IndexError, ValueError):
-            nv[m] = n
+            nv[mask] = n
         return nv
 
     # preserves dtype if possible

From 0a00ebe61849f2ad9b7ee6a65b27f92a6491969d Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 7 Sep 2019 10:17:47 -0700
Subject: [PATCH 77/95] CLN: eval_kwargs (#28328)

---
 pandas/core/computation/expressions.py | 12 ++++++------
 pandas/core/ops/__init__.py            | 24 +++++-------------------
 2 files changed, 11 insertions(+), 25 deletions(-)

diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py
index 29c8239fa518fc..90bb12b4cd727f 100644
--- a/pandas/core/computation/expressions.py
+++ b/pandas/core/computation/expressions.py
@@ -62,8 +62,9 @@ def set_numexpr_threads(n=None):
         ne.set_num_threads(n)
 
 
-def _evaluate_standard(op, op_str, a, b, **eval_kwargs):
+def _evaluate_standard(op, op_str, a, b, reversed=False):
     """ standard evaluation """
+    # `reversed` kwarg is included for compatibility with _evaluate_numexpr
     if _TEST_MODE:
         _store_test_result(False)
     with np.errstate(all="ignore"):
@@ -96,7 +97,7 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check):
     return False
 
 
-def _evaluate_numexpr(op, op_str, a, b, truediv=True, reversed=False, **eval_kwargs):
+def _evaluate_numexpr(op, op_str, a, b, reversed=False):
     result = None
 
     if _can_use_numexpr(op, op_str, a, b, "evaluate"):
@@ -111,8 +112,6 @@ def _evaluate_numexpr(op, op_str, a, b, truediv=True, reversed=False, **eval_kwa
                 "a_value {op} b_value".format(op=op_str),
                 local_dict={"a_value": a_value, "b_value": b_value},
                 casting="safe",
-                truediv=truediv,
-                **eval_kwargs
             )
         except ValueError as detail:
             if "unknown type object" in str(detail):
@@ -201,7 +200,7 @@ def _bool_arith_check(
     return True
 
 
-def evaluate(op, op_str, a, b, use_numexpr=True, **eval_kwargs):
+def evaluate(op, op_str, a, b, use_numexpr=True, reversed=False):
     """
     Evaluate and return the expression of the op on a and b.
 
@@ -214,11 +213,12 @@ def evaluate(op, op_str, a, b, use_numexpr=True, **eval_kwargs):
     b : right operand
     use_numexpr : bool, default True
         Whether to try to use numexpr.
+    reversed : bool, default False
     """
 
     use_numexpr = use_numexpr and _bool_arith_check(op_str, a, b)
     if use_numexpr:
-        return _evaluate(op, op_str, a, b, **eval_kwargs)
+        return _evaluate(op, op_str, a, b, reversed=reversed)
     return _evaluate_standard(op, op_str, a, b)
 
 
diff --git a/pandas/core/ops/__init__.py b/pandas/core/ops/__init__.py
index 60fa1bef01f3dc..f1f4777cedbc57 100644
--- a/pandas/core/ops/__init__.py
+++ b/pandas/core/ops/__init__.py
@@ -213,12 +213,6 @@ def _gen_eval_kwargs(name):
             # Exclude commutative operations
             kwargs["reversed"] = True
 
-    if name in ["truediv", "rtruediv"]:
-        kwargs["truediv"] = True
-
-    if name in ["ne"]:
-        kwargs["masker"] = True
-
     return kwargs
 
 
@@ -247,7 +241,7 @@ def _get_frame_op_default_axis(name):
         return "columns"
 
 
-def _get_opstr(op, cls):
+def _get_opstr(op):
     """
     Find the operation string, if any, to pass to numexpr for this
     operation.
@@ -255,19 +249,11 @@ def _get_opstr(op, cls):
     Parameters
     ----------
     op : binary operator
-    cls : class
 
     Returns
     -------
     op_str : string or None
     """
-    # numexpr is available for non-sparse classes
-    subtyp = getattr(cls, "_subtyp", "")
-    use_numexpr = "sparse" not in subtyp
-
-    if not use_numexpr:
-        # if we're not using numexpr, then don't pass a str_rep
-        return None
 
     return {
         operator.add: "+",
@@ -624,7 +610,7 @@ def _arith_method_SERIES(cls, op, special):
     Wrapper function for Series arithmetic operations, to avoid
     code duplication.
     """
-    str_rep = _get_opstr(op, cls)
+    str_rep = _get_opstr(op)
     op_name = _get_op_name(op, special)
     eval_kwargs = _gen_eval_kwargs(op_name)
     construct_result = (
@@ -999,7 +985,7 @@ def to_series(right):
 
 
 def _arith_method_FRAME(cls, op, special):
-    str_rep = _get_opstr(op, cls)
+    str_rep = _get_opstr(op)
     op_name = _get_op_name(op, special)
     eval_kwargs = _gen_eval_kwargs(op_name)
     default_axis = _get_frame_op_default_axis(op_name)
@@ -1041,7 +1027,7 @@ def f(self, other, axis=default_axis, level=None, fill_value=None):
 
 
 def _flex_comp_method_FRAME(cls, op, special):
-    str_rep = _get_opstr(op, cls)
+    str_rep = _get_opstr(op)
     op_name = _get_op_name(op, special)
     default_axis = _get_frame_op_default_axis(op_name)
 
@@ -1082,7 +1068,7 @@ def f(self, other, axis=default_axis, level=None):
 
 
 def _comp_method_FRAME(cls, func, special):
-    str_rep = _get_opstr(func, cls)
+    str_rep = _get_opstr(func)
     op_name = _get_op_name(func, special)
 
     @Appender("Wrapper for comparison method {name}".format(name=op_name))

From 6b23fb8d006309b3c050c1ccde280349328c2aae Mon Sep 17 00:00:00 2001
From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
Date: Sat, 7 Sep 2019 12:18:53 -0500
Subject: [PATCH 78/95] Clean groupby error message (#28324)

---
 pandas/core/groupby/ops.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index bcda25bf3ce394..1a3f0da3cf92bf 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -463,9 +463,7 @@ def _cython_operation(self, kind, values, how, axis, min_count=-1, **kwargs):
         # categoricals are only 1d, so we
         # are not setup for dim transforming
         if is_categorical_dtype(values) or is_sparse(values):
-            raise NotImplementedError(
-                "{} are not support in cython ops".format(values.dtype)
-            )
+            raise NotImplementedError("{} dtype not supported".format(values.dtype))
         elif is_datetime64_any_dtype(values):
             if how in ["add", "prod", "cumsum", "cumprod"]:
                 raise NotImplementedError(

From e24d9e51b5f7e84d08d9b6b246ebdfa2d3eab6fa Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@gmail.com>
Date: Sat, 7 Sep 2019 10:20:02 -0700
Subject: [PATCH 79/95] Removed PyString refs from extension modules (#28322)

* Removed PyString refs from extension modules

* Reverted macro
---
 pandas/_libs/src/parse_helper.h           | 5 -----
 pandas/_libs/src/ujson/python/objToJSON.c | 4 ++--
 pandas/_libs/tslibs/util.pxd              | 5 -----
 pandas/_libs/writers.pyx                  | 9 +--------
 4 files changed, 3 insertions(+), 20 deletions(-)

diff --git a/pandas/_libs/src/parse_helper.h b/pandas/_libs/src/parse_helper.h
index 1db4c813bb4930..0a767dd27b6580 100644
--- a/pandas/_libs/src/parse_helper.h
+++ b/pandas/_libs/src/parse_helper.h
@@ -25,11 +25,6 @@ int to_double(char *item, double *p_value, char sci, char decimal,
     return (error == 0) && (!*p_end);
 }
 
-#if PY_VERSION_HEX < 0x02060000
-#define PyBytes_Check PyString_Check
-#define PyBytes_AS_STRING PyString_AS_STRING
-#endif  // PY_VERSION_HEX
-
 int floatify(PyObject *str, double *result, int *maybe_int) {
     int status;
     char *data;
diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c
index 4b612bb033761d..dc9b906c8d76c4 100644
--- a/pandas/_libs/src/ujson/python/objToJSON.c
+++ b/pandas/_libs/src/ujson/python/objToJSON.c
@@ -435,7 +435,7 @@ static void *PyFloatToDOUBLE(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
     return NULL;
 }
 
-static void *PyStringToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
+static void *PyBytesToUTF8(JSOBJ _obj, JSONTypeContext *tc, void *outValue,
                             size_t *_outLen) {
     PyObject *obj = (PyObject *)_obj;
     *_outLen = PyBytes_GET_SIZE(obj);
@@ -1869,7 +1869,7 @@ void Object_beginTypeContext(JSOBJ _obj, JSONTypeContext *tc) {
         return;
     } else if (PyBytes_Check(obj)) {
         PRINTMARK();
-        pc->PyTypeToJSON = PyStringToUTF8;
+        pc->PyTypeToJSON = PyBytesToUTF8;
         tc->type = JT_UTF8;
         return;
     } else if (PyUnicode_Check(obj)) {
diff --git a/pandas/_libs/tslibs/util.pxd b/pandas/_libs/tslibs/util.pxd
index 07c2805dd0ef61..65f4e98708f47e 100644
--- a/pandas/_libs/tslibs/util.pxd
+++ b/pandas/_libs/tslibs/util.pxd
@@ -4,11 +4,7 @@ from cpython cimport PyTypeObject
 cdef extern from *:
     """
     PyObject* char_to_string(const char* data) {
-    #if PY_VERSION_HEX >= 0x03000000
         return PyUnicode_FromString(data);
-    #else
-        return PyString_FromString(data);
-    #endif
     }
     """
     object char_to_string(const char* data)
@@ -18,7 +14,6 @@ cdef extern from "Python.h":
     # Note: importing extern-style allows us to declare these as nogil
     # functions, whereas `from cpython cimport` does not.
     bint PyUnicode_Check(object obj) nogil
-    bint PyString_Check(object obj) nogil
     bint PyBool_Check(object obj) nogil
     bint PyFloat_Check(object obj) nogil
     bint PyComplex_Check(object obj) nogil
diff --git a/pandas/_libs/writers.pyx b/pandas/_libs/writers.pyx
index d1aecf0a9d2947..e5d78dae9c0233 100644
--- a/pandas/_libs/writers.pyx
+++ b/pandas/_libs/writers.pyx
@@ -3,11 +3,6 @@ from cython import Py_ssize_t
 
 from cpython cimport PyBytes_GET_SIZE, PyUnicode_GET_SIZE
 
-try:
-    from cpython cimport PyString_GET_SIZE
-except ImportError:
-    from cpython cimport PyUnicode_GET_SIZE as PyString_GET_SIZE
-
 import numpy as np
 from numpy cimport ndarray, uint8_t
 
@@ -126,11 +121,9 @@ def max_len_string_array(pandas_string[:] arr) -> Py_ssize_t:
     for i in range(length):
         val = arr[i]
         if isinstance(val, str):
-            l = PyString_GET_SIZE(val)
+            l = PyUnicode_GET_SIZE(val)
         elif isinstance(val, bytes):
             l = PyBytes_GET_SIZE(val)
-        elif isinstance(val, unicode):
-            l = PyUnicode_GET_SIZE(val)
 
         if l > m:
             m = l

From 53ad571d86449fba1b854dfede6de76657930282 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 7 Sep 2019 10:23:07 -0700
Subject: [PATCH 80/95] CLN: catch Exception less (#28309)

---
 pandas/core/apply.py                     | 14 ++++++++------
 pandas/core/arrays/datetimes.py          |  3 ++-
 pandas/core/dtypes/concat.py             |  5 ++---
 pandas/core/indexes/accessors.py         |  2 +-
 pandas/core/series.py                    |  3 ---
 pandas/plotting/_core.py                 | 14 ++++++--------
 pandas/plotting/_matplotlib/converter.py |  4 +++-
 pandas/plotting/_matplotlib/core.py      |  2 +-
 setup.py                                 |  4 ++--
 9 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index b96b3c75720315..e6766a33a613b2 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -199,20 +199,21 @@ def apply_empty_result(self):
             return self.obj.copy()
 
         # we may need to infer
-        reduce = self.result_type == "reduce"
+        should_reduce = self.result_type == "reduce"
 
         from pandas import Series
 
-        if not reduce:
+        if not should_reduce:
 
             EMPTY_SERIES = Series([])
             try:
                 r = self.f(EMPTY_SERIES, *self.args, **self.kwds)
-                reduce = not isinstance(r, Series)
             except Exception:
                 pass
+            else:
+                should_reduce = not isinstance(r, Series)
 
-        if reduce:
+        if should_reduce:
             return self.obj._constructor_sliced(np.nan, index=self.agg_axis)
         else:
             return self.obj.copy()
@@ -306,10 +307,11 @@ def apply_series_generator(self):
             for i, v in enumerate(series_gen):
                 try:
                     results[i] = self.f(v)
-                    keys.append(v.name)
-                    successes.append(i)
                 except Exception:
                     pass
+                else:
+                    keys.append(v.name)
+                    successes.append(i)
 
             # so will work with MultiIndex
             if len(successes) < len(res_index):
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index 732f819e743a47..5dff1f93264c3e 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -2284,7 +2284,8 @@ def _infer_tz_from_endpoints(start, end, tz):
     """
     try:
         inferred_tz = timezones.infer_tzinfo(start, end)
-    except Exception:
+    except AssertionError:
+        # infer_tzinfo raises AssertionError if passed mismatched timezones
         raise TypeError(
             "Start and end cannot both be tz-aware with different timezones"
         )
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 12f3fd2c75dc8a..1094ab22238e97 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -89,10 +89,9 @@ def concat_compat(to_concat, axis=0):
     # filter empty arrays
     # 1-d dtypes always are included here
     def is_nonempty(x):
-        try:
-            return x.shape[axis] > 0
-        except Exception:
+        if x.ndim <= axis:
             return True
+        return x.shape[axis] > 0
 
     # If all arrays are empty, there's nothing to convert, just short-cut to
     # the concatenation, #3121.
diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py
index 2036728e702f30..11b6cb2ca3ed4b 100644
--- a/pandas/core/indexes/accessors.py
+++ b/pandas/core/indexes/accessors.py
@@ -316,7 +316,7 @@ def __new__(cls, data):
         # do all the validation here.
         from pandas import Series
 
-        if not isinstance(data, Series):
+        if not isinstance(data, ABCSeries):
             raise TypeError(
                 "cannot convert an object of type {0} to a "
                 "datetimelike index".format(type(data))
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 6fb39c422de932..10d50e89ca92eb 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1114,9 +1114,6 @@ def __getitem__(self, key):
                     return self.__getitem__(new_key)
                 raise
 
-        except Exception:
-            raise
-
         if is_iterator(key):
             key = list(key)
 
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index d3c9e8ccfa51ca..837b01974be930 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1,22 +1,20 @@
 import importlib
-from typing import List, Type  # noqa
 import warnings
 
+from pandas._config import get_option
+
+from pandas.compat._optional import import_optional_dependency
 from pandas.util._decorators import Appender
 
 from pandas.core.dtypes.common import is_integer, is_list_like
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
-import pandas
 from pandas.core.base import PandasObject
 
 # Trigger matplotlib import, which implicitly registers our
 # converts. Implicit registration is deprecated, and when enforced
 # we can lazily import matplotlib.
-try:
-    import pandas.plotting._matplotlib  # noqa
-except ImportError:
-    pass
+import_optional_dependency("pandas.plotting._matplotlib", raise_on_missing=False)
 
 
 def hist_series(
@@ -732,7 +730,7 @@ def __call__(self, *args, **kwargs):
         # `x` parameter, and return a Series with the parameter `y` as values.
         data = self._parent.copy()
 
-        if isinstance(data, pandas.core.dtypes.generic.ABCSeries):
+        if isinstance(data, ABCSeries):
             kwargs["reuse_plot"] = True
 
         if kind in self._dataframe_kinds:
@@ -1603,7 +1601,7 @@ def _get_plot_backend(backend=None):
     The backend is imported lazily, as matplotlib is a soft dependency, and
     pandas can be used without it being installed.
     """
-    backend = backend or pandas.get_option("plotting.backend")
+    backend = backend or get_option("plotting.backend")
 
     if backend == "matplotlib":
         # Because matplotlib is an optional dependency and first-party backend,
diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py
index 893854ab26e37d..446350cb5d9152 100644
--- a/pandas/plotting/_matplotlib/converter.py
+++ b/pandas/plotting/_matplotlib/converter.py
@@ -329,7 +329,7 @@ def __init__(self, locator, tz=None, defaultfmt="%Y-%m-%d"):
 
 class PandasAutoDateLocator(dates.AutoDateLocator):
     def get_locator(self, dmin, dmax):
-        "Pick the best locator based on a distance."
+        """Pick the best locator based on a distance."""
         _check_implicitly_registered()
         delta = relativedelta(dmax, dmin)
 
@@ -382,6 +382,7 @@ def __call__(self):
             dmax, dmin = dmin, dmax
         # We need to cap at the endpoints of valid datetime
 
+        # FIXME: dont leave commented-out
         # TODO(wesm) unused?
         # delta = relativedelta(dmax, dmin)
         # try:
@@ -448,6 +449,7 @@ def autoscale(self):
 
         # We need to cap at the endpoints of valid datetime
 
+        # FIXME: dont leave commented-out
         # TODO(wesm): unused?
 
         # delta = relativedelta(dmax, dmin)
diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py
index 6ff3f284403039..346949cb82c4d0 100644
--- a/pandas/plotting/_matplotlib/core.py
+++ b/pandas/plotting/_matplotlib/core.py
@@ -1,5 +1,5 @@
 import re
-from typing import Optional  # noqa
+from typing import Optional
 import warnings
 
 import numpy as np
diff --git a/setup.py b/setup.py
index a86527ace092b3..76db96870c36a1 100755
--- a/setup.py
+++ b/setup.py
@@ -300,12 +300,12 @@ def run(self):
         for clean_me in self._clean_me:
             try:
                 os.unlink(clean_me)
-            except Exception:
+            except OSError:
                 pass
         for clean_tree in self._clean_trees:
             try:
                 shutil.rmtree(clean_tree)
-            except Exception:
+            except OSError:
                 pass
 
 

From a72b24059dd647f1d8357e6241f267bc58fc8bc7 Mon Sep 17 00:00:00 2001
From: William Ayd <william.ayd@gmail.com>
Date: Sat, 7 Sep 2019 10:43:01 -0700
Subject: [PATCH 81/95] Added cpp files to build clean (#28320)

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 76db96870c36a1..05e5f5250e2506 100755
--- a/setup.py
+++ b/setup.py
@@ -277,6 +277,7 @@ def initialize_options(self):
                     ".pyo",
                     ".pyd",
                     ".c",
+                    ".cpp",
                     ".orig",
                 ):
                     self._clean_me.append(filepath)

From 71119275b93b0be2fef6304cc42fef685ae6cef9 Mon Sep 17 00:00:00 2001
From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com>
Date: Sat, 7 Sep 2019 12:59:25 -0500
Subject: [PATCH 82/95] PERF: Speed up Spearman calculation (#28151)

---
 asv_bench/benchmarks/stat_ops.py | 11 +++++++++++
 doc/source/whatsnew/v1.0.0.rst   |  2 +-
 pandas/_libs/algos.pyx           | 20 ++++++++++++++++----
 3 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py
index 6032bee41958e1..ed5ebfa61594ec 100644
--- a/asv_bench/benchmarks/stat_ops.py
+++ b/asv_bench/benchmarks/stat_ops.py
@@ -113,12 +113,23 @@ def setup(self, method, use_bottleneck):
             nanops._USE_BOTTLENECK = use_bottleneck
         self.df = pd.DataFrame(np.random.randn(1000, 30))
         self.df2 = pd.DataFrame(np.random.randn(1000, 30))
+        self.df_wide = pd.DataFrame(np.random.randn(1000, 200))
+        self.df_wide_nans = self.df_wide.where(np.random.random((1000, 200)) < 0.9)
         self.s = pd.Series(np.random.randn(1000))
         self.s2 = pd.Series(np.random.randn(1000))
 
     def time_corr(self, method, use_bottleneck):
         self.df.corr(method=method)
 
+    def time_corr_wide(self, method, use_bottleneck):
+        self.df_wide.corr(method=method)
+
+    def time_corr_wide_nans(self, method, use_bottleneck):
+        self.df_wide_nans.corr(method=method)
+
+    def peakmem_corr_wide(self, method, use_bottleneck):
+        self.df_wide.corr(method=method)
+
     def time_corr_series(self, method, use_bottleneck):
         self.s.corr(self.s2, method=method)
 
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 2f72de25c579ba..628e2e708e4f12 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -75,9 +75,9 @@ Performance improvements
 - Performance improvement in indexing with a non-unique :class:`IntervalIndex` (:issue:`27489`)
 - Performance improvement in `MultiIndex.is_monotonic` (:issue:`27495`)
 - Performance improvement in :func:`cut` when ``bins`` is an :class:`IntervalIndex` (:issue:`27668`)
+- Performance improvement in :meth:`DataFrame.corr` when ``method`` is ``"spearman"`` (:issue:`28139`)
 - Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`)
 
-
 .. _whatsnew_1000.bug_fixes:
 
 Bug fixes
diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 038447ad252fe2..0f91f612994c7b 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -296,6 +296,7 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
     cdef:
         Py_ssize_t i, j, xi, yi, N, K
         ndarray[float64_t, ndim=2] result
+        ndarray[float64_t, ndim=2] ranked_mat
         ndarray[float64_t, ndim=1] maskedx
         ndarray[float64_t, ndim=1] maskedy
         ndarray[uint8_t, ndim=2] mask
@@ -307,10 +308,18 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
     result = np.empty((K, K), dtype=np.float64)
     mask = np.isfinite(mat).view(np.uint8)
 
+    ranked_mat = np.empty((N, K), dtype=np.float64)
+
+    for i in range(K):
+        ranked_mat[:, i] = rank_1d_float64(mat[:, i])
+
     for xi in range(K):
         for yi in range(xi + 1):
             nobs = 0
+            # Keep track of whether we need to recompute ranks
+            all_ranks = True
             for i in range(N):
+                all_ranks &= not (mask[i, xi] ^ mask[i, yi])
                 if mask[i, xi] and mask[i, yi]:
                     nobs += 1
 
@@ -320,13 +329,16 @@ def nancorr_spearman(const float64_t[:, :] mat, Py_ssize_t minp=1):
                 maskedx = np.empty(nobs, dtype=np.float64)
                 maskedy = np.empty(nobs, dtype=np.float64)
                 j = 0
+
                 for i in range(N):
                     if mask[i, xi] and mask[i, yi]:
-                        maskedx[j] = mat[i, xi]
-                        maskedy[j] = mat[i, yi]
+                        maskedx[j] = ranked_mat[i, xi]
+                        maskedy[j] = ranked_mat[i, yi]
                         j += 1
-                maskedx = rank_1d_float64(maskedx)
-                maskedy = rank_1d_float64(maskedy)
+
+                if not all_ranks:
+                    maskedx = rank_1d_float64(maskedx)
+                    maskedy = rank_1d_float64(maskedy)
 
                 mean = (nobs + 1) / 2.
 

From 1cd7ae6a4b65366773124fc115292687db27397a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 7 Sep 2019 12:21:08 -0700
Subject: [PATCH 83/95] BUG: datetime64 - Timestamp incorrectly raising
 TypeError (#28286)

---
 doc/source/whatsnew/v1.0.0.rst                   |  2 ++
 pandas/_libs/tslibs/c_timestamp.pyx              |  5 +++++
 pandas/tests/scalar/timestamp/test_arithmetic.py | 14 ++++++++++++++
 3 files changed, 21 insertions(+)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 628e2e708e4f12..161ebf9783e1bb 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -98,6 +98,8 @@ Datetimelike
 - Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`)
 - Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`)
 - Bug in :meth:`pandas.core.groupby.SeriesGroupBy.nunique` where ``NaT`` values were interfering with the count of unique values (:issue:`27951`)
+- Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`)
+-
 
 
 Timedelta
diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx
index 41e2ae6b5b59b6..e3456edbf7e627 100644
--- a/pandas/_libs/tslibs/c_timestamp.pyx
+++ b/pandas/_libs/tslibs/c_timestamp.pyx
@@ -312,6 +312,11 @@ cdef class _Timestamp(datetime):
             except (OverflowError, OutOfBoundsDatetime):
                 pass
 
+        elif is_datetime64_object(self):
+            # GH#28286 cython semantics for __rsub__, `other` is actually
+            #  the Timestamp
+            return type(other)(self) - other
+
         # scalar Timestamp/datetime - Timedelta -> yields a Timestamp (with
         # same timezone if specified)
         return datetime.__sub__(self, other)
diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py
index 2ef4fe79eeacf5..7b00f00fc9ec49 100644
--- a/pandas/tests/scalar/timestamp/test_arithmetic.py
+++ b/pandas/tests/scalar/timestamp/test_arithmetic.py
@@ -66,6 +66,20 @@ def test_delta_preserve_nanos(self):
         result = val + timedelta(1)
         assert result.nanosecond == val.nanosecond
 
+    def test_rsub_dtscalars(self, tz_naive_fixture):
+        # In particular, check that datetime64 - Timestamp works GH#28286
+        td = Timedelta(1235345642000)
+        ts = Timestamp.now(tz_naive_fixture)
+        other = ts + td
+
+        assert other - ts == td
+        assert other.to_pydatetime() - ts == td
+        if tz_naive_fixture is None:
+            assert other.to_datetime64() - ts == td
+        else:
+            with pytest.raises(TypeError, match="subtraction must have"):
+                other.to_datetime64() - ts
+
     def test_timestamp_sub_datetime(self):
         dt = datetime(2013, 10, 12)
         ts = Timestamp(datetime(2013, 10, 13))

From 7161b907748b46e535b3a3444d2ab47c37a95612 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 7 Sep 2019 12:39:07 -0700
Subject: [PATCH 84/95] CLN: catch specific Exceptions in _config (#28310)

---
 pandas/_config/display.py               |  5 ++++-
 pandas/_config/localization.py          | 12 ++++--------
 pandas/tests/io/formats/test_console.py |  8 +++++---
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/pandas/_config/display.py b/pandas/_config/display.py
index 6e5fabe2706e5e..067b7c503baabf 100644
--- a/pandas/_config/display.py
+++ b/pandas/_config/display.py
@@ -28,7 +28,10 @@ def detect_console_encoding():
     if not encoding or "ascii" in encoding.lower():
         try:
             encoding = locale.getpreferredencoding()
-        except Exception:
+        except locale.Error:
+            # can be raised by locale.setlocale(), which is
+            #  called by getpreferredencoding
+            #  (on some systems, see stdlib locale docs)
             pass
 
     # when all else fails. this will usually be "ascii"
diff --git a/pandas/_config/localization.py b/pandas/_config/localization.py
index 46802c64609594..9f750d8447c6ab 100644
--- a/pandas/_config/localization.py
+++ b/pandas/_config/localization.py
@@ -98,13 +98,7 @@ def _valid_locales(locales, normalize):
 
 
 def _default_locale_getter():
-    try:
-        raw_locales = subprocess.check_output(["locale -a"], shell=True)
-    except subprocess.CalledProcessError as e:
-        raise type(e)(
-            "{exception}, the 'locale -a' command cannot be found "
-            "on your system".format(exception=e)
-        )
+    raw_locales = subprocess.check_output(["locale -a"], shell=True)
     return raw_locales
 
 
@@ -139,7 +133,9 @@ def get_locales(prefix=None, normalize=True, locale_getter=_default_locale_gette
     """
     try:
         raw_locales = locale_getter()
-    except Exception:
+    except subprocess.CalledProcessError:
+        # Raised on (some? all?) Windows platforms because Note: "locale -a"
+        #  is not defined
         return None
 
     try:
diff --git a/pandas/tests/io/formats/test_console.py b/pandas/tests/io/formats/test_console.py
index f4bee99296a834..e56d14885f11e3 100644
--- a/pandas/tests/io/formats/test_console.py
+++ b/pandas/tests/io/formats/test_console.py
@@ -1,3 +1,5 @@
+import locale
+
 import pytest
 
 from pandas._config import detect_console_encoding
@@ -50,11 +52,11 @@ def test_detect_console_encoding_fallback_to_locale(monkeypatch, encoding):
     "std,locale",
     [
         ["ascii", "ascii"],
-        ["ascii", Exception],
+        ["ascii", locale.Error],
         [AttributeError, "ascii"],
-        [AttributeError, Exception],
+        [AttributeError, locale.Error],
         [IOError, "ascii"],
-        [IOError, Exception],
+        [IOError, locale.Error],
     ],
 )
 def test_detect_console_encoding_fallback_to_default(monkeypatch, std, locale):

From 9aa9db9b85ee0285e11fc950f570a886233bc5b1 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 7 Sep 2019 12:43:42 -0700
Subject: [PATCH 85/95] catch more specific (#28198)

---
 pandas/core/groupby/ops.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 1a3f0da3cf92bf..40517eefe4d5db 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -212,9 +212,12 @@ def apply(self, f, data, axis=0):
                 # This Exception is also raised if `f` triggers an exception
                 # but it is preferable to raise the exception in Python.
                 pass
-            except TypeError:
-                # occurs if we have any EAs
-                pass
+            except TypeError as err:
+                if "Cannot convert" in str(err):
+                    # via apply_frame_axis0 if we pass a non-ndarray
+                    pass
+                else:
+                    raise
 
         for key, (i, group) in zip(group_keys, splitter):
             object.__setattr__(group, "name", key)

From f04c4db6ad7da21752705808063780572baf5172 Mon Sep 17 00:00:00 2001
From: Mohamed Amine ZGHAL <aminezghal@gmail.com>
Date: Sun, 8 Sep 2019 19:08:18 +0200
Subject: [PATCH 86/95] Pandas.series.astype docstring PR02 (#28340)

---
 pandas/core/generic.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index b427b1f0ac8580..831543ee660392 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5780,11 +5780,11 @@ def astype(self, dtype, copy=True, errors="raise", **kwargs):
             Control raising of exceptions on invalid data for provided dtype.
 
             - ``raise`` : allow exceptions to be raised
-            - ``ignore`` : suppress exceptions. On error return original object
+            - ``ignore`` : suppress exceptions. On error return original object.
 
             .. versionadded:: 0.20.0
 
-        kwargs : keyword arguments to pass on to the constructor
+        **kwargs : keyword arguments to pass on to the constructor
 
         Returns
         -------
@@ -5845,7 +5845,7 @@ def astype(self, dtype, copy=True, errors="raise", **kwargs):
         Convert to ordered categorical type with custom ordering:
 
         >>> cat_dtype = pd.api.types.CategoricalDtype(
-        ...                     categories=[2, 1], ordered=True)
+        ...     categories=[2, 1], ordered=True)
         >>> ser.astype(cat_dtype)
         0    1
         1    2
@@ -5855,7 +5855,7 @@ def astype(self, dtype, copy=True, errors="raise", **kwargs):
         Note that using ``copy=False`` and changing data on a new
         pandas object may propagate changes:
 
-        >>> s1 = pd.Series([1,2])
+        >>> s1 = pd.Series([1, 2])
         >>> s2 = s1.astype('int64', copy=False)
         >>> s2[0] = 10
         >>> s1  # note that s1[0] has changed too

From 5c57e7bc066d86564084b23e832f645f35e06c0e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 8 Sep 2019 10:09:54 -0700
Subject: [PATCH 87/95] BUG: Timestamp+int should raise NullFrequencyError, not
 ValueError (#28268)

---
 doc/source/whatsnew/v1.0.0.rst                |  1 +
 pandas/_libs/tslibs/__init__.py               |  3 ++
 pandas/_libs/tslibs/c_timestamp.pyx           | 34 +++++++++++--------
 pandas/errors/__init__.py                     | 10 +-----
 pandas/tests/arithmetic/test_timedelta64.py   |  5 +--
 .../tests/scalar/timestamp/test_arithmetic.py |  8 +++--
 pandas/tests/tslibs/test_api.py               |  1 +
 7 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index 161ebf9783e1bb..e1fe2f7fe77e2c 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -99,6 +99,7 @@ Datetimelike
 - Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`)
 - Bug in :meth:`pandas.core.groupby.SeriesGroupBy.nunique` where ``NaT`` values were interfering with the count of unique values (:issue:`27951`)
 - Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`)
+- Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`)
 -
 
 
diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py
index 67a323782a836a..8d3b00e4a44b91 100644
--- a/pandas/_libs/tslibs/__init__.py
+++ b/pandas/_libs/tslibs/__init__.py
@@ -7,3 +7,6 @@
 from .timedeltas import Timedelta, delta_to_nanoseconds, ints_to_pytimedelta
 from .timestamps import Timestamp
 from .tzconversion import tz_convert_single
+
+# import fails if we do this before np_datetime
+from .c_timestamp import NullFrequencyError  # isort:skip
diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx
index e3456edbf7e627..a45b8c9b35dfab 100644
--- a/pandas/_libs/tslibs/c_timestamp.pyx
+++ b/pandas/_libs/tslibs/c_timestamp.pyx
@@ -42,6 +42,15 @@ from pandas._libs.tslibs.timezones import UTC
 from pandas._libs.tslibs.tzconversion cimport tz_convert_single
 
 
+class NullFrequencyError(ValueError):
+    """
+    Error raised when a null `freq` attribute is used in an operation
+    that needs a non-null frequency, particularly `DatetimeIndex.shift`,
+    `TimedeltaIndex.shift`, `PeriodIndex.shift`.
+    """
+    pass
+
+
 def maybe_integer_op_deprecated(obj):
     # GH#22535 add/sub of integers and int-arrays is deprecated
     if obj.freq is not None:
@@ -227,8 +236,8 @@ cdef class _Timestamp(datetime):
                 # to be compat with Period
                 return NaT
             elif self.freq is None:
-                raise ValueError("Cannot add integral value to Timestamp "
-                                 "without freq.")
+                raise NullFrequencyError(
+                    "Cannot add integral value to Timestamp without freq.")
             return self.__class__((self.freq * other).apply(self),
                                   freq=self.freq)
 
@@ -246,17 +255,15 @@ cdef class _Timestamp(datetime):
 
             result = self.__class__(self.value + nanos,
                                     tz=self.tzinfo, freq=self.freq)
-            if getattr(other, 'normalize', False):
-                # DateOffset
-                result = result.normalize()
             return result
 
         elif is_array(other):
             if other.dtype.kind in ['i', 'u']:
                 maybe_integer_op_deprecated(self)
                 if self.freq is None:
-                    raise ValueError("Cannot add integer-dtype array "
-                                     "to Timestamp without freq.")
+                    raise NullFrequencyError(
+                        "Cannot add integer-dtype array "
+                        "to Timestamp without freq.")
                 return self.freq * other + self
 
         # index/series like
@@ -270,6 +277,7 @@ cdef class _Timestamp(datetime):
         return result
 
     def __sub__(self, other):
+
         if (is_timedelta64_object(other) or is_integer_object(other) or
                 PyDelta_Check(other) or hasattr(other, 'delta')):
             # `delta` attribute is for offsets.Tick or offsets.Week obj
@@ -280,15 +288,16 @@ cdef class _Timestamp(datetime):
             if other.dtype.kind in ['i', 'u']:
                 maybe_integer_op_deprecated(self)
                 if self.freq is None:
-                    raise ValueError("Cannot subtract integer-dtype array "
-                                     "from Timestamp without freq.")
+                    raise NullFrequencyError(
+                        "Cannot subtract integer-dtype array "
+                        "from Timestamp without freq.")
                 return self - self.freq * other
 
         typ = getattr(other, '_typ', None)
         if typ is not None:
             return NotImplemented
 
-        elif other is NaT:
+        if other is NaT:
             return NaT
 
         # coerce if necessary if we are a Timestamp-like
@@ -311,15 +320,12 @@ cdef class _Timestamp(datetime):
                 return Timedelta(self.value - other.value)
             except (OverflowError, OutOfBoundsDatetime):
                 pass
-
         elif is_datetime64_object(self):
             # GH#28286 cython semantics for __rsub__, `other` is actually
             #  the Timestamp
             return type(other)(self) - other
 
-        # scalar Timestamp/datetime - Timedelta -> yields a Timestamp (with
-        # same timezone if specified)
-        return datetime.__sub__(self, other)
+        return NotImplemented
 
     cdef int64_t _maybe_convert_value_to_local(self):
         """Convert UTC i8 value to local i8 value if tz exists"""
diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index 3177937ac4ba19..a85fc8bfb14142 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -4,7 +4,7 @@
 Expose public exceptions & warnings
 """
 
-from pandas._libs.tslibs import OutOfBoundsDatetime
+from pandas._libs.tslibs import NullFrequencyError, OutOfBoundsDatetime
 
 
 class PerformanceWarning(Warning):
@@ -157,14 +157,6 @@ class MergeError(ValueError):
     """
 
 
-class NullFrequencyError(ValueError):
-    """
-    Error raised when a null `freq` attribute is used in an operation
-    that needs a non-null frequency, particularly `DatetimeIndex.shift`,
-    `TimedeltaIndex.shift`, `PeriodIndex.shift`.
-    """
-
-
 class AccessorRegistrationWarning(Warning):
     """Warning for attribute conflicts in accessor registration."""
 
diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py
index ee27ce97f269e9..d480b26e30fff6 100644
--- a/pandas/tests/arithmetic/test_timedelta64.py
+++ b/pandas/tests/arithmetic/test_timedelta64.py
@@ -241,10 +241,7 @@ def test_subtraction_ops(self):
         with pytest.raises(TypeError, match=msg):
             tdi - dti
 
-        msg = (
-            r"descriptor '__sub__' requires a 'datetime\.datetime' object"
-            " but received a 'Timedelta'"
-        )
+        msg = r"unsupported operand type\(s\) for -"
         with pytest.raises(TypeError, match=msg):
             td - dt
 
diff --git a/pandas/tests/scalar/timestamp/test_arithmetic.py b/pandas/tests/scalar/timestamp/test_arithmetic.py
index 7b00f00fc9ec49..9634c6d8222368 100644
--- a/pandas/tests/scalar/timestamp/test_arithmetic.py
+++ b/pandas/tests/scalar/timestamp/test_arithmetic.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+from pandas.errors import NullFrequencyError
+
 from pandas import Timedelta, Timestamp
 import pandas.util.testing as tm
 
@@ -177,12 +179,12 @@ def test_timestamp_add_timedelta64_unit(self, other, expected_difference):
         ],
     )
     def test_add_int_no_freq_raises(self, ts, other):
-        with pytest.raises(ValueError, match="without freq"):
+        with pytest.raises(NullFrequencyError, match="without freq"):
             ts + other
-        with pytest.raises(ValueError, match="without freq"):
+        with pytest.raises(NullFrequencyError, match="without freq"):
             other + ts
 
-        with pytest.raises(ValueError, match="without freq"):
+        with pytest.raises(NullFrequencyError, match="without freq"):
             ts - other
         with pytest.raises(TypeError):
             other - ts
diff --git a/pandas/tests/tslibs/test_api.py b/pandas/tests/tslibs/test_api.py
index 47e398dfe3d167..7a8a6d511aa69a 100644
--- a/pandas/tests/tslibs/test_api.py
+++ b/pandas/tests/tslibs/test_api.py
@@ -29,6 +29,7 @@ def test_namespace():
         "NaTType",
         "iNaT",
         "is_null_datetimelike",
+        "NullFrequencyError",
         "OutOfBoundsDatetime",
         "Period",
         "IncompatibleFrequency",

From df3d9b2cdfe2ebab3a5b22e6f5359a393e519af4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 8 Sep 2019 12:36:55 -0700
Subject: [PATCH 88/95] CLN: handle bare exceptions im timedeltas, timestamps,
 reduction (#28346)

---
 pandas/_libs/reduction.pyx          |  4 +++-
 pandas/_libs/tslibs/c_timestamp.pyx |  3 ++-
 pandas/_libs/tslibs/timedeltas.pyx  | 11 ++++++++---
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx
index c892c1cf1b8a3e..bf940eb03e06f4 100644
--- a/pandas/_libs/reduction.pyx
+++ b/pandas/_libs/reduction.pyx
@@ -528,7 +528,8 @@ def apply_frame_axis0(object frame, object f, object names,
 
             try:
                 piece = f(chunk)
-            except:
+            except Exception:
+                # We can't be more specific without knowing something about `f`
                 raise InvalidApply('Let this error raise above us')
 
             # Need to infer if low level index slider will cause segfaults
@@ -539,6 +540,7 @@ def apply_frame_axis0(object frame, object f, object names,
                 else:
                     mutated = True
             except AttributeError:
+                # `piece` might not have an index, could be e.g. an int
                 pass
 
             results.append(piece)
diff --git a/pandas/_libs/tslibs/c_timestamp.pyx b/pandas/_libs/tslibs/c_timestamp.pyx
index a45b8c9b35dfab..dfa66d7e2d8626 100644
--- a/pandas/_libs/tslibs/c_timestamp.pyx
+++ b/pandas/_libs/tslibs/c_timestamp.pyx
@@ -140,7 +140,8 @@ cdef class _Timestamp(datetime):
 
         try:
             stamp += zone.strftime(' %%Z')
-        except:
+        except AttributeError:
+            # e.g. tzlocal has no `strftime`
             pass
 
         tz = ", tz='{0}'".format(zone) if zone is not None else ""
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index d24aafae0967df..ad7c32ca319405 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -228,8 +228,13 @@ def array_to_timedelta64(object[:] values, unit='ns', errors='raise'):
     # this is where all of the error handling will take place.
     try:
         for i in range(n):
-            result[i] = parse_timedelta_string(values[i])
-    except:
+            if values[i] is NaT:
+                # we allow this check in the fast-path because NaT is a C-object
+                #  so this is an inexpensive check
+                iresult[i] = NPY_NAT
+            else:
+                result[i] = parse_timedelta_string(values[i])
+    except (TypeError, ValueError):
         unit = parse_timedelta_unit(unit)
         for i in range(n):
             try:
@@ -309,7 +314,7 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
     return <int64_t>(base * m) + <int64_t>(frac * m)
 
 
-cdef inline parse_timedelta_string(object ts):
+cdef inline int64_t parse_timedelta_string(str ts) except? -1:
     """
     Parse a regular format timedelta string. Return an int64_t (in ns)
     or raise a ValueError on an invalid parse.

From 7d5425fdf1e0b010edc3d06bb79d9ff74fcc4f31 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 9 Sep 2019 04:53:37 -0700
Subject: [PATCH 89/95] PERF: lazify type-check import (#28342)

---
 pandas/io/formats/format.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index f8db1b19dadfa8..4a66ad48d13185 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -5,6 +5,7 @@
 
 import codecs
 from contextlib import contextmanager
+from datetime import tzinfo
 import decimal
 from functools import partial
 from io import StringIO
@@ -27,8 +28,6 @@
 )
 from unicodedata import east_asian_width
 
-from dateutil.tz.tz import tzutc
-from dateutil.zoneinfo import tzfile
 import numpy as np
 
 from pandas._config.config import get_option, set_option
@@ -1552,9 +1551,7 @@ def _is_dates_only(
 
 
 def _format_datetime64(
-    x: Union[NaTType, Timestamp],
-    tz: Optional[Union[tzfile, tzutc]] = None,
-    nat_rep: str = "NaT",
+    x: Union[NaTType, Timestamp], tz: Optional[tzinfo] = None, nat_rep: str = "NaT"
 ) -> str:
     if x is None or (is_scalar(x) and isna(x)):
         return nat_rep

From 17f73aaac1071a4b861c96f7957b1dd88e4c466c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 9 Sep 2019 04:54:41 -0700
Subject: [PATCH 90/95] CLN: avoid bare except in libfrequencies (#28344)

---
 pandas/_libs/tslibs/frequencies.pyx | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx
index f2dcd37b191edf..b29c8418960720 100644
--- a/pandas/_libs/tslibs/frequencies.pyx
+++ b/pandas/_libs/tslibs/frequencies.pyx
@@ -138,6 +138,10 @@ cpdef get_freq_code(freqstr):
     -------
     return : tuple of base frequency code and stride (mult)
 
+    Raises
+    ------
+    TypeError : if passed a tuple witth incorrect types
+
     Examples
     --------
     >>> get_freq_code('3D')
@@ -156,16 +160,16 @@ cpdef get_freq_code(freqstr):
         if is_integer_object(freqstr[0]) and is_integer_object(freqstr[1]):
             # e.g., freqstr = (2000, 1)
             return freqstr
+        elif is_integer_object(freqstr[0]):
+            # Note: passing freqstr[1] below will raise TypeError if that
+            #  is not a str
+            code = _period_str_to_code(freqstr[1])
+            stride = freqstr[0]
+            return code, stride
         else:
             # e.g., freqstr = ('T', 5)
-            try:
-                code = _period_str_to_code(freqstr[0])
-                stride = freqstr[1]
-            except:
-                if is_integer_object(freqstr[1]):
-                    raise
-                code = _period_str_to_code(freqstr[1])
-                stride = freqstr[0]
+            code = _period_str_to_code(freqstr[0])
+            stride = freqstr[1]
             return code, stride
 
     if is_integer_object(freqstr):
@@ -177,7 +181,7 @@ cpdef get_freq_code(freqstr):
     return code, stride
 
 
-cpdef _base_and_stride(freqstr):
+cpdef _base_and_stride(str freqstr):
     """
     Return base freq and stride info from string representation
 
@@ -207,7 +211,7 @@ cpdef _base_and_stride(freqstr):
     return base, stride
 
 
-cpdef _period_str_to_code(freqstr):
+cpdef _period_str_to_code(str freqstr):
     freqstr = _lite_rule_alias.get(freqstr, freqstr)
 
     if freqstr not in _dont_uppercase:

From 5d1440e8d1a3cbb24b5c43ac4a1bb981e5fd3d24 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 9 Sep 2019 04:55:05 -0700
Subject: [PATCH 91/95] CLN: avoid bare except in tslib and tslibs.parsing
 (#28345)

---
 pandas/_libs/tslib.pyx          | 11 +++++------
 pandas/_libs/tslibs/parsing.pyx | 27 ++++++++++-----------------
 2 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 01e500a80dcc41..dc06a30004d19d 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -344,14 +344,13 @@ def array_with_unit_to_datetime(ndarray values, object unit,
         # try a quick conversion to i8
         # if we have nulls that are not type-compat
         # then need to iterate
-        try:
+        if values.dtype.kind == "i":
+            # Note: this condition makes the casting="same_kind" redundant
             iresult = values.astype('i8', casting='same_kind', copy=False)
             mask = iresult == NPY_NAT
             iresult[mask] = 0
             fvalues = iresult.astype('f8') * m
             need_to_iterate = False
-        except:
-            pass
 
         # check the bounds
         if not need_to_iterate:
@@ -406,7 +405,7 @@ def array_with_unit_to_datetime(ndarray values, object unit,
                         elif is_ignore:
                             raise AssertionError
                         iresult[i] = NPY_NAT
-                    except:
+                    except OverflowError:
                         if is_raise:
                             raise OutOfBoundsDatetime(
                                 "cannot convert input {val} with the unit "
@@ -447,7 +446,7 @@ def array_with_unit_to_datetime(ndarray values, object unit,
             else:
                 try:
                     oresult[i] = Timestamp(cast_from_unit(val, unit))
-                except:
+                except OverflowError:
                     oresult[i] = val
 
         elif isinstance(val, str):
@@ -574,7 +573,7 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
                         # datetimes/strings, then we must coerce)
                         try:
                             iresult[i] = cast_from_unit(val, 'ns')
-                        except:
+                        except OverflowError:
                             iresult[i] = NPY_NAT
 
                 elif isinstance(val, str):
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
index eb99f090e85657..3da3d1e4b1b414 100644
--- a/pandas/_libs/tslibs/parsing.pyx
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -587,15 +587,11 @@ def try_parse_dates(object[:] values, parser=None,
     else:
         parse_date = parser
 
-        try:
-            for i in range(n):
-                if values[i] == '':
-                    result[i] = np.nan
-                else:
-                    result[i] = parse_date(values[i])
-        except Exception:
-            # raise if passed parser and it failed
-            raise
+        for i in range(n):
+            if values[i] == '':
+                result[i] = np.nan
+            else:
+                result[i] = parse_date(values[i])
 
     return result.base  # .base to access underlying ndarray
 
@@ -814,7 +810,7 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse,
     if dt_str_parse is None or dt_str_split is None:
         return None
 
-    if not isinstance(dt_str, (str, unicode)):
+    if not isinstance(dt_str, str):
         return None
 
     day_attribute_and_format = (('day',), '%d', 2)
@@ -840,19 +836,16 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse,
 
     try:
         parsed_datetime = dt_str_parse(dt_str, dayfirst=dayfirst)
-    except:
+    except (ValueError, OverflowError):
         # In case the datetime can't be parsed, its format cannot be guessed
         return None
 
     if parsed_datetime is None:
         return None
 
-    try:
-        tokens = dt_str_split(dt_str)
-    except:
-        # In case the datetime string can't be split, its format cannot
-        # be guessed
-        return None
+    # the default dt_str_split from dateutil will never raise here; we assume
+    #  that any user-provided function will not either.
+    tokens = dt_str_split(dt_str)
 
     format_guess = [None] * len(tokens)
     found_attrs = set()

From e6bafb5eadcbb85a051525c0af1b992e4df172ff Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 9 Sep 2019 04:56:15 -0700
Subject: [PATCH 92/95] CLN: Exception catching in io (#28349)

* stop catching exception

* CLN: catching Exception
---
 pandas/core/indexes/accessors.py | 23 ++++++++++-------------
 pandas/core/indexes/frozen.py    |  5 -----
 pandas/io/common.py              | 18 ++++++++----------
 pandas/io/parsers.py             |  1 -
 pandas/io/pickle.py              |  4 ++--
 5 files changed, 20 insertions(+), 31 deletions(-)

diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py
index 11b6cb2ca3ed4b..cc8ecc0e64684f 100644
--- a/pandas/core/indexes/accessors.py
+++ b/pandas/core/indexes/accessors.py
@@ -326,18 +326,15 @@ def __new__(cls, data):
         if orig is not None:
             data = Series(orig.values.categories, name=orig.name, copy=False)
 
-        try:
-            if is_datetime64_dtype(data.dtype):
-                return DatetimeProperties(data, orig)
-            elif is_datetime64tz_dtype(data.dtype):
-                return DatetimeProperties(data, orig)
-            elif is_timedelta64_dtype(data.dtype):
-                return TimedeltaProperties(data, orig)
-            elif is_period_arraylike(data):
-                return PeriodProperties(data, orig)
-            elif is_datetime_arraylike(data):
-                return DatetimeProperties(data, orig)
-        except Exception:
-            pass  # we raise an attribute error anyway
+        if is_datetime64_dtype(data.dtype):
+            return DatetimeProperties(data, orig)
+        elif is_datetime64tz_dtype(data.dtype):
+            return DatetimeProperties(data, orig)
+        elif is_timedelta64_dtype(data.dtype):
+            return TimedeltaProperties(data, orig)
+        elif is_period_arraylike(data):
+            return PeriodProperties(data, orig)
+        elif is_datetime_arraylike(data):
+            return DatetimeProperties(data, orig)
 
         raise AttributeError("Can only use .dt accessor with datetimelike values")
diff --git a/pandas/core/indexes/frozen.py b/pandas/core/indexes/frozen.py
index 329456e25bdedc..a6c39d049c50cf 100644
--- a/pandas/core/indexes/frozen.py
+++ b/pandas/core/indexes/frozen.py
@@ -70,12 +70,7 @@ def difference(self, other):
     # TODO: Consider deprecating these in favor of `union` (xref gh-15506)
     __add__ = __iadd__ = union
 
-    # Python 2 compat
-    def __getslice__(self, i, j):
-        return self.__class__(super().__getslice__(i, j))
-
     def __getitem__(self, n):
-        # Python 3 compat
         if isinstance(n, slice):
             return self.__class__(super().__getitem__(n))
         return super().__getitem__(n)
diff --git a/pandas/io/common.py b/pandas/io/common.py
index ac8dee8467370d..0bbac8a8b7c1cf 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -90,7 +90,8 @@ def __next__(self):
 
 
 def _is_url(url) -> bool:
-    """Check to see if a URL has a valid protocol.
+    """
+    Check to see if a URL has a valid protocol.
 
     Parameters
     ----------
@@ -101,10 +102,9 @@ def _is_url(url) -> bool:
     isurl : bool
         If `url` has a valid protocol return True otherwise False.
     """
-    try:
-        return parse_url(url).scheme in _VALID_URLS
-    except Exception:
+    if not isinstance(url, str):
         return False
+    return parse_url(url).scheme in _VALID_URLS
 
 
 def _expand_user(
@@ -171,18 +171,16 @@ def _stringify_path(
 
 def is_s3_url(url) -> bool:
     """Check for an s3, s3n, or s3a url"""
-    try:
-        return parse_url(url).scheme in ["s3", "s3n", "s3a"]
-    except Exception:
+    if not isinstance(url, str):
         return False
+    return parse_url(url).scheme in ["s3", "s3n", "s3a"]
 
 
 def is_gcs_url(url) -> bool:
     """Check for a gcs url"""
-    try:
-        return parse_url(url).scheme in ["gcs", "gs"]
-    except Exception:
+    if not isinstance(url, str):
         return False
+    return parse_url(url).scheme in ["gcs", "gs"]
 
 
 def urlopen(*args, **kwargs):
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index a3ff837bc7f52c..72f1adf0aad3dc 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1064,7 +1064,6 @@ def _clean_options(self, options, engine):
             )
 
             if result.get(arg, depr_default) != depr_default:
-                # raise Exception(result.get(arg, depr_default), depr_default)
                 depr_warning += msg + "\n\n"
             else:
                 result[arg] = parser_default
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index 4e390de87fc607..4b9a52a1fb8f33 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -153,10 +153,10 @@ def read_pickle(path, compression="infer"):
             # We want to silence any warnings about, e.g. moved modules.
             warnings.simplefilter("ignore", Warning)
             return pickle.load(f)
-    except Exception:  # noqa: E722
+    except Exception:
         try:
             return pc.load(f, encoding=None)
-        except Exception:  # noqa: E722
+        except Exception:
             return pc.load(f, encoding="latin1")
     finally:
         f.close()

From 3f5b5c45f481fe0cbb704f6463578675318bb1f6 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 9 Sep 2019 04:57:20 -0700
Subject: [PATCH 93/95] CLN: raise ValueError instead of Exception (#28352)

---
 pandas/core/groupby/generic.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index e514162f84c374..e731cffea0671a 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -324,7 +324,11 @@ def _aggregate_item_by_item(self, func, *args, **kwargs):
                 if cast:
                     result[item] = self._try_cast(result[item], data)
 
-            except ValueError:
+            except ValueError as err:
+                if "Must produce aggregated value" in str(err):
+                    # raised in _aggregate_named, handle at higher level
+                    #  see test_apply_with_mutated_index
+                    raise
                 cannot_agg.append(item)
                 continue
             except TypeError as e:
@@ -1009,7 +1013,7 @@ def _aggregate_named(self, func, *args, **kwargs):
             group.name = name
             output = func(group, *args, **kwargs)
             if isinstance(output, (Series, Index, np.ndarray)):
-                raise Exception("Must produce aggregated value")
+                raise ValueError("Must produce aggregated value")
             result[name] = self._try_cast(output, group)
 
         return result

From e0c63b4cfaa821dfe310f4a8a1f84929ced5f5bd Mon Sep 17 00:00:00 2001
From: Noritada Kobayashi <noritada.kobayashi@gmail.com>
Date: Mon, 9 Sep 2019 21:06:00 +0900
Subject: [PATCH 94/95] BUG/TST: fix and test for timezone drop in
 GroupBy.shift/bfill/ffill (#27992)

---
 doc/source/whatsnew/v0.25.2.rst      |  1 +
 pandas/core/groupby/groupby.py       | 12 ++---
 pandas/tests/groupby/test_groupby.py | 66 ++++++++++++++++++++++++++++
 3 files changed, 74 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.2.rst b/doc/source/whatsnew/v0.25.2.rst
index 1cdf213d81a74b..69f324211e5b28 100644
--- a/doc/source/whatsnew/v0.25.2.rst
+++ b/doc/source/whatsnew/v0.25.2.rst
@@ -78,6 +78,7 @@ Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
 - Bug incorrectly raising an ``IndexError`` when passing a list of quantiles to :meth:`pandas.core.groupby.DataFrameGroupBy.quantile` (:issue:`28113`).
+- Bug in :meth:`pandas.core.groupby.GroupBy.shift`, :meth:`pandas.core.groupby.GroupBy.bfill` and :meth:`pandas.core.groupby.GroupBy.ffill` where timezone information would be dropped (:issue:`19995`, :issue:`27992`)
 -
 -
 -
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 55def024cb1d46..e010e615e176e6 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -2263,26 +2263,28 @@ def _get_cythonized_result(
         base_func = getattr(libgroupby, how)
 
         for name, obj in self._iterate_slices():
+            values = obj._data._values
+
             if aggregate:
                 result_sz = ngroups
             else:
-                result_sz = len(obj.values)
+                result_sz = len(values)
 
             if not cython_dtype:
-                cython_dtype = obj.values.dtype
+                cython_dtype = values.dtype
 
             result = np.zeros(result_sz, dtype=cython_dtype)
             func = partial(base_func, result, labels)
             inferences = None
 
             if needs_values:
-                vals = obj.values
+                vals = values
                 if pre_processing:
                     vals, inferences = pre_processing(vals)
                 func = partial(func, vals)
 
             if needs_mask:
-                mask = isna(obj.values).view(np.uint8)
+                mask = isna(values).view(np.uint8)
                 func = partial(func, mask)
 
             if needs_ngroups:
@@ -2291,7 +2293,7 @@ def _get_cythonized_result(
             func(**kwargs)  # Call func to modify indexer values in place
 
             if result_is_index:
-                result = algorithms.take_nd(obj.values, result)
+                result = algorithms.take_nd(values, result)
 
             if post_processing:
                 result = post_processing(result, inferences)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 4556b22b572797..bec5cbc5fecb8b 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1882,3 +1882,69 @@ def test_groupby_axis_1(group_name):
     results = df.groupby(group_name, axis=1).sum()
     expected = df.T.groupby(group_name).sum().T
     assert_frame_equal(results, expected)
+
+
+@pytest.mark.parametrize(
+    "op, expected",
+    [
+        (
+            "shift",
+            {
+                "time": [
+                    None,
+                    None,
+                    Timestamp("2019-01-01 12:00:00"),
+                    Timestamp("2019-01-01 12:30:00"),
+                    None,
+                    None,
+                ]
+            },
+        ),
+        (
+            "bfill",
+            {
+                "time": [
+                    Timestamp("2019-01-01 12:00:00"),
+                    Timestamp("2019-01-01 12:30:00"),
+                    Timestamp("2019-01-01 14:00:00"),
+                    Timestamp("2019-01-01 14:30:00"),
+                    Timestamp("2019-01-01 14:00:00"),
+                    Timestamp("2019-01-01 14:30:00"),
+                ]
+            },
+        ),
+        (
+            "ffill",
+            {
+                "time": [
+                    Timestamp("2019-01-01 12:00:00"),
+                    Timestamp("2019-01-01 12:30:00"),
+                    Timestamp("2019-01-01 12:00:00"),
+                    Timestamp("2019-01-01 12:30:00"),
+                    Timestamp("2019-01-01 14:00:00"),
+                    Timestamp("2019-01-01 14:30:00"),
+                ]
+            },
+        ),
+    ],
+)
+def test_shift_bfill_ffill_tz(tz_naive_fixture, op, expected):
+    # GH19995, GH27992: Check that timezone does not drop in shift, bfill, and ffill
+    tz = tz_naive_fixture
+    data = {
+        "id": ["A", "B", "A", "B", "A", "B"],
+        "time": [
+            Timestamp("2019-01-01 12:00:00"),
+            Timestamp("2019-01-01 12:30:00"),
+            None,
+            None,
+            Timestamp("2019-01-01 14:00:00"),
+            Timestamp("2019-01-01 14:30:00"),
+        ],
+    }
+    df = DataFrame(data).assign(time=lambda x: x.time.dt.tz_localize(tz))
+
+    grouped = df.groupby("id")
+    result = getattr(grouped, op)()
+    expected = DataFrame(expected).assign(time=lambda x: x.time.dt.tz_localize(tz))
+    assert_frame_equal(result, expected)

From 96bf66108ef7a37e7b68414c4e72182e1ecdd5b4 Mon Sep 17 00:00:00 2001
From: Mak Sze Chun <makszechun@gmail.com>
Date: Tue, 10 Sep 2019 10:28:47 +0800
Subject: [PATCH 95/95] [ENH] Use default EA repr for IntervalArray (#26316)

---
 doc/source/whatsnew/v1.0.0.rst                |  20 +-
 pandas/core/arrays/interval.py                | 210 ++++++++++++------
 pandas/core/indexes/interval.py               | 107 ++++++++-
 pandas/tests/arrays/interval/test_interval.py |  15 +-
 4 files changed, 267 insertions(+), 85 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index e1fe2f7fe77e2c..329018bdf4bfb4 100644
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -37,7 +37,25 @@ Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 - :class:`pandas.core.groupby.GroupBy.transform` now raises on invalid operation names (:issue:`27489`).
--
+- :class:`pandas.core.arrays.IntervalArray` adopts a new ``__repr__`` in accordance with other array classes (:issue:`25022`)
+
+*pandas 0.25.x*
+
+.. code-block:: ipython
+
+   In [1]: pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)])
+   Out[2]:
+   IntervalArray([(0, 1], (2, 3]],
+                 closed='right',
+                 dtype='interval[int64]')
+
+
+*pandas 1.0.0*
+
+.. ipython:: python
+
+   pd.arrays.IntervalArray.from_tuples([(0, 1), (2, 3)])
+
 
 .. _whatsnew_1000.api.other:
 
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
index 7a14d6f1b619aa..1f4b76a259f00c 100644
--- a/pandas/core/arrays/interval.py
+++ b/pandas/core/arrays/interval.py
@@ -129,9 +129,9 @@
     ``Interval`` objects:
 
     >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
-    IntervalArray([(0, 1], (1, 5]],
-                  closed='right',
-                  dtype='interval[int64]')
+    <IntervalArray>
+    [(0, 1], (1, 5]]
+    Length: 2, closed: right, dtype: interval[int64]
 
     It may also be constructed using one of the constructor
     methods: :meth:`IntervalArray.from_arrays`,
@@ -248,9 +248,8 @@ def _from_factorized(cls, values, original):
             values = values.astype(original.dtype.subtype)
         return cls(values, closed=original.closed)
 
-    _interval_shared_docs[
-        "from_breaks"
-    ] = """
+    _interval_shared_docs["from_breaks"] = textwrap.dedent(
+        """
     Construct an %(klass)s from an array of splits.
 
     Parameters
@@ -277,24 +276,34 @@ def _from_factorized(cls, values, original):
     %(klass)s.from_arrays : Construct from a left and right array.
     %(klass)s.from_tuples : Construct from a sequence of tuples.
 
-    Examples
-    --------
-    >>> pd.%(qualname)s.from_breaks([0, 1, 2, 3])
-    %(klass)s([(0, 1], (1, 2], (2, 3]],
-                  closed='right',
-                  dtype='interval[int64]')
+    %(examples)s\
     """
+    )
 
     @classmethod
-    @Appender(_interval_shared_docs["from_breaks"] % _shared_docs_kwargs)
+    @Appender(
+        _interval_shared_docs["from_breaks"]
+        % dict(
+            klass="IntervalArray",
+            examples=textwrap.dedent(
+                """\
+        Examples
+        --------
+        >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3])
+        <IntervalArray>
+        [(0, 1], (1, 2], (2, 3]]
+        Length: 3, closed: right, dtype: interval[int64]
+        """
+            ),
+        )
+    )
     def from_breaks(cls, breaks, closed="right", copy=False, dtype=None):
         breaks = maybe_convert_platform_interval(breaks)
 
         return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype)
 
-    _interval_shared_docs[
-        "from_arrays"
-    ] = """
+    _interval_shared_docs["from_arrays"] = textwrap.dedent(
+        """
         Construct from two arrays defining the left and right bounds.
 
         Parameters
@@ -340,16 +349,25 @@ def from_breaks(cls, breaks, closed="right", copy=False, dtype=None):
         using an unsupported type for `left` or `right`. At the moment,
         'category', 'object', and 'string' subtypes are not supported.
 
-        Examples
-        --------
-        >>> %(klass)s.from_arrays([0, 1, 2], [1, 2, 3])
-        %(klass)s([(0, 1], (1, 2], (2, 3]],
-                     closed='right',
-                     dtype='interval[int64]')
+        %(examples)s\
         """
+    )
 
     @classmethod
-    @Appender(_interval_shared_docs["from_arrays"] % _shared_docs_kwargs)
+    @Appender(
+        _interval_shared_docs["from_arrays"]
+        % dict(
+            klass="IntervalArray",
+            examples=textwrap.dedent(
+                """\
+        >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])
+        <IntervalArray>
+        [(0, 1], (1, 2], (2, 3]]
+        Length: 3, closed: right, dtype: interval[int64]
+        """
+            ),
+        )
+    )
     def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
         left = maybe_convert_platform_interval(left)
         right = maybe_convert_platform_interval(right)
@@ -358,9 +376,8 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
             left, right, closed, copy=copy, dtype=dtype, verify_integrity=True
         )
 
-    _interval_shared_docs[
-        "from_tuples"
-    ] = """
+    _interval_shared_docs["from_tuples"] = textwrap.dedent(
+        """
     Construct an %(klass)s from an array-like of tuples.
 
     Parameters
@@ -389,15 +406,27 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
     %(klass)s.from_breaks : Construct an %(klass)s from an array of
                                 splits.
 
-    Examples
-    --------
-    >>> pd.%(qualname)s.from_tuples([(0, 1), (1, 2)])
-    %(klass)s([(0, 1], (1, 2]],
-                closed='right', dtype='interval[int64]')
+    %(examples)s\
     """
+    )
 
     @classmethod
-    @Appender(_interval_shared_docs["from_tuples"] % _shared_docs_kwargs)
+    @Appender(
+        _interval_shared_docs["from_tuples"]
+        % dict(
+            klass="IntervalArray",
+            examples=textwrap.dedent(
+                """\
+        Examples
+        --------
+        >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
+        <IntervalArray>
+        [(0, 1], (1, 2]]
+        Length: 2, closed: right, dtype: interval[int64]
+        """
+            ),
+        )
+    )
     def from_tuples(cls, data, closed="right", copy=False, dtype=None):
         if len(data):
             left, right = [], []
@@ -832,16 +861,20 @@ def _format_data(self):
         return summary
 
     def __repr__(self):
-        tpl = textwrap.dedent(
-            """\
-        {cls}({data},
-        {lead}closed='{closed}',
-        {lead}dtype='{dtype}')"""
+        template = (
+            "{class_name}"
+            "{data}\n"
+            "Length: {length}, closed: {closed}, dtype: {dtype}"
         )
-        return tpl.format(
-            cls=self.__class__.__name__,
-            data=self._format_data(),
-            lead=" " * len(self.__class__.__name__) + " ",
+        # the short repr has no trailing newline, while the truncated
+        # repr does. So we include a newline in our template, and strip
+        # any trailing newlines from format_object_summary
+        data = self._format_data()
+        class_name = "<{}>\n".format(self.__class__.__name__)
+        return template.format(
+            class_name=class_name,
+            data=data,
+            length=len(self),
             closed=self.closed,
             dtype=self.dtype,
         )
@@ -874,9 +907,8 @@ def closed(self):
         """
         return self._closed
 
-    _interval_shared_docs[
-        "set_closed"
-    ] = """
+    _interval_shared_docs["set_closed"] = textwrap.dedent(
+        """
         Return an %(klass)s identical to the current one, but closed on the
         specified side.
 
@@ -892,20 +924,31 @@ def closed(self):
         -------
         new_index : %(klass)s
 
+        %(examples)s\
+        """
+    )
+
+    @Appender(
+        _interval_shared_docs["set_closed"]
+        % dict(
+            klass="IntervalArray",
+            examples=textwrap.dedent(
+                """\
         Examples
         --------
-        >>> index = pd.interval_range(0, 3)
+        >>> index = pd.arrays.IntervalArray.from_breaks(range(4))
         >>> index
-        IntervalIndex([(0, 1], (1, 2], (2, 3]],
-              closed='right',
-              dtype='interval[int64]')
+        <IntervalArray>
+        [(0, 1], (1, 2], (2, 3]]
+        Length: 3, closed: right, dtype: interval[int64]
         >>> index.set_closed('both')
-        IntervalIndex([[0, 1], [1, 2], [2, 3]],
-              closed='both',
-              dtype='interval[int64]')
+        <IntervalArray>
+        [[0, 1], [1, 2], [2, 3]]
+        Length: 3, closed: both, dtype: interval[int64]
         """
-
-    @Appender(_interval_shared_docs["set_closed"] % _shared_docs_kwargs)
+            ),
+        )
+    )
     def set_closed(self, closed):
         if closed not in _VALID_CLOSED:
             msg = "invalid option for 'closed': {closed}"
@@ -1028,9 +1071,8 @@ def repeat(self, repeats, axis=None):
         right_repeat = self.right.repeat(repeats)
         return self._shallow_copy(left=left_repeat, right=right_repeat)
 
-    _interval_shared_docs[
-        "contains"
-    ] = """
+    _interval_shared_docs["contains"] = textwrap.dedent(
+        """
         Check elementwise if the Intervals contain the value.
 
         Return a boolean mask whether the value is contained in the Intervals
@@ -1055,16 +1097,27 @@ def repeat(self, repeats, axis=None):
 
         Examples
         --------
-        >>> intervals = pd.%(qualname)s.from_tuples([(0, 1), (1, 3), (2, 4)])
-        >>> intervals
-        %(klass)s([(0, 1], (1, 3], (2, 4]],
-              closed='right',
-              dtype='interval[int64]')
+        %(examples)s
         >>> intervals.contains(0.5)
         array([ True, False, False])
     """
+    )
 
-    @Appender(_interval_shared_docs["contains"] % _shared_docs_kwargs)
+    @Appender(
+        _interval_shared_docs["contains"]
+        % dict(
+            klass="IntervalArray",
+            examples=textwrap.dedent(
+                """\
+        >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)])
+        >>> intervals
+        <IntervalArray>
+        [(0, 1], (1, 3], (2, 4]]
+        Length: 3, closed: right, dtype: interval[int64]
+        """
+            ),
+        )
+    )
     def contains(self, other):
         if isinstance(other, Interval):
             raise NotImplementedError("contains not implemented for two intervals")
@@ -1073,9 +1126,8 @@ def contains(self, other):
             other < self.right if self.open_right else other <= self.right
         )
 
-    _interval_shared_docs[
-        "overlaps"
-    ] = """
+    _interval_shared_docs["overlaps"] = textwrap.dedent(
+        """
         Check elementwise if an Interval overlaps the values in the %(klass)s.
 
         Two intervals overlap if they share a common point, including closed
@@ -1086,7 +1138,7 @@ def contains(self, other):
 
         Parameters
         ----------
-        other : Interval
+        other : %(klass)s
             Interval to check against for an overlap.
 
         Returns
@@ -1100,11 +1152,7 @@ def contains(self, other):
 
         Examples
         --------
-        >>> intervals = pd.%(qualname)s.from_tuples([(0, 1), (1, 3), (2, 4)])
-        >>> intervals
-        %(klass)s([(0, 1], (1, 3], (2, 4]],
-              closed='right',
-              dtype='interval[int64]')
+        %(examples)s
         >>> intervals.overlaps(pd.Interval(0.5, 1.5))
         array([ True,  True, False])
 
@@ -1117,9 +1165,25 @@ def contains(self, other):
 
         >>> intervals.overlaps(pd.Interval(1, 2, closed='right'))
         array([False,  True, False])
-    """
+        """
+    )
 
-    @Appender(_interval_shared_docs["overlaps"] % _shared_docs_kwargs)
+    @Appender(
+        _interval_shared_docs["overlaps"]
+        % dict(
+            klass="IntervalArray",
+            examples=textwrap.dedent(
+                """\
+        >>> data = [(0, 1), (1, 3), (2, 4)]
+        >>> intervals = pd.arrays.IntervalArray.from_tuples(data)
+        >>> intervals
+        <IntervalArray>
+        [(0, 1], (1, 3], (2, 4]]
+        Length: 3, closed: right, dtype: interval[int64]
+        """
+            ),
+        )
+    )
     def overlaps(self, other):
         if isinstance(other, (IntervalArray, ABCIntervalIndex)):
             raise NotImplementedError
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 7c581a12764b1e..29e297cb28a3b8 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -250,7 +250,22 @@ def _simple_new(cls, array, name, closed=None):
         return result
 
     @classmethod
-    @Appender(_interval_shared_docs["from_breaks"] % _index_doc_kwargs)
+    @Appender(
+        _interval_shared_docs["from_breaks"]
+        % dict(
+            klass="IntervalIndex",
+            examples=textwrap.dedent(
+                """\
+        Examples
+        --------
+        >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3])
+        IntervalIndex([(0, 1], (1, 2], (2, 3]],
+                      closed='right',
+                      dtype='interval[int64]')
+        """
+            ),
+        )
+    )
     def from_breaks(cls, breaks, closed="right", name=None, copy=False, dtype=None):
         with rewrite_exception("IntervalArray", cls.__name__):
             array = IntervalArray.from_breaks(
@@ -259,7 +274,22 @@ def from_breaks(cls, breaks, closed="right", name=None, copy=False, dtype=None):
         return cls._simple_new(array, name=name)
 
     @classmethod
-    @Appender(_interval_shared_docs["from_arrays"] % _index_doc_kwargs)
+    @Appender(
+        _interval_shared_docs["from_arrays"]
+        % dict(
+            klass="IntervalIndex",
+            examples=textwrap.dedent(
+                """\
+        Examples
+        --------
+        >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])
+        IntervalIndex([(0, 1], (1, 2], (2, 3]],
+                      closed='right',
+                      dtype='interval[int64]')
+        """
+            ),
+        )
+    )
     def from_arrays(
         cls, left, right, closed="right", name=None, copy=False, dtype=None
     ):
@@ -270,7 +300,22 @@ def from_arrays(
         return cls._simple_new(array, name=name)
 
     @classmethod
-    @Appender(_interval_shared_docs["from_tuples"] % _index_doc_kwargs)
+    @Appender(
+        _interval_shared_docs["from_tuples"]
+        % dict(
+            klass="IntervalIndex",
+            examples=textwrap.dedent(
+                """\
+        Examples
+        --------
+        >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)])
+        IntervalIndex([(0, 1], (1, 2]],
+                       closed='right',
+                       dtype='interval[int64]')
+        """
+            ),
+        )
+    )
     def from_tuples(cls, data, closed="right", name=None, copy=False, dtype=None):
         with rewrite_exception("IntervalArray", cls.__name__):
             arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype)
@@ -367,7 +412,27 @@ def closed(self):
         """
         return self._data._closed
 
-    @Appender(_interval_shared_docs["set_closed"] % _index_doc_kwargs)
+    @Appender(
+        _interval_shared_docs["set_closed"]
+        % dict(
+            klass="IntervalIndex",
+            examples=textwrap.dedent(
+                """\
+        Examples
+        --------
+        >>> index = pd.interval_range(0, 3)
+        >>> index
+        IntervalIndex([(0, 1], (1, 2], (2, 3]],
+                      closed='right',
+                      dtype='interval[int64]')
+        >>> index.set_closed('both')
+        IntervalIndex([[0, 1], [1, 2], [2, 3]],
+                      closed='both',
+                      dtype='interval[int64]')
+        """
+            ),
+        )
+    )
     def set_closed(self, closed):
         if closed not in _VALID_CLOSED:
             msg = "invalid option for 'closed': {closed}"
@@ -1168,11 +1233,41 @@ def equals(self, other):
             and self.closed == other.closed
         )
 
-    @Appender(_interval_shared_docs["contains"] % _index_doc_kwargs)
+    @Appender(
+        _interval_shared_docs["contains"]
+        % dict(
+            klass="IntervalIndex",
+            examples=textwrap.dedent(
+                """\
+        >>> intervals = pd.IntervalIndex.from_tuples([(0, 1), (1, 3), (2, 4)])
+        >>> intervals
+        IntervalIndex([(0, 1], (1, 3], (2, 4]],
+                  closed='right',
+                  dtype='interval[int64]')
+        >>> intervals.contains(0.5)
+        array([ True, False, False])
+        """
+            ),
+        )
+    )
     def contains(self, other):
         return self._data.contains(other)
 
-    @Appender(_interval_shared_docs["overlaps"] % _index_doc_kwargs)
+    @Appender(
+        _interval_shared_docs["overlaps"]
+        % dict(
+            klass="IntervalIndex",
+            examples=textwrap.dedent(
+                """\
+        >>> intervals = pd.IntervalIndex.from_tuples([(0, 1), (1, 3), (2, 4)])
+        >>> intervals
+        IntervalIndex([(0, 1], (1, 3], (2, 4]],
+              closed='right',
+              dtype='interval[int64]')
+        """
+            ),
+        )
+    )
     def overlaps(self, other):
         return self._data.overlaps(other)
 
diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py
index 6a86289b6fcc60..655a6e717119b1 100644
--- a/pandas/tests/arrays/interval/test_interval.py
+++ b/pandas/tests/arrays/interval/test_interval.py
@@ -93,8 +93,13 @@ def test_set_na(self, left_right_dtypes):
         tm.assert_extension_array_equal(result, expected)
 
 
-def test_repr_matches():
-    idx = IntervalIndex.from_breaks([1, 2, 3])
-    a = repr(idx)
-    b = repr(idx.values)
-    assert a.replace("Index", "Array") == b
+def test_repr():
+    # GH 25022
+    arr = IntervalArray.from_tuples([(0, 1), (1, 2)])
+    result = repr(arr)
+    expected = (
+        "<IntervalArray>\n"
+        "[(0, 1], (1, 2]]\n"
+        "Length: 2, closed: right, dtype: interval[int64]"
+    )
+    assert result == expected