From 4e9c0d1f2156c656df5da4ac3f00190f0da5828b Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Sat, 19 Aug 2017 10:51:05 -0600
Subject: [PATCH 001/188] CLN: replace %s syntax with .format in pandas.tseries
 (#17290)

---
 pandas/tseries/frequencies.py |  38 +++++-----
 pandas/tseries/holiday.py     |  14 ++--
 pandas/tseries/offsets.py     | 137 +++++++++++++++++++---------------
 3 files changed, 105 insertions(+), 84 deletions(-)

diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index aa33a3849acb3d..7f34bcaf52926e 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -409,16 +409,17 @@ def _get_freq_str(base, mult=1):
 need_suffix = ['QS', 'BQ', 'BQS', 'YS', 'AS', 'BY', 'BA', 'BYS', 'BAS']
 for __prefix in need_suffix:
     for _m in tslib._MONTHS:
-        _offset_to_period_map['%s-%s' % (__prefix, _m)] = \
-            _offset_to_period_map[__prefix]
+        _alias = '{prefix}-{month}'.format(prefix=__prefix, month=_m)
+        _offset_to_period_map[_alias] = _offset_to_period_map[__prefix]
 for __prefix in ['A', 'Q']:
     for _m in tslib._MONTHS:
-        _alias = '%s-%s' % (__prefix, _m)
+        _alias = '{prefix}-{month}'.format(prefix=__prefix, month=_m)
         _offset_to_period_map[_alias] = _alias
 
 _days = ['MON', 'TUE', 'WED', 'THU', 'FRI', 'SAT', 'SUN']
 for _d in _days:
-    _offset_to_period_map['W-%s' % _d] = 'W-%s' % _d
+    _alias = 'W-{day}'.format(day=_d)
+    _offset_to_period_map[_alias] = _alias
 
 
 def get_period_alias(offset_str):
@@ -587,7 +588,7 @@ def _base_and_stride(freqstr):
     groups = opattern.match(freqstr)
 
     if not groups:
-        raise ValueError("Could not evaluate %s" % freqstr)
+        raise ValueError("Could not evaluate {freq}".format(freq=freqstr))
 
     stride = groups.group(1)
 
@@ -775,8 +776,8 @@ def infer_freq(index, warn=True):
         if not (is_datetime64_dtype(values) or
                 is_timedelta64_dtype(values) or
                 values.dtype == object):
-            raise TypeError("cannot infer freq from a non-convertible "
-                            "dtype on a Series of {0}".format(index.dtype))
+            raise TypeError("cannot infer freq from a non-convertible dtype "
+                            "on a Series of {dtype}".format(dtype=index.dtype))
         index = values
 
     if is_period_arraylike(index):
@@ -789,7 +790,7 @@ def infer_freq(index, warn=True):
     if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex):
         if isinstance(index, (pd.Int64Index, pd.Float64Index)):
             raise TypeError("cannot infer freq from a non-convertible index "
-                            "type {0}".format(type(index)))
+                            "type {type}".format(type=type(index)))
         index = index.values
 
     if not isinstance(index, pd.DatetimeIndex):
@@ -956,15 +957,17 @@ def _infer_daily_rule(self):
         if annual_rule:
             nyears = self.ydiffs[0]
             month = _month_aliases[self.rep_stamp.month]
-            return _maybe_add_count('%s-%s' % (annual_rule, month), nyears)
+            alias = '{prefix}-{month}'.format(prefix=annual_rule, month=month)
+            return _maybe_add_count(alias, nyears)
 
         quarterly_rule = self._get_quarterly_rule()
         if quarterly_rule:
             nquarters = self.mdiffs[0] / 3
             mod_dict = {0: 12, 2: 11, 1: 10}
             month = _month_aliases[mod_dict[self.rep_stamp.month % 3]]
-            return _maybe_add_count('%s-%s' % (quarterly_rule, month),
-                                    nquarters)
+            alias = '{prefix}-{month}'.format(prefix=quarterly_rule,
+                                              month=month)
+            return _maybe_add_count(alias, nquarters)
 
         monthly_rule = self._get_monthly_rule()
         if monthly_rule:
@@ -974,8 +977,8 @@ def _infer_daily_rule(self):
             days = self.deltas[0] / _ONE_DAY
             if days % 7 == 0:
                 # Weekly
-                alias = _weekday_rule_aliases[self.rep_stamp.weekday()]
-                return _maybe_add_count('W-%s' % alias, days / 7)
+                day = _weekday_rule_aliases[self.rep_stamp.weekday()]
+                return _maybe_add_count('W-{day}'.format(day=day), days / 7)
             else:
                 return _maybe_add_count('D', days)
 
@@ -1048,7 +1051,7 @@ def _get_wom_rule(self):
         week = week_of_months[0] + 1
         wd = _weekday_rule_aliases[weekdays[0]]
 
-        return 'WOM-%d%s' % (week, wd)
+        return 'WOM-{week}{weekday}'.format(week=week, weekday=wd)
 
 
 class _TimedeltaFrequencyInferer(_FrequencyInferer):
@@ -1058,15 +1061,16 @@ def _infer_daily_rule(self):
             days = self.deltas[0] / _ONE_DAY
             if days % 7 == 0:
                 # Weekly
-                alias = _weekday_rule_aliases[self.rep_stamp.weekday()]
-                return _maybe_add_count('W-%s' % alias, days / 7)
+                wd = _weekday_rule_aliases[self.rep_stamp.weekday()]
+                alias = 'W-{weekday}'.format(weekday=wd)
+                return _maybe_add_count(alias, days / 7)
             else:
                 return _maybe_add_count('D', days)
 
 
 def _maybe_add_count(base, count):
     if count != 1:
-        return '%d%s' % (count, base)
+        return '{count}{base}'.format(count=int(count), base=base)
     else:
         return base
 
diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py
index 9acb52ebe0e9f2..d8bfa3013f8f79 100644
--- a/pandas/tseries/holiday.py
+++ b/pandas/tseries/holiday.py
@@ -174,16 +174,16 @@ class from pandas.tseries.offsets
     def __repr__(self):
         info = ''
         if self.year is not None:
-            info += 'year=%s, ' % self.year
-        info += 'month=%s, day=%s, ' % (self.month, self.day)
+            info += 'year={year}, '.format(year=self.year)
+        info += 'month={mon}, day={day}, '.format(mon=self.month, day=self.day)
 
         if self.offset is not None:
-            info += 'offset=%s' % self.offset
+            info += 'offset={offset}'.format(offset=self.offset)
 
         if self.observance is not None:
-            info += 'observance=%s' % self.observance
+            info += 'observance={obs}'.format(obs=self.observance)
 
-        repr = 'Holiday: %s (%s)' % (self.name, info)
+        repr = 'Holiday: {name} ({info})'.format(name=self.name, info=info)
         return repr
 
     def dates(self, start_date, end_date, return_name=False):
@@ -374,8 +374,8 @@ def holidays(self, start=None, end=None, return_name=False):
             DatetimeIndex of holidays
         """
         if self.rules is None:
-            raise Exception('Holiday Calendar %s does not have any '
-                            'rules specified' % self.name)
+            raise Exception('Holiday Calendar {name} does not have any '
+                            'rules specified'.format(name=self.name))
 
         if start is None:
             start = AbstractHolidayCalendar.start_date
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 56ef703e67ca08..29cdda55488965 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -261,10 +261,10 @@ def apply_index(self, i):
         """
 
         if not type(self) is DateOffset:
-            raise NotImplementedError("DateOffset subclass %s "
+            raise NotImplementedError("DateOffset subclass {name} "
                                       "does not have a vectorized "
-                                      "implementation"
-                                      % (self.__class__.__name__,))
+                                      "implementation".format(
+                                          name=self.__class__.__name__))
         relativedelta_fast = set(['years', 'months', 'weeks',
                                   'days', 'hours', 'minutes',
                                   'seconds', 'microseconds'])
@@ -295,10 +295,10 @@ def apply_index(self, i):
             return i + (self._offset * self.n)
         else:
             # relativedelta with other keywords
+            kwd = set(self.kwds) - relativedelta_fast
             raise NotImplementedError("DateOffset with relativedelta "
-                                      "keyword(s) %s not able to be "
-                                      "applied vectorized" %
-                                      (set(self.kwds) - relativedelta_fast),)
+                                      "keyword(s) {kwd} not able to be "
+                                      "applied vectorized".format(kwd=kwd))
 
     def isAnchored(self):
         return (self.n == 1)
@@ -339,19 +339,20 @@ def __repr__(self):
                 if attr not in exclude:
                     attrs.append('='.join((attr, repr(getattr(self, attr)))))
 
+        plural = ''
         if abs(self.n) != 1:
             plural = 's'
-        else:
-            plural = ''
 
-        n_str = ""
+        n_str = ''
         if self.n != 1:
-            n_str = "%s * " % self.n
+            n_str = '{n} * '.format(n=self.n)
 
-        out = '<%s' % n_str + className + plural
+        attrs_str = ''
         if attrs:
-            out += ': ' + ', '.join(attrs)
-        out += '>'
+            attrs_str = ': ' + ', '.join(attrs)
+
+        repr_content = ''.join([n_str, className, plural, attrs_str])
+        out = '<{content}>'.format(content=repr_content)
         return out
 
     @property
@@ -501,7 +502,7 @@ def freqstr(self):
             return repr(self)
 
         if self.n != 1:
-            fstr = '%d%s' % (self.n, code)
+            fstr = '{n}{code}'.format(n=self.n, code=code)
         else:
             fstr = code
 
@@ -509,7 +510,7 @@ def freqstr(self):
 
     @property
     def nanos(self):
-        raise ValueError("{0} is a non-fixed frequency".format(self))
+        raise ValueError("{name} is a non-fixed frequency".format(name=self))
 
 
 class SingleConstructorOffset(DateOffset):
@@ -518,7 +519,7 @@ class SingleConstructorOffset(DateOffset):
     def _from_name(cls, suffix=None):
         # default _from_name calls cls with no args
         if suffix:
-            raise ValueError("Bad freq suffix %s" % suffix)
+            raise ValueError("Bad freq suffix {suffix}".format(suffix=suffix))
         return cls()
 
 
@@ -531,21 +532,21 @@ class BusinessMixin(object):
     def __repr__(self):
         className = getattr(self, '_outputName', self.__class__.__name__)
 
+        plural = ''
         if abs(self.n) != 1:
             plural = 's'
-        else:
-            plural = ''
 
-        n_str = ""
+        n_str = ''
         if self.n != 1:
-            n_str = "%s * " % self.n
+            n_str = '{n} * '.format(n=self.n)
 
-        out = '<%s' % n_str + className + plural + self._repr_attrs() + '>'
+        repr_content = ''.join([n_str, className, plural, self._repr_attrs()])
+        out = '<{content}>'.format(content=repr_content)
         return out
 
     def _repr_attrs(self):
         if self.offset:
-            attrs = ['offset=%s' % repr(self.offset)]
+            attrs = ['offset={offset!r}'.format(offset=self.offset)]
         else:
             attrs = None
         out = ''
@@ -601,7 +602,7 @@ def freqstr(self):
             return repr(self)
 
         if self.n != 1:
-            fstr = '%d%s' % (self.n, code)
+            fstr = '{n}{code}'.format(n=self.n, code=code)
         else:
             fstr = code
 
@@ -1109,7 +1110,8 @@ def name(self):
         if self.isAnchored:
             return self.rule_code
         else:
-            return "%s-%s" % (self.rule_code, _int_to_month[self.n])
+            return "{code}-{month}".format(code=self.rule_code,
+                                           month=_int_to_month[self.n])
 
 
 class MonthEnd(MonthOffset):
@@ -1176,9 +1178,9 @@ def __init__(self, n=1, day_of_month=None, normalize=False, **kwds):
         else:
             self.day_of_month = int(day_of_month)
         if not self._min_day_of_month <= self.day_of_month <= 27:
-            raise ValueError('day_of_month must be '
-                             '{}<=day_of_month<=27, got {}'.format(
-                                 self._min_day_of_month, self.day_of_month))
+            msg = 'day_of_month must be {min}<=day_of_month<=27, got {day}'
+            raise ValueError(msg.format(min=self._min_day_of_month,
+                                        day=self.day_of_month))
         self.n = int(n)
         self.normalize = normalize
         self.kwds = kwds
@@ -1190,7 +1192,7 @@ def _from_name(cls, suffix=None):
 
     @property
     def rule_code(self):
-        suffix = '-{}'.format(self.day_of_month)
+        suffix = '-{day_of_month}'.format(day_of_month=self.day_of_month)
         return self._prefix + suffix
 
     @apply_wraps
@@ -1576,8 +1578,8 @@ def __init__(self, n=1, normalize=False, **kwds):
 
         if self.weekday is not None:
             if self.weekday < 0 or self.weekday > 6:
-                raise ValueError('Day must be 0<=day<=6, got %d' %
-                                 self.weekday)
+                raise ValueError('Day must be 0<=day<=6, got {day}'
+                                 .format(day=self.weekday))
 
         self._inc = timedelta(weeks=1)
         self.kwds = kwds
@@ -1630,7 +1632,7 @@ def onOffset(self, dt):
     def rule_code(self):
         suffix = ''
         if self.weekday is not None:
-            suffix = '-%s' % (_int_to_weekday[self.weekday])
+            suffix = '-{weekday}'.format(weekday=_int_to_weekday[self.weekday])
         return self._prefix + suffix
 
     @classmethod
@@ -1696,11 +1698,11 @@ def __init__(self, n=1, normalize=False, **kwds):
             raise ValueError('N cannot be 0')
 
         if self.weekday < 0 or self.weekday > 6:
-            raise ValueError('Day must be 0<=day<=6, got %d' %
-                             self.weekday)
+            raise ValueError('Day must be 0<=day<=6, got {day}'
+                             .format(day=self.weekday))
         if self.week < 0 or self.week > 3:
-            raise ValueError('Week must be 0<=day<=3, got %d' %
-                             self.week)
+            raise ValueError('Week must be 0<=week<=3, got {week}'
+                             .format(week=self.week))
 
         self.kwds = kwds
 
@@ -1746,15 +1748,18 @@ def onOffset(self, dt):
 
     @property
     def rule_code(self):
-        return '%s-%d%s' % (self._prefix, self.week + 1,
-                            _int_to_weekday.get(self.weekday, ''))
+        weekday = _int_to_weekday.get(self.weekday, '')
+        return '{prefix}-{week}{weekday}'.format(prefix=self._prefix,
+                                                 week=self.week + 1,
+                                                 weekday=weekday)
 
     _prefix = 'WOM'
 
     @classmethod
     def _from_name(cls, suffix=None):
         if not suffix:
-            raise ValueError("Prefix %r requires a suffix." % (cls._prefix))
+            raise ValueError("Prefix {prefix!r} requires a suffix."
+                             .format(prefix=cls._prefix))
         # TODO: handle n here...
         # only one digit weeks (1 --> week 0, 2 --> week 1, etc.)
         week = int(suffix[0]) - 1
@@ -1789,8 +1794,8 @@ def __init__(self, n=1, normalize=False, **kwds):
             raise ValueError('N cannot be 0')
 
         if self.weekday < 0 or self.weekday > 6:
-            raise ValueError('Day must be 0<=day<=6, got %d' %
-                             self.weekday)
+            raise ValueError('Day must be 0<=day<=6, got {day}'
+                             .format(day=self.weekday))
 
         self.kwds = kwds
 
@@ -1829,14 +1834,17 @@ def onOffset(self, dt):
 
     @property
     def rule_code(self):
-        return '%s-%s' % (self._prefix, _int_to_weekday.get(self.weekday, ''))
+        weekday = _int_to_weekday.get(self.weekday, '')
+        return '{prefix}-{weekday}'.format(prefix=self._prefix,
+                                           weekday=weekday)
 
     _prefix = 'LWOM'
 
     @classmethod
     def _from_name(cls, suffix=None):
         if not suffix:
-            raise ValueError("Prefix %r requires a suffix." % (cls._prefix))
+            raise ValueError("Prefix {prefix!r} requires a suffix."
+                             .format(prefix=cls._prefix))
         # TODO: handle n here...
         weekday = _weekday_to_int[suffix]
         return cls(weekday=weekday)
@@ -1876,7 +1884,8 @@ def _from_name(cls, suffix=None):
 
     @property
     def rule_code(self):
-        return '%s-%s' % (self._prefix, _int_to_month[self.startingMonth])
+        month = _int_to_month[self.startingMonth]
+        return '{prefix}-{month}'.format(prefix=self._prefix, month=month)
 
 
 class BQuarterEnd(QuarterOffset):
@@ -2045,8 +2054,7 @@ def apply(self, other):
     @apply_index_wraps
     def apply_index(self, i):
         freq_month = 12 if self.startingMonth == 1 else self.startingMonth - 1
-        # freq_month = self.startingMonth
-        freqstr = 'Q-%s' % (_int_to_month[freq_month],)
+        freqstr = 'Q-{month}'.format(month=_int_to_month[freq_month])
         return self._beg_apply_index(i, freqstr)
 
 
@@ -2071,7 +2079,8 @@ def _from_name(cls, suffix=None):
 
     @property
     def rule_code(self):
-        return '%s-%s' % (self._prefix, _int_to_month[self.month])
+        month = _int_to_month[self.month]
+        return '{prefix}-{month}'.format(prefix=self._prefix, month=month)
 
 
 class BYearEnd(YearOffset):
@@ -2246,7 +2255,7 @@ def _rollf(date):
     @apply_index_wraps
     def apply_index(self, i):
         freq_month = 12 if self.month == 1 else self.month - 1
-        freqstr = 'A-%s' % (_int_to_month[freq_month],)
+        freqstr = 'A-{month}'.format(month=_int_to_month[freq_month])
         return self._beg_apply_index(i, freqstr)
 
     def onOffset(self, dt):
@@ -2312,7 +2321,8 @@ def __init__(self, n=1, normalize=False, **kwds):
             raise ValueError('N cannot be 0')
 
         if self.variation not in ["nearest", "last"]:
-            raise ValueError('%s is not a valid variation' % self.variation)
+            raise ValueError('{variation} is not a valid variation'
+                             .format(variation=self.variation))
 
         if self.variation == "nearest":
             weekday_offset = weekday(self.weekday)
@@ -2438,8 +2448,9 @@ def _get_year_end_last(self, dt):
 
     @property
     def rule_code(self):
+        prefix = self._get_prefix()
         suffix = self.get_rule_code_suffix()
-        return "%s-%s" % (self._get_prefix(), suffix)
+        return "{prefix}-{suffix}".format(prefix=prefix, suffix=suffix)
 
     def _get_prefix(self):
         return self._prefix
@@ -2451,9 +2462,11 @@ def _get_suffix_prefix(self):
             return self._suffix_prefix_last
 
     def get_rule_code_suffix(self):
-        return '%s-%s-%s' % (self._get_suffix_prefix(),
-                             _int_to_month[self.startingMonth],
-                             _int_to_weekday[self.weekday])
+        prefix = self._get_suffix_prefix()
+        month = _int_to_month[self.startingMonth]
+        weekday = _int_to_weekday[self.weekday]
+        return '{prefix}-{month}-{weekday}'.format(prefix=prefix, month=month,
+                                                   weekday=weekday)
 
     @classmethod
     def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code):
@@ -2463,7 +2476,7 @@ def _parse_suffix(cls, varion_code, startingMonth_code, weekday_code):
             variation = "last"
         else:
             raise ValueError(
-                "Unable to parse varion_code: %s" % (varion_code,))
+                "Unable to parse varion_code: {code}".format(code=varion_code))
 
         startingMonth = _month_to_int[startingMonth_code]
         weekday = _weekday_to_int[weekday_code]
@@ -2628,8 +2641,9 @@ def onOffset(self, dt):
     @property
     def rule_code(self):
         suffix = self._offset.get_rule_code_suffix()
-        return "%s-%s" % (self._prefix,
-                          "%s-%d" % (suffix, self.qtr_with_extra_week))
+        qtr = self.qtr_with_extra_week
+        return "{prefix}-{suffix}-{qtr}".format(prefix=self._prefix,
+                                                suffix=suffix, qtr=qtr)
 
     @classmethod
     def _from_name(cls, *args):
@@ -2712,8 +2726,8 @@ def __add__(self, other):
         except ApplyTypeError:
             return NotImplemented
         except OverflowError:
-            raise OverflowError("the add operation between {} and {} "
-                                "will overflow".format(self, other))
+            raise OverflowError("the add operation between {self} and {other} "
+                                "will overflow".format(self=self, other=other))
 
     def __eq__(self, other):
         if isinstance(other, compat.string_types):
@@ -2771,7 +2785,8 @@ def apply(self, other):
         elif isinstance(other, type(self)):
             return type(self)(self.n + other.n)
 
-        raise ApplyTypeError('Unhandled type: %s' % type(other).__name__)
+        raise ApplyTypeError('Unhandled type: {type_str}'
+                             .format(type_str=type(other).__name__))
 
     _prefix = 'undefined'
 
@@ -2921,7 +2936,8 @@ def generate_range(start=None, end=None, periods=None,
             # faster than cur + offset
             next_date = offset.apply(cur)
             if next_date <= cur:
-                raise ValueError('Offset %s did not increment date' % offset)
+                raise ValueError('Offset {offset} did not increment date'
+                                 .format(offset=offset))
             cur = next_date
     else:
         while cur >= end:
@@ -2930,7 +2946,8 @@ def generate_range(start=None, end=None, periods=None,
             # faster than cur + offset
             next_date = offset.apply(cur)
             if next_date >= cur:
-                raise ValueError('Offset %s did not decrement date' % offset)
+                raise ValueError('Offset {offset} did not decrement date'
+                                 .format(offset=offset))
             cur = next_date
 
 

From ab32c0a3e2033456ede23dbfeffc6adc8c4ea190 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 19 Aug 2017 17:55:34 -0400
Subject: [PATCH 002/188] TST: parameterize consistency tests for
 rolling/expanding windows (#17292)

---
 pandas/tests/test_window.py | 403 ++++++++++++++++++------------------
 1 file changed, 203 insertions(+), 200 deletions(-)

diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
index 21a9b05d481262..1cc0ad8bb40416 100644
--- a/pandas/tests/test_window.py
+++ b/pandas/tests/test_window.py
@@ -2009,6 +2009,15 @@ def no_nans(x):
 _consistency_data = _create_consistency_data()
 
 
+def _rolling_consistency_cases():
+    for window in [1, 2, 3, 10, 20]:
+        for min_periods in set([0, 1, 2, 3, 4, window]):
+            if min_periods and (min_periods > window):
+                continue
+            for center in [False, True]:
+                yield window, min_periods, center
+
+
 class TestMomentsConsistency(Base):
     base_functions = [
         (lambda v: Series(v).count(), None, 'count'),
@@ -2177,7 +2186,11 @@ def _non_null_values(x):
                                              (mean_x * mean_y))
 
     @pytest.mark.slow
-    def test_ewm_consistency(self):
+    @pytest.mark.parametrize(
+        'min_periods, adjust, ignore_na', product([0, 1, 2, 3, 4],
+                                                  [True, False],
+                                                  [False, True]))
+    def test_ewm_consistency(self, min_periods, adjust, ignore_na):
         def _weights(s, com, adjust, ignore_na):
             if isinstance(s, DataFrame):
                 if not len(s.columns):
@@ -2231,52 +2244,51 @@ def _ewma(s, com, min_periods, adjust, ignore_na):
             return result
 
         com = 3.
-        for min_periods, adjust, ignore_na in product([0, 1, 2, 3, 4],
-                                                      [True, False],
-                                                      [False, True]):
-            # test consistency between different ewm* moments
-            self._test_moments_consistency(
-                min_periods=min_periods,
-                count=lambda x: x.expanding().count(),
-                mean=lambda x: x.ewm(com=com, min_periods=min_periods,
-                                     adjust=adjust,
-                                     ignore_na=ignore_na).mean(),
-                mock_mean=lambda x: _ewma(x, com=com,
-                                          min_periods=min_periods,
-                                          adjust=adjust,
-                                          ignore_na=ignore_na),
-                corr=lambda x, y: x.ewm(com=com, min_periods=min_periods,
-                                        adjust=adjust,
-                                        ignore_na=ignore_na).corr(y),
-                var_unbiased=lambda x: (
-                    x.ewm(com=com, min_periods=min_periods,
-                          adjust=adjust,
-                          ignore_na=ignore_na).var(bias=False)),
-                std_unbiased=lambda x: (
-                    x.ewm(com=com, min_periods=min_periods,
-                          adjust=adjust, ignore_na=ignore_na)
-                    .std(bias=False)),
-                cov_unbiased=lambda x, y: (
-                    x.ewm(com=com, min_periods=min_periods,
-                          adjust=adjust, ignore_na=ignore_na)
-                    .cov(y, bias=False)),
-                var_biased=lambda x: (
-                    x.ewm(com=com, min_periods=min_periods,
-                          adjust=adjust, ignore_na=ignore_na)
-                    .var(bias=True)),
-                std_biased=lambda x: x.ewm(com=com, min_periods=min_periods,
-                                           adjust=adjust,
-                                           ignore_na=ignore_na).std(bias=True),
-                cov_biased=lambda x, y: (
-                    x.ewm(com=com, min_periods=min_periods,
-                          adjust=adjust, ignore_na=ignore_na)
-                    .cov(y, bias=True)),
-                var_debiasing_factors=lambda x: (
-                    _variance_debiasing_factors(x, com=com, adjust=adjust,
-                                                ignore_na=ignore_na)))
+        # test consistency between different ewm* moments
+        self._test_moments_consistency(
+            min_periods=min_periods,
+            count=lambda x: x.expanding().count(),
+            mean=lambda x: x.ewm(com=com, min_periods=min_periods,
+                                 adjust=adjust,
+                                 ignore_na=ignore_na).mean(),
+            mock_mean=lambda x: _ewma(x, com=com,
+                                      min_periods=min_periods,
+                                      adjust=adjust,
+                                      ignore_na=ignore_na),
+            corr=lambda x, y: x.ewm(com=com, min_periods=min_periods,
+                                    adjust=adjust,
+                                    ignore_na=ignore_na).corr(y),
+            var_unbiased=lambda x: (
+                x.ewm(com=com, min_periods=min_periods,
+                      adjust=adjust,
+                      ignore_na=ignore_na).var(bias=False)),
+            std_unbiased=lambda x: (
+                x.ewm(com=com, min_periods=min_periods,
+                      adjust=adjust, ignore_na=ignore_na)
+                .std(bias=False)),
+            cov_unbiased=lambda x, y: (
+                x.ewm(com=com, min_periods=min_periods,
+                      adjust=adjust, ignore_na=ignore_na)
+                .cov(y, bias=False)),
+            var_biased=lambda x: (
+                x.ewm(com=com, min_periods=min_periods,
+                      adjust=adjust, ignore_na=ignore_na)
+                .var(bias=True)),
+            std_biased=lambda x: x.ewm(com=com, min_periods=min_periods,
+                                       adjust=adjust,
+                                       ignore_na=ignore_na).std(bias=True),
+            cov_biased=lambda x, y: (
+                x.ewm(com=com, min_periods=min_periods,
+                      adjust=adjust, ignore_na=ignore_na)
+                .cov(y, bias=True)),
+            var_debiasing_factors=lambda x: (
+                _variance_debiasing_factors(x, com=com, adjust=adjust,
+                                            ignore_na=ignore_na)))
 
     @pytest.mark.slow
-    def test_expanding_consistency(self):
+    @pytest.mark.parametrize(
+        'min_periods', [0, 1, 2, 3, 4])
+    def test_expanding_consistency(self, min_periods):
 
         # suppress warnings about empty slices, as we are deliberately testing
         # with empty/0-length Series/DataFrames
@@ -2285,72 +2297,72 @@ def test_expanding_consistency(self):
                                     message=".*(empty slice|0 for slice).*",
                                     category=RuntimeWarning)
 
-            for min_periods in [0, 1, 2, 3, 4]:
-
-                # test consistency between different expanding_* moments
-                self._test_moments_consistency(
-                    min_periods=min_periods,
-                    count=lambda x: x.expanding().count(),
-                    mean=lambda x: x.expanding(
-                        min_periods=min_periods).mean(),
-                    mock_mean=lambda x: x.expanding(
-                        min_periods=min_periods).sum() / x.expanding().count(),
-                    corr=lambda x, y: x.expanding(
-                        min_periods=min_periods).corr(y),
-                    var_unbiased=lambda x: x.expanding(
-                        min_periods=min_periods).var(),
-                    std_unbiased=lambda x: x.expanding(
-                        min_periods=min_periods).std(),
-                    cov_unbiased=lambda x, y: x.expanding(
-                        min_periods=min_periods).cov(y),
-                    var_biased=lambda x: x.expanding(
-                        min_periods=min_periods).var(ddof=0),
-                    std_biased=lambda x: x.expanding(
-                        min_periods=min_periods).std(ddof=0),
-                    cov_biased=lambda x, y: x.expanding(
-                        min_periods=min_periods).cov(y, ddof=0),
-                    var_debiasing_factors=lambda x: (
-                        x.expanding().count() /
-                        (x.expanding().count() - 1.)
-                        .replace(0., np.nan)))
-
-                # test consistency between expanding_xyz() and either (a)
-                # expanding_apply of Series.xyz(), or (b) expanding_apply of
-                # np.nanxyz()
-                for (x, is_constant, no_nans) in self.data:
-                    functions = self.base_functions
-
-                    # GH 8269
-                    if no_nans:
-                        functions = self.base_functions + self.no_nan_functions
-                    for (f, require_min_periods, name) in functions:
-                        expanding_f = getattr(
-                            x.expanding(min_periods=min_periods), name)
-
-                        if (require_min_periods and
-                                (min_periods is not None) and
-                                (min_periods < require_min_periods)):
-                            continue
-
-                        if name == 'count':
-                            expanding_f_result = expanding_f()
-                            expanding_apply_f_result = x.expanding(
-                                min_periods=0).apply(func=f)
+            # test consistency between different expanding_* moments
+            self._test_moments_consistency(
+                min_periods=min_periods,
+                count=lambda x: x.expanding().count(),
+                mean=lambda x: x.expanding(
+                    min_periods=min_periods).mean(),
+                mock_mean=lambda x: x.expanding(
+                    min_periods=min_periods).sum() / x.expanding().count(),
+                corr=lambda x, y: x.expanding(
+                    min_periods=min_periods).corr(y),
+                var_unbiased=lambda x: x.expanding(
+                    min_periods=min_periods).var(),
+                std_unbiased=lambda x: x.expanding(
+                    min_periods=min_periods).std(),
+                cov_unbiased=lambda x, y: x.expanding(
+                    min_periods=min_periods).cov(y),
+                var_biased=lambda x: x.expanding(
+                    min_periods=min_periods).var(ddof=0),
+                std_biased=lambda x: x.expanding(
+                    min_periods=min_periods).std(ddof=0),
+                cov_biased=lambda x, y: x.expanding(
+                    min_periods=min_periods).cov(y, ddof=0),
+                var_debiasing_factors=lambda x: (
+                    x.expanding().count() /
+                    (x.expanding().count() - 1.)
+                    .replace(0., np.nan)))
+
+            # test consistency between expanding_xyz() and either (a)
+            # expanding_apply of Series.xyz(), or (b) expanding_apply of
+            # np.nanxyz()
+            for (x, is_constant, no_nans) in self.data:
+                functions = self.base_functions
+
+                # GH 8269
+                if no_nans:
+                    functions = self.base_functions + self.no_nan_functions
+                for (f, require_min_periods, name) in functions:
+                    expanding_f = getattr(
+                        x.expanding(min_periods=min_periods), name)
+
+                    if (require_min_periods and
+                            (min_periods is not None) and
+                            (min_periods < require_min_periods)):
+                        continue
+
+                    if name == 'count':
+                        expanding_f_result = expanding_f()
+                        expanding_apply_f_result = x.expanding(
+                            min_periods=0).apply(func=f)
+                    else:
+                        if name in ['cov', 'corr']:
+                            expanding_f_result = expanding_f(
+                                pairwise=False)
                         else:
-                            if name in ['cov', 'corr']:
-                                expanding_f_result = expanding_f(
-                                    pairwise=False)
-                            else:
-                                expanding_f_result = expanding_f()
-                            expanding_apply_f_result = x.expanding(
-                                min_periods=min_periods).apply(func=f)
-
-                        if not tm._incompat_bottleneck_version(name):
-                            assert_equal(expanding_f_result,
-                                         expanding_apply_f_result)
+                            expanding_f_result = expanding_f()
+                        expanding_apply_f_result = x.expanding(
+                            min_periods=min_periods).apply(func=f)
+
+                    if not tm._incompat_bottleneck_version(name):
+                        assert_equal(expanding_f_result,
+                                     expanding_apply_f_result)
 
     @pytest.mark.slow
-    def test_rolling_consistency(self):
+    @pytest.mark.parametrize(
+        'window,min_periods,center', list(_rolling_consistency_cases()))
+    def test_rolling_consistency(self, window, min_periods, center):
 
         # suppress warnings about empty slices, as we are deliberately testing
         # with empty/0-length Series/DataFrames
@@ -2359,100 +2371,91 @@ def test_rolling_consistency(self):
                                     message=".*(empty slice|0 for slice).*",
                                     category=RuntimeWarning)
 
-            def cases():
-                for window in [1, 2, 3, 10, 20]:
-                    for min_periods in set([0, 1, 2, 3, 4, window]):
-                        if min_periods and (min_periods > window):
-                            continue
-                        for center in [False, True]:
-                            yield window, min_periods, center
-
-            for window, min_periods, center in cases():
-                # test consistency between different rolling_* moments
-                self._test_moments_consistency(
-                    min_periods=min_periods,
-                    count=lambda x: (
-                        x.rolling(window=window, center=center)
-                        .count()),
-                    mean=lambda x: (
-                        x.rolling(window=window, min_periods=min_periods,
-                                  center=center).mean()),
-                    mock_mean=lambda x: (
-                        x.rolling(window=window,
-                                  min_periods=min_periods,
-                                  center=center).sum()
-                        .divide(x.rolling(window=window,
-                                          min_periods=min_periods,
-                                          center=center).count())),
-                    corr=lambda x, y: (
-                        x.rolling(window=window, min_periods=min_periods,
-                                  center=center).corr(y)),
-
-                    var_unbiased=lambda x: (
-                        x.rolling(window=window, min_periods=min_periods,
-                                  center=center).var()),
-
-                    std_unbiased=lambda x: (
-                        x.rolling(window=window, min_periods=min_periods,
-                                  center=center).std()),
-
-                    cov_unbiased=lambda x, y: (
-                        x.rolling(window=window, min_periods=min_periods,
-                                  center=center).cov(y)),
-
-                    var_biased=lambda x: (
-                        x.rolling(window=window, min_periods=min_periods,
-                                  center=center).var(ddof=0)),
-
-                    std_biased=lambda x: (
-                        x.rolling(window=window, min_periods=min_periods,
-                                  center=center).std(ddof=0)),
-
-                    cov_biased=lambda x, y: (
-                        x.rolling(window=window, min_periods=min_periods,
-                                  center=center).cov(y, ddof=0)),
-                    var_debiasing_factors=lambda x: (
-                        x.rolling(window=window, center=center).count()
-                        .divide((x.rolling(window=window, center=center)
-                                 .count() - 1.)
-                                .replace(0., np.nan))))
-
-                # test consistency between rolling_xyz() and either (a)
-                # rolling_apply of Series.xyz(), or (b) rolling_apply of
-                # np.nanxyz()
-                for (x, is_constant, no_nans) in self.data:
-                    functions = self.base_functions
-
-                    # GH 8269
-                    if no_nans:
-                        functions = self.base_functions + self.no_nan_functions
-                    for (f, require_min_periods, name) in functions:
-                        rolling_f = getattr(
-                            x.rolling(window=window, center=center,
-                                      min_periods=min_periods), name)
-
-                        if require_min_periods and (
-                                min_periods is not None) and (
-                                    min_periods < require_min_periods):
-                            continue
+            # test consistency between different rolling_* moments
+            self._test_moments_consistency(
+                min_periods=min_periods,
+                count=lambda x: (
+                    x.rolling(window=window, center=center)
+                    .count()),
+                mean=lambda x: (
+                    x.rolling(window=window, min_periods=min_periods,
+                              center=center).mean()),
+                mock_mean=lambda x: (
+                    x.rolling(window=window,
+                              min_periods=min_periods,
+                              center=center).sum()
+                    .divide(x.rolling(window=window,
+                                      min_periods=min_periods,
+                                      center=center).count())),
+                corr=lambda x, y: (
+                    x.rolling(window=window, min_periods=min_periods,
+                              center=center).corr(y)),
 
-                        if name == 'count':
-                            rolling_f_result = rolling_f()
-                            rolling_apply_f_result = x.rolling(
-                                window=window, min_periods=0,
-                                center=center).apply(func=f)
+                var_unbiased=lambda x: (
+                    x.rolling(window=window, min_periods=min_periods,
+                              center=center).var()),
+
+                std_unbiased=lambda x: (
+                    x.rolling(window=window, min_periods=min_periods,
+                              center=center).std()),
+
+                cov_unbiased=lambda x, y: (
+                    x.rolling(window=window, min_periods=min_periods,
+                              center=center).cov(y)),
+
+                var_biased=lambda x: (
+                    x.rolling(window=window, min_periods=min_periods,
+                              center=center).var(ddof=0)),
+
+                std_biased=lambda x: (
+                    x.rolling(window=window, min_periods=min_periods,
+                              center=center).std(ddof=0)),
+
+                cov_biased=lambda x, y: (
+                    x.rolling(window=window, min_periods=min_periods,
+                              center=center).cov(y, ddof=0)),
+                var_debiasing_factors=lambda x: (
+                    x.rolling(window=window, center=center).count()
+                    .divide((x.rolling(window=window, center=center)
+                             .count() - 1.)
+                            .replace(0., np.nan))))
+
+            # test consistency between rolling_xyz() and either (a)
+            # rolling_apply of Series.xyz(), or (b) rolling_apply of
+            # np.nanxyz()
+            for (x, is_constant, no_nans) in self.data:
+                functions = self.base_functions
+
+                # GH 8269
+                if no_nans:
+                    functions = self.base_functions + self.no_nan_functions
+                for (f, require_min_periods, name) in functions:
+                    rolling_f = getattr(
+                        x.rolling(window=window, center=center,
+                                  min_periods=min_periods), name)
+
+                    if require_min_periods and (
+                            min_periods is not None) and (
+                                min_periods < require_min_periods):
+                        continue
+
+                    if name == 'count':
+                        rolling_f_result = rolling_f()
+                        rolling_apply_f_result = x.rolling(
+                            window=window, min_periods=0,
+                            center=center).apply(func=f)
+                    else:
+                        if name in ['cov', 'corr']:
+                            rolling_f_result = rolling_f(
+                                pairwise=False)
                         else:
-                            if name in ['cov', 'corr']:
-                                rolling_f_result = rolling_f(
-                                    pairwise=False)
-                            else:
-                                rolling_f_result = rolling_f()
-                            rolling_apply_f_result = x.rolling(
-                                window=window, min_periods=min_periods,
-                                center=center).apply(func=f)
-                        if not tm._incompat_bottleneck_version(name):
-                            assert_equal(rolling_f_result,
-                                         rolling_apply_f_result)
+                            rolling_f_result = rolling_f()
+                        rolling_apply_f_result = x.rolling(
+                            window=window, min_periods=min_periods,
+                            center=center).apply(func=f)
+                    if not tm._incompat_bottleneck_version(name):
+                        assert_equal(rolling_f_result,
+                                     rolling_apply_f_result)
 
     # binary moments
     def test_rolling_cov(self):

From 3b02e73b856a6f8d53382bf3908f04447bf90e03 Mon Sep 17 00:00:00 2001
From: Thomas A Caswell <tcaswell@gmail.com>
Date: Sat, 19 Aug 2017 17:59:19 -0400
Subject: [PATCH 003/188] FIX: define `DataFrame.items` for all versions of
 python (#17214)

---
 doc/source/whatsnew/v0.21.0.txt |  4 ++++
 pandas/core/frame.py            |  3 +--
 pandas/core/series.py           |  3 +--
 pandas/tests/frame/test_api.py  | 11 ++++++++++-
 pandas/tests/series/test_api.py | 10 ++++++++++
 5 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 6008ea5d4cbcd2..c5fe89282bf52d 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -128,6 +128,10 @@ Other Enhancements
 - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`)
 - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`).
 - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`).
+- :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`)
+
+
+
 
 .. _whatsnew_0210.api_breaking:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 467ef52de234e8..b5b3df64d24c0b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -802,8 +802,7 @@ def itertuples(self, index=True, name="Pandas"):
         # fallback to regular tuples
         return zip(*arrays)
 
-    if compat.PY3:  # pragma: no cover
-        items = iteritems
+    items = iteritems
 
     def __len__(self):
         """Returns length of info axis, but here we use the index """
diff --git a/pandas/core/series.py b/pandas/core/series.py
index c8282450b77a9e..75dc3d6403650c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1110,8 +1110,7 @@ def iteritems(self):
         """
         return zip(iter(self.index), iter(self))
 
-    if compat.PY3:  # pragma: no cover
-        items = iteritems
+    items = iteritems
 
     # ----------------------------------------------------------------------
     # Misc public methods
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index 53a1b9525a0dd1..a62fcb506a34bc 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -171,7 +171,16 @@ def test_nonzero(self):
     def test_iteritems(self):
         df = self.klass([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b'])
         for k, v in compat.iteritems(df):
-            assert type(v) == self.klass._constructor_sliced
+            assert isinstance(v, self.klass._constructor_sliced)
+
+    def test_items(self):
+        # issue #17213, #13918
+        cols = ['a', 'b', 'c']
+        df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=cols)
+        for c, (k, v) in zip(cols, df.items()):
+            assert c == k
+            assert isinstance(v, Series)
+            assert (df[k] == v).all()
 
     def test_iter(self):
         assert tm.equalContents(list(self.frame), self.frame.columns)
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
index 8e22dd38030ee2..b7fbe803f8d3b9 100644
--- a/pandas/tests/series/test_api.py
+++ b/pandas/tests/series/test_api.py
@@ -301,6 +301,16 @@ def test_iteritems(self):
         # assert is lazy (genrators don't define reverse, lists do)
         assert not hasattr(self.series.iteritems(), 'reverse')
 
+    def test_items(self):
+        for idx, val in self.series.items():
+            assert val == self.series[idx]
+
+        for idx, val in self.ts.items():
+            assert val == self.ts[idx]
+
+        # assert is lazy (genrators don't define reverse, lists do)
+        assert not hasattr(self.series.items(), 'reverse')
+
     def test_raise_on_info(self):
         s = Series(np.random.randn(10))
         with pytest.raises(AttributeError):

From 58d872903449b8a29237288ade6227cdb280fe18 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Sun, 20 Aug 2017 16:25:43 -0500
Subject: [PATCH 004/188] PERF: Update ASV publish config (#17293)

Stricter cutoffs for considering regressions

[ci skip]
---
 asv_bench/asv.conf.json | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
index 59c05400d06b0b..ced4f2b12445f3 100644
--- a/asv_bench/asv.conf.json
+++ b/asv_bench/asv.conf.json
@@ -117,8 +117,10 @@
     // with results. If the commit is `null`, regression detection is
     // skipped for the matching benchmark.
     //
-    // "regressions_first_commits": {
-    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
-    //    "another_benchmark": null,   // Skip regression detection altogether
-    // }
+    "regressions_first_commits": {
+        "*": "v0.20.0"
+    },
+    "regression_thresholds": {
+        "*": 0.05
+    }
 }

From e14431f897c7c0afd76d627ba933c07c277f8deb Mon Sep 17 00:00:00 2001
From: Yosuke Nakabayashi <yousken_n72@msn.com>
Date: Mon, 21 Aug 2017 09:50:44 +0200
Subject: [PATCH 005/188] DOC: Expand docstrings for head / tail methods
 (#16941)

---
 pandas/core/generic.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 5a7f37bba91aa2..d9d75c870b20c1 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2978,14 +2978,36 @@ def filter(self, items=None, like=None, regex=None, axis=None):
 
     def head(self, n=5):
         """
-        Returns first n rows
+        Return the first n rows.
+
+        Parameters
+        ----------
+        n : int, default 5
+            Number of rows to select.
+
+        Returns
+        -------
+        obj_head : type of caller
+            The first n rows of the caller object.
         """
+
         return self.iloc[:n]
 
     def tail(self, n=5):
         """
-        Returns last n rows
+        Return the last n rows.
+
+        Parameters
+        ----------
+        n : int, default 5
+            Number of rows to select.
+
+        Returns
+        -------
+        obj_tail : type of caller
+            The last n rows of the caller object.
         """
+
         if n == 0:
             return self.iloc[0:0]
         return self.iloc[-n:]

From 8354a1dfa9073eab1b120d39be31103fc29394bb Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 21 Aug 2017 00:56:39 -0700
Subject: [PATCH 006/188] MAINT: Use set literal for unsupported + depr args

Initializes unsupported and deprecated argument sets with set literals instead of the set constructor in pandas/io/parsers.py, as the former is slightly faster than the latter.
---
 pandas/io/parsers.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 05a04f268f72b5..a9821be3fa5e2d 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -487,18 +487,18 @@ def _read(filepath_or_buffer, kwds):
     'widths': None,
 }
 
-_c_unsupported = set(['skipfooter'])
-_python_unsupported = set([
+_c_unsupported = {'skipfooter'}
+_python_unsupported = {
     'low_memory',
     'buffer_lines',
     'float_precision',
-])
-_deprecated_args = set([
+}
+_deprecated_args = {
     'as_recarray',
     'buffer_lines',
     'compact_ints',
     'use_unsigned',
-])
+}
 
 
 def _make_parser_function(name, sep=','):

From 91245a758ee32658c66bdecd9556f7054cd99901 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 21 Aug 2017 01:14:50 -0700
Subject: [PATCH 007/188] DOC: Add proper docstring to maybe_convert_indices

Patches several spelling errors and expands current doc to a proper doc-string.
---
 pandas/core/indexing.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 109183827de4e8..929c2346ba5b0d 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1985,9 +1985,31 @@ def get_indexer(_i, _idx):
 
 
 def maybe_convert_indices(indices, n):
-    """ if we have negative indicies, translate to postive here
-    if have indicies that are out-of-bounds, raise an IndexError
     """
+    Attempt to convert indices into valid, positive indices.
+
+    If we have negative indices, translate to positive here.
+    If we have indices that are out-of-bounds, raise an IndexError.
+
+    Parameters
+    ----------
+    indices : array-like
+        The array of indices that we are to convert.
+    n : int
+        The number of elements in the array that we are indexing.
+
+    Returns
+    -------
+    valid_indices : array-like
+        An array-like of positive indices that correspond to the ones
+        that were passed in initially to this function.
+
+    Raises
+    ------
+    IndexError : one of the converted indices either exceeded the number
+        of elements (specified by `n`) OR was still negative.
+    """
+
     if isinstance(indices, list):
         indices = np.array(indices)
         if len(indices) == 0:

From d0d28fec180ee61de17921fe5068ecde95adae8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?agust=C3=ADn=20m=C3=A9ndez?= <matagus@gmail.com>
Date: Mon, 21 Aug 2017 10:27:24 +0200
Subject: [PATCH 008/188] DOC: Improving docstring of take method (#16948)

---
 pandas/core/generic.py | 67 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 63 insertions(+), 4 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index d9d75c870b20c1..c83b1073afc8e3 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2063,18 +2063,77 @@ def __delitem__(self, key):
 
     def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs):
         """
-        Analogous to ndarray.take
+        Return the elements in the given *positional* indices along an axis.
+
+        This means that we are not indexing according to actual values in
+        the index attribute of the object. We are indexing according to the
+        actual position of the element in the object.
 
         Parameters
         ----------
-        indices : list / array of ints
+        indices : array-like
+            An array of ints indicating which positions to take.
         axis : int, default 0
-        convert : translate neg to pos indices (default)
-        is_copy : mark the returned frame as a copy
+            The axis on which to select elements. "0" means that we are
+            selecting rows, "1" means that we are selecting columns, etc.
+        convert : bool, default True
+            Whether to convert negative indices to positive ones, just as with
+            indexing into Python lists. For example, if `-1` was passed in,
+            this index would be converted ``n - 1``.
+        is_copy : bool, default True
+            Whether to return a copy of the original object or not.
+
+        Examples
+        --------
+        >>> df = pd.DataFrame([('falcon', 'bird',    389.0),
+                               ('parrot', 'bird',     24.0),
+                               ('lion',   'mammal',   80.5),
+                               ('monkey', 'mammal', np.nan)],
+                              columns=('name', 'class', 'max_speed'),
+                              index=[0, 2, 3, 1])
+        >>> df
+             name   class  max_speed
+        0  falcon    bird      389.0
+        2  parrot    bird       24.0
+        3    lion  mammal       80.5
+        1  monkey  mammal        NaN
+
+        Take elements at positions 0 and 3 along the axis 0 (default).
+
+        Note how the actual indices selected (0 and 1) do not correspond to
+        our selected indices 0 and 3. That's because we are selecting the 0th
+        and 3rd rows, not rows whose indices equal 0 and 3.
+
+        >>> df.take([0, 3])
+        0  falcon    bird      389.0
+        1  monkey  mammal        NaN
+
+        Take elements at indices 1 and 2 along the axis 1 (column selection).
+
+        >>> df.take([1, 2], axis=1)
+            class  max_speed
+        0    bird      389.0
+        2    bird       24.0
+        3  mammal       80.5
+        1  mammal        NaN
+
+        We may take elements using negative integers for positive indices,
+        starting from the end of the object, just like with Python lists.
+
+        >>> df.take([-1, -2])
+             name   class  max_speed
+        1  monkey  mammal        NaN
+        3    lion  mammal       80.5
 
         Returns
         -------
         taken : type of caller
+            An array-like containing the elements taken from the object.
+
+        See Also
+        --------
+        numpy.ndarray.take
+        numpy.take
         """
         nv.validate_take(tuple(), kwargs)
         self._consolidate_inplace()

From 91c2f1f6acde8e5f571d12716e72327747183247 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Mon, 21 Aug 2017 14:39:50 -0500
Subject: [PATCH 009/188] BUG: Fixed regex in asv.conf.json (#17300)

In https://github.com/pandas-dev/pandas/pull/17293 I messed up the syntax. I
used a glob instead of a regex. According to the docs at
http://asv.readthedocs.io/en/latest/asv.conf.json.html#regressions-thresholds we
want to use a regex. I've actually manually tested this change and verified that
it works.

[ci skip]
---
 asv_bench/asv.conf.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/asv_bench/asv.conf.json b/asv_bench/asv.conf.json
index ced4f2b12445f3..9c333f62810f46 100644
--- a/asv_bench/asv.conf.json
+++ b/asv_bench/asv.conf.json
@@ -118,9 +118,9 @@
     // skipped for the matching benchmark.
     //
     "regressions_first_commits": {
-        "*": "v0.20.0"
+        ".*": "v0.20.0"
     },
     "regression_thresholds": {
-        "*": 0.05
+        ".*": 0.05
     }
 }

From eff1f889d26fb47467124b103cb70045f85fdf84 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 21 Aug 2017 16:49:17 -0700
Subject: [PATCH 010/188] Remove unnecessary usage of _TSObject (#17297)

---
 pandas/_libs/period.pyx       | 20 --------------------
 pandas/_libs/src/datetime.pxd | 32 --------------------------------
 pandas/_libs/tslib.pyx        | 35 ++++++-----------------------------
 3 files changed, 6 insertions(+), 81 deletions(-)

diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index e017d863e19075..6ba7ec0270f30a 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -120,26 +120,6 @@ initialize_daytime_conversion_factor_matrix()
 # Period logic
 #----------------------------------------------------------------------
 
-cdef inline int64_t apply_mult(int64_t period_ord, int64_t mult):
-    """
-    Get freq+multiple ordinal value from corresponding freq-only ordinal value.
-    For example, 5min ordinal will be 1/5th the 1min ordinal (rounding down to
-    integer).
-    """
-    if mult == 1:
-        return period_ord
-
-    return (period_ord - 1) // mult
-
-cdef inline int64_t remove_mult(int64_t period_ord_w_mult, int64_t mult):
-    """
-    Get freq-only ordinal value from corresponding freq+multiple ordinal.
-    """
-    if mult == 1:
-        return period_ord_w_mult
-
-    return period_ord_w_mult * mult + 1;
-
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
diff --git a/pandas/_libs/src/datetime.pxd b/pandas/_libs/src/datetime.pxd
index 2267c8282ec144..23620e790c1323 100644
--- a/pandas/_libs/src/datetime.pxd
+++ b/pandas/_libs/src/datetime.pxd
@@ -88,11 +88,6 @@ cdef extern from "datetime/np_datetime.h":
     int cmp_pandas_datetimestruct(pandas_datetimestruct *a,
                                   pandas_datetimestruct *b)
 
-    int convert_pydatetime_to_datetimestruct(PyObject *obj,
-                                             pandas_datetimestruct *out,
-                                             PANDAS_DATETIMEUNIT *out_bestunit,
-                                             int apply_tzinfo)
-
     npy_datetime pandas_datetimestruct_to_datetime(PANDAS_DATETIMEUNIT fr,
                                                    pandas_datetimestruct *d) nogil
     void pandas_datetime_to_datetimestruct(npy_datetime val,
@@ -112,12 +107,6 @@ cdef extern from "datetime/np_datetime_strings.h":
                                 PANDAS_DATETIMEUNIT *out_bestunit,
                                 npy_bool *out_special)
 
-    int make_iso_8601_datetime(pandas_datetimestruct *dts, char *outstr, int outlen,
-                               int local, PANDAS_DATETIMEUNIT base, int tzoffset,
-                               NPY_CASTING casting)
-
-    int get_datetime_iso_8601_strlen(int local, PANDAS_DATETIMEUNIT base)
-
     # int parse_python_string(object obj, pandas_datetimestruct *out) except -1
 
 
@@ -152,16 +141,6 @@ cdef inline int _cstring_to_dts(char *val, int length,
     return result
 
 
-cdef inline object _datetime64_to_datetime(int64_t val):
-    cdef pandas_datetimestruct dts
-    pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts)
-    return _dts_to_pydatetime(&dts)
-
-cdef inline object _dts_to_pydatetime(pandas_datetimestruct *dts):
-    return <object> PyDateTime_FromDateAndTime(dts.year, dts.month,
-                                               dts.day, dts.hour,
-                                               dts.min, dts.sec, dts.us)
-
 cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts):
     dts.year = PyDateTime_GET_YEAR(val)
     dts.month = PyDateTime_GET_MONTH(val)
@@ -173,17 +152,6 @@ cdef inline int64_t _pydatetime_to_dts(object val, pandas_datetimestruct *dts):
     dts.ps = dts.as = 0
     return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
 
-cdef inline int64_t _dtlike_to_datetime64(object val,
-                                          pandas_datetimestruct *dts):
-    dts.year = val.year
-    dts.month = val.month
-    dts.day = val.day
-    dts.hour = val.hour
-    dts.min = val.minute
-    dts.sec = val.second
-    dts.us = val.microsecond
-    dts.ps = dts.as = 0
-    return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
 
 cdef inline int64_t _date_to_datetime64(object val,
                                         pandas_datetimestruct *dts):
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 32b8c92a50269d..c4a38ec660a4c3 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -705,7 +705,6 @@ class Timestamp(_Timestamp):
             pandas_datetimestruct dts
             int64_t value
             object _tzinfo, result, k, v
-            _TSObject ts
 
         # set to naive if needed
         _tzinfo = self.tzinfo
@@ -1009,10 +1008,6 @@ def unique_deltas(ndarray[int64_t] arr):
     return result
 
 
-cdef inline bint _is_multiple(int64_t us, int64_t mult):
-    return us % mult == 0
-
-
 cdef inline bint _cmp_scalar(int64_t lhs, int64_t rhs, int op) except -1:
     if op == Py_EQ:
         return lhs == rhs
@@ -4694,7 +4689,6 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
     field and return an array of these values.
     """
     cdef:
-        _TSObject ts
         Py_ssize_t i, count = 0
         ndarray[int32_t] out
         ndarray[int32_t, ndim=2] _month_offset
@@ -4876,7 +4870,6 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field,
     (defined by frequency).
     """
     cdef:
-        _TSObject ts
         Py_ssize_t i
         int count = 0
         bint is_business = 0
@@ -4925,9 +4918,8 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field,
 
                 pandas_datetime_to_datetimestruct(
                     dtindex[i], PANDAS_FR_ns, &dts)
-                ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
                 dom = dts.day
-                dow = ts_dayofweek(ts)
+                dow = dayofweek(dts.year, dts.month, dts.day)
 
                 if (dom == 1 and dow < 5) or (dom <= 3 and dow == 0):
                     out[i] = 1
@@ -4951,13 +4943,12 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field,
 
                 pandas_datetime_to_datetimestruct(
                     dtindex[i], PANDAS_FR_ns, &dts)
-                ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
                 isleap = is_leapyear(dts.year)
                 mo_off = _month_offset[isleap, dts.month - 1]
                 dom = dts.day
                 doy = mo_off + dom
                 ldom = _month_offset[isleap, dts.month]
-                dow = ts_dayofweek(ts)
+                dow = dayofweek(dts.year, dts.month, dts.day)
 
                 if (ldom == doy and dow < 5) or (
                         dow == 4 and (ldom - doy <= 2)):
@@ -4986,9 +4977,8 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field,
 
                 pandas_datetime_to_datetimestruct(
                     dtindex[i], PANDAS_FR_ns, &dts)
-                ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
                 dom = dts.day
-                dow = ts_dayofweek(ts)
+                dow = dayofweek(dts.year, dts.month, dts.day)
 
                 if ((dts.month - start_month) % 3 == 0) and (
                         (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)):
@@ -5013,13 +5003,12 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field,
 
                 pandas_datetime_to_datetimestruct(
                     dtindex[i], PANDAS_FR_ns, &dts)
-                ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
                 isleap = is_leapyear(dts.year)
                 mo_off = _month_offset[isleap, dts.month - 1]
                 dom = dts.day
                 doy = mo_off + dom
                 ldom = _month_offset[isleap, dts.month]
-                dow = ts_dayofweek(ts)
+                dow = dayofweek(dts.year, dts.month, dts.day)
 
                 if ((dts.month - end_month) % 3 == 0) and (
                         (ldom == doy and dow < 5) or (
@@ -5049,9 +5038,8 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field,
 
                 pandas_datetime_to_datetimestruct(
                     dtindex[i], PANDAS_FR_ns, &dts)
-                ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
                 dom = dts.day
-                dow = ts_dayofweek(ts)
+                dow = dayofweek(dts.year, dts.month, dts.day)
 
                 if (dts.month == start_month) and (
                         (dom == 1 and dow < 5) or (dom <= 3 and dow == 0)):
@@ -5076,12 +5064,11 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field,
 
                 pandas_datetime_to_datetimestruct(
                     dtindex[i], PANDAS_FR_ns, &dts)
-                ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
                 isleap = is_leapyear(dts.year)
                 dom = dts.day
                 mo_off = _month_offset[isleap, dts.month - 1]
                 doy = mo_off + dom
-                dow = ts_dayofweek(ts)
+                dow = dayofweek(dts.year, dts.month, dts.day)
                 ldom = _month_offset[isleap, dts.month]
 
                 if (dts.month == end_month) and (
@@ -5095,7 +5082,6 @@ def get_start_end_field(ndarray[int64_t] dtindex, object field,
 
                 pandas_datetime_to_datetimestruct(
                     dtindex[i], PANDAS_FR_ns, &dts)
-                ts = convert_to_tsobject(dtindex[i], None, None, 0, 0)
                 isleap = is_leapyear(dts.year)
                 mo_off = _month_offset[isleap, dts.month - 1]
                 dom = dts.day
@@ -5117,7 +5103,6 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field):
     name based on requested field (e.g. weekday_name)
     """
     cdef:
-        _TSObject ts
         Py_ssize_t i, count = 0
         ndarray[object] out
         pandas_datetimestruct dts
@@ -5143,10 +5128,6 @@ def get_date_name_field(ndarray[int64_t] dtindex, object field):
     raise ValueError("Field %s not supported" % field)
 
 
-cdef inline int m8_weekday(int64_t val):
-    ts = convert_to_tsobject(val, None, None, 0, 0)
-    return ts_dayofweek(ts)
-
 cdef int64_t DAY_NS = 86400000000000LL
 
 
@@ -5156,11 +5137,9 @@ def date_normalize(ndarray[int64_t] stamps, tz=None):
     cdef:
         Py_ssize_t i, n = len(stamps)
         pandas_datetimestruct dts
-        _TSObject tso
         ndarray[int64_t] result = np.empty(n, dtype=np.int64)
 
     if tz is not None:
-        tso = _TSObject()
         tz = maybe_get_tz(tz)
         result = _normalize_local(stamps, tz)
     else:
@@ -5305,8 +5284,6 @@ def monthrange(int64_t year, int64_t month):
 
     return (dayofweek(year, month, 1), days)
 
-cdef inline int64_t ts_dayofweek(_TSObject ts):
-    return dayofweek(ts.dts.year, ts.dts.month, ts.dts.day)
 
 cdef inline int days_in_month(pandas_datetimestruct dts) nogil:
     return days_per_month_table[is_leapyear(dts.year)][dts.month -1]

From 910207ffe518413e84cfa95d772cb66d57a0d08e Mon Sep 17 00:00:00 2001
From: Michael Gasvoda <mgasvoda@mercatus.gmu.edu>
Date: Mon, 21 Aug 2017 19:51:18 -0400
Subject: [PATCH 011/188] BUG: clip should handle null values

closes #17276

Author: Michael Gasvoda <mgasvoda@mercatus.gmu.edu>
Author: mgasvoda <mgasvoda01@gmail.com>

Closes #17288 from mgasvoda/master and squashes the following commits:

a1dbdf293 [mgasvoda] Merge branch 'master' into master
9333952c2 [Michael Gasvoda] Checking output of tests
4e0464eaf [Michael Gasvoda] fixing whatsnew text
c44204080 [Michael Gasvoda] formatting fixes
7e2367879 [Michael Gasvoda] formatting updates
781ea724a [Michael Gasvoda] whatsnew entry
d9627fe4c [Michael Gasvoda] adding clip tests
9aa0159e9 [Michael Gasvoda] Treating na values as none for clips
---
 doc/source/whatsnew/v0.21.0.txt       |  2 +-
 pandas/core/generic.py                | 12 ++++++++----
 pandas/tests/frame/test_analytics.py  | 26 ++++++++++----------------
 pandas/tests/series/test_analytics.py | 11 +++++++++++
 4 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index c5fe89282bf52d..0d2c52c70b345e 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -132,7 +132,6 @@ Other Enhancements
 
 
 
-
 .. _whatsnew_0210.api_breaking:
 
 Backwards incompatible API changes
@@ -384,6 +383,7 @@ Reshaping
 Numeric
 ^^^^^^^
 - Bug in ``.clip()`` with ``axis=1`` and a list-like for ``threshold`` is passed; previously this raised ``ValueError`` (:issue:`15390`)
+- :func:`Series.clip()` and :func:`DataFrame.clip()` now treat NA values for upper and lower arguments as ``None`` instead of raising ``ValueError`` (:issue:`17276`).
 
 
 Categorical
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index c83b1073afc8e3..5c9e1f22ddd200 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -4741,9 +4741,6 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace):
         if axis is not None:
             axis = self._get_axis_number(axis)
 
-        if np.any(isna(threshold)):
-            raise ValueError("Cannot use an NA value as a clip threshold")
-
         # method is self.le for upper bound and self.ge for lower bound
         if is_scalar(threshold) and is_number(threshold):
             if method.__name__ == 'le':
@@ -4823,6 +4820,14 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False,
 
         axis = nv.validate_clip_with_axis(axis, args, kwargs)
 
+        # GH 17276
+        # numpy doesn't like NaN as a clip value
+        # so ignore
+        if np.any(pd.isnull(lower)):
+            lower = None
+        if np.any(pd.isnull(upper)):
+            upper = None
+
         # GH 2747 (arguments were reversed)
         if lower is not None and upper is not None:
             if is_scalar(lower) and is_scalar(upper):
@@ -4839,7 +4844,6 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False,
         if upper is not None:
             if inplace:
                 result = self
-
             result = result.clip_upper(upper, axis, inplace=inplace)
 
         return result
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 484a09f11b58a7..93514a8a422151 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -1931,22 +1931,16 @@ def test_clip_against_frame(self, axis):
         tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask])
         tm.assert_frame_equal(clipped_df[mask], df[mask])
 
-    def test_clip_na(self):
-        msg = "Cannot use an NA"
-        with tm.assert_raises_regex(ValueError, msg):
-            self.frame.clip(lower=np.nan)
-
-        with tm.assert_raises_regex(ValueError, msg):
-            self.frame.clip(lower=[np.nan])
-
-        with tm.assert_raises_regex(ValueError, msg):
-            self.frame.clip(upper=np.nan)
-
-        with tm.assert_raises_regex(ValueError, msg):
-            self.frame.clip(upper=[np.nan])
-
-        with tm.assert_raises_regex(ValueError, msg):
-            self.frame.clip(lower=np.nan, upper=np.nan)
+    def test_clip_with_na_args(self):
+        """Should process np.nan argument as None """
+        # GH # 17276
+        tm.assert_frame_equal(self.frame.clip(np.nan), self.frame)
+        tm.assert_frame_equal(self.frame.clip(upper=[1, 2, np.nan]),
+                              self.frame)
+        tm.assert_frame_equal(self.frame.clip(lower=[1, np.nan, 3]),
+                              self.frame)
+        tm.assert_frame_equal(self.frame.clip(upper=np.nan, lower=np.nan),
+                              self.frame)
 
     # Matrix-like
 
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 44da0968d70243..f1d044f7a11325 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1000,6 +1000,17 @@ def test_clip_types_and_nulls(self):
             assert list(isna(s)) == list(isna(l))
             assert list(isna(s)) == list(isna(u))
 
+    def test_clip_with_na_args(self):
+        """Should process np.nan argument as None """
+        # GH # 17276
+        s = Series([1, 2, 3])
+
+        assert_series_equal(s.clip(np.nan), Series([1, 2, 3]))
+        assert_series_equal(s.clip(upper=[1, 1, np.nan]), Series([1, 2, 3]))
+        assert_series_equal(s.clip(lower=[1, np.nan, 1]), Series([1, 2, 3]))
+        assert_series_equal(s.clip(upper=np.nan, lower=np.nan),
+                            Series([1, 2, 3]))
+
     def test_clip_against_series(self):
         # GH #6966
 

From a4c4edeb2a7e5c84b5a82a9743a12a4b66e7bcf1 Mon Sep 17 00:00:00 2001
From: ante328 <ante328@hotmail.com>
Date: Tue, 22 Aug 2017 01:55:10 +0200
Subject: [PATCH 012/188] BUG: fillna returns frame when inplace=True if value
 is a dict (#16156) (#17279)

---
 doc/source/whatsnew/v0.21.0.txt    | 2 +-
 pandas/core/generic.py             | 3 ++-
 pandas/tests/frame/test_missing.py | 3 +++
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 0d2c52c70b345e..dd06114f6abd31 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -318,7 +318,7 @@ Conversion
 - Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`)
 - Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods.  Previously returned a ``numpy.bool_``. (:issue:`17237`)
 - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`)
-
+- Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`)
 
 Indexing
 ^^^^^^^^
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 5c9e1f22ddd200..e84e4eac3f34d6 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -4135,7 +4135,8 @@ def fillna(self, value=None, method=None, axis=None, inplace=False,
                         continue
                     obj = result[k]
                     obj.fillna(v, limit=limit, inplace=True, downcast=downcast)
-                return result
+                return result if not inplace else None
+
             elif not is_list_like(value):
                 new_data = self._data.fillna(value=value, limit=limit,
                                              inplace=inplace,
diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py
index 77f0357685cab8..ebd15b3180a337 100644
--- a/pandas/tests/frame/test_missing.py
+++ b/pandas/tests/frame/test_missing.py
@@ -407,6 +407,9 @@ def test_fillna_inplace(self):
         df.fillna(value=0, inplace=True)
         tm.assert_frame_equal(df, expected)
 
+        expected = df.fillna(value={0: 0}, inplace=True)
+        assert expected is None
+
         df[1][:4] = np.nan
         df[3][-4:] = np.nan
         expected = df.fillna(method='ffill')

From 2f00159da32c85c3b30b433f78a43e47677711a3 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Tue, 22 Aug 2017 10:11:10 +0200
Subject: [PATCH 013/188] CLN: Index.append() refactoring (#16236)

---
 pandas/core/dtypes/concat.py        | 48 ++++++++++++++++++++++-
 pandas/core/indexes/base.py         | 11 +++---
 pandas/core/indexes/category.py     |  6 ++-
 pandas/core/indexes/datetimelike.py |  2 +-
 pandas/core/indexes/interval.py     |  4 +-
 pandas/core/indexes/range.py        | 59 ++---------------------------
 6 files changed, 63 insertions(+), 67 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 292d5f608d4cb2..0ce45eea119ed2 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -19,7 +19,7 @@
     _TD_DTYPE)
 from pandas.core.dtypes.generic import (
     ABCDatetimeIndex, ABCTimedeltaIndex,
-    ABCPeriodIndex)
+    ABCPeriodIndex, ABCRangeIndex)
 
 
 def get_dtype_kinds(l):
@@ -41,6 +41,8 @@ def get_dtype_kinds(l):
             typ = 'category'
         elif is_sparse(arr):
             typ = 'sparse'
+        elif isinstance(arr, ABCRangeIndex):
+            typ = 'range'
         elif is_datetimetz(arr):
             # if to_concat contains different tz,
             # the result must be object dtype
@@ -559,3 +561,47 @@ def convert_sparse(x, axis):
         # coerce to object if needed
         result = result.astype('object')
     return result
+
+
+def _concat_rangeindex_same_dtype(indexes):
+    """
+    Concatenates multiple RangeIndex instances. All members of "indexes" must
+    be of type RangeIndex; result will be RangeIndex if possible, Int64Index
+    otherwise. E.g.:
+    indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)
+    indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5])
+    """
+
+    start = step = next = None
+
+    for obj in indexes:
+        if not len(obj):
+            continue
+
+        if start is None:
+            # This is set by the first non-empty index
+            start = obj._start
+            if step is None and len(obj) > 1:
+                step = obj._step
+        elif step is None:
+            # First non-empty index had only one element
+            if obj._start == start:
+                return _concat_index_asobject(indexes)
+            step = obj._start - start
+
+        non_consecutive = ((step != obj._step and len(obj) > 1) or
+                           (next is not None and obj._start != next))
+        if non_consecutive:
+            # Int64Index._append_same_dtype([ix.astype(int) for ix in indexes])
+            # would be preferred... but it currently resorts to
+            # _concat_index_asobject anyway.
+            return _concat_index_asobject(indexes)
+
+        if step is not None:
+            next = obj[-1] + step
+
+    if start is None:
+        start = obj._start
+        step = obj._step
+    stop = obj._stop if next is None else next
+    return indexes[0].__class__(start, stop, step)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index de6221987a59aa..a21e6df3ffc93d 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -1745,18 +1745,17 @@ def append(self, other):
         names = set([obj.name for obj in to_concat])
         name = None if len(names) > 1 else self.name
 
-        if self.is_categorical():
-            # if calling index is category, don't check dtype of others
-            from pandas.core.indexes.category import CategoricalIndex
-            return CategoricalIndex._append_same_dtype(self, to_concat, name)
+        return self._concat(to_concat, name)
+
+    def _concat(self, to_concat, name):
 
         typs = _concat.get_dtype_kinds(to_concat)
 
         if len(typs) == 1:
-            return self._append_same_dtype(to_concat, name=name)
+            return self._concat_same_dtype(to_concat, name=name)
         return _concat._concat_index_asobject(to_concat, name=name)
 
-    def _append_same_dtype(self, to_concat, name):
+    def _concat_same_dtype(self, to_concat, name):
         """
         Concatenate to_concat which has the same class
         """
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index ac4698b570d172..f22407308e0944 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -633,7 +633,11 @@ def insert(self, loc, item):
         codes = np.concatenate((codes[:loc], code, codes[loc:]))
         return self._create_from_codes(codes)
 
-    def _append_same_dtype(self, to_concat, name):
+    def _concat(self, to_concat, name):
+        # if calling index is category, don't check dtype of others
+        return CategoricalIndex._concat_same_dtype(self, to_concat, name)
+
+    def _concat_same_dtype(self, to_concat, name):
         """
         Concatenate to_concat which has the same class
         ValueError if other is not in the categories
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 845c71b6c41d8b..c3232627fce74c 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -837,7 +837,7 @@ def summary(self, name=None):
         result = result.replace("'", "")
         return result
 
-    def _append_same_dtype(self, to_concat, name):
+    def _concat_same_dtype(self, to_concat, name):
         """
         Concatenate to_concat which has the same class
         """
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index e90378184e3f3e..e0ed6c7ea35c0c 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -876,7 +876,7 @@ def _as_like_interval_index(self, other, error_msg):
             raise ValueError(error_msg)
         return other
 
-    def _append_same_dtype(self, to_concat, name):
+    def _concat_same_dtype(self, to_concat, name):
         """
         assert that we all have the same .closed
         we allow a 0-len index here as well
@@ -885,7 +885,7 @@ def _append_same_dtype(self, to_concat, name):
             msg = ('can only append two IntervalIndex objects '
                    'that are closed on the same side')
             raise ValueError(msg)
-        return super(IntervalIndex, self)._append_same_dtype(to_concat, name)
+        return super(IntervalIndex, self)._concat_same_dtype(to_concat, name)
 
     @Appender(_index_shared_docs['take'] % _index_doc_kwargs)
     def take(self, indices, axis=0, allow_fill=True,
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index ac4cc6986cace2..82412d3a7ef57a 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -14,6 +14,7 @@
 from pandas.compat.numpy import function as nv
 from pandas.core.indexes.base import Index, _index_shared_docs
 from pandas.util._decorators import Appender, cache_readonly
+import pandas.core.dtypes.concat as _concat
 import pandas.core.indexes.base as ibase
 
 from pandas.core.indexes.numeric import Int64Index
@@ -447,62 +448,8 @@ def join(self, other, how='left', level=None, return_indexers=False,
         return super(RangeIndex, self).join(other, how, level, return_indexers,
                                             sort)
 
-    def append(self, other):
-        """
-        Append a collection of Index options together
-
-        Parameters
-        ----------
-        other : Index or list/tuple of indices
-
-        Returns
-        -------
-        appended : RangeIndex if all indexes are consecutive RangeIndexes,
-                   otherwise Int64Index or Index
-        """
-
-        to_concat = [self]
-
-        if isinstance(other, (list, tuple)):
-            to_concat = to_concat + list(other)
-        else:
-            to_concat.append(other)
-
-        if not all([isinstance(i, RangeIndex) for i in to_concat]):
-            return super(RangeIndex, self).append(other)
-
-        start = step = next = None
-
-        for obj in to_concat:
-            if not len(obj):
-                continue
-
-            if start is None:
-                # This is set by the first non-empty index
-                start = obj._start
-                if step is None and len(obj) > 1:
-                    step = obj._step
-            elif step is None:
-                # First non-empty index had only one element
-                if obj._start == start:
-                    return super(RangeIndex, self).append(other)
-                step = obj._start - start
-
-            non_consecutive = ((step != obj._step and len(obj) > 1) or
-                               (next is not None and obj._start != next))
-            if non_consecutive:
-                return super(RangeIndex, self).append(other)
-
-            if step is not None:
-                next = obj[-1] + step
-
-        if start is None:
-            start = obj._start
-            step = obj._step
-        stop = obj._stop if next is None else next
-        names = set([obj.name for obj in to_concat])
-        name = None if len(names) > 1 else self.name
-        return RangeIndex(start, stop, step, name=name)
+    def _concat_same_dtype(self, indexes, name):
+        return _concat._concat_rangeindex_same_dtype(indexes).rename(name)
 
     def __len__(self):
         """

From 870b6a6d6415c76d051b287adcb180ac3020b6e8 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 22 Aug 2017 05:50:57 -0400
Subject: [PATCH 014/188] DEPS: set min versions (#17002)

closes #15206, numpy >= 1.9
closes #15543, matplotlib >= 1.4.3
scipy >= 0.14.0
---
 .travis.yml                                   |   6 +-
 ci/install_travis.sh                          |   2 +-
 ci/requirements-2.7_COMPAT.build              |   2 +-
 ci/requirements-2.7_COMPAT.run                |   9 +-
 ci/requirements-2.7_LOCALE.build              |   2 +-
 ci/requirements-2.7_LOCALE.run                |   5 +-
 ci/requirements-2.7_SLOW.build                |   2 +-
 ci/requirements-2.7_SLOW.run                  |   4 +-
 ci/script_multi.sh                            |   6 +
 ci/script_single.sh                           |   8 +
 doc/source/install.rst                        |   6 +-
 doc/source/whatsnew/v0.21.0.txt               |  22 ++-
 pandas/_libs/sparse.pyx                       |   2 -
 pandas/compat/numpy/__init__.py               |  14 +-
 pandas/core/algorithms.py                     |   7 +-
 pandas/core/generic.py                        |   5 +-
 pandas/core/groupby.py                        |   8 +-
 pandas/core/internals.py                      |  16 +-
 pandas/tests/frame/test_quantile.py           |  42 -----
 pandas/tests/frame/test_rank.py               |  12 +-
 .../tests/indexes/datetimes/test_datetime.py  |   8 +-
 pandas/tests/indexes/period/test_indexing.py  |  34 ++--
 .../indexes/timedeltas/test_timedelta.py      |   8 +-
 pandas/tests/plotting/common.py               |   3 +-
 pandas/tests/plotting/test_datetimelike.py    |   2 +
 pandas/tests/plotting/test_frame.py           | 163 ++++++++++--------
 pandas/tests/plotting/test_misc.py            |  45 +----
 pandas/tests/plotting/test_series.py          |  12 ++
 pandas/tests/series/test_operators.py         |  16 +-
 pandas/tests/series/test_quantile.py          |  27 +--
 pandas/tests/series/test_rank.py              |   9 +-
 pandas/tests/sparse/test_array.py             |   7 +-
 pandas/tests/test_nanops.py                   |  18 +-
 pandas/tests/test_resample.py                 |   2 +-
 pandas/tests/tools/test_numeric.py            |   5 +-
 setup.py                                      |   2 +-
 36 files changed, 221 insertions(+), 320 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 897d31cf23a3b8..034e2a32bb75c7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -37,7 +37,7 @@ matrix:
         - JOB="3.5_OSX" TEST_ARGS="--skip-slow --skip-network"
     - dist: trusty
       env:
-        - JOB="2.7_LOCALE" TEST_ARGS="--only-slow --skip-network" LOCALE_OVERRIDE="zh_CN.UTF-8"
+        - JOB="2.7_LOCALE" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true
       addons:
         apt:
           packages:
@@ -62,7 +62,7 @@ matrix:
     # In allow_failures
     - dist: trusty
       env:
-        - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network"
+        - JOB="2.7_SLOW" SLOW=true
     # In allow_failures
     - dist: trusty
       env:
@@ -82,7 +82,7 @@ matrix:
     allow_failures:
       - dist: trusty
         env:
-          - JOB="2.7_SLOW" TEST_ARGS="--only-slow --skip-network"
+          - JOB="2.7_SLOW" SLOW=true
       - dist: trusty
         env:
           - JOB="2.7_BUILD_TEST" TEST_ARGS="--skip-slow" BUILD_TEST=true
diff --git a/ci/install_travis.sh b/ci/install_travis.sh
index ad8f0bdd8a5977..d26689f2e6b4bd 100755
--- a/ci/install_travis.sh
+++ b/ci/install_travis.sh
@@ -47,7 +47,7 @@ which conda
 echo
 echo "[update conda]"
 conda config --set ssl_verify false || exit 1
-conda config --set always_yes true --set changeps1 false || exit 1
+conda config --set quiet true --set always_yes true --set changeps1 false || exit 1
 conda update -q conda
 
 echo
diff --git a/ci/requirements-2.7_COMPAT.build b/ci/requirements-2.7_COMPAT.build
index 0e1ccf9eac9bf1..d9c932daa110ba 100644
--- a/ci/requirements-2.7_COMPAT.build
+++ b/ci/requirements-2.7_COMPAT.build
@@ -1,5 +1,5 @@
 python=2.7*
-numpy=1.7.1
+numpy=1.9.2
 cython=0.23
 dateutil=1.5
 pytz=2013b
diff --git a/ci/requirements-2.7_COMPAT.run b/ci/requirements-2.7_COMPAT.run
index b94f4ab7b27d1a..39bf7201407333 100644
--- a/ci/requirements-2.7_COMPAT.run
+++ b/ci/requirements-2.7_COMPAT.run
@@ -1,11 +1,12 @@
-numpy=1.7.1
+numpy=1.9.2
 dateutil=1.5
 pytz=2013b
-scipy=0.11.0
+scipy=0.14.0
 xlwt=0.7.5
 xlrd=0.9.2
-numexpr=2.2.2
-pytables=3.0.0
+bottleneck=1.0.0
+numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr
+pytables=3.2.2
 psycopg2
 pymysql=0.6.0
 sqlalchemy=0.7.8
diff --git a/ci/requirements-2.7_LOCALE.build b/ci/requirements-2.7_LOCALE.build
index 4a37ce8fbe1613..96cb184ec2665e 100644
--- a/ci/requirements-2.7_LOCALE.build
+++ b/ci/requirements-2.7_LOCALE.build
@@ -1,5 +1,5 @@
 python=2.7*
 python-dateutil
 pytz=2013b
-numpy=1.8.2
+numpy=1.9.2
 cython=0.23
diff --git a/ci/requirements-2.7_LOCALE.run b/ci/requirements-2.7_LOCALE.run
index 8e360cf74b081c..00006106f7009c 100644
--- a/ci/requirements-2.7_LOCALE.run
+++ b/ci/requirements-2.7_LOCALE.run
@@ -1,11 +1,12 @@
 python-dateutil
 pytz=2013b
-numpy=1.8.2
+numpy=1.9.2
 xlwt=0.7.5
 openpyxl=1.6.2
 xlsxwriter=0.5.2
 xlrd=0.9.2
-matplotlib=1.3.1
+bottleneck=1.0.0
+matplotlib=1.4.3
 sqlalchemy=0.8.1
 lxml=3.2.1
 scipy
diff --git a/ci/requirements-2.7_SLOW.build b/ci/requirements-2.7_SLOW.build
index 0f4a2c6792e6b1..a665ab9edd5850 100644
--- a/ci/requirements-2.7_SLOW.build
+++ b/ci/requirements-2.7_SLOW.build
@@ -1,5 +1,5 @@
 python=2.7*
 python-dateutil
 pytz
-numpy=1.8.2
+numpy=1.10*
 cython
diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run
index 0a549554f5219e..f7708283ad04a0 100644
--- a/ci/requirements-2.7_SLOW.run
+++ b/ci/requirements-2.7_SLOW.run
@@ -1,7 +1,7 @@
 python-dateutil
 pytz
-numpy=1.8.2
-matplotlib=1.3.1
+numpy=1.10*
+matplotlib=1.4.3
 scipy
 patsy
 xlwt
diff --git a/ci/script_multi.sh b/ci/script_multi.sh
index d79fc43fbe175a..ee9fbcaad5ef5f 100755
--- a/ci/script_multi.sh
+++ b/ci/script_multi.sh
@@ -36,9 +36,15 @@ elif [ "$COVERAGE" ]; then
     echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
     pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
 
+elif [ "$SLOW" ]; then
+    TEST_ARGS="--only-slow --skip-network"
+    echo pytest -r xX -m "not single and slow" -v --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
+    pytest -r xX -m "not single and slow" -v --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
+
 else
     echo pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
     pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest
+
 fi
 
 RET="$?"
diff --git a/ci/script_single.sh b/ci/script_single.sh
index 245b4e6152c4d9..375e9879e950fd 100755
--- a/ci/script_single.sh
+++ b/ci/script_single.sh
@@ -12,16 +12,24 @@ if [ -n "$LOCALE_OVERRIDE" ]; then
     python -c "$pycmd"
 fi
 
+if [ "$SLOW" ]; then
+    TEST_ARGS="--only-slow --skip-network"
+fi
+
 if [ "$BUILD_TEST" ]; then
     echo "We are not running pytest as this is a build test."
+
 elif [ "$DOC" ]; then
     echo "We are not running pytest as this is a doc-build"
+
 elif [ "$COVERAGE" ]; then
     echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
     pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
+
 else
     echo pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas
     pytest -m "single" -r xX  --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest
+
 fi
 
 RET="$?"
diff --git a/doc/source/install.rst b/doc/source/install.rst
index 99d299b75b59b2..f92c43839ee317 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -203,7 +203,7 @@ Dependencies
 ------------
 
 * `setuptools <https://setuptools.readthedocs.io/en/latest/>`__
-* `NumPy <http://www.numpy.org>`__: 1.7.1 or higher
+* `NumPy <http://www.numpy.org>`__: 1.9.0 or higher
 * `python-dateutil <http://labix.org/python-dateutil>`__: 1.5 or higher
 * `pytz <http://pytz.sourceforge.net/>`__: Needed for time zone support
 
@@ -233,7 +233,7 @@ Optional Dependencies
 
 * `Cython <http://www.cython.org>`__: Only necessary to build development
   version. Version 0.23 or higher.
-* `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
+* `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions, Version 0.14.0 or higher
 * `xarray <http://xarray.pydata.org>`__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended.
 * `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended.
 * `Feather Format <https://github.com/wesm/feather>`__: necessary for feather-based storage, version 0.3.1 or higher.
@@ -244,7 +244,7 @@ Optional Dependencies
   * `pymysql <https://github.com/PyMySQL/PyMySQL>`__: for MySQL.
   * `SQLite <https://docs.python.org/3.5/library/sqlite3.html>`__: for SQLite, this is included in Python's standard library by default.
 
-* `matplotlib <http://matplotlib.org/>`__: for plotting
+* `matplotlib <http://matplotlib.org/>`__: for plotting, Version 1.4.3 or higher.
 * For Excel I/O:
 
   * `xlrd/xlwt <http://www.python-excel.org/>`__: Excel reading (xlrd) and writing (xlwt)
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index dd06114f6abd31..148fd0a8324021 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -137,6 +137,27 @@ Other Enhancements
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+
+.. _whatsnew_0210.api_breaking.deps:
+
+Dependencies have increased minimum versions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+We have updated our minimum supported versions of dependencies (:issue:`15206`, :issue:`15543`, :issue:`15214`)
+). If installed, we now require:
+
+   +--------------+-----------------+----------+
+   | Package      | Minimum Version | Required |
+   +======================+=========+==========+
+   | Numpy        | 1.9.0           |    X     |
+   +--------------+-----------------+----------+
+   | Matplotlib   | 1.4.3           |          |
+   +--------------+-----------------+----------+
+   | Scipy        | 0.14.0          |          |
+   +--------------+-----------------+----------+
+   | Bottleneck   | 1.0.0           |          |
+   +--------------+-----------------+----------+
+
 .. _whatsnew_0210.api_breaking.pandas_eval:
 
 Improved error handling during item assignment in pd.eval
@@ -258,7 +279,6 @@ Other API Changes
 ^^^^^^^^^^^^^^^^^
 
 - Support has been dropped for Python 3.4 (:issue:`15251`)
-- Support has been dropped for bottleneck < 1.0.0 (:issue:`15214`)
 - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`)
 - Accessing a non-existent attribute on a closed :class:`~pandas.HDFStore` will now
   raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`)
diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
index 0c2e056ead7fac..1cc7f5ace95ea5 100644
--- a/pandas/_libs/sparse.pyx
+++ b/pandas/_libs/sparse.pyx
@@ -12,8 +12,6 @@ from distutils.version import LooseVersion
 
 # numpy versioning
 _np_version = np.version.short_version
-_np_version_under1p8 = LooseVersion(_np_version) < '1.8'
-_np_version_under1p9 = LooseVersion(_np_version) < '1.9'
 _np_version_under1p10 = LooseVersion(_np_version) < '1.10'
 _np_version_under1p11 = LooseVersion(_np_version) < '1.11'
 
diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py
index 2c5a18973afa8f..5112957b498751 100644
--- a/pandas/compat/numpy/__init__.py
+++ b/pandas/compat/numpy/__init__.py
@@ -9,19 +9,18 @@
 # numpy versioning
 _np_version = np.__version__
 _nlv = LooseVersion(_np_version)
-_np_version_under1p8 = _nlv < '1.8'
-_np_version_under1p9 = _nlv < '1.9'
 _np_version_under1p10 = _nlv < '1.10'
 _np_version_under1p11 = _nlv < '1.11'
 _np_version_under1p12 = _nlv < '1.12'
 _np_version_under1p13 = _nlv < '1.13'
 _np_version_under1p14 = _nlv < '1.14'
+_np_version_under1p15 = _nlv < '1.15'
 
-if _nlv < '1.7.0':
+if _nlv < '1.9':
     raise ImportError('this version of pandas is incompatible with '
-                      'numpy < 1.7.0\n'
+                      'numpy < 1.9.0\n'
                       'your numpy version is {0}.\n'
-                      'Please upgrade numpy to >= 1.7.0 to use '
+                      'Please upgrade numpy to >= 1.9.0 to use '
                       'this pandas version'.format(_np_version))
 
 
@@ -70,11 +69,10 @@ def np_array_datetime64_compat(arr, *args, **kwargs):
 
 
 __all__ = ['np',
-           '_np_version_under1p8',
-           '_np_version_under1p9',
            '_np_version_under1p10',
            '_np_version_under1p11',
            '_np_version_under1p12',
            '_np_version_under1p13',
-           '_np_version_under1p14'
+           '_np_version_under1p14',
+           '_np_version_under1p15'
            ]
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index f2359f3ff1a9db..ffd03096e2a27f 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -6,7 +6,6 @@
 from warnings import warn, catch_warnings
 import numpy as np
 
-from pandas import compat, _np_version_under1p8
 from pandas.core.dtypes.cast import maybe_promote
 from pandas.core.dtypes.generic import (
     ABCSeries, ABCIndex,
@@ -407,14 +406,12 @@ def isin(comps, values):
     comps, dtype, _ = _ensure_data(comps)
     values, _, _ = _ensure_data(values, dtype=dtype)
 
-    # GH11232
-    # work-around for numpy < 1.8 and comparisions on py3
     # faster for larger cases to use np.in1d
     f = lambda x, y: htable.ismember_object(x, values)
+
     # GH16012
     # Ensure np.in1d doesn't get object types or it *may* throw an exception
-    if ((_np_version_under1p8 and compat.PY3) or len(comps) > 1000000 and
-       not is_object_dtype(comps)):
+    if len(comps) > 1000000 and not is_object_dtype(comps):
         f = lambda x, y: np.in1d(x, y)
     elif is_integer_dtype(comps):
         try:
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index e84e4eac3f34d6..f8366c804e3e79 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1827,11 +1827,8 @@ def _box_item_values(self, key, values):
 
     def _maybe_cache_changed(self, item, value):
         """The object has called back to us saying maybe it has changed.
-
-        numpy < 1.8 has an issue with object arrays and aliasing
-        GH6026
         """
-        self._data.set(item, value, check=pd._np_version_under1p8)
+        self._data.set(item, value, check=False)
 
     @property
     def _is_cached(self):
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index a388892e925b63..aa7c4517c0a016 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -13,7 +13,7 @@
 )
 
 from pandas import compat
-from pandas.compat.numpy import function as nv, _np_version_under1p8
+from pandas.compat.numpy import function as nv
 from pandas.compat import set_function_name
 
 from pandas.core.dtypes.common import (
@@ -3257,11 +3257,7 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
             d = np.diff(np.r_[idx, len(ids)])
             if dropna:
                 m = ids[lab == -1]
-                if _np_version_under1p8:
-                    mi, ml = algorithms.factorize(m)
-                    d[ml] = d[ml] - np.bincount(mi)
-                else:
-                    np.add.at(d, m, -1)
+                np.add.at(d, m, -1)
                 acc = rep(d)[mask]
             else:
                 acc = rep(d)
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index b616270e47aa6e..83b382ec0ed723 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -69,8 +69,7 @@
 import pandas.core.computation.expressions as expressions
 from pandas.util._decorators import cache_readonly
 from pandas.util._validators import validate_bool_kwarg
-
-from pandas import compat, _np_version_under1p9
+from pandas import compat
 from pandas.compat import range, map, zip, u
 
 
@@ -857,9 +856,6 @@ def _is_empty_indexer(indexer):
 
         # set
         else:
-            if _np_version_under1p9:
-                # Work around GH 6168 to support old numpy
-                indexer = getattr(indexer, 'values', indexer)
             values[indexer] = value
 
         # coerce and try to infer the dtypes of the result
@@ -1482,15 +1478,7 @@ def quantile(self, qs, interpolation='linear', axis=0, mgr=None):
         tuple of (axis, block)
 
         """
-        if _np_version_under1p9:
-            if interpolation != 'linear':
-                raise ValueError("Interpolation methods other than linear "
-                                 "are not supported in numpy < 1.9.")
-
-        kw = {}
-        if not _np_version_under1p9:
-            kw.update({'interpolation': interpolation})
-
+        kw = {'interpolation': interpolation}
         values = self.get_values()
         values, _, _, _ = self._try_coerce_args(values, values)
 
diff --git a/pandas/tests/frame/test_quantile.py b/pandas/tests/frame/test_quantile.py
index 2482e493dbefdc..2f264874378bce 100644
--- a/pandas/tests/frame/test_quantile.py
+++ b/pandas/tests/frame/test_quantile.py
@@ -12,7 +12,6 @@
 from pandas.util.testing import assert_series_equal, assert_frame_equal
 
 import pandas.util.testing as tm
-from pandas import _np_version_under1p9
 
 from pandas.tests.frame.common import TestData
 
@@ -103,9 +102,6 @@ def test_quantile_axis_parameter(self):
 
     def test_quantile_interpolation(self):
         # see gh-10174
-        if _np_version_under1p9:
-            pytest.skip("Numpy version under 1.9")
-
         from numpy import percentile
 
         # interpolation = linear (default case)
@@ -166,44 +162,6 @@ def test_quantile_interpolation(self):
                                  index=[.25, .5], columns=['a', 'b', 'c'])
         assert_frame_equal(result, expected)
 
-    def test_quantile_interpolation_np_lt_1p9(self):
-        # see gh-10174
-        if not _np_version_under1p9:
-            pytest.skip("Numpy version is greater than 1.9")
-
-        from numpy import percentile
-
-        # interpolation = linear (default case)
-        q = self.tsframe.quantile(0.1, axis=0, interpolation='linear')
-        assert q['A'] == percentile(self.tsframe['A'], 10)
-        q = self.intframe.quantile(0.1)
-        assert q['A'] == percentile(self.intframe['A'], 10)
-
-        # test with and without interpolation keyword
-        q1 = self.intframe.quantile(0.1)
-        assert q1['A'] == np.percentile(self.intframe['A'], 10)
-        assert_series_equal(q, q1)
-
-        # interpolation method other than default linear
-        msg = "Interpolation methods other than linear"
-        df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}, index=[1, 2, 3])
-        with tm.assert_raises_regex(ValueError, msg):
-            df.quantile(.5, axis=1, interpolation='nearest')
-
-        with tm.assert_raises_regex(ValueError, msg):
-            df.quantile([.5, .75], axis=1, interpolation='lower')
-
-        # test degenerate case
-        df = DataFrame({'x': [], 'y': []})
-        with tm.assert_raises_regex(ValueError, msg):
-            q = df.quantile(0.1, axis=0, interpolation='higher')
-
-        # multi
-        df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
-                       columns=['a', 'b', 'c'])
-        with tm.assert_raises_regex(ValueError, msg):
-            df.quantile([.25, .5], interpolation='midpoint')
-
     def test_quantile_multi(self):
         df = DataFrame([[1, 1, 1], [2, 2, 2], [3, 3, 3]],
                        columns=['a', 'b', 'c'])
diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py
index acf887d047c9e6..58f4d9b770173c 100644
--- a/pandas/tests/frame/test_rank.py
+++ b/pandas/tests/frame/test_rank.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+import pytest
 from datetime import timedelta, datetime
 from distutils.version import LooseVersion
 from numpy import nan
@@ -26,8 +27,7 @@ class TestRank(TestData):
     }
 
     def test_rank(self):
-        tm._skip_if_no_scipy()
-        from scipy.stats import rankdata
+        rankdata = pytest.importorskip('scipy.stats.rankdata')
 
         self.frame['A'][::2] = np.nan
         self.frame['B'][::3] = np.nan
@@ -120,8 +120,7 @@ def test_rank2(self):
         tm.assert_frame_equal(df.rank(), exp)
 
     def test_rank_na_option(self):
-        tm._skip_if_no_scipy()
-        from scipy.stats import rankdata
+        rankdata = pytest.importorskip('scipy.stats.rankdata')
 
         self.frame['A'][::2] = np.nan
         self.frame['B'][::3] = np.nan
@@ -193,10 +192,9 @@ def test_rank_axis(self):
         tm.assert_frame_equal(df.rank(axis=1), df.rank(axis='columns'))
 
     def test_rank_methods_frame(self):
-        tm.skip_if_no_package('scipy', min_version='0.13',
-                              app='scipy.stats.rankdata')
+        pytest.importorskip('scipy.stats.special')
+        rankdata = pytest.importorskip('scipy.stats.rankdata')
         import scipy
-        from scipy.stats import rankdata
 
         xs = np.random.randint(0, 21, (100, 26))
         xs = (xs - 10.0) / 10.0
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index f99dcee9e5c8ab..47f53f53cfd021 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -9,7 +9,7 @@
 from pandas.compat import lrange
 from pandas.compat.numpy import np_datetime64_compat
 from pandas import (DatetimeIndex, Index, date_range, Series, DataFrame,
-                    Timestamp, datetime, offsets, _np_version_under1p8)
+                    Timestamp, datetime, offsets)
 
 from pandas.util.testing import assert_series_equal, assert_almost_equal
 
@@ -276,11 +276,7 @@ def test_comparisons_nat(self):
                          np_datetime64_compat('2014-06-01 00:00Z'),
                          np_datetime64_compat('2014-07-01 00:00Z')])
 
-        if _np_version_under1p8:
-            # cannot test array because np.datetime('nat') returns today's date
-            cases = [(fidx1, fidx2), (didx1, didx2)]
-        else:
-            cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)]
+        cases = [(fidx1, fidx2), (didx1, didx2), (didx1, darr)]
 
         # Check pd.NaT is handles as the same as np.nan
         with tm.assert_produces_warning(None):
diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py
index d4dac1cf88fffb..efc13a56cd77e4 100644
--- a/pandas/tests/indexes/period/test_indexing.py
+++ b/pandas/tests/indexes/period/test_indexing.py
@@ -8,7 +8,7 @@
 from pandas.compat import lrange
 from pandas._libs import tslib
 from pandas import (PeriodIndex, Series, DatetimeIndex,
-                    period_range, Period, _np_version_under1p9)
+                    period_range, Period)
 
 
 class TestGetItem(object):
@@ -149,16 +149,12 @@ def test_getitem_seconds(self):
             values = ['2014', '2013/02', '2013/01/02', '2013/02/01 9H',
                       '2013/02/01 09:00']
             for v in values:
-                if _np_version_under1p9:
-                    with pytest.raises(ValueError):
-                        idx[v]
-                else:
-                    # GH7116
-                    # these show deprecations as we are trying
-                    # to slice with non-integer indexers
-                    # with pytest.raises(IndexError):
-                    #    idx[v]
-                    continue
+                # GH7116
+                # these show deprecations as we are trying
+                # to slice with non-integer indexers
+                # with pytest.raises(IndexError):
+                #    idx[v]
+                continue
 
             s = Series(np.random.rand(len(idx)), index=idx)
             tm.assert_series_equal(s['2013/01/01 10:00'], s[3600:3660])
@@ -178,16 +174,12 @@ def test_getitem_day(self):
                       '2013/02/01 09:00']
             for v in values:
 
-                if _np_version_under1p9:
-                    with pytest.raises(ValueError):
-                        idx[v]
-                else:
-                    # GH7116
-                    # these show deprecations as we are trying
-                    # to slice with non-integer indexers
-                    # with pytest.raises(IndexError):
-                    #    idx[v]
-                    continue
+                # GH7116
+                # these show deprecations as we are trying
+                # to slice with non-integer indexers
+                # with pytest.raises(IndexError):
+                #    idx[v]
+                continue
 
             s = Series(np.random.rand(len(idx)), index=idx)
             tm.assert_series_equal(s['2013/01'], s[0:31])
diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
index 59e4b1432b8bc1..0b3bd0b03bccfd 100644
--- a/pandas/tests/indexes/timedeltas/test_timedelta.py
+++ b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -7,7 +7,7 @@
 import pandas.util.testing as tm
 from pandas import (timedelta_range, date_range, Series, Timedelta,
                     DatetimeIndex, TimedeltaIndex, Index, DataFrame,
-                    Int64Index, _np_version_under1p8)
+                    Int64Index)
 from pandas.util.testing import (assert_almost_equal, assert_series_equal,
                                  assert_index_equal)
 
@@ -379,11 +379,7 @@ def test_comparisons_nat(self):
                           np.timedelta64(1, 'D') + np.timedelta64(2, 's'),
                           np.timedelta64(5, 'D') + np.timedelta64(3, 's')])
 
-        if _np_version_under1p8:
-            # cannot test array because np.datetime('nat') returns today's date
-            cases = [(tdidx1, tdidx2)]
-        else:
-            cases = [(tdidx1, tdidx2), (tdidx1, tdarr)]
+        cases = [(tdidx1, tdidx2), (tdidx1, tdarr)]
 
         # Check pd.NaT is handles as the same as np.nan
         for idx1, idx2 in cases:
diff --git a/pandas/tests/plotting/common.py b/pandas/tests/plotting/common.py
index 3ab443b223f207..dfab539e9474c5 100644
--- a/pandas/tests/plotting/common.py
+++ b/pandas/tests/plotting/common.py
@@ -39,7 +39,8 @@ def _ok_for_gaussian_kde(kind):
             from scipy.stats import gaussian_kde  # noqa
         except ImportError:
             return False
-    return True
+
+    return plotting._compat._mpl_ge_1_5_0()
 
 
 class TestPlotBase(object):
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index e9c7d806fd65df..cff0c1c0b424e5 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -610,6 +610,8 @@ def test_secondary_y_ts(self):
 
     @pytest.mark.slow
     def test_secondary_kde(self):
+        if not self.mpl_ge_1_5_0:
+            pytest.skip("mpl is not supported")
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
 
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 6d813ac76cc4e2..67098529a01119 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -473,7 +473,6 @@ def test_subplots_multiple_axes(self):
         # TestDataFrameGroupByPlots.test_grouped_box_multiple_axes
         fig, axes = self.plt.subplots(2, 2)
         with warnings.catch_warnings():
-            warnings.simplefilter('ignore')
             df = DataFrame(np.random.rand(10, 4),
                            index=list(string.ascii_letters[:10]))
 
@@ -1290,6 +1289,9 @@ def test_boxplot_subplots_return_type(self):
     def test_kde_df(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
+        if not self.mpl_ge_1_5_0:
+            pytest.skip("mpl is not supported")
+
         df = DataFrame(randn(100, 4))
         ax = _check_plot_works(df.plot, kind='kde')
         expected = [pprint_thing(c) for c in df.columns]
@@ -1311,6 +1313,9 @@ def test_kde_df(self):
     def test_kde_missing_vals(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
+        if not self.mpl_ge_1_5_0:
+            pytest.skip("mpl is not supported")
+
         df = DataFrame(np.random.uniform(size=(100, 4)))
         df.loc[0, 0] = np.nan
         _check_plot_works(df.plot, kind='kde')
@@ -1835,6 +1840,8 @@ def test_hist_colors(self):
     def test_kde_colors(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
+        if not self.mpl_ge_1_5_0:
+            pytest.skip("mpl is not supported")
 
         from matplotlib import cm
 
@@ -1858,6 +1865,8 @@ def test_kde_colors(self):
     def test_kde_colors_and_styles_subplots(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
+        if not self.mpl_ge_1_5_0:
+            pytest.skip("mpl is not supported")
 
         from matplotlib import cm
         default_colors = self._maybe_unpack_cycler(self.plt.rcParams)
@@ -2160,71 +2169,74 @@ def test_pie_df_nan(self):
 
     @pytest.mark.slow
     def test_errorbar_plot(self):
-        d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)}
-        df = DataFrame(d)
-        d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4}
-        df_err = DataFrame(d_err)
-
-        # check line plots
-        ax = _check_plot_works(df.plot, yerr=df_err, logy=True)
-        self._check_has_errorbars(ax, xerr=0, yerr=2)
-        ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True)
-        self._check_has_errorbars(ax, xerr=0, yerr=2)
-        ax = _check_plot_works(df.plot, yerr=df_err, loglog=True)
-        self._check_has_errorbars(ax, xerr=0, yerr=2)
+        with warnings.catch_warnings():
+            d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)}
+            df = DataFrame(d)
+            d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4}
+            df_err = DataFrame(d_err)
 
-        kinds = ['line', 'bar', 'barh']
-        for kind in kinds:
-            ax = _check_plot_works(df.plot, yerr=df_err['x'], kind=kind)
+            # check line plots
+            ax = _check_plot_works(df.plot, yerr=df_err, logy=True)
             self._check_has_errorbars(ax, xerr=0, yerr=2)
-            ax = _check_plot_works(df.plot, yerr=d_err, kind=kind)
+            ax = _check_plot_works(df.plot, yerr=df_err, logx=True, logy=True)
             self._check_has_errorbars(ax, xerr=0, yerr=2)
-            ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err,
-                                   kind=kind)
-            self._check_has_errorbars(ax, xerr=2, yerr=2)
-            ax = _check_plot_works(df.plot, yerr=df_err['x'], xerr=df_err['x'],
-                                   kind=kind)
-            self._check_has_errorbars(ax, xerr=2, yerr=2)
-            ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind)
-            self._check_has_errorbars(ax, xerr=2, yerr=2)
-            # _check_plot_works adds an ax so catch warning. see GH #13188
-            with tm.assert_produces_warning(UserWarning):
+            ax = _check_plot_works(df.plot, yerr=df_err, loglog=True)
+            self._check_has_errorbars(ax, xerr=0, yerr=2)
+
+            kinds = ['line', 'bar', 'barh']
+            for kind in kinds:
+                ax = _check_plot_works(df.plot, yerr=df_err['x'], kind=kind)
+                self._check_has_errorbars(ax, xerr=0, yerr=2)
+                ax = _check_plot_works(df.plot, yerr=d_err, kind=kind)
+                self._check_has_errorbars(ax, xerr=0, yerr=2)
+                ax = _check_plot_works(df.plot, yerr=df_err, xerr=df_err,
+                                       kind=kind)
+                self._check_has_errorbars(ax, xerr=2, yerr=2)
+                ax = _check_plot_works(df.plot, yerr=df_err['x'],
+                                       xerr=df_err['x'],
+                                       kind=kind)
+                self._check_has_errorbars(ax, xerr=2, yerr=2)
+                ax = _check_plot_works(df.plot, xerr=0.2, yerr=0.2, kind=kind)
+                self._check_has_errorbars(ax, xerr=2, yerr=2)
+
+                # _check_plot_works adds an ax so catch warning. see GH #13188
                 axes = _check_plot_works(df.plot,
                                          yerr=df_err, xerr=df_err,
                                          subplots=True,
                                          kind=kind)
-            self._check_has_errorbars(axes, xerr=1, yerr=1)
-
-        ax = _check_plot_works((df + 1).plot, yerr=df_err,
-                               xerr=df_err, kind='bar', log=True)
-        self._check_has_errorbars(ax, xerr=2, yerr=2)
+                self._check_has_errorbars(axes, xerr=1, yerr=1)
 
-        # yerr is raw error values
-        ax = _check_plot_works(df['y'].plot, yerr=np.ones(12) * 0.4)
-        self._check_has_errorbars(ax, xerr=0, yerr=1)
-        ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4)
-        self._check_has_errorbars(ax, xerr=0, yerr=2)
+            ax = _check_plot_works((df + 1).plot, yerr=df_err,
+                                   xerr=df_err, kind='bar', log=True)
+            self._check_has_errorbars(ax, xerr=2, yerr=2)
 
-        # yerr is iterator
-        import itertools
-        ax = _check_plot_works(df.plot, yerr=itertools.repeat(0.1, len(df)))
-        self._check_has_errorbars(ax, xerr=0, yerr=2)
+            # yerr is raw error values
+            ax = _check_plot_works(df['y'].plot, yerr=np.ones(12) * 0.4)
+            self._check_has_errorbars(ax, xerr=0, yerr=1)
+            ax = _check_plot_works(df.plot, yerr=np.ones((2, 12)) * 0.4)
+            self._check_has_errorbars(ax, xerr=0, yerr=2)
 
-        # yerr is column name
-        for yerr in ['yerr', u('誤差')]:
-            s_df = df.copy()
-            s_df[yerr] = np.ones(12) * 0.2
-            ax = _check_plot_works(s_df.plot, yerr=yerr)
+            # yerr is iterator
+            import itertools
+            ax = _check_plot_works(df.plot,
+                                   yerr=itertools.repeat(0.1, len(df)))
             self._check_has_errorbars(ax, xerr=0, yerr=2)
-            ax = _check_plot_works(s_df.plot, y='y', x='x', yerr=yerr)
-            self._check_has_errorbars(ax, xerr=0, yerr=1)
 
-        with pytest.raises(ValueError):
-            df.plot(yerr=np.random.randn(11))
+            # yerr is column name
+            for yerr in ['yerr', u('誤差')]:
+                s_df = df.copy()
+                s_df[yerr] = np.ones(12) * 0.2
+                ax = _check_plot_works(s_df.plot, yerr=yerr)
+                self._check_has_errorbars(ax, xerr=0, yerr=2)
+                ax = _check_plot_works(s_df.plot, y='y', x='x', yerr=yerr)
+                self._check_has_errorbars(ax, xerr=0, yerr=1)
 
-        df_err = DataFrame({'x': ['zzz'] * 12, 'y': ['zzz'] * 12})
-        with pytest.raises((ValueError, TypeError)):
-            df.plot(yerr=df_err)
+            with pytest.raises(ValueError):
+                df.plot(yerr=np.random.randn(11))
+
+            df_err = DataFrame({'x': ['zzz'] * 12, 'y': ['zzz'] * 12})
+            with pytest.raises((ValueError, TypeError)):
+                df.plot(yerr=df_err)
 
     @pytest.mark.slow
     def test_errorbar_with_integer_column_names(self):
@@ -2262,33 +2274,34 @@ def test_errorbar_with_partial_columns(self):
     @pytest.mark.slow
     def test_errorbar_timeseries(self):
 
-        d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)}
-        d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4}
+        with warnings.catch_warnings():
+            d = {'x': np.arange(12), 'y': np.arange(12, 0, -1)}
+            d_err = {'x': np.ones(12) * 0.2, 'y': np.ones(12) * 0.4}
 
-        # check time-series plots
-        ix = date_range('1/1/2000', '1/1/2001', freq='M')
-        tdf = DataFrame(d, index=ix)
-        tdf_err = DataFrame(d_err, index=ix)
+            # check time-series plots
+            ix = date_range('1/1/2000', '1/1/2001', freq='M')
+            tdf = DataFrame(d, index=ix)
+            tdf_err = DataFrame(d_err, index=ix)
 
-        kinds = ['line', 'bar', 'barh']
-        for kind in kinds:
-            ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
-            self._check_has_errorbars(ax, xerr=0, yerr=2)
-            ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind)
-            self._check_has_errorbars(ax, xerr=0, yerr=2)
-            ax = _check_plot_works(tdf.plot, y='y', yerr=tdf_err['x'],
-                                   kind=kind)
-            self._check_has_errorbars(ax, xerr=0, yerr=1)
-            ax = _check_plot_works(tdf.plot, y='y', yerr='x', kind=kind)
-            self._check_has_errorbars(ax, xerr=0, yerr=1)
-            ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
-            self._check_has_errorbars(ax, xerr=0, yerr=2)
-            # _check_plot_works adds an ax so catch warning. see GH #13188
-            with tm.assert_produces_warning(UserWarning):
+            kinds = ['line', 'bar', 'barh']
+            for kind in kinds:
+                ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
+                self._check_has_errorbars(ax, xerr=0, yerr=2)
+                ax = _check_plot_works(tdf.plot, yerr=d_err, kind=kind)
+                self._check_has_errorbars(ax, xerr=0, yerr=2)
+                ax = _check_plot_works(tdf.plot, y='y', yerr=tdf_err['x'],
+                                       kind=kind)
+                self._check_has_errorbars(ax, xerr=0, yerr=1)
+                ax = _check_plot_works(tdf.plot, y='y', yerr='x', kind=kind)
+                self._check_has_errorbars(ax, xerr=0, yerr=1)
+                ax = _check_plot_works(tdf.plot, yerr=tdf_err, kind=kind)
+                self._check_has_errorbars(ax, xerr=0, yerr=2)
+
+                # _check_plot_works adds an ax so catch warning. see GH #13188
                 axes = _check_plot_works(tdf.plot,
                                          kind=kind, yerr=tdf_err,
                                          subplots=True)
-            self._check_has_errorbars(axes, xerr=0, yerr=1)
+                self._check_has_errorbars(axes, xerr=0, yerr=1)
 
     def test_errorbar_asymmetrical(self):
 
diff --git a/pandas/tests/plotting/test_misc.py b/pandas/tests/plotting/test_misc.py
index 684a943fb5a69f..c4795ea1e1eca6 100644
--- a/pandas/tests/plotting/test_misc.py
+++ b/pandas/tests/plotting/test_misc.py
@@ -4,7 +4,7 @@
 
 import pytest
 
-from pandas import Series, DataFrame
+from pandas import DataFrame
 from pandas.compat import lmap
 import pandas.util.testing as tm
 
@@ -13,8 +13,7 @@
 from numpy.random import randn
 
 import pandas.plotting as plotting
-from pandas.tests.plotting.common import (TestPlotBase, _check_plot_works,
-                                          _ok_for_gaussian_kde)
+from pandas.tests.plotting.common import TestPlotBase, _check_plot_works
 
 tm._skip_if_no_mpl()
 
@@ -52,46 +51,6 @@ def test_bootstrap_plot(self):
 
 class TestDataFramePlots(TestPlotBase):
 
-    @pytest.mark.slow
-    def test_scatter_plot_legacy(self):
-        tm._skip_if_no_scipy()
-
-        df = DataFrame(randn(100, 2))
-
-        def scat(**kwds):
-            return plotting.scatter_matrix(df, **kwds)
-
-        with tm.assert_produces_warning(UserWarning):
-            _check_plot_works(scat)
-        with tm.assert_produces_warning(UserWarning):
-            _check_plot_works(scat, marker='+')
-        with tm.assert_produces_warning(UserWarning):
-            _check_plot_works(scat, vmin=0)
-        if _ok_for_gaussian_kde('kde'):
-            with tm.assert_produces_warning(UserWarning):
-                _check_plot_works(scat, diagonal='kde')
-        if _ok_for_gaussian_kde('density'):
-            with tm.assert_produces_warning(UserWarning):
-                _check_plot_works(scat, diagonal='density')
-        with tm.assert_produces_warning(UserWarning):
-            _check_plot_works(scat, diagonal='hist')
-        with tm.assert_produces_warning(UserWarning):
-            _check_plot_works(scat, range_padding=.1)
-        with tm.assert_produces_warning(UserWarning):
-            _check_plot_works(scat, color='rgb')
-        with tm.assert_produces_warning(UserWarning):
-            _check_plot_works(scat, c='rgb')
-        with tm.assert_produces_warning(UserWarning):
-            _check_plot_works(scat, facecolor='rgb')
-
-        def scat2(x, y, by=None, ax=None, figsize=None):
-            return plotting._core.scatter_plot(df, x, y, by, ax, figsize=None)
-
-        _check_plot_works(scat2, x=0, y=1)
-        grouper = Series(np.repeat([1, 2, 3, 4, 5], 20), df.index)
-        with tm.assert_produces_warning(UserWarning):
-            _check_plot_works(scat2, x=0, y=1, by=grouper)
-
     def test_scatter_matrix_axis(self):
         tm._skip_if_no_scipy()
         scatter_matrix = plotting.scatter_matrix
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
index 9c9011ba1ca7b2..8164ad74a190a7 100644
--- a/pandas/tests/plotting/test_series.py
+++ b/pandas/tests/plotting/test_series.py
@@ -571,6 +571,9 @@ def test_plot_fails_with_dupe_color_and_style(self):
 
     @pytest.mark.slow
     def test_hist_kde(self):
+        if not self.mpl_ge_1_5_0:
+            pytest.skip("mpl is not supported")
+
         _, ax = self.plt.subplots()
         ax = self.ts.plot.hist(logy=True, ax=ax)
         self._check_ax_scales(ax, yaxis='log')
@@ -596,6 +599,9 @@ def test_hist_kde(self):
     def test_kde_kwargs(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
+        if not self.mpl_ge_1_5_0:
+            pytest.skip("mpl is not supported")
+
         from numpy import linspace
         _check_plot_works(self.ts.plot.kde, bw_method=.5,
                           ind=linspace(-100, 100, 20))
@@ -611,6 +617,9 @@ def test_kde_kwargs(self):
     def test_kde_missing_vals(self):
         tm._skip_if_no_scipy()
         _skip_if_no_scipy_gaussian_kde()
+        if not self.mpl_ge_1_5_0:
+            pytest.skip("mpl is not supported")
+
         s = Series(np.random.uniform(size=50))
         s[0] = np.nan
         axes = _check_plot_works(s.plot.kde)
@@ -638,6 +647,9 @@ def test_hist_kwargs(self):
 
     @pytest.mark.slow
     def test_hist_kde_color(self):
+        if not self.mpl_ge_1_5_0:
+            pytest.skip("mpl is not supported")
+
         _, ax = self.plt.subplots()
         ax = self.ts.plot.hist(logy=True, bins=10, color='b', ax=ax)
         self._check_ax_scales(ax, yaxis='log')
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index 4888f8fe996b63..114a055de81953 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -14,8 +14,7 @@
 import pandas as pd
 
 from pandas import (Index, Series, DataFrame, isna, bdate_range,
-                    NaT, date_range, timedelta_range,
-                    _np_version_under1p8)
+                    NaT, date_range, timedelta_range)
 from pandas.core.indexes.datetimes import Timestamp
 from pandas.core.indexes.timedeltas import Timedelta
 import pandas.core.nanops as nanops
@@ -687,14 +686,13 @@ def run_ops(ops, get_ser, test_ser):
         assert_series_equal(result, exp)
 
         # odd numpy behavior with scalar timedeltas
-        if not _np_version_under1p8:
-            result = td1[0] + dt1
-            exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz)
-            assert_series_equal(result, exp)
+        result = td1[0] + dt1
+        exp = (dt1.dt.tz_localize(None) + td1[0]).dt.tz_localize(tz)
+        assert_series_equal(result, exp)
 
-            result = td2[0] + dt2
-            exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz)
-            assert_series_equal(result, exp)
+        result = td2[0] + dt2
+        exp = (dt2.dt.tz_localize(None) + td2[0]).dt.tz_localize(tz)
+        assert_series_equal(result, exp)
 
         result = dt1 - td1[0]
         exp = (dt1.dt.tz_localize(None) - td1[0]).dt.tz_localize(tz)
diff --git a/pandas/tests/series/test_quantile.py b/pandas/tests/series/test_quantile.py
index 21379641a78d86..cf5e3fe4f29b06 100644
--- a/pandas/tests/series/test_quantile.py
+++ b/pandas/tests/series/test_quantile.py
@@ -1,11 +1,10 @@
 # coding=utf-8
 # pylint: disable-msg=E1101,W0612
 
-import pytest
 import numpy as np
 import pandas as pd
 
-from pandas import (Index, Series, _np_version_under1p9)
+from pandas import Index, Series
 from pandas.core.indexes.datetimes import Timestamp
 from pandas.core.dtypes.common import is_integer
 import pandas.util.testing as tm
@@ -68,8 +67,6 @@ def test_quantile_multi(self):
             [], dtype=float))
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.skipif(_np_version_under1p9,
-                        reason="Numpy version is under 1.9")
     def test_quantile_interpolation(self):
         # see gh-10174
 
@@ -82,8 +79,6 @@ def test_quantile_interpolation(self):
         # test with and without interpolation keyword
         assert q == q1
 
-    @pytest.mark.skipif(_np_version_under1p9,
-                        reason="Numpy version is under 1.9")
     def test_quantile_interpolation_dtype(self):
         # GH #10174
 
@@ -96,26 +91,6 @@ def test_quantile_interpolation_dtype(self):
         assert q == np.percentile(np.array([1, 3, 4]), 50)
         assert is_integer(q)
 
-    @pytest.mark.skipif(not _np_version_under1p9,
-                        reason="Numpy version is greater 1.9")
-    def test_quantile_interpolation_np_lt_1p9(self):
-        # GH #10174
-
-        # interpolation = linear (default case)
-        q = self.ts.quantile(0.1, interpolation='linear')
-        assert q == np.percentile(self.ts.valid(), 10)
-        q1 = self.ts.quantile(0.1)
-        assert q1 == np.percentile(self.ts.valid(), 10)
-
-        # interpolation other than linear
-        msg = "Interpolation methods other than "
-        with tm.assert_raises_regex(ValueError, msg):
-            self.ts.quantile(0.9, interpolation='nearest')
-
-        # object dtype
-        with tm.assert_raises_regex(ValueError, msg):
-            Series(self.ts, dtype=object).quantile(0.7, interpolation='higher')
-
     def test_quantile_nan(self):
 
         # GH 13098
diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py
index ff489eb7f15b1e..128a4cdd845e6e 100644
--- a/pandas/tests/series/test_rank.py
+++ b/pandas/tests/series/test_rank.py
@@ -28,8 +28,8 @@ class TestSeriesRank(TestData):
     }
 
     def test_rank(self):
-        tm._skip_if_no_scipy()
-        from scipy.stats import rankdata
+        pytest.importorskip('scipy.stats.special')
+        rankdata = pytest.importorskip('scipy.stats.rankdata')
 
         self.ts[::2] = np.nan
         self.ts[:10][::3] = 4.
@@ -246,10 +246,9 @@ def _check(s, expected, method='average'):
             _check(series, results[method], method=method)
 
     def test_rank_methods_series(self):
-        tm.skip_if_no_package('scipy', min_version='0.13',
-                              app='scipy.stats.rankdata')
+        pytest.importorskip('scipy.stats.special')
+        rankdata = pytest.importorskip('scipy.stats.rankdata')
         import scipy
-        from scipy.stats import rankdata
 
         xs = np.random.randn(9)
         xs = np.concatenate([xs[i:] for i in range(0, 9, 2)])  # add duplicates
diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py
index 4ce03f72dbba6e..b0a9182a265fe8 100644
--- a/pandas/tests/sparse/test_array.py
+++ b/pandas/tests/sparse/test_array.py
@@ -8,7 +8,6 @@
 from numpy import nan
 import numpy as np
 
-from pandas import _np_version_under1p8
 from pandas.core.sparse.api import SparseArray, SparseSeries
 from pandas._libs.sparse import IntIndex
 from pandas.util.testing import assert_almost_equal
@@ -150,10 +149,8 @@ def test_take(self):
         assert np.isnan(self.arr.take(0))
         assert np.isscalar(self.arr.take(2))
 
-        # np.take in < 1.8 doesn't support scalar indexing
-        if not _np_version_under1p8:
-            assert self.arr.take(2) == np.take(self.arr_data, 2)
-            assert self.arr.take(6) == np.take(self.arr_data, 6)
+        assert self.arr.take(2) == np.take(self.arr_data, 2)
+        assert self.arr.take(6) == np.take(self.arr_data, 6)
 
         exp = SparseArray(np.take(self.arr_data, [2, 3]))
         tm.assert_sp_array_equal(self.arr.take([2, 3]), exp)
diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py
index 2a22fc9d329195..9305504f8d5e3e 100644
--- a/pandas/tests/test_nanops.py
+++ b/pandas/tests/test_nanops.py
@@ -8,7 +8,7 @@
 import numpy as np
 
 import pandas as pd
-from pandas import Series, isna, _np_version_under1p9
+from pandas import Series, isna
 from pandas.core.dtypes.common import is_integer_dtype
 import pandas.core.nanops as nanops
 import pandas.util.testing as tm
@@ -340,15 +340,13 @@ def test_nanmean_overflow(self):
         # In the previous implementation mean can overflow for int dtypes, it
         # is now consistent with numpy
 
-        # numpy < 1.9.0 is not computing this correctly
-        if not _np_version_under1p9:
-            for a in [2 ** 55, -2 ** 55, 20150515061816532]:
-                s = Series(a, index=range(500), dtype=np.int64)
-                result = s.mean()
-                np_result = s.values.mean()
-                assert result == a
-                assert result == np_result
-                assert result.dtype == np.float64
+        for a in [2 ** 55, -2 ** 55, 20150515061816532]:
+            s = Series(a, index=range(500), dtype=np.int64)
+            result = s.mean()
+            np_result = s.values.mean()
+            assert result == a
+            assert result == np_result
+            assert result.dtype == np.float64
 
     def test_returned_dtype(self):
 
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
index d938d5bf9f3abd..d42e37048d87ff 100644
--- a/pandas/tests/test_resample.py
+++ b/pandas/tests/test_resample.py
@@ -1688,7 +1688,7 @@ def test_resample_dtype_preservation(self):
 
     def test_resample_dtype_coerceion(self):
 
-        pytest.importorskip('scipy')
+        pytest.importorskip('scipy.interpolate')
 
         # GH 16361
         df = {"a": [1, 3, 1, 4]}
diff --git a/pandas/tests/tools/test_numeric.py b/pandas/tests/tools/test_numeric.py
index 664a97640387ef..1d13ba93ba7592 100644
--- a/pandas/tests/tools/test_numeric.py
+++ b/pandas/tests/tools/test_numeric.py
@@ -3,7 +3,7 @@
 
 import numpy as np
 import pandas as pd
-from pandas import to_numeric, _np_version_under1p9
+from pandas import to_numeric
 
 from pandas.util import testing as tm
 from numpy import iinfo
@@ -355,9 +355,6 @@ def test_downcast(self):
 
     def test_downcast_limits(self):
         # Test the limits of each downcast. Bug: #14401.
-        # Check to make sure numpy is new enough to run this test.
-        if _np_version_under1p9:
-            pytest.skip("Numpy version is under 1.9")
 
         i = 'integer'
         u = 'unsigned'
diff --git a/setup.py b/setup.py
index a912b253289540..04a5684c20fcd5 100755
--- a/setup.py
+++ b/setup.py
@@ -45,7 +45,7 @@ def is_platform_mac():
     _have_setuptools = False
 
 setuptools_kwargs = {}
-min_numpy_ver = '1.7.0'
+min_numpy_ver = '1.9.0'
 if sys.version_info[0] >= 3:
 
     setuptools_kwargs = {

From dfaf8c6918ff20ef781d3177f464a29e70ee5d65 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Tue, 22 Aug 2017 07:57:53 -0600
Subject: [PATCH 015/188] CLN: replace %s syntax with .format in core.tools,
 algorithms.py, base.py (#17305)

---
 pandas/core/algorithms.py       | 10 +++++-----
 pandas/core/base.py             | 19 ++++++++++---------
 pandas/core/tools/datetimes.py  | 32 ++++++++++++++++++--------------
 pandas/core/tools/timedeltas.py |  7 ++++---
 4 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index ffd03096e2a27f..cccb094eaae7b6 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -393,12 +393,12 @@ def isin(comps, values):
 
     if not is_list_like(comps):
         raise TypeError("only list-like objects are allowed to be passed"
-                        " to isin(), you passed a "
-                        "[{0}]".format(type(comps).__name__))
+                        " to isin(), you passed a [{comps_type}]"
+                        .format(comps_type=type(comps).__name__))
     if not is_list_like(values):
         raise TypeError("only list-like objects are allowed to be passed"
-                        " to isin(), you passed a "
-                        "[{0}]".format(type(values).__name__))
+                        " to isin(), you passed a [{values_type}]"
+                        .format(values_type=type(values).__name__))
 
     if not isinstance(values, (ABCIndex, ABCSeries, np.ndarray)):
         values = lib.list_to_object_array(list(values))
@@ -671,7 +671,7 @@ def mode(values):
     try:
         result = np.sort(result)
     except TypeError as e:
-        warn("Unable to sort modes: %s" % e)
+        warn("Unable to sort modes: {error}".format(error=e))
 
     result = _reconstruct_data(result, original.dtype, original)
     return Series(result)
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 4ae47360357933..a7c991dc8d2572 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -342,24 +342,25 @@ def _obj_with_exclusions(self):
 
     def __getitem__(self, key):
         if self._selection is not None:
-            raise Exception('Column(s) %s already selected' % self._selection)
+            raise Exception('Column(s) {selection} already selected'
+                            .format(selection=self._selection))
 
         if isinstance(key, (list, tuple, ABCSeries, ABCIndexClass,
                             np.ndarray)):
             if len(self.obj.columns.intersection(key)) != len(key):
                 bad_keys = list(set(key).difference(self.obj.columns))
-                raise KeyError("Columns not found: %s"
-                               % str(bad_keys)[1:-1])
+                raise KeyError("Columns not found: {missing}"
+                               .format(missing=str(bad_keys)[1:-1]))
             return self._gotitem(list(key), ndim=2)
 
         elif not getattr(self, 'as_index', False):
             if key not in self.obj.columns:
-                raise KeyError("Column not found: %s" % key)
+                raise KeyError("Column not found: {key}".format(key=key))
             return self._gotitem(key, ndim=2)
 
         else:
             if key not in self.obj:
-                raise KeyError("Column not found: %s" % key)
+                raise KeyError("Column not found: {key}".format(key=key))
             return self._gotitem(key, ndim=1)
 
     def _gotitem(self, key, ndim, subset=None):
@@ -409,7 +410,7 @@ def _try_aggregate_string_function(self, arg, *args, **kwargs):
         if f is not None:
             return f(self, *args, **kwargs)
 
-        raise ValueError("{} is an unknown string function".format(arg))
+        raise ValueError("{arg} is an unknown string function".format(arg=arg))
 
     def _aggregate(self, arg, *args, **kwargs):
         """
@@ -484,9 +485,9 @@ def nested_renaming_depr(level=4):
                         is_nested_renamer = True
 
                         if k not in obj.columns:
-                            raise SpecificationError('cannot perform renaming '
-                                                     'for {0} with a nested '
-                                                     'dictionary'.format(k))
+                            msg = ('cannot perform renaming for {key} with a '
+                                   'nested dictionary').format(key=k)
+                            raise SpecificationError(msg)
                         nested_renaming_depr(4 + (_level or 0))
 
                     elif isinstance(obj, ABCSeries):
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 6ff4302937d073..53f58660cabdb5 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -46,7 +46,8 @@ def _infer(a, b):
         if b and b.tzinfo:
             if not (tslib.get_timezone(tz) == tslib.get_timezone(b.tzinfo)):
                 raise AssertionError('Inputs must both have the same timezone,'
-                                     ' {0} != {1}'.format(tz, b.tzinfo))
+                                     ' {timezone1} != {timezone2}'
+                                     .format(timezone1=tz, timezone2=b.tzinfo))
         return tz
 
     tz = None
@@ -491,10 +492,10 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
             offset = tslib.Timestamp(origin) - tslib.Timestamp(0)
         except tslib.OutOfBoundsDatetime:
             raise tslib.OutOfBoundsDatetime(
-                "origin {} is Out of Bounds".format(origin))
+                "origin {origin} is Out of Bounds".format(origin=origin))
         except ValueError:
-            raise ValueError("origin {} cannot be converted "
-                             "to a Timestamp".format(origin))
+            raise ValueError("origin {origin} cannot be converted "
+                             "to a Timestamp".format(origin=origin))
 
         # convert the offset to the unit of the arg
         # this should be lossless in terms of precision
@@ -590,16 +591,16 @@ def f(value):
     required = ['year', 'month', 'day']
     req = sorted(list(set(required) - set(unit_rev.keys())))
     if len(req):
-        raise ValueError("to assemble mappings requires at "
-                         "least that [year, month, day] be specified: "
-                         "[{0}] is missing".format(','.join(req)))
+        raise ValueError("to assemble mappings requires at least that "
+                         "[year, month, day] be specified: [{required}] "
+                         "is missing".format(required=','.join(req)))
 
     # keys we don't recognize
     excess = sorted(list(set(unit_rev.keys()) - set(_unit_map.values())))
     if len(excess):
         raise ValueError("extra keys have been passed "
                          "to the datetime assemblage: "
-                         "[{0}]".format(','.join(excess)))
+                         "[{excess}]".format(','.join(excess=excess)))
 
     def coerce(values):
         # we allow coercion to if errors allows
@@ -617,7 +618,7 @@ def coerce(values):
         values = to_datetime(values, format='%Y%m%d', errors=errors)
     except (TypeError, ValueError) as e:
         raise ValueError("cannot assemble the "
-                         "datetimes: {0}".format(e))
+                         "datetimes: {error}".format(error=e))
 
     for u in ['h', 'm', 's', 'ms', 'us', 'ns']:
         value = unit_rev.get(u)
@@ -627,8 +628,8 @@ def coerce(values):
                                        unit=u,
                                        errors=errors)
             except (TypeError, ValueError) as e:
-                raise ValueError("cannot assemble the datetimes "
-                                 "[{0}]: {1}".format(value, e))
+                raise ValueError("cannot assemble the datetimes [{value}]: "
+                                 "{error}".format(value=value, error=e))
 
     return values
 
@@ -810,8 +811,10 @@ def _convert_listlike(arg, format):
                     times.append(datetime.strptime(element, format).time())
                 except (ValueError, TypeError):
                     if errors == 'raise':
-                        raise ValueError("Cannot convert %s to a time with "
-                                         "given format %s" % (element, format))
+                        msg = ("Cannot convert {element} to a time with given "
+                               "format {format}").format(element=element,
+                                                         format=format)
+                        raise ValueError(msg)
                     elif errors == 'ignore':
                         return arg
                     else:
@@ -876,6 +879,7 @@ def ole2datetime(oledt):
     # Excel has a bug where it thinks the date 2/29/1900 exists
     # we just reject any date before 3/1/1900.
     if val < 61:
-        raise ValueError("Value is outside of acceptable range: %s " % val)
+        msg = "Value is outside of acceptable range: {value}".format(value=val)
+        raise ValueError(msg)
 
     return OLE_TIME_ZERO + timedelta(days=val)
diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py
index f2d99d26a87b84..d5132826bb93f8 100644
--- a/pandas/core/tools/timedeltas.py
+++ b/pandas/core/tools/timedeltas.py
@@ -129,7 +129,8 @@ def _validate_timedelta_unit(arg):
     except:
         if arg is None:
             return 'ns'
-        raise ValueError("invalid timedelta unit {0} provided".format(arg))
+        raise ValueError("invalid timedelta unit {arg} provided"
+                         .format(arg=arg))
 
 
 def _coerce_scalar_to_timedelta_type(r, unit='ns', box=True, errors='raise'):
@@ -161,8 +162,8 @@ def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None):
     if is_timedelta64_dtype(arg):
         value = arg.astype('timedelta64[ns]')
     elif is_integer_dtype(arg):
-        value = arg.astype('timedelta64[{0}]'.format(
-            unit)).astype('timedelta64[ns]', copy=False)
+        value = arg.astype('timedelta64[{unit}]'.format(unit=unit)).astype(
+            'timedelta64[ns]', copy=False)
     else:
         try:
             value = tslib.array_to_timedelta64(_ensure_object(arg),

From 2bec750b21b8715e3f55e71a6c69f2abef54d08b Mon Sep 17 00:00:00 2001
From: ante328 <ante328@hotmail.com>
Date: Tue, 22 Aug 2017 16:31:14 +0200
Subject: [PATCH 016/188] BUG: Fix strange behaviour of Series.iloc on
 MultiIndex Series (#17148) (#17291)

---
 doc/source/whatsnew/v0.21.0.txt    |  1 +
 pandas/core/indexing.py            |  3 ++-
 pandas/tests/indexing/test_iloc.py | 29 +++++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 148fd0a8324021..f760d0b6359a2d 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -353,6 +353,7 @@ Indexing
 - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`)
 - Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`)
 - Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`)
+- Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`)
 
 I/O
 ^^^
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 929c2346ba5b0d..6b9ad5cd2d93b7 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -146,7 +146,8 @@ def _get_setitem_indexer(self, key):
             return self._convert_tuple(key, is_setter=True)
 
         axis = self.obj._get_axis(0)
-        if isinstance(axis, MultiIndex):
+
+        if isinstance(axis, MultiIndex) and self.name != 'iloc':
             try:
                 return axis.get_loc(key)
             except Exception:
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
index 31fee303a41e20..39569f0b0cb383 100644
--- a/pandas/tests/indexing/test_iloc.py
+++ b/pandas/tests/indexing/test_iloc.py
@@ -269,6 +269,35 @@ def test_iloc_setitem(self):
         expected = Series([0, 1, 0], index=[4, 5, 6])
         tm.assert_series_equal(s, expected)
 
+    @pytest.mark.parametrize(
+        'data, indexes, values, expected_k', [
+            # test without indexer value in first level of MultiIndex
+            ([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
+            # test like code sample 1 in the issue
+            ([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100],
+                [755, 1066]),
+            # test like code sample 2 in the issue
+            ([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
+            # test like code sample 3 in the issue
+            ([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10],
+                [8, 15, 13])
+        ])
+    def test_iloc_setitem_int_multiindex_series(
+            self, data, indexes, values, expected_k):
+        # GH17148
+        df = pd.DataFrame(
+            data=data,
+            columns=['i', 'j', 'k'])
+        df = df.set_index(['i', 'j'])
+
+        series = df.k.copy()
+        for i, v in zip(indexes, values):
+            series.iloc[i] += v
+
+        df['k'] = expected_k
+        expected = df.k
+        tm.assert_series_equal(series, expected)
+
     def test_iloc_setitem_list(self):
 
         # setitem with an iloc list

From 0cf2b146c526fe85e2df45b1c5e80da404c9d58f Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Wed, 23 Aug 2017 13:30:34 -0700
Subject: [PATCH 017/188] DOC: Add module doc-string to tseries/api.py

---
 pandas/tseries/api.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tseries/api.py b/pandas/tseries/api.py
index 71386c02547ba7..2094791ecdc609 100644
--- a/pandas/tseries/api.py
+++ b/pandas/tseries/api.py
@@ -1,5 +1,5 @@
 """
-
+Timeseries API
 """
 
 # flake8: noqa

From 66ec5f3e616f6449ef2c88401042cf2a282234d7 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Wed, 23 Aug 2017 14:35:49 -0700
Subject: [PATCH 018/188] MAINT: Clean up docs in pandas/errors/__init__.py

---
 pandas/errors/__init__.py | 26 ++++++++++++++------------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
index 6304f3a527f2c9..42b3bdd4991a9a 100644
--- a/pandas/errors/__init__.py
+++ b/pandas/errors/__init__.py
@@ -1,25 +1,28 @@
 # flake8: noqa
 
-""" expose public exceptions & warnings """
+"""
+Expose public exceptions & warnings
+"""
 
 from pandas._libs.tslib import OutOfBoundsDatetime
 
 
 class PerformanceWarning(Warning):
     """
-    Warnings shown when there is a possible performance
-    impact.
+    Warning raised when there is a possible
+    performance impact.
     """
 
 class UnsupportedFunctionCall(ValueError):
     """
-    If attempting to call a numpy function on a pandas
-    object. For example using ``np.cumsum(groupby_object)``.
+    Exception raised when attempting to call a numpy function
+    on a pandas object, but that function is not supported by
+    the object e.g. ``np.cumsum(groupby_object)``.
     """
 
 class UnsortedIndexError(KeyError):
     """
-    Error raised when attempting to get a slice of a MultiIndex
+    Error raised when attempting to get a slice of a MultiIndex,
     and the index has not been lexsorted. Subclass of `KeyError`.
 
     .. versionadded:: 0.20.0
@@ -29,22 +32,22 @@ class UnsortedIndexError(KeyError):
 
 class ParserError(ValueError):
     """
-    Exception that is thrown by an error is encountered in `pd.read_csv`
+    Exception that is raised by an error encountered in `pd.read_csv`.
     """
 
 
 class DtypeWarning(Warning):
     """
-    Warning that is raised for a dtype incompatiblity. This is
+    Warning that is raised for a dtype incompatiblity. This
     can happen whenever `pd.read_csv` encounters non-
-    uniform dtypes in a column(s) of a given CSV file
+    uniform dtypes in a column(s) of a given CSV file.
     """
 
 
 class EmptyDataError(ValueError):
     """
     Exception that is thrown in `pd.read_csv` (by both the C and
-    Python engines) when empty data or header is encountered
+    Python engines) when empty data or header is encountered.
     """
 
 
@@ -53,7 +56,7 @@ class ParserWarning(Warning):
     Warning that is raised in `pd.read_csv` whenever it is necessary
     to change parsers (generally from 'c' to 'python') contrary to the
     one specified by the user due to lack of support or functionality for
-    parsing particular attributes of a CSV file with the requsted engine
+    parsing particular attributes of a CSV file with the requsted engine.
     """
 
 
@@ -61,5 +64,4 @@ class MergeError(ValueError):
     """
     Error raised when problems arise during merging due to problems
     with input data. Subclass of `ValueError`.
-
     """

From d45e12b87ce867b2df3254c386c0f17f175efbf0 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Thu, 24 Aug 2017 03:50:18 -0600
Subject: [PATCH 019/188] CLN: replace %s syntax with .format in missing.py,
 nanops.py, ops.py (#17322)

Replaced %s syntax with .format in missing.py, nanops.py, ops.py. Additionally, made some of the existing positional .format code more explicit.
---
 pandas/core/missing.py | 41 +++++++++++++--------
 pandas/core/nanops.py  |  8 ++--
 pandas/core/ops.py     | 84 +++++++++++++++++++++++-------------------
 3 files changed, 76 insertions(+), 57 deletions(-)

diff --git a/pandas/core/missing.py b/pandas/core/missing.py
index 93281e20a2a964..8a6a870834c83e 100644
--- a/pandas/core/missing.py
+++ b/pandas/core/missing.py
@@ -88,8 +88,8 @@ def clean_fill_method(method, allow_nearest=False):
         valid_methods.append('nearest')
         expecting = 'pad (ffill), backfill (bfill) or nearest'
     if method not in valid_methods:
-        msg = ('Invalid fill method. Expecting %s. Got %s' %
-               (expecting, method))
+        msg = ('Invalid fill method. Expecting {expecting}. Got {method}'
+               .format(expecting=expecting, method=method))
         raise ValueError(msg)
     return method
 
@@ -104,8 +104,8 @@ def clean_interp_method(method, **kwargs):
         raise ValueError("You must specify the order of the spline or "
                          "polynomial.")
     if method not in valid:
-        raise ValueError("method must be one of {0}."
-                         "Got '{1}' instead.".format(valid, method))
+        raise ValueError("method must be one of {valid}. Got '{method}' "
+                         "instead.".format(valid=valid, method=method))
 
     return method
 
@@ -146,8 +146,10 @@ def interpolate_1d(xvalues, yvalues, method='linear', limit=None,
     valid_limit_directions = ['forward', 'backward', 'both']
     limit_direction = limit_direction.lower()
     if limit_direction not in valid_limit_directions:
-        raise ValueError('Invalid limit_direction: expecting one of %r, got '
-                         '%r.' % (valid_limit_directions, limit_direction))
+        msg = ('Invalid limit_direction: expecting one of {valid!r}, '
+               'got {invalid!r}.')
+        raise ValueError(msg.format(valid=valid_limit_directions,
+                                    invalid=limit_direction))
 
     from pandas import Series
     ys = Series(yvalues)
@@ -248,7 +250,8 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None,
         # TODO: Why is DatetimeIndex being imported here?
         from pandas import DatetimeIndex  # noqa
     except ImportError:
-        raise ImportError('{0} interpolation requires Scipy'.format(method))
+        raise ImportError('{method} interpolation requires SciPy'
+                          .format(method=method))
 
     new_x = np.asarray(new_x)
 
@@ -466,7 +469,8 @@ def pad_1d(values, limit=None, mask=None, dtype=None):
         dtype = values.dtype
     _method = None
     if is_float_dtype(values):
-        _method = getattr(algos, 'pad_inplace_%s' % dtype.name, None)
+        name = 'pad_inplace_{name}'.format(name=dtype.name)
+        _method = getattr(algos, name, None)
     elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
         _method = _pad_1d_datetime
     elif is_integer_dtype(values):
@@ -476,7 +480,8 @@ def pad_1d(values, limit=None, mask=None, dtype=None):
         _method = algos.pad_inplace_object
 
     if _method is None:
-        raise ValueError('Invalid dtype for pad_1d [%s]' % dtype.name)
+        raise ValueError('Invalid dtype for pad_1d [{name}]'
+                         .format(name=dtype.name))
 
     if mask is None:
         mask = isna(values)
@@ -490,7 +495,8 @@ def backfill_1d(values, limit=None, mask=None, dtype=None):
         dtype = values.dtype
     _method = None
     if is_float_dtype(values):
-        _method = getattr(algos, 'backfill_inplace_%s' % dtype.name, None)
+        name = 'backfill_inplace_{name}'.format(name=dtype.name)
+        _method = getattr(algos, name, None)
     elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
         _method = _backfill_1d_datetime
     elif is_integer_dtype(values):
@@ -500,7 +506,8 @@ def backfill_1d(values, limit=None, mask=None, dtype=None):
         _method = algos.backfill_inplace_object
 
     if _method is None:
-        raise ValueError('Invalid dtype for backfill_1d [%s]' % dtype.name)
+        raise ValueError('Invalid dtype for backfill_1d [{name}]'
+                         .format(name=dtype.name))
 
     if mask is None:
         mask = isna(values)
@@ -515,7 +522,8 @@ def pad_2d(values, limit=None, mask=None, dtype=None):
         dtype = values.dtype
     _method = None
     if is_float_dtype(values):
-        _method = getattr(algos, 'pad_2d_inplace_%s' % dtype.name, None)
+        name = 'pad_2d_inplace_{name}'.format(name=dtype.name)
+        _method = getattr(algos, name, None)
     elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
         _method = _pad_2d_datetime
     elif is_integer_dtype(values):
@@ -525,7 +533,8 @@ def pad_2d(values, limit=None, mask=None, dtype=None):
         _method = algos.pad_2d_inplace_object
 
     if _method is None:
-        raise ValueError('Invalid dtype for pad_2d [%s]' % dtype.name)
+        raise ValueError('Invalid dtype for pad_2d [{name}]'
+                         .format(name=dtype.name))
 
     if mask is None:
         mask = isna(values)
@@ -544,7 +553,8 @@ def backfill_2d(values, limit=None, mask=None, dtype=None):
         dtype = values.dtype
     _method = None
     if is_float_dtype(values):
-        _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None)
+        name = 'backfill_2d_inplace_{name}'.format(name=dtype.name)
+        _method = getattr(algos, name, None)
     elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
         _method = _backfill_2d_datetime
     elif is_integer_dtype(values):
@@ -554,7 +564,8 @@ def backfill_2d(values, limit=None, mask=None, dtype=None):
         _method = algos.backfill_2d_inplace_object
 
     if _method is None:
-        raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name)
+        raise ValueError('Invalid dtype for backfill_2d [{name}]'
+                         .format(name=dtype.name))
 
     if mask is None:
         mask = isna(values)
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index b2bbf1c75b7ea0..858aed7fd3e237 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -70,9 +70,8 @@ def __call__(self, f):
         def _f(*args, **kwargs):
             obj_iter = itertools.chain(args, compat.itervalues(kwargs))
             if any(self.check(obj) for obj in obj_iter):
-                raise TypeError('reduction operation {0!r} not allowed for '
-                                'this dtype'.format(
-                                    f.__name__.replace('nan', '')))
+                msg = 'reduction operation {name!r} not allowed for this dtype'
+                raise TypeError(msg.format(name=f.__name__.replace('nan', '')))
             try:
                 with np.errstate(invalid='ignore'):
                     return f(*args, **kwargs)
@@ -786,7 +785,8 @@ def _ensure_numeric(x):
             try:
                 x = complex(x)
             except Exception:
-                raise TypeError('Could not convert %s to numeric' % str(x))
+                raise TypeError('Could not convert {value!s} to numeric'
+                                .format(value=x))
     return x
 
 # NA-friendly array comparisons
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 82101414e4aa61..221f6ff8b92c68 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -63,9 +63,9 @@ def _create_methods(arith_method, comp_method, bool_method,
 
         def names(x):
             if x[-1] == "_":
-                return "__%s_" % x
+                return "__{name}_".format(name=x)
             else:
-                return "__%s__" % x
+                return "__{name}__".format(name=x)
     else:
         names = lambda x: x
 
@@ -388,8 +388,8 @@ def _validate(self, lvalues, rvalues, name):
             if name not in ('__div__', '__truediv__', '__mul__', '__rmul__'):
                 raise TypeError("can only operate on a timedelta and an "
                                 "integer or a float for division and "
-                                "multiplication, but the operator [%s] was"
-                                "passed" % name)
+                                "multiplication, but the operator [{name}] "
+                                "was passed".format(name=name))
 
         # 2 timedeltas
         elif ((self.is_timedelta_lhs and
@@ -400,9 +400,9 @@ def _validate(self, lvalues, rvalues, name):
             if name not in ('__div__', '__rdiv__', '__truediv__',
                             '__rtruediv__', '__add__', '__radd__', '__sub__',
                             '__rsub__'):
-                raise TypeError("can only operate on a timedeltas for "
-                                "addition, subtraction, and division, but the"
-                                " operator [%s] was passed" % name)
+                raise TypeError("can only operate on a timedeltas for addition"
+                                ", subtraction, and division, but the operator"
+                                " [{name}] was passed".format(name=name))
 
         # datetime and timedelta/DateOffset
         elif (self.is_datetime_lhs and
@@ -411,23 +411,24 @@ def _validate(self, lvalues, rvalues, name):
             if name not in ('__add__', '__radd__', '__sub__'):
                 raise TypeError("can only operate on a datetime with a rhs of "
                                 "a timedelta/DateOffset for addition and "
-                                "subtraction, but the operator [%s] was "
-                                "passed" % name)
+                                "subtraction, but the operator [{name}] was "
+                                "passed".format(name=name))
 
         elif (self.is_datetime_rhs and
               (self.is_timedelta_lhs or self.is_offset_lhs)):
             if name not in ('__add__', '__radd__', '__rsub__'):
                 raise TypeError("can only operate on a timedelta/DateOffset "
                                 "with a rhs of a datetime for addition, "
-                                "but the operator [%s] was passed" % name)
+                                "but the operator [{name}] was passed"
+                                .format(name=name))
 
         # 2 datetimes
         elif self.is_datetime_lhs and self.is_datetime_rhs:
 
             if name not in ('__sub__', '__rsub__'):
                 raise TypeError("can only operate on a datetimes for"
-                                " subtraction, but the operator [%s] was"
-                                " passed" % name)
+                                " subtraction, but the operator [{name}] was"
+                                " passed".format(name=name))
 
             # if tz's must be equal (same or None)
             if getattr(lvalues, 'tz', None) != getattr(rvalues, 'tz', None):
@@ -439,8 +440,8 @@ def _validate(self, lvalues, rvalues, name):
 
             if name not in ('__add__', '__radd__'):
                 raise TypeError("can only operate on a timedelta/DateOffset "
-                                "and a datetime for addition, but the "
-                                "operator [%s] was passed" % name)
+                                "and a datetime for addition, but the operator"
+                                " [{name}] was passed".format(name=name))
         else:
             raise TypeError('cannot operate on a series without a rhs '
                             'of a series/ndarray of type datetime64[ns] '
@@ -498,7 +499,7 @@ def _convert_to_array(self, values, name=None, other=None):
                 values = values.to_timestamp().to_series()
             elif name not in ('__truediv__', '__div__', '__mul__', '__rmul__'):
                 raise TypeError("incompatible type for a datetime/timedelta "
-                                "operation [{0}]".format(name))
+                                "operation [{name}]".format(name=name))
         elif inferred_type == 'floating':
             if (isna(values).all() and
                     name in ('__add__', '__radd__', '__sub__', '__rsub__')):
@@ -508,8 +509,9 @@ def _convert_to_array(self, values, name=None, other=None):
         elif self._is_offset(values):
             return values
         else:
-            raise TypeError("incompatible type [{0}] for a datetime/timedelta"
-                            " operation".format(np.array(values).dtype))
+            raise TypeError("incompatible type [{dtype}] for a "
+                            "datetime/timedelta operation"
+                            .format(dtype=np.array(values).dtype))
 
         return values
 
@@ -866,8 +868,8 @@ def wrapper(self, other, axis=None):
             with np.errstate(all='ignore'):
                 res = na_op(values, other)
             if is_scalar(res):
-                raise TypeError('Could not compare %s type with Series' %
-                                type(other))
+                raise TypeError('Could not compare {typ} type with Series'
+                                .format(typ=type(other)))
 
             # always return a full value series here
             res = _values_from_object(res)
@@ -906,9 +908,10 @@ def na_op(x, y):
                         y = bool(y)
                     result = lib.scalar_binop(x, y, op)
                 except:
-                    raise TypeError("cannot compare a dtyped [{0}] array with "
-                                    "a scalar of type [{1}]".format(
-                                        x.dtype, type(y).__name__))
+                    msg = ("cannot compare a dtyped [{dtype}] array "
+                           "with a scalar of type [{type}]"
+                           ).format(dtype=x.dtype, type=type(y).__name__)
+                    raise TypeError(msg)
 
         return result
 
@@ -1140,14 +1143,17 @@ def _align_method_FRAME(left, right, axis):
     """ convert rhs to meet lhs dims if input is list, tuple or np.ndarray """
 
     def to_series(right):
-        msg = 'Unable to coerce to Series, length must be {0}: given {1}'
+        msg = ('Unable to coerce to Series, length must be {req_len}: '
+               'given {given_len}')
         if axis is not None and left._get_axis_name(axis) == 'index':
             if len(left.index) != len(right):
-                raise ValueError(msg.format(len(left.index), len(right)))
+                raise ValueError(msg.format(req_len=len(left.index),
+                                            given_len=len(right)))
             right = left._constructor_sliced(right, index=left.index)
         else:
             if len(left.columns) != len(right):
-                raise ValueError(msg.format(len(left.columns), len(right)))
+                raise ValueError(msg.format(req_len=len(left.columns),
+                                            given_len=len(right)))
             right = left._constructor_sliced(right, index=left.columns)
         return right
 
@@ -1161,15 +1167,16 @@ def to_series(right):
 
         elif right.ndim == 2:
             if left.shape != right.shape:
-                msg = ("Unable to coerce to DataFrame, "
-                       "shape must be {0}: given {1}")
-                raise ValueError(msg.format(left.shape, right.shape))
+                msg = ("Unable to coerce to DataFrame, shape "
+                       "must be {req_shape}: given {given_shape}"
+                       ).format(req_shape=left.shape, given_shape=right.shape)
+                raise ValueError(msg)
 
             right = left._constructor(right, index=left.index,
                                       columns=left.columns)
         else:
-            msg = 'Unable to coerce to Series/DataFrame, dim must be <= 2: {0}'
-            raise ValueError(msg.format(right.shape, ))
+            raise ValueError('Unable to coerce to Series/DataFrame, dim '
+                             'must be <= 2: {dim}'.format(dim=right.shape))
 
     return right
 
@@ -1278,7 +1285,8 @@ def na_op(x, y):
 
         return result
 
-    @Appender('Wrapper for flexible comparison methods %s' % name)
+    @Appender('Wrapper for flexible comparison methods {name}'
+              .format(name=name))
     def f(self, other, axis=default_axis, level=None):
 
         other = _align_method_FRAME(self, other, axis)
@@ -1299,7 +1307,7 @@ def f(self, other, axis=default_axis, level=None):
 
 
 def _comp_method_FRAME(func, name, str_rep, masker=False):
-    @Appender('Wrapper for comparison method %s' % name)
+    @Appender('Wrapper for comparison method {name}'.format(name=name))
     def f(self, other):
         if isinstance(other, pd.DataFrame):  # Another DataFrame
             return self._compare_frame(other, func, str_rep)
@@ -1349,9 +1357,9 @@ def na_op(x, y):
     # work only for scalars
     def f(self, other):
         if not is_scalar(other):
-            raise ValueError('Simple arithmetic with %s can only be '
-                             'done with scalar values' %
-                             self._constructor.__name__)
+            raise ValueError('Simple arithmetic with {name} can only be '
+                             'done with scalar values'
+                             .format(name=self._constructor.__name__))
 
         return self._combine(other, op)
 
@@ -1384,7 +1392,7 @@ def na_op(x, y):
 
         return result
 
-    @Appender('Wrapper for comparison method %s' % name)
+    @Appender('Wrapper for comparison method {name}'.format(name=name))
     def f(self, other, axis=None):
         # Validate the axis parameter
         if axis is not None:
@@ -1394,8 +1402,8 @@ def f(self, other, axis=None):
             return self._compare_constructor(other, na_op, try_cast=False)
         elif isinstance(other, (self._constructor_sliced, pd.DataFrame,
                                 ABCSeries)):
-            raise Exception("input needs alignment for this object [%s]" %
-                            self._constructor)
+            raise Exception("input needs alignment for this object [{object}]"
+                            .format(object=self._constructor))
         else:
             return self._combine_const(other, na_op, try_cast=False)
 

From 6993c1ba981554cdd8f45675db5807077a28e2c0 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 24 Aug 2017 03:03:18 -0700
Subject: [PATCH 020/188] Make pd.Period immutable (#17239)

---
 doc/source/whatsnew/v0.21.0.txt    |  2 ++
 pandas/_libs/period.pyx            | 17 +++++++++++------
 pandas/tests/scalar/test_period.py | 11 +++++++++++
 3 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index f760d0b6359a2d..604d275511fa02 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -291,6 +291,8 @@ Other API Changes
 - Moved definition of ``MergeError`` to the ``pandas.errors`` module.
 - The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`)
 - :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`)
+- :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`).
+
 
 .. _whatsnew_0210.deprecations:
 
diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index 6ba7ec0270f30a..a1d04fea891517 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -29,7 +29,9 @@ from datetime cimport (
     PANDAS_FR_ns,
     INT32_MIN)
 
+
 cimport util, lib
+
 from lib cimport is_null_datetimelike, is_period
 from pandas._libs import tslib, lib
 from pandas._libs.tslib import (Timedelta, Timestamp, iNaT,
@@ -668,13 +670,17 @@ class IncompatibleFrequency(ValueError):
 
 cdef class _Period(object):
 
-    cdef public:
+    cdef readonly:
         int64_t ordinal
         object freq
 
     _comparables = ['name', 'freqstr']
     _typ = 'period'
 
+    def __cinit__(self, ordinal, freq):
+        self.ordinal = ordinal
+        self.freq = freq
+
     @classmethod
     def _maybe_convert_freq(cls, object freq):
 
@@ -698,9 +704,8 @@ cdef class _Period(object):
         if ordinal == iNaT:
             return NaT
         else:
-            self = _Period.__new__(cls)
-            self.ordinal = ordinal
-            self.freq = cls._maybe_convert_freq(freq)
+            freq = cls._maybe_convert_freq(freq)
+            self = _Period.__new__(cls, ordinal, freq)
             return self
 
     def __richcmp__(self, other, op):
@@ -752,7 +757,7 @@ cdef class _Period(object):
     def __add__(self, other):
         if isinstance(self, Period):
             if isinstance(other, (timedelta, np.timedelta64,
-                                  offsets.Tick, offsets.DateOffset,
+                                  offsets.DateOffset,
                                   Timedelta)):
                 return self._add_delta(other)
             elif other is NaT:
@@ -770,7 +775,7 @@ cdef class _Period(object):
     def __sub__(self, other):
         if isinstance(self, Period):
             if isinstance(other, (timedelta, np.timedelta64,
-                                  offsets.Tick, offsets.DateOffset,
+                                  offsets.DateOffset,
                                   Timedelta)):
                 neg_other = -other
                 return self + neg_other
diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py
index 931d6b2b8f1f09..a167c9c738b0bf 100644
--- a/pandas/tests/scalar/test_period.py
+++ b/pandas/tests/scalar/test_period.py
@@ -1406,3 +1406,14 @@ def test_period_ops_offset(self):
 
         with tm.assert_raises_regex(period.IncompatibleFrequency, msg):
             p - offsets.Hour(2)
+
+
+def test_period_immutable():
+    # see gh-17116
+    per = pd.Period('2014Q1')
+    with pytest.raises(AttributeError):
+        per.ordinal = 14
+
+    freq = per.freq
+    with pytest.raises(AttributeError):
+        per.freq = 2 * freq

From 62527c0f328caa4ae716328246df75a6f2b33028 Mon Sep 17 00:00:00 2001
From: P-Tillmann <Peter.tillmann@smart-pricer.com>
Date: Thu, 24 Aug 2017 12:38:27 +0200
Subject: [PATCH 021/188] Bug: groupby multiindex levels equals rows (#16859)

closes #16843
---
 doc/source/whatsnew/v0.21.0.txt      |  2 +-
 pandas/core/groupby.py               |  9 +++++----
 pandas/tests/groupby/test_groupby.py | 13 +++++++++++++
 3 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 604d275511fa02..6317b4ae845656 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -383,7 +383,7 @@ Groupby/Resample/Rolling
 - Bug in ``groupby.transform()`` that would coerce boolean dtypes back to float (:issue:`16875`)
 - Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`)
 - Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`)
-
+- Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`)
 
 Sparse
 ^^^^^^
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index aa7c4517c0a016..c23b00dc740a43 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2629,13 +2629,14 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
 
     try:
         if isinstance(obj, DataFrame):
-            all_in_columns = all(g in obj.columns for g in keys)
+            all_in_columns_index = all(g in obj.columns or g in obj.index.names
+                                       for g in keys)
         else:
-            all_in_columns = False
+            all_in_columns_index = False
     except Exception:
-        all_in_columns = False
+        all_in_columns_index = False
 
-    if not any_callable and not all_in_columns and \
+    if not any_callable and not all_in_columns_index and \
        not any_arraylike and not any_groupers and \
        match_axis_length and level is None:
         keys = [com._asarray_tuplesafe(keys)]
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index f9e1a0d2e744a9..8957beacab376d 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -3891,6 +3891,19 @@ def predictions(tool):
         result = df2.groupby('Key').apply(predictions).p1
         tm.assert_series_equal(expected, result)
 
+    def test_gb_key_len_equal_axis_len(self):
+            # GH16843
+            # test ensures that index and column keys are recognized correctly
+            # when number of keys equals axis length of groupby
+            df = pd.DataFrame([['foo', 'bar', 'B', 1],
+                               ['foo', 'bar', 'B', 2],
+                               ['foo', 'baz', 'C', 3]],
+                              columns=['first', 'second', 'third', 'one'])
+            df = df.set_index(['first', 'second'])
+            df = df.groupby(['first', 'second', 'third']).size()
+            assert df.loc[('foo', 'bar', 'B')] == 2
+            assert df.loc[('foo', 'baz', 'C')] == 1
+
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = lmap(tuple, df[keys].values)

From 96f92eb1c696723b6465fdc273dc8406201c606a Mon Sep 17 00:00:00 2001
From: step4me <prosikeffect@gmail.com>
Date: Thu, 24 Aug 2017 08:53:50 -0400
Subject: [PATCH 022/188] BUG: Cannot use tz-aware origin in to_datetime
 (#16842)

closes #16842

Author: step4me <prosikeffect@gmail.com>

Closes #17244 from step4me/step4me-feature and squashes the following commits:

09d051d48 [step4me] BUG: Cannot use tz-aware origin in to_datetime (#16842)
---
 doc/source/whatsnew/v0.21.0.txt              | 5 +++--
 pandas/core/tools/datetimes.py               | 7 ++++++-
 pandas/tests/indexes/datetimes/test_tools.py | 6 ++++++
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 6317b4ae845656..fcadd26156b1d4 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -292,6 +292,7 @@ Other API Changes
 - The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`)
 - :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`)
 - :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`).
+- :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`)
 
 
 .. _whatsnew_0210.deprecations:
@@ -356,6 +357,7 @@ Indexing
 - Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`)
 - Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`)
 - Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`)
+- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`)
 
 I/O
 ^^^
@@ -402,6 +404,7 @@ Reshaping
 - Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`)
 - Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`)
 - :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`).
+- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`)
 
 Numeric
 ^^^^^^^
@@ -420,5 +423,3 @@ Categorical
 Other
 ^^^^^
 - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
-- Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`)
-- Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`)
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 53f58660cabdb5..c0f234a36803d7 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -489,7 +489,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
 
         # we are going to offset back to unix / epoch time
         try:
-            offset = tslib.Timestamp(origin) - tslib.Timestamp(0)
+            offset = tslib.Timestamp(origin)
         except tslib.OutOfBoundsDatetime:
             raise tslib.OutOfBoundsDatetime(
                 "origin {origin} is Out of Bounds".format(origin=origin))
@@ -497,6 +497,11 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
             raise ValueError("origin {origin} cannot be converted "
                              "to a Timestamp".format(origin=origin))
 
+        if offset.tz is not None:
+            raise ValueError(
+                "origin offset {} must be tz-naive".format(offset))
+        offset -= tslib.Timestamp(0)
+
         # convert the offset to the unit of the arg
         # this should be lossless in terms of precision
         offset = offset // tslib.Timedelta(1, unit=unit)
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index 9764b65d330af3..50669ee357bbdc 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -1589,6 +1589,12 @@ def test_invalid_origins(self, origin, exc, units, units_from_epochs):
             pd.to_datetime(units_from_epochs, unit=units,
                            origin=origin)
 
+    def test_invalid_origins_tzinfo(self):
+        # GH16842
+        with pytest.raises(ValueError):
+            pd.to_datetime(1, unit='D',
+                           origin=datetime(2000, 1, 1, tzinfo=pytz.utc))
+
     def test_processing_order(self):
         # make sure we handle out-of-bounds *before*
         # constructing the dates

From 473a7f3c186f6b0bfd9d3ce413fb627cf7a8f111 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 25 Aug 2017 13:29:57 -0700
Subject: [PATCH 023/188] Replace usage of total_seconds compat func with
 timedelta method (#17289)

---
 pandas/_libs/period.pyx                   |  7 ++---
 pandas/_libs/src/datetime_helper.h        | 36 -----------------------
 pandas/_libs/src/ujson/python/objToJSON.c | 22 +++++++++++++-
 pandas/_libs/tslib.pyx                    | 28 +++++++-----------
 pandas/io/pytables.py                     |  2 +-
 pandas/tseries/offsets.py                 |  6 ++--
 setup.py                                  |  2 --
 7 files changed, 38 insertions(+), 65 deletions(-)
 delete mode 100644 pandas/_libs/src/datetime_helper.h

diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index a1d04fea891517..816b7ebfff86de 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -10,9 +10,6 @@ from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray,
                     NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA)
 import numpy as np
 
-cdef extern from "datetime_helper.h":
-    double total_seconds(object)
-
 from libc.stdlib cimport free
 
 from pandas import compat
@@ -552,7 +549,7 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz):
                                               &dts)
             dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                           dts.min, dts.sec, dts.us, tz)
-            delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000
+            delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000
             pandas_datetime_to_datetimestruct(stamps[i] + delta,
                                               PANDAS_FR_ns, &dts)
             curr_reso = _reso_stamp(&dts)
@@ -619,7 +616,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
                                               &dts)
             dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                           dts.min, dts.sec, dts.us, tz)
-            delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000
+            delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000
             pandas_datetime_to_datetimestruct(stamps[i] + delta,
                                               PANDAS_FR_ns, &dts)
             result[i] = get_period_ordinal(dts.year, dts.month, dts.day,
diff --git a/pandas/_libs/src/datetime_helper.h b/pandas/_libs/src/datetime_helper.h
deleted file mode 100644
index 8023285f85b9b3..00000000000000
--- a/pandas/_libs/src/datetime_helper.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/*
-Copyright (c) 2016, PyData Development Team
-All rights reserved.
-
-Distributed under the terms of the BSD Simplified License.
-
-The full license is in the LICENSE file, distributed with this software.
-*/
-
-#ifndef PANDAS__LIBS_SRC_DATETIME_HELPER_H_
-#define PANDAS__LIBS_SRC_DATETIME_HELPER_H_
-
-#include <stdio.h>
-#include "datetime.h"
-#include "numpy/arrayobject.h"
-#include "numpy/arrayscalars.h"
-
-npy_int64 get_long_attr(PyObject *o, const char *attr) {
-  npy_int64 long_val;
-  PyObject *value = PyObject_GetAttrString(o, attr);
-  long_val = (PyLong_Check(value) ?
-              PyLong_AsLongLong(value) : PyInt_AS_LONG(value));
-  Py_DECREF(value);
-  return long_val;
-}
-
-npy_float64 total_seconds(PyObject *td) {
-  // Python 2.6 compat
-  npy_int64 microseconds = get_long_attr(td, "microseconds");
-  npy_int64 seconds = get_long_attr(td, "seconds");
-  npy_int64 days = get_long_attr(td, "days");
-  npy_int64 days_in_seconds = days * 24LL * 3600LL;
-  return (microseconds + (seconds + days_in_seconds) * 1000000.0) / 1000000.0;
-}
-
-#endif  // PANDAS__LIBS_SRC_DATETIME_HELPER_H_
diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c
index f2c0b18d351312..4beaa3fd449df2 100644
--- a/pandas/_libs/src/ujson/python/objToJSON.c
+++ b/pandas/_libs/src/ujson/python/objToJSON.c
@@ -47,9 +47,9 @@ Numeric decoder derived from from TCL library
 #include <numpy_helper.h>         // NOLINT(build/include_order)
 #include <stdio.h>                // NOLINT(build/include_order)
 #include <ultrajson.h>            // NOLINT(build/include_order)
-#include <datetime_helper.h>      // NOLINT(build/include_order)
 #include <np_datetime.h>          // NOLINT(build/include_order)
 #include <np_datetime_strings.h>  // NOLINT(build/include_order)
+#include "datetime.h"
 
 static PyObject *type_decimal;
 
@@ -329,6 +329,26 @@ static Py_ssize_t get_attr_length(PyObject *obj, char *attr) {
     return ret;
 }
 
+npy_int64 get_long_attr(PyObject *o, const char *attr) {
+  npy_int64 long_val;
+  PyObject *value = PyObject_GetAttrString(o, attr);
+  long_val = (PyLong_Check(value) ?
+              PyLong_AsLongLong(value) : PyInt_AS_LONG(value));
+  Py_DECREF(value);
+  return long_val;
+}
+
+npy_float64 total_seconds(PyObject *td) {
+  // Python 2.6 compat
+  // TODO(anyone): remove this legacy workaround with a more
+  // direct td.total_seconds()
+  npy_int64 microseconds = get_long_attr(td, "microseconds");
+  npy_int64 seconds = get_long_attr(td, "seconds");
+  npy_int64 days = get_long_attr(td, "days");
+  npy_int64 days_in_seconds = days * 24LL * 3600LL;
+  return (microseconds + (seconds + days_in_seconds) * 1000000.0) / 1000000.0;
+}
+
 static PyObject *get_item(PyObject *obj, Py_ssize_t i) {
     PyObject *tmp = PyInt_FromSsize_t(i);
     PyObject *ret;
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index c4a38ec660a4c3..b5aca2e3ec3094 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -26,9 +26,6 @@ from cpython cimport (
 cdef extern from "Python.h":
     cdef PyTypeObject *Py_TYPE(object)
 
-cdef extern from "datetime_helper.h":
-    double total_seconds(object)
-
 # this is our datetime.pxd
 from libc.stdlib cimport free
 
@@ -1639,7 +1636,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
         pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts)
         dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour,
                       obj.dts.min, obj.dts.sec, obj.dts.us, tz)
-        delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000
+        delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000
         if obj.value != NPY_NAT:
             pandas_datetime_to_datetimestruct(obj.value + delta,
                                               PANDAS_FR_ns, &obj.dts)
@@ -4136,7 +4133,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
                     pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts)
                     dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                                   dts.min, dts.sec, dts.us, tz1)
-                    delta = (int(total_seconds(_get_utcoffset(tz1, dt)))
+                    delta = (int(_get_utcoffset(tz1, dt).total_seconds())
                              * 1000000000)
                     utc_dates[i] = v - delta
         else:
@@ -4176,8 +4173,8 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
                 pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts)
                 dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                               dts.min, dts.sec, dts.us, tz2)
-                delta = int(total_seconds(
-                    _get_utcoffset(tz2, dt))) * 1000000000
+                delta = (int(_get_utcoffset(tz2, dt).total_seconds())
+                             * 1000000000)
                 result[i] = v + delta
         return result
 
@@ -4243,7 +4240,7 @@ def tz_convert_single(int64_t val, object tz1, object tz2):
         pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts)
         dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                       dts.min, dts.sec, dts.us, tz1)
-        delta = int(total_seconds(_get_utcoffset(tz1, dt))) * 1000000000
+        delta = int(_get_utcoffset(tz1, dt).total_seconds()) * 1000000000
         utc_date = val - delta
     elif _get_zone(tz1) != 'UTC':
         trans, deltas, typ = _get_dst_info(tz1)
@@ -4261,7 +4258,7 @@ def tz_convert_single(int64_t val, object tz1, object tz2):
         pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts)
         dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                       dts.min, dts.sec, dts.us, tz2)
-        delta = int(total_seconds(_get_utcoffset(tz2, dt))) * 1000000000
+        delta = int(_get_utcoffset(tz2, dt).total_seconds()) * 1000000000
         return utc_date + delta
 
     # Convert UTC to other timezone
@@ -4333,7 +4330,7 @@ cdef object _get_dst_info(object tz):
     """
     cache_key = _tz_cache_key(tz)
     if cache_key is None:
-        num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000
+        num = int(_get_utcoffset(tz, None).total_seconds()) * 1000000000
         return (np.array([NPY_NAT + 1], dtype=np.int64),
                 np.array([num], dtype=np.int64),
                 None)
@@ -4380,7 +4377,7 @@ cdef object _get_dst_info(object tz):
         else:
             # static tzinfo
             trans = np.array([NPY_NAT + 1], dtype=np.int64)
-            num = int(total_seconds(_get_utcoffset(tz, None))) * 1000000000
+            num = int(_get_utcoffset(tz, None).total_seconds()) * 1000000000
             deltas = np.array([num], dtype=np.int64)
             typ = 'static'
 
@@ -4403,9 +4400,6 @@ cdef object _get_utc_trans_times_from_dateutil_tz(object tz):
     return new_trans
 
 
-def tot_seconds(td):
-    return total_seconds(td)
-
 cpdef ndarray _unbox_utcoffsets(object transinfo):
     cdef:
         Py_ssize_t i, sz
@@ -4415,7 +4409,7 @@ cpdef ndarray _unbox_utcoffsets(object transinfo):
     arr = np.empty(sz, dtype='i8')
 
     for i in range(sz):
-        arr[i] = int(total_seconds(transinfo[i][0])) * 1000000000
+        arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000
 
     return arr
 
@@ -4458,7 +4452,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
             pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts)
             dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                           dts.min, dts.sec, dts.us, tz)
-            delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000
+            delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000
             result[i] = v - delta
         return result
 
@@ -5181,7 +5175,7 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz):
             pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
             dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                           dts.min, dts.sec, dts.us, tz)
-            delta = int(total_seconds(_get_utcoffset(tz, dt))) * 1000000000
+            delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000
             pandas_datetime_to_datetimestruct(stamps[i] + delta,
                                               PANDAS_FR_ns, &dts)
             result[i] = _normalized_stamp(&dts)
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 82c80a13372d7a..712e9e9903f0a5 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -4381,7 +4381,7 @@ def _get_tz(tz):
     """ for a tz-aware type, return an encoded zone """
     zone = tslib.get_timezone(tz)
     if zone is None:
-        zone = tslib.tot_seconds(tz.utcoffset())
+        zone = tz.utcoffset().total_seconds()
     return zone
 
 
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 29cdda55488965..7ccecaa84e6d6d 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -778,12 +778,12 @@ def _get_business_hours_by_sec(self):
             # create dummy datetime to calcurate businesshours in a day
             dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute)
             until = datetime(2014, 4, 1, self.end.hour, self.end.minute)
-            return tslib.tot_seconds(until - dtstart)
+            return (until - dtstart).total_seconds()
         else:
             self.daytime = False
             dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute)
             until = datetime(2014, 4, 2, self.end.hour, self.end.minute)
-            return tslib.tot_seconds(until - dtstart)
+            return (until - dtstart).total_seconds()
 
     @apply_wraps
     def rollback(self, dt):
@@ -907,7 +907,7 @@ def _onOffset(self, dt, businesshours):
             op = self._prev_opening_time(dt)
         else:
             op = self._next_opening_time(dt)
-        span = tslib.tot_seconds(dt - op)
+        span = (dt - op).total_seconds()
         if span <= businesshours:
             return True
         else:
diff --git a/setup.py b/setup.py
index 04a5684c20fcd5..444db5bc4d275e 100755
--- a/setup.py
+++ b/setup.py
@@ -467,7 +467,6 @@ def pxd(name):
 
 tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h',
                    'pandas/_libs/src/datetime/np_datetime_strings.h',
-                   'pandas/_libs/src/datetime_helper.h',
                    'pandas/_libs/src/period_helper.h',
                    'pandas/_libs/src/datetime.pxd']
 
@@ -597,7 +596,6 @@ def pxd(name):
 
 ujson_ext = Extension('pandas._libs.json',
                       depends=['pandas/_libs/src/ujson/lib/ultrajson.h',
-                               'pandas/_libs/src/datetime_helper.h',
                                'pandas/_libs/src/numpy_helper.h'],
                       sources=['pandas/_libs/src/ujson/python/ujson.c',
                                'pandas/_libs/src/ujson/python/objToJSON.c',

From 376483e12e4a08140d594eab86bf22423684fbcb Mon Sep 17 00:00:00 2001
From: cbertinato <chrisbertinato@mac.com>
Date: Mon, 28 Aug 2017 09:58:05 -0400
Subject: [PATCH 024/188] CLN: replace %s syntax with .format in
 core/indexing.py (#17357)

Progress toward issue #16130. Converted old string formatting to new string formatting in core/indexing.py.
---
 pandas/core/indexing.py | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 6b9ad5cd2d93b7..b7a51afcedabfe 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -188,8 +188,9 @@ def _has_valid_tuple(self, key):
             if i >= self.obj.ndim:
                 raise IndexingError('Too many indexers')
             if not self._has_valid_type(k, i):
-                raise ValueError("Location based indexing can only have [%s] "
-                                 "types" % self._valid_types)
+                raise ValueError("Location based indexing can only have "
+                                 "[{types}] types"
+                                 .format(types=self._valid_types))
 
     def _should_validate_iterable(self, axis=0):
         """ return a boolean whether this axes needs validation for a passed
@@ -263,11 +264,11 @@ def _has_valid_positional_setitem_indexer(self, indexer):
                     pass
                 elif is_integer(i):
                     if i >= len(ax):
-                        raise IndexError("{0} cannot enlarge its target object"
-                                         .format(self.name))
+                        raise IndexError("{name} cannot enlarge its target "
+                                         "object".format(name=self.name))
                 elif isinstance(i, dict):
-                    raise IndexError("{0} cannot enlarge its target object"
-                                     .format(self.name))
+                    raise IndexError("{name} cannot enlarge its target object"
+                                     .format(name=self.name))
 
         return True
 
@@ -1235,7 +1236,8 @@ def _convert_to_indexer(self, obj, axis=0, is_setter=False):
 
                 mask = check == -1
                 if mask.any():
-                    raise KeyError('%s not in index' % objarr[mask])
+                    raise KeyError('{mask} not in index'
+                                   .format(mask=objarr[mask]))
 
                 return _values_from_object(indexer)
 
@@ -1421,8 +1423,9 @@ def _has_valid_type(self, key, axis):
             if (not is_iterator(key) and len(key) and
                     np.all(ax.get_indexer_for(key) < 0)):
 
-                raise KeyError("None of [%s] are in the [%s]" %
-                               (key, self.obj._get_axis_name(axis)))
+                raise KeyError(u"None of [{key}] are in the [{axis}]"
+                               .format(key=key,
+                                       axis=self.obj._get_axis_name(axis)))
 
             return True
 
@@ -1432,8 +1435,9 @@ def error():
                 if isna(key):
                     raise TypeError("cannot use label indexing with a null "
                                     "key")
-                raise KeyError("the label [%s] is not in the [%s]" %
-                               (key, self.obj._get_axis_name(axis)))
+                raise KeyError(u"the label [{key}] is not in the [{axis}]"
+                               .format(key=key,
+                                       axis=self.obj._get_axis_name(axis)))
 
             try:
                 key = self._convert_scalar_indexer(key, axis)

From 36dadd70376c6033037af281a4669a360fc71cfa Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 28 Aug 2017 07:05:29 -0700
Subject: [PATCH 025/188] DOC: Point to dev-docs in issue template (#17353)

[ci skip]
---
 .github/ISSUE_TEMPLATE.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index 237e61487d13a4..e33835c4625112 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -12,6 +12,12 @@
 
 **Note**: Many problems can be resolved by simply upgrading `pandas` to the latest version. Before submitting, please check if that solution works for you. If possible, you may want to check if `master` addresses this issue, but that is not necessary.
 
+For documentation-related issues, you can check the latest versions of the docs on `master` here:
+
+https://pandas-docs.github.io/pandas-docs-travis/
+
+If the issue has not been resolved there, go ahead and file it in the issue tracker.
+
 #### Expected Output
 
 #### Output of ``pd.show_versions()``

From df2ebfc9fd424ec760bfd2879993e44aaf983d42 Mon Sep 17 00:00:00 2001
From: chris-b1 <cbartak@gmail.com>
Date: Tue, 29 Aug 2017 05:06:29 -0500
Subject: [PATCH 026/188] CLN: remove total_seconds compat from json (#17341)

---
 pandas/_libs/src/ujson/python/objToJSON.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/pandas/_libs/src/ujson/python/objToJSON.c b/pandas/_libs/src/ujson/python/objToJSON.c
index 4beaa3fd449df2..1ee862b54cf0bc 100644
--- a/pandas/_libs/src/ujson/python/objToJSON.c
+++ b/pandas/_libs/src/ujson/python/objToJSON.c
@@ -329,7 +329,7 @@ static Py_ssize_t get_attr_length(PyObject *obj, char *attr) {
     return ret;
 }
 
-npy_int64 get_long_attr(PyObject *o, const char *attr) {
+static npy_int64 get_long_attr(PyObject *o, const char *attr) {
   npy_int64 long_val;
   PyObject *value = PyObject_GetAttrString(o, attr);
   long_val = (PyLong_Check(value) ?
@@ -338,15 +338,12 @@ npy_int64 get_long_attr(PyObject *o, const char *attr) {
   return long_val;
 }
 
-npy_float64 total_seconds(PyObject *td) {
-  // Python 2.6 compat
-  // TODO(anyone): remove this legacy workaround with a more
-  // direct td.total_seconds()
-  npy_int64 microseconds = get_long_attr(td, "microseconds");
-  npy_int64 seconds = get_long_attr(td, "seconds");
-  npy_int64 days = get_long_attr(td, "days");
-  npy_int64 days_in_seconds = days * 24LL * 3600LL;
-  return (microseconds + (seconds + days_in_seconds) * 1000000.0) / 1000000.0;
+static npy_float64 total_seconds(PyObject *td) {
+  npy_float64 double_val;
+  PyObject *value = PyObject_CallMethod(td, "total_seconds", NULL);
+  double_val = PyFloat_AS_DOUBLE(value);
+  Py_DECREF(value);
+  return double_val;
 }
 
 static PyObject *get_item(PyObject *obj, Py_ssize_t i) {

From 6bab9d18bef3b7fccab2830d6dad78d0fb476ed8 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Tue, 29 Aug 2017 04:10:15 -0600
Subject: [PATCH 027/188] CLN: Move test_intersect_str_dates (#17366)

Moves test_intersect_str_dates from tests/indexes/test_range.py to tests/indexes/test_base.py.
---
 pandas/tests/indexes/test_base.py  | 9 +++++++++
 pandas/tests/indexes/test_range.py | 9 ---------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index ef36e4a91aa1c7..07e98c326bcaa6 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -663,6 +663,15 @@ def test_intersection(self):
         intersect = first.intersection(second)
         assert intersect.name is None
 
+    def test_intersect_str_dates(self):
+        dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
+
+        i1 = Index(dt_dates, dtype=object)
+        i2 = Index(['aa'], dtype=object)
+        res = i2.intersection(i1)
+
+        assert len(res) == 0
+
     def test_union(self):
         first = self.strIndex[5:20]
         second = self.strIndex[:10]
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index 566354da4870d0..5ecf467b57fc5c 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -639,15 +639,6 @@ def test_intersection(self):
         expected = RangeIndex(0, 0, 1)
         tm.assert_index_equal(result, expected)
 
-    def test_intersect_str_dates(self):
-        dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
-
-        i1 = Index(dt_dates, dtype=object)
-        i2 = Index(['aa'], dtype=object)
-        res = i2.intersection(i1)
-
-        assert len(res) == 0
-
     def test_union_noncomparable(self):
         from datetime import datetime, timedelta
         # corner case, non-Int64Index

From 9a1dfca9182c86c90fffa26579844244cfd7cd7a Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Tue, 29 Aug 2017 05:52:51 -0700
Subject: [PATCH 028/188] BUG: Respect dups in reindexing CategoricalIndex
 (#17355)

When the indexer is identical to the elements.
We should still return duplicates when the indexer
contains duplicates.

Closes gh-17323.
---
 doc/source/whatsnew/v0.21.0.txt       |  1 +
 pandas/core/indexes/category.py       |  2 +-
 pandas/tests/indexes/test_category.py | 22 +++++++++++++++++-----
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index fcadd26156b1d4..942e37a29f8d57 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -358,6 +358,7 @@ Indexing
 - Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`)
 - Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`)
 - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`)
+- Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`)
 
 I/O
 ^^^
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index f22407308e0944..0681202289311e 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -487,7 +487,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
         method = missing.clean_reindex_fill_method(method)
         target = ibase._ensure_index(target)
 
-        if self.equals(target):
+        if self.is_unique and self.equals(target):
             return np.arange(len(self), dtype='intp')
 
         if method == 'pad' or method == 'backfill':
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 64bd6df361aeb7..05d31af57b36c5 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -365,18 +365,18 @@ def test_astype(self):
         tm.assert_index_equal(result, expected)
 
     def test_reindex_base(self):
-
-        # determined by cat ordering
-        idx = self.create_index()
+        # Determined by cat ordering.
+        idx = CategoricalIndex(list("cab"), categories=list("cab"))
         expected = np.arange(len(idx), dtype=np.intp)
 
         actual = idx.get_indexer(idx)
         tm.assert_numpy_array_equal(expected, actual)
 
-        with tm.assert_raises_regex(ValueError, 'Invalid fill method'):
-            idx.get_indexer(idx, method='invalid')
+        with tm.assert_raises_regex(ValueError, "Invalid fill method"):
+            idx.get_indexer(idx, method="invalid")
 
     def test_reindexing(self):
+        np.random.seed(123456789)
 
         ci = self.create_index()
         oidx = Index(np.array(ci))
@@ -388,6 +388,18 @@ def test_reindexing(self):
             actual = ci.get_indexer(finder)
             tm.assert_numpy_array_equal(expected, actual)
 
+        # see gh-17323
+        #
+        # Even when indexer is equal to the
+        # members in the index, we should
+        # respect duplicates instead of taking
+        # the fast-track path.
+        for finder in [list("aabbca"), list("aababca")]:
+            expected = oidx.get_indexer_non_unique(finder)[0]
+
+            actual = ci.get_indexer(finder)
+            tm.assert_numpy_array_equal(expected, actual)
+
     def test_reindex_dtype(self):
         c = CategoricalIndex(['a', 'b', 'c', 'a'])
         res, indexer = c.reindex(['a', 'c'])

From e8a1765edf91ec4d087b46b90d5e54530550029b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 29 Aug 2017 06:23:38 -0700
Subject: [PATCH 029/188] Unify Index._dir_* with Series implementation
 (#17117)

---
 pandas/core/accessor.py     | 35 +++++++++++++++++++++++++++++++++++
 pandas/core/base.py         | 22 +++-------------------
 pandas/core/generic.py      |  5 +++--
 pandas/core/indexes/base.py |  9 +++++++--
 pandas/core/series.py       | 17 +++--------------
 pandas/core/strings.py      | 20 ++------------------
 6 files changed, 53 insertions(+), 55 deletions(-)
 create mode 100644 pandas/core/accessor.py

diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py
new file mode 100644
index 00000000000000..9f8556d1e69616
--- /dev/null
+++ b/pandas/core/accessor.py
@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+"""
+
+accessor.py contains base classes for implementing accessor properties
+that can be mixed into or pinned onto other pandas classes.
+
+"""
+
+
+class DirNamesMixin(object):
+    _accessors = frozenset([])
+
+    def _dir_deletions(self):
+        """ delete unwanted __dir__ for this object """
+        return self._accessors
+
+    def _dir_additions(self):
+        """ add addtional __dir__ for this object """
+        rv = set()
+        for accessor in self._accessors:
+            try:
+                getattr(self, accessor)
+                rv.add(accessor)
+            except AttributeError:
+                pass
+        return rv
+
+    def __dir__(self):
+        """
+        Provide method name lookup and completion
+        Only provide 'public' methods
+        """
+        rv = set(dir(type(self)))
+        rv = (rv - self._dir_deletions()) | self._dir_additions()
+        return sorted(rv)
diff --git a/pandas/core/base.py b/pandas/core/base.py
index a7c991dc8d2572..d60a8515dc920f 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -19,6 +19,7 @@
 from pandas.util._decorators import (Appender, cache_readonly,
                                      deprecate_kwarg, Substitution)
 from pandas.core.common import AbstractMethodError
+from pandas.core.accessor import DirNamesMixin
 
 _shared_docs = dict()
 _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='',
@@ -73,7 +74,7 @@ def __repr__(self):
         return str(self)
 
 
-class PandasObject(StringMixin):
+class PandasObject(StringMixin, DirNamesMixin):
 
     """baseclass for various pandas objects"""
 
@@ -92,23 +93,6 @@ def __unicode__(self):
         # Should be overwritten by base classes
         return object.__repr__(self)
 
-    def _dir_additions(self):
-        """ add addtional __dir__ for this object """
-        return set()
-
-    def _dir_deletions(self):
-        """ delete unwanted __dir__ for this object """
-        return set()
-
-    def __dir__(self):
-        """
-        Provide method name lookup and completion
-        Only provide 'public' methods
-        """
-        rv = set(dir(type(self)))
-        rv = (rv - self._dir_deletions()) | self._dir_additions()
-        return sorted(rv)
-
     def _reset_cache(self, key=None):
         """
         Reset cached properties. If ``key`` is passed, only clears that key.
@@ -141,7 +125,7 @@ class NoNewAttributesMixin(object):
 
     Prevents additional attributes via xxx.attribute = "something" after a
     call to `self.__freeze()`. Mainly used to prevent the user from using
-    wrong attrirbutes on a accessor (`Series.cat/.str/.dt`).
+    wrong attributes on a accessor (`Series.cat/.str/.dt`).
 
     If you really want to add a new attribute at a later time, you need to use
     `object.__setattr__(self, key, value)`.
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index f8366c804e3e79..cdb08d8887e05b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -192,8 +192,9 @@ def __unicode__(self):
 
     def _dir_additions(self):
         """ add the string-like attributes from the info_axis """
-        return set([c for c in self._info_axis
-                    if isinstance(c, string_types) and isidentifier(c)])
+        additions = set([c for c in self._info_axis
+                         if isinstance(c, string_types) and isidentifier(c)])
+        return super(NDFrame, self)._dir_additions().union(additions)
 
     @property
     def _constructor_sliced(self):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index a21e6df3ffc93d..31cf1e48b85294 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -56,7 +56,7 @@
 import pandas.core.sorting as sorting
 from pandas.io.formats.printing import pprint_thing
 from pandas.core.ops import _comp_method_OBJECT_ARRAY
-from pandas.core.strings import StringAccessorMixin
+from pandas.core import strings
 from pandas.core.config import get_option
 
 
@@ -102,7 +102,7 @@ def _new_Index(cls, d):
     return cls.__new__(cls, **d)
 
 
-class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
+class Index(IndexOpsMixin, PandasObject):
     """
     Immutable ndarray implementing an ordered, sliceable set. The basic object
     storing axis labels for all pandas objects
@@ -155,6 +155,11 @@ class Index(IndexOpsMixin, StringAccessorMixin, PandasObject):
 
     _engine_type = libindex.ObjectEngine
 
+    _accessors = frozenset(['str'])
+
+    # String Methods
+    str = base.AccessorProperty(strings.StringMethods)
+
     def __new__(cls, data=None, dtype=None, copy=False, name=None,
                 fastpath=False, tupleize_cols=True, **kwargs):
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 75dc3d6403650c..6905fc1aced742 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -114,8 +114,7 @@ def wrapper(self):
 # Series class
 
 
-class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
-             generic.NDFrame,):
+class Series(base.IndexOpsMixin, generic.NDFrame):
     """
     One-dimensional ndarray with axis labels (including time series).
 
@@ -2923,18 +2922,8 @@ def to_period(self, freq=None, copy=True):
     # Categorical methods
     cat = base.AccessorProperty(CategoricalAccessor)
 
-    def _dir_deletions(self):
-        return self._accessors
-
-    def _dir_additions(self):
-        rv = set()
-        for accessor in self._accessors:
-            try:
-                getattr(self, accessor)
-                rv.add(accessor)
-            except AttributeError:
-                pass
-        return rv
+    # String Methods
+    str = base.AccessorProperty(strings.StringMethods)
 
     # ----------------------------------------------------------------------
     # Add plotting methods to Series
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 0b1db0277eee3f..2f95e510bba5ef 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -16,7 +16,7 @@
 
 from pandas.core.algorithms import take_1d
 import pandas.compat as compat
-from pandas.core.base import AccessorProperty, NoNewAttributesMixin
+from pandas.core.base import NoNewAttributesMixin
 from pandas.util._decorators import Appender
 import re
 import pandas._libs.lib as lib
@@ -1920,20 +1920,4 @@ def _make_accessor(cls, data):
                 message = ("Can only use .str accessor with Index, not "
                            "MultiIndex")
                 raise AttributeError(message)
-        return StringMethods(data)
-
-
-class StringAccessorMixin(object):
-    """ Mixin to add a `.str` acessor to the class."""
-
-    str = AccessorProperty(StringMethods)
-
-    def _dir_additions(self):
-        return set()
-
-    def _dir_deletions(self):
-        try:
-            getattr(self, 'str')
-        except AttributeError:
-            return set(['str'])
-        return set()
+        return cls(data)

From 0618f9950ad72f6f30283bbcf44fcdcf5918756d Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Tue, 29 Aug 2017 19:03:17 +0200
Subject: [PATCH 030/188] BUG: make order of index from pd.concat deterministic
 (#17364)

closes #17344
---
 doc/source/whatsnew/v0.21.0.txt     |  1 +
 pandas/core/common.py               | 14 ++++++++++++++
 pandas/core/indexes/api.py          |  9 ++-------
 pandas/tests/reshape/test_concat.py | 13 ++++++++++++-
 4 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 942e37a29f8d57..a3673609147a6b 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -406,6 +406,7 @@ Reshaping
 - Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`)
 - :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`).
 - Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`)
+- Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`)
 
 Numeric
 ^^^^^^^
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 44cb36b8a32076..515a4010961205 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -629,3 +629,17 @@ def _random_state(state=None):
     else:
         raise ValueError("random_state must be an integer, a numpy "
                          "RandomState, or None")
+
+
+def _get_distinct_objs(objs):
+    """
+    Return a list with distinct elements of "objs" (different ids).
+    Preserves order.
+    """
+    ids = set()
+    res = []
+    for obj in objs:
+        if not id(obj) in ids:
+            ids.add(id(obj))
+            res.append(obj)
+    return res
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index db73a6878258ad..323d50166e7b6f 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -23,8 +23,7 @@
            'PeriodIndex', 'DatetimeIndex',
            '_new_Index', 'NaT',
            '_ensure_index', '_get_na_value', '_get_combined_index',
-           '_get_objs_combined_axis',
-           '_get_distinct_indexes', '_union_indexes',
+           '_get_objs_combined_axis', '_union_indexes',
            '_get_consensus_names',
            '_all_indexes_same']
 
@@ -41,7 +40,7 @@ def _get_objs_combined_axis(objs, intersect=False, axis=0):
 
 def _get_combined_index(indexes, intersect=False):
     # TODO: handle index names!
-    indexes = _get_distinct_indexes(indexes)
+    indexes = com._get_distinct_objs(indexes)
     if len(indexes) == 0:
         return Index([])
     if len(indexes) == 1:
@@ -55,10 +54,6 @@ def _get_combined_index(indexes, intersect=False):
     return _ensure_index(union)
 
 
-def _get_distinct_indexes(indexes):
-    return list(dict((id(x), x) for x in indexes).values())
-
-
 def _union_indexes(indexes):
     if len(indexes) == 0:
         raise AssertionError('Must have at least 1 Index to union')
diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py
index 52cd18126859a1..6e646f9b294429 100644
--- a/pandas/tests/reshape/test_concat.py
+++ b/pandas/tests/reshape/test_concat.py
@@ -5,7 +5,7 @@
 from numpy.random import randn
 
 from datetime import datetime
-from pandas.compat import StringIO, iteritems
+from pandas.compat import StringIO, iteritems, PY2
 import pandas as pd
 from pandas import (DataFrame, concat,
                     read_csv, isna, Series, date_range,
@@ -1944,6 +1944,17 @@ def test_concat_categoricalindex(self):
                            index=exp_idx)
         tm.assert_frame_equal(result, exp)
 
+    def test_concat_order(self):
+        # GH 17344
+        dfs = [pd.DataFrame(index=range(3), columns=['a', 1, None])]
+        dfs += [pd.DataFrame(index=range(3), columns=[None, 1, 'a'])
+                for i in range(100)]
+        result = pd.concat(dfs).columns
+        expected = dfs[0].columns
+        if PY2:
+            expected = expected.sort_values()
+        tm.assert_index_equal(result, expected)
+
 
 @pytest.mark.parametrize('pdt', [pd.Series, pd.DataFrame, pd.Panel])
 @pytest.mark.parametrize('dt', np.sctypes['float'])

From 0d676a3ccf1d7aa986416a7488b941496f936d98 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 29 Aug 2017 10:04:07 -0700
Subject: [PATCH 031/188] Fix typo that causes several NaT methods to have
 incorrect docstrings (#17327)

---
 doc/source/whatsnew/v0.21.0.txt | 1 +
 pandas/_libs/tslib.pyx          | 7 ++++---
 pandas/tests/scalar/test_nat.py | 5 +++++
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index a3673609147a6b..33b7e128ef8bfc 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -425,3 +425,4 @@ Categorical
 Other
 ^^^^^
 - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
+- Several ``NaT`` method docstrings (e.g. :func:`NaT.ctime`) were incorrect (:issue:`17327`)
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index b5aca2e3ec3094..5dd30072fb7aa0 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # cython: profile=False
 
 import warnings
@@ -3922,7 +3923,7 @@ for _method_name in _nat_methods:
         def f(*args, **kwargs):
             return NaT
         f.__name__ = func_name
-        f.__doc__ = _get_docstring(_method_name)
+        f.__doc__ = _get_docstring(func_name)
         return f
 
     setattr(NaTType, _method_name, _make_nat_func(_method_name))
@@ -3934,7 +3935,7 @@ for _method_name in _nan_methods:
         def f(*args, **kwargs):
             return np.nan
         f.__name__ = func_name
-        f.__doc__ = _get_docstring(_method_name)
+        f.__doc__ = _get_docstring(func_name)
         return f
 
     setattr(NaTType, _method_name, _make_nan_func(_method_name))
@@ -3952,7 +3953,7 @@ for _maybe_method_name in dir(NaTType):
             def f(*args, **kwargs):
                 raise ValueError("NaTType does not support " + func_name)
             f.__name__ = func_name
-            f.__doc__ = _get_docstring(_method_name)
+            f.__doc__ = _get_docstring(func_name)
             return f
 
         setattr(NaTType, _maybe_method_name,
diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py
index 5f247cae1099b6..6f852f2b394e18 100644
--- a/pandas/tests/scalar/test_nat.py
+++ b/pandas/tests/scalar/test_nat.py
@@ -247,3 +247,8 @@ def test_nat_arithmetic_index():
         tm.assert_index_equal(right + left, exp)
         tm.assert_index_equal(left - right, exp)
         tm.assert_index_equal(right - left, exp)
+
+
+def test_nat_pinned_docstrings():
+    # GH17327
+    assert NaT.ctime.__doc__ == datetime.ctime.__doc__

From b9d48e48904b0e607c4d18738df50dec744b745f Mon Sep 17 00:00:00 2001
From: cbertinato <chrisbertinato@mac.com>
Date: Wed, 30 Aug 2017 06:19:44 -0400
Subject: [PATCH 032/188] CLN: replace %s syntax with .format in
 io/formats/format.py (#17358)

Progress toward issue #16130. Converted old string formatting to new string formatting in io/formats/format.py.
---
 pandas/io/formats/format.py | 165 ++++++++++++++++++++----------------
 1 file changed, 93 insertions(+), 72 deletions(-)

diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 733fd3bd39b527..6a98497aa1bfef 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -47,6 +47,7 @@
 
 import itertools
 import csv
+from functools import partial
 
 common_docstring = """
     Parameters
@@ -109,7 +110,7 @@ def _get_footer(self):
         if self.length:
             if footer:
                 footer += ', '
-            footer += "Length: %d" % len(self.categorical)
+            footer += "Length: {length}".format(length=len(self.categorical))
 
         level_info = self.categorical._repr_categories_info()
 
@@ -135,7 +136,7 @@ def to_string(self):
 
         fmt_values = self._get_formatted_values()
 
-        result = ['%s' % i for i in fmt_values]
+        result = [u('{i}').format(i=i) for i in fmt_values]
         result = [i.strip() for i in result]
         result = u(', ').join(result)
         result = [u('[') + result + u(']')]
@@ -191,7 +192,7 @@ def _get_footer(self):
         footer = u('')
 
         if getattr(self.series.index, 'freq', None) is not None:
-            footer += 'Freq: %s' % self.series.index.freqstr
+            footer += 'Freq: {freq}'.format(freq=self.series.index.freqstr)
 
         if self.name is not False and name is not None:
             if footer:
@@ -199,20 +200,21 @@ def _get_footer(self):
 
             series_name = pprint_thing(name,
                                        escape_chars=('\t', '\r', '\n'))
-            footer += ("Name: %s" % series_name) if name is not None else ""
+            footer += ((u"Name: {sname}".format(sname=series_name))
+                       if name is not None else "")
 
         if (self.length is True or
                 (self.length == 'truncate' and self.truncate_v)):
             if footer:
                 footer += ', '
-            footer += 'Length: %d' % len(self.series)
+            footer += 'Length: {length}'.format(length=len(self.series))
 
         if self.dtype is not False and self.dtype is not None:
             name = getattr(self.tr_series.dtype, 'name', None)
             if name:
                 if footer:
                     footer += ', '
-                footer += 'dtype: %s' % pprint_thing(name)
+                footer += u'dtype: {typ}'.format(typ=pprint_thing(name))
 
         # level infos are added to the end and in a new line, like it is done
         # for Categoricals
@@ -509,8 +511,10 @@ def _to_str_columns(self):
         else:
             if is_list_like(self.header):
                 if len(self.header) != len(self.columns):
-                    raise ValueError(('Writing %d cols but got %d aliases'
-                                      % (len(self.columns), len(self.header))))
+                    raise ValueError(('Writing {ncols} cols but got {nalias} '
+                                      'aliases'
+                                      .format(ncols=len(self.columns),
+                                              nalias=len(self.header))))
                 str_columns = [[label] for label in self.header]
             else:
                 str_columns = self._get_formatted_column_labels(frame)
@@ -578,10 +582,10 @@ def to_string(self):
         frame = self.frame
 
         if len(frame.columns) == 0 or len(frame.index) == 0:
-            info_line = (u('Empty %s\nColumns: %s\nIndex: %s') %
-                         (type(self.frame).__name__,
-                          pprint_thing(frame.columns),
-                          pprint_thing(frame.index)))
+            info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}')
+                         .format(name=type(self.frame).__name__,
+                         col=pprint_thing(frame.columns),
+                         idx=pprint_thing(frame.index)))
             text = info_line
         else:
 
@@ -630,8 +634,8 @@ def to_string(self):
         self.buf.writelines(text)
 
         if self.should_show_dimensions:
-            self.buf.write("\n\n[%d rows x %d columns]" %
-                           (len(frame), len(frame.columns)))
+            self.buf.write("\n\n[{nrows} rows x {ncols} columns]"
+                           .format(nrows=len(frame), ncols=len(frame.columns)))
 
     def _join_multiline(self, *strcols):
         lwidth = self.line_width
@@ -805,7 +809,8 @@ def _get_formatted_index(self, frame):
 
         # empty space for columns
         if show_col_names:
-            col_header = ['%s' % x for x in self._get_column_name_list()]
+            col_header = ['{x}'.format(x=x)
+                          for x in self._get_column_name_list()]
         else:
             col_header = [''] * columns.nlevels
 
@@ -861,9 +866,10 @@ def write_result(self, buf):
 
         # string representation of the columns
         if len(self.frame.columns) == 0 or len(self.frame.index) == 0:
-            info_line = (u('Empty %s\nColumns: %s\nIndex: %s') %
-                         (type(self.frame).__name__, self.frame.columns,
-                          self.frame.index))
+            info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}')
+                         .format(name=type(self.frame).__name__,
+                                 col=self.frame.columns,
+                                 idx=self.frame.index))
             strcols = [[info_line]]
         else:
             strcols = self.fmt._to_str_columns()
@@ -906,14 +912,16 @@ def get_col_type(dtype):
                 column_format = index_format + column_format
         elif not isinstance(column_format,
                             compat.string_types):  # pragma: no cover
-            raise AssertionError('column_format must be str or unicode, not %s'
-                                 % type(column_format))
+            raise AssertionError('column_format must be str or unicode, '
+                                 'not {typ}'.format(typ=type(column_format)))
 
         if not self.longtable:
-            buf.write('\\begin{tabular}{%s}\n' % column_format)
+            buf.write('\\begin{{tabular}}{{{fmt}}}\n'
+                      .format(fmt=column_format))
             buf.write('\\toprule\n')
         else:
-            buf.write('\\begin{longtable}{%s}\n' % column_format)
+            buf.write('\\begin{{longtable}}{{{fmt}}}\n'
+                      .format(fmt=column_format))
             buf.write('\\toprule\n')
 
         ilevels = self.frame.index.nlevels
@@ -948,7 +956,7 @@ def get_col_type(dtype):
                 crow = [x if x else '{}' for x in row]
             if self.bold_rows and self.fmt.index:
                 # bold row labels
-                crow = ['\\textbf{%s}' % x
+                crow = ['\\textbf{{{x}}}'.format(x=x)
                         if j < ilevels and x.strip() not in ['', '{}'] else x
                         for j, x in enumerate(crow)]
             if i < clevels and self.fmt.header and self.multicolumn:
@@ -986,9 +994,9 @@ def _format_multicolumn(self, row, ilevels):
         def append_col():
             # write multicolumn if needed
             if ncol > 1:
-                row2.append('\\multicolumn{{{0:d}}}{{{1:s}}}{{{2:s}}}'
-                            .format(ncol, self.multicolumn_format,
-                                    coltext.strip()))
+                row2.append('\\multicolumn{{{ncol:d}}}{{{fmt:s}}}{{{txt:s}}}'
+                            .format(ncol=ncol, fmt=self.multicolumn_format,
+                                    txt=coltext.strip()))
             # don't modify where not needed
             else:
                 row2.append(coltext)
@@ -1027,8 +1035,8 @@ def _format_multirow(self, row, ilevels, i, rows):
                         break
                 if nrow > 1:
                     # overwrite non-multirow entry
-                    row[j] = '\\multirow{{{0:d}}}{{*}}{{{1:s}}}'.format(
-                        nrow, row[j].strip())
+                    row[j] = '\\multirow{{{nrow:d}}}{{*}}{{{row:s}}}'.format(
+                        nrow=nrow, row=row[j].strip())
                     # save when to end the current block with \cline
                     self.clinebuf.append([i + nrow - 1, j + 1])
         return row
@@ -1039,7 +1047,8 @@ def _print_cline(self, buf, i, icol):
         """
         for cl in self.clinebuf:
             if cl[0] == i:
-                buf.write('\cline{{{0:d}-{1:d}}}\n'.format(cl[1], icol))
+                buf.write('\cline{{{cl:d}-{icol:d}}}\n'
+                          .format(cl=cl[1], icol=icol))
         # remove entries that have been written to buffer
         self.clinebuf = [x for x in self.clinebuf if x[0] != i]
 
@@ -1076,7 +1085,8 @@ def write(self, s, indent=0):
     def write_th(self, s, indent=0, tags=None):
         if self.fmt.col_space is not None and self.fmt.col_space > 0:
             tags = (tags or "")
-            tags += 'style="min-width: %s;"' % self.fmt.col_space
+            tags += ('style="min-width: {colspace};"'
+                     .format(colspace=self.fmt.col_space))
 
         return self._write_cell(s, kind='th', indent=indent, tags=tags)
 
@@ -1085,9 +1095,9 @@ def write_td(self, s, indent=0, tags=None):
 
     def _write_cell(self, s, kind='td', indent=0, tags=None):
         if tags is not None:
-            start_tag = '<%s %s>' % (kind, tags)
+            start_tag = '<{kind} {tags}>'.format(kind=kind, tags=tags)
         else:
-            start_tag = '<%s>' % kind
+            start_tag = '<{kind}>'.format(kind=kind)
 
         if self.escape:
             # escape & first to prevent double escaping of &
@@ -1096,7 +1106,8 @@ def _write_cell(self, s, kind='td', indent=0, tags=None):
         else:
             esc = {}
         rs = pprint_thing(s, escape_chars=esc).strip()
-        self.write('%s%s</%s>' % (start_tag, rs, kind), indent)
+        self.write(u'{start}{rs}</{kind}>'
+                   .format(start=start_tag, rs=rs, kind=kind), indent)
 
     def write_tr(self, line, indent=0, indent_delta=4, header=False,
                  align=None, tags=None, nindex_levels=0):
@@ -1106,7 +1117,8 @@ def write_tr(self, line, indent=0, indent_delta=4, header=False,
         if align is None:
             self.write('<tr>', indent)
         else:
-            self.write('<tr style="text-align: %s;">' % align, indent)
+            self.write('<tr style="text-align: {align};">'
+                       .format(align=align), indent)
         indent += indent_delta
 
         for i, s in enumerate(line):
@@ -1146,8 +1158,8 @@ def write_result(self, buf):
             if isinstance(self.classes, str):
                 self.classes = self.classes.split()
             if not isinstance(self.classes, (list, tuple)):
-                raise AssertionError('classes must be list or tuple, '
-                                     'not %s' % type(self.classes))
+                raise AssertionError('classes must be list or tuple, not {typ}'
+                                     .format(typ=type(self.classes)))
             _classes.extend(self.classes)
 
         if self.notebook:
@@ -1159,12 +1171,11 @@ def write_result(self, buf):
             except (ImportError, AttributeError):
                 pass
 
-            self.write('<div{0}>'.format(div_style))
+            self.write('<div{style}>'.format(style=div_style))
 
         self.write_style()
-        self.write('<table border="%s" class="%s">' % (self.border,
-                                                       ' '.join(_classes)),
-                   indent)
+        self.write('<table border="{border}" class="{cls}">'
+                   .format(border=self.border, cls=' '.join(_classes)), indent)
 
         indent += self.indent_delta
         indent = self._write_header(indent)
@@ -1173,8 +1184,10 @@ def write_result(self, buf):
         self.write('</table>', indent)
         if self.should_show_dimensions:
             by = chr(215) if compat.PY3 else unichr(215)  # ×
-            self.write(u('<p>%d rows %s %d columns</p>') %
-                       (len(frame), by, len(frame.columns)))
+            self.write(u('<p>{rows} rows {by} {cols} columns</p>')
+                       .format(rows=len(frame),
+                               by=by,
+                               cols=len(frame.columns)))
 
         if self.notebook:
             self.write('</div>')
@@ -1199,7 +1212,7 @@ def _column_header():
                     row.append(single_column_table(self.columns.names))
                 else:
                     row.append('')
-                style = "text-align: %s;" % self.fmt.justify
+                style = "text-align: {just};".format(just=self.fmt.justify)
                 row.extend([single_column_table(c, self.fmt.justify, style)
                             for c in self.columns])
             else:
@@ -1214,7 +1227,7 @@ def _column_header():
         indent += self.indent_delta
 
         if isinstance(self.columns, MultiIndex):
-            template = 'colspan="%d" halign="left"'
+            template = 'colspan="{span:d}" halign="left"'
 
             if self.fmt.sparsify:
                 # GH3547
@@ -1282,7 +1295,7 @@ def _column_header():
                 for i, v in enumerate(values):
                     if i in records:
                         if records[i] > 1:
-                            tags[j] = template % records[i]
+                            tags[j] = template.format(span=records[i])
                     else:
                         continue
                     j += 1
@@ -1372,7 +1385,7 @@ def _write_regular_rows(self, fmt_values, indent):
                           nindex_levels=1)
 
     def _write_hierarchical_rows(self, fmt_values, indent):
-        template = 'rowspan="%d" valign="top"'
+        template = 'rowspan="{span}" valign="top"'
 
         truncate_h = self.fmt.truncate_h
         truncate_v = self.fmt.truncate_v
@@ -1447,7 +1460,7 @@ def _write_hierarchical_rows(self, fmt_values, indent):
                 for records, v in zip(level_lengths, idx_values[i]):
                     if i in records:
                         if records[i] > 1:
-                            tags[j] = template % records[i]
+                            tags[j] = template.format(span=records[i])
                     else:
                         sparse_offset += 1
                         continue
@@ -1615,8 +1628,9 @@ def _save_header(self):
             return
         if has_aliases:
             if len(header) != len(cols):
-                raise ValueError(('Writing %d cols but got %d aliases'
-                                  % (len(cols), len(header))))
+                raise ValueError(('Writing {ncols} cols but got {nalias} '
+                                 'aliases'.format(ncols=len(cols),
+                                                  nalias=len(header))))
             else:
                 write_cols = header
         else:
@@ -1790,8 +1804,9 @@ def _format_strings(self):
         if self.float_format is None:
             float_format = get_option("display.float_format")
             if float_format is None:
-                fmt_str = '%% .%dg' % get_option("display.precision")
-                float_format = lambda x: fmt_str % x
+                fmt_str = ('{{x: .{prec:d}g}}'
+                           .format(prec=get_option("display.precision")))
+                float_format = lambda x: fmt_str.format(x=x)
         else:
             float_format = self.float_format
 
@@ -1807,10 +1822,10 @@ def _format(x):
                     return 'NaT'
                 return self.na_rep
             elif isinstance(x, PandasObject):
-                return '%s' % x
+                return u'{x}'.format(x=x)
             else:
                 # object dtype
-                return '%s' % formatter(x)
+                return u'{x}'.format(x=formatter(x))
 
         vals = self.values
         if isinstance(vals, Index):
@@ -1824,11 +1839,11 @@ def _format(x):
         fmt_values = []
         for i, v in enumerate(vals):
             if not is_float_type[i] and leading_space:
-                fmt_values.append(' %s' % _format(v))
+                fmt_values.append(u' {v}'.format(v=_format(v)))
             elif is_float_type[i]:
                 fmt_values.append(float_format(v))
             else:
-                fmt_values.append(' %s' % _format(v))
+                fmt_values.append(u' {v}'.format(v=_format(v)))
 
         return fmt_values
 
@@ -1864,7 +1879,7 @@ def _value_formatter(self, float_format=None, threshold=None):
         # because str(0.0) = '0.0' while '%g' % 0.0 = '0'
         if float_format:
             def base_formatter(v):
-                return (float_format % v) if notna(v) else self.na_rep
+                return float_format(value=v) if notna(v) else self.na_rep
         else:
             def base_formatter(v):
                 return str(v) if notna(v) else self.na_rep
@@ -1925,10 +1940,14 @@ def format_values_with(float_format):
 
         # There is a special default string when we are fixed-width
         # The default is otherwise to use str instead of a formatting string
-        if self.float_format is None and self.fixed_width:
-            float_format = '%% .%df' % self.digits
+        if self.float_format is None:
+            if self.fixed_width:
+                float_format = partial('{value: .{digits:d}f}'.format,
+                                       digits=self.digits)
+            else:
+                float_format = self.float_format
         else:
-            float_format = self.float_format
+            float_format = lambda value: self.float_format % value
 
         formatted_values = format_values_with(float_format)
 
@@ -1955,7 +1974,8 @@ def format_values_with(float_format):
                                 (abs_vals > 0)).any()
 
         if has_small_values or (too_long and has_large_values):
-            float_format = '%% .%de' % self.digits
+            float_format = partial('{value: .{digits:d}e}'.format,
+                                   digits=self.digits)
             formatted_values = format_values_with(float_format)
 
         return formatted_values
@@ -1971,7 +1991,7 @@ def _format_strings(self):
 class IntArrayFormatter(GenericArrayFormatter):
 
     def _format_strings(self):
-        formatter = self.formatter or (lambda x: '% d' % x)
+        formatter = self.formatter or (lambda x: '{x: d}'.format(x=x))
         fmt_values = [formatter(x) for x in self.values]
         return fmt_values
 
@@ -2023,7 +2043,7 @@ def _format_strings(self):
             # periods may contains different freq
             values = Index(self.values, dtype='object').to_native_types()
 
-        formatter = self.formatter or (lambda x: '%s' % x)
+        formatter = self.formatter or (lambda x: '{x}'.format(x=x))
         fmt_values = [formatter(x) for x in values]
         return fmt_values
 
@@ -2223,7 +2243,7 @@ def _formatter(x):
             x = Timedelta(x)
         result = x._repr_base(format=format)
         if box:
-            result = "'{0}'".format(result)
+            result = "'{res}'".format(res=result)
         return result
 
     return _formatter
@@ -2278,12 +2298,12 @@ def _cond(values):
 def single_column_table(column, align=None, style=None):
     table = '<table'
     if align is not None:
-        table += (' align="%s"' % align)
+        table += (' align="{align}"'.format(align=align))
     if style is not None:
-        table += (' style="%s"' % style)
+        table += (' style="{style}"'.format(style=style))
     table += '><tbody>'
     for i in column:
-        table += ('<tr><td>%s</td></tr>' % str(i))
+        table += ('<tr><td>{i!s}</td></tr>'.format(i=i))
     table += '</tbody></table>'
     return table
 
@@ -2291,7 +2311,7 @@ def single_column_table(column, align=None, style=None):
 def single_row_table(row):  # pragma: no cover
     table = '<table><tbody><tr>'
     for i in row:
-        table += ('<td>%s</td>' % str(i))
+        table += ('<td>{i!s}</td>'.format(i=i))
     table += '</tr></tbody></table>'
     return table
 
@@ -2385,18 +2405,19 @@ def __call__(self, num):
             prefix = self.ENG_PREFIXES[int_pow10]
         else:
             if int_pow10 < 0:
-                prefix = 'E-%02d' % (-int_pow10)
+                prefix = 'E-{pow10:02d}'.format(pow10=-int_pow10)
             else:
-                prefix = 'E+%02d' % int_pow10
+                prefix = 'E+{pow10:02d}'.format(pow10=int_pow10)
 
         mant = sign * dnum / (10**pow10)
 
         if self.accuracy is None:  # pragma: no cover
-            format_str = u("% g%s")
+            format_str = u("{mant: g}{prefix}")
         else:
-            format_str = (u("%% .%if%%s") % self.accuracy)
+            format_str = (u("{{mant: .{acc:d}f}}{{prefix}}")
+                          .format(acc=self.accuracy))
 
-        formatted = format_str % (mant, prefix)
+        formatted = format_str.format(mant=mant, prefix=prefix)
 
         return formatted  # .strip()
 

From 77bfe21c7229e724d01721bb84861283baf7e9d3 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Wed, 30 Aug 2017 05:50:04 -0500
Subject: [PATCH 033/188] PKG: Added pyproject.toml for PEP 518 (#16745)

Declaring build-time requirements: https://www.python.org/dev/peps/pep-0518/
---
 MANIFEST.in                     | 1 +
 doc/source/whatsnew/v0.21.0.txt | 1 +
 pyproject.toml                  | 9 +++++++++
 3 files changed, 11 insertions(+)
 create mode 100644 pyproject.toml

diff --git a/MANIFEST.in b/MANIFEST.in
index 8bd83a7d569484..1a6b831c1b9752 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -3,6 +3,7 @@ include LICENSE
 include RELEASE.md
 include README.rst
 include setup.py
+include pyproject.toml
 
 graft doc
 prune doc/build
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 33b7e128ef8bfc..014f251ffb90ab 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -112,6 +112,7 @@ Other Enhancements
 ^^^^^^^^^^^^^^^^^^
 
 - The ``validate`` argument for :func:`merge` function now checks whether a merge is one-to-one, one-to-many, many-to-one, or many-to-many. If a merge is found to not be an example of specified merge type, an exception of type ``MergeError`` will be raised. For more, see :ref:`here <merging.validation>` (:issue:`16270`)
+- Added support for `PEP 518 <https://www.python.org/dev/peps/pep-0518/>`_ to the build system (:issue:`16745`)
 - :func:`Series.to_dict` and :func:`DataFrame.to_dict` now support an ``into`` keyword which allows you to specify the ``collections.Mapping`` subclass that you would like returned.  The default is ``dict``, which is backwards compatible. (:issue:`16122`)
 - :func:`RangeIndex.append` now returns a ``RangeIndex`` object when possible (:issue:`16212`)
 - :func:`Series.rename_axis` and :func:`DataFrame.rename_axis` with ``inplace=True`` now return ``None`` while renaming the axis inplace. (:issue:`15704`)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000000000..f0d57d1d808a25
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,9 @@
+[build-system]
+requires = [
+    "wheel",
+    "setuptools",
+    "Cython",  # required for VCS build, optional for released source
+    "numpy==1.9.3; python_version=='3.5'",
+    "numpy==1.12.1; python_version=='3.6'",
+    "numpy==1.13.1; python_version>='3.7'",
+]

From ad7d6fc0248edaf098537e5674dcc0c9dd059491 Mon Sep 17 00:00:00 2001
From: iulia <iulia.feofanova@gmail.com>
Date: Wed, 30 Aug 2017 19:39:45 +0300
Subject: [PATCH 034/188] DOC: Update Overview page in documentation (#17368)

* Update Overview page in documentation

* DOC Revise Overview page

* DOC Make further revisions in Overview webpage

* Update overview.rst

Remove references to Panel
---
 doc/source/overview.rst | 71 ++++++++++++++++++++++-------------------
 1 file changed, 38 insertions(+), 33 deletions(-)

diff --git a/doc/source/overview.rst b/doc/source/overview.rst
index 92caeec3191698..00a71603e12612 100644
--- a/doc/source/overview.rst
+++ b/doc/source/overview.rst
@@ -6,7 +6,11 @@
 Package overview
 ****************
 
-:mod:`pandas` consists of the following things
+:mod:`pandas` is an open source, BSD-licensed library providing high-performance, 
+easy-to-use data structures and data analysis tools for the `Python <https://www.python.org/>`__
+programming language.
+
+:mod:`pandas` consists of the following elements
 
  * A set of labeled array data structures, the primary of which are
    Series and DataFrame
@@ -21,27 +25,23 @@ Package overview
  * Memory-efficient "sparse" versions of the standard data structures for storing
    data that is mostly missing or mostly constant (some fixed value)
  * Moving window statistics (rolling mean, rolling standard deviation, etc.)
- * Static and moving window linear and `panel regression
-   <http://en.wikipedia.org/wiki/Panel_data>`__
 
-Data structures at a glance
----------------------------
+Data Structures
+---------------
 
 .. csv-table::
     :header: "Dimensions", "Name", "Description"
     :widths: 15, 20, 50
 
-    1, Series, "1D labeled homogeneously-typed array"
-    2, DataFrame, "General 2D labeled, size-mutable tabular structure with
-    potentially heterogeneously-typed columns"
-    3, Panel, "General 3D labeled, also size-mutable array"
+    1, "Series", "1D labeled homogeneously-typed array"
+    2, "DataFrame", "General 2D labeled, size-mutable tabular structure with potentially heterogeneously-typed column"
 
-Why more than 1 data structure?
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Why more than one data structure?
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The best way to think about the pandas data structures is as flexible
 containers for lower dimensional data. For example, DataFrame is a container
-for Series, and Panel is a container for DataFrame objects. We would like to be
+for Series, and Series is a container for scalars. We would like to be
 able to insert and remove objects from these containers in a dictionary-like
 fashion.
 
@@ -85,36 +85,41 @@ The first stop for pandas issues and ideas is the `Github Issue Tracker
 pandas community experts can answer through `Stack Overflow
 <http://stackoverflow.com/questions/tagged/pandas>`__.
 
-Longer discussions occur on the `developer mailing list
-<http://groups.google.com/group/pystatsmodels>`__, and commercial support
-inquiries for Lambda Foundry should be sent to: support@lambdafoundry.com
+Community
+---------
 
-Credits
--------
+pandas is actively supported today by a community of like-minded individuals around 
+the world who contribute their valuable time and energy to help make open source 
+pandas possible. Thanks to `all of our contributors <https://github.com/pandas-dev/pandas/graphs/contributors>`__.
+
+If you're interested in contributing, please
+visit `Contributing to pandas webpage <http://pandas.pydata.org/pandas-docs/stable/contributing.html>`__.
 
-pandas development began at `AQR Capital Management <http://www.aqr.com>`__ in
-April 2008. It was open-sourced at the end of 2009. AQR continued to provide
-resources for development through the end of 2011, and continues to contribute
-bug reports today.
+pandas is a `NUMFocus <https://www.numfocus.org/open-source-projects/>`__ sponsored project.
+This will help ensure the success of development of pandas as a world-class open-source
+project, and makes it possible to `donate <https://pandas.pydata.org/donate.html>`__ to the project.
 
-Since January 2012, `Lambda Foundry <http://www.lambdafoundry.com>`__, has
-been providing development resources, as well as commercial support,
-training, and consulting for pandas.
+Project Governance
+------------------
 
-pandas is only made possible by a group of people around the world like you
-who have contributed new code, bug reports, fixes, comments and ideas. A
-complete list can be found `on Github <http://www.github.com/pandas-dev/pandas/contributors>`__.
+The governance process that pandas project has used informally since its inception in 2008 is formalized in `Project Governance documents <https://github.com/pandas-dev/pandas-governance>`__ .
+The documents clarify how decisions are made and how the various elements of our community interact, including the relationship between open source collaborative development and work that may be funded by for-profit or non-profit entities.
+
+Wes McKinney is the Benevolent Dictator for Life (BDFL).
 
 Development Team
-----------------
+-----------------
+
+The list of the  Core Team members and more detailed information can be found on the `people’s page <https://github.com/pandas-dev/pandas-governance/blob/master/people.md>`__ of the governance repo.
+ 
 
-pandas is a part of the PyData project. The PyData Development Team is a
-collection of developers focused on the improvement of Python's data
-libraries. The core team that coordinates development can be found on `Github
-<http://github.com/pydata>`__. If you're interested in contributing, please
-visit the `project website <http://pandas.pydata.org>`__.
+Institutional Partners
+----------------------
+
+The information about current institutional partners can be found on `pandas website page <https://pandas.pydata.org/about.html>`__
 
 License
 -------
 
 .. literalinclude:: ../../LICENSE
+

From 64c8a8d6fecacb796da8265ace870a4fcab98092 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Wed, 30 Aug 2017 15:30:53 -0500
Subject: [PATCH 035/188] API: Have MultiIndex consturctors always return a MI
 (#17236)

* API: Have MultiIndex constructors return MI

This removes the special case for MultiIndex constructors returning
an Index if all the levels are length-1. Now this will return a
MultiIndex with a single level.

This is a backwards incompatabile change, with no clear method for
deprecation, so we're making a clean break.

Closes #17178

* fixup! API: Have MultiIndex constructors return MI

* Update for comments
---
 doc/source/whatsnew/v0.21.0.txt    | 24 +++++++++++
 pandas/core/frame.py               | 11 ++---
 pandas/core/indexes/api.py         | 12 ++++--
 pandas/core/indexes/base.py        | 69 ++++++++++++++++++++++++++++++
 pandas/core/indexes/multi.py       | 10 -----
 pandas/core/reshape/reshape.py     | 21 ++++++---
 pandas/core/sparse/scipy_sparse.py |  6 ++-
 pandas/core/strings.py             |  7 ++-
 pandas/io/parsers.py               | 13 +++---
 pandas/tests/indexes/test_base.py  | 18 +++++++-
 pandas/tests/indexes/test_multi.py | 20 ++++-----
 pandas/util/testing.py             |  4 ++
 12 files changed, 170 insertions(+), 45 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 014f251ffb90ab..273cbd8357f853 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -274,6 +274,30 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical
 
 The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement.
 
+.. _whatsnew_210.api.multiindex_single:
+
+MultiIndex Constructor with a Single Level
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The ``MultiIndex`` constructors no longer squeeze a MultiIndex with all
+length-one levels down to a regular ``Index``. This affects all the
+``MultiIndex`` constructors. (:issue:`17178`)
+
+Previous behavior:
+
+.. code-block:: ipython
+
+   In [2]: pd.MultiIndex.from_tuples([('a',), ('b',)])
+   Out[2]: Index(['a', 'b'], dtype='object')
+
+Length 1 levels are no longer special-cased. They behave exactly as if you had
+length 2+ levels, so a :class:`MultiIndex` is always returned from all of the
+``MultiIndex`` constructors:
+
+.. ipython:: python
+
+   pd.MultiIndex.from_tuples([('a',), ('b',)])
+
 .. _whatsnew_0210.api:
 
 Other API Changes
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b5b3df64d24c0b..5991ec825c8417 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -67,7 +67,8 @@
                                 _dict_compat,
                                 standardize_mapping)
 from pandas.core.generic import NDFrame, _shared_docs
-from pandas.core.index import Index, MultiIndex, _ensure_index
+from pandas.core.index import (Index, MultiIndex, _ensure_index,
+                               _ensure_index_from_sequences)
 from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
                                   check_bool_indexer)
 from pandas.core.internals import (BlockManager,
@@ -1155,9 +1156,9 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
             else:
                 try:
                     to_remove = [arr_columns.get_loc(field) for field in index]
-
-                    result_index = MultiIndex.from_arrays(
-                        [arrays[i] for i in to_remove], names=index)
+                    index_data = [arrays[i] for i in to_remove]
+                    result_index = _ensure_index_from_sequences(index_data,
+                                                                names=index)
 
                     exclude.update(index)
                 except Exception:
@@ -3000,7 +3001,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
                     to_remove.append(col)
             arrays.append(level)
 
-        index = MultiIndex.from_arrays(arrays, names=names)
+        index = _ensure_index_from_sequences(arrays, names)
 
         if verify_integrity and not index.is_unique:
             duplicates = index.get_duplicates()
diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py
index 323d50166e7b6f..d20a0b0a2c73df 100644
--- a/pandas/core/indexes/api.py
+++ b/pandas/core/indexes/api.py
@@ -1,6 +1,9 @@
-from pandas.core.indexes.base import (Index, _new_Index,  # noqa
-                                 _ensure_index, _get_na_value,
-                                 InvalidIndexError)
+from pandas.core.indexes.base import (Index,
+                                      _new_Index,
+                                      _ensure_index,
+                                      _ensure_index_from_sequences,
+                                      _get_na_value,
+                                      InvalidIndexError)  # noqa
 from pandas.core.indexes.category import CategoricalIndex  # noqa
 from pandas.core.indexes.multi import MultiIndex  # noqa
 from pandas.core.indexes.interval import IntervalIndex  # noqa
@@ -22,7 +25,8 @@
            'InvalidIndexError', 'TimedeltaIndex',
            'PeriodIndex', 'DatetimeIndex',
            '_new_Index', 'NaT',
-           '_ensure_index', '_get_na_value', '_get_combined_index',
+           '_ensure_index', '_ensure_index_from_sequences', '_get_na_value',
+           '_get_combined_index',
            '_get_objs_combined_axis', '_union_indexes',
            '_get_consensus_names',
            '_all_indexes_same']
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 31cf1e48b85294..6a30eaefaaae76 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -4012,7 +4012,76 @@ def invalid_op(self, other=None):
 Index._add_comparison_methods()
 
 
+def _ensure_index_from_sequences(sequences, names=None):
+    """Construct an index from sequences of data.
+
+    A single sequence returns an Index. Many sequences returns a
+    MultiIndex.
+
+    Parameters
+    ----------
+    sequences : sequence of sequences
+    names : sequence of str
+
+    Returns
+    -------
+    index : Index or MultiIndex
+
+    Examples
+    --------
+    >>> _ensure_index_from_sequences([[1, 2, 3]], names=['name'])
+    Int64Index([1, 2, 3], dtype='int64', name='name')
+
+    >>> _ensure_index_from_sequences([['a', 'a'], ['a', 'b']],
+                                     names=['L1', 'L2'])
+    MultiIndex(levels=[['a'], ['a', 'b']],
+               labels=[[0, 0], [0, 1]],
+               names=['L1', 'L2'])
+
+    See Also
+    --------
+    _ensure_index
+    """
+    from .multi import MultiIndex
+
+    if len(sequences) == 1:
+        if names is not None:
+            names = names[0]
+        return Index(sequences[0], name=names)
+    else:
+        return MultiIndex.from_arrays(sequences, names=names)
+
+
 def _ensure_index(index_like, copy=False):
+    """
+    Ensure that we have an index from some index-like object
+
+    Parameters
+    ----------
+    index : sequence
+        An Index or other sequence
+    copy : bool
+
+    Returns
+    -------
+    index : Index or MultiIndex
+
+    Examples
+    --------
+    >>> _ensure_index(['a', 'b'])
+    Index(['a', 'b'], dtype='object')
+
+    >>> _ensure_index([('a', 'a'),  ('b', 'c')])
+    Index([('a', 'a'), ('b', 'c')], dtype='object')
+
+    >>> _ensure_index([['a', 'a'], ['b', 'c']])
+    MultiIndex(levels=[['a'], ['b', 'c']],
+               labels=[[0, 0], [0, 1]])
+
+    See Also
+    --------
+    _ensure_index_from_sequences
+    """
     if isinstance(index_like, Index):
         if copy:
             index_like = index_like.copy()
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index ea45b4700172f0..d7d5b6d128a2c1 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -91,12 +91,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
             raise ValueError('Length of levels and labels must be the same.')
         if len(levels) == 0:
             raise ValueError('Must pass non-zero number of levels/labels')
-        if len(levels) == 1:
-            if names:
-                name = names[0]
-            else:
-                name = None
-            return Index(levels[0], name=name, copy=True).take(labels[0])
 
         result = object.__new__(MultiIndex)
 
@@ -1084,10 +1078,6 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
         MultiIndex.from_product : Make a MultiIndex from cartesian product
                                   of iterables
         """
-        if len(arrays) == 1:
-            name = None if names is None else names[0]
-            return Index(arrays[0], name=name)
-
         # Check if lengths of all arrays are equal or not,
         # raise ValueError, if not
         for i in range(1, len(arrays)):
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 455da9246783c1..b4abba8026b35b 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -31,7 +31,7 @@
 
 from pandas.core.frame import _shared_docs
 from pandas.util._decorators import Appender
-from pandas.core.index import MultiIndex, _get_na_value
+from pandas.core.index import Index, MultiIndex, _get_na_value
 
 
 class _Unstacker(object):
@@ -311,10 +311,14 @@ def _unstack_multiple(data, clocs):
     recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels,
                                          xnull=False)
 
-    dummy_index = MultiIndex(levels=rlevels + [obs_ids],
-                             labels=rlabels + [comp_ids],
-                             names=rnames + ['__placeholder__'],
-                             verify_integrity=False)
+    if rlocs == []:
+        # Everything is in clocs, so the dummy df has a regular index
+        dummy_index = Index(obs_ids, name='__placeholder__')
+    else:
+        dummy_index = MultiIndex(levels=rlevels + [obs_ids],
+                                 labels=rlabels + [comp_ids],
+                                 names=rnames + ['__placeholder__'],
+                                 verify_integrity=False)
 
     if isinstance(data, Series):
         dummy = data.copy()
@@ -446,7 +450,12 @@ def _slow_pivot(index, columns, values):
 
 def unstack(obj, level, fill_value=None):
     if isinstance(level, (tuple, list)):
-        return _unstack_multiple(obj, level)
+        if len(level) != 1:
+            # _unstack_multiple only handles MultiIndexes,
+            # and isn't needed for a single level
+            return _unstack_multiple(obj, level)
+        else:
+            level = level[0]
 
     if isinstance(obj, DataFrame):
         if isinstance(obj.index, MultiIndex):
diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py
index ea108e3e899352..d2b9583d8efe5c 100644
--- a/pandas/core/sparse/scipy_sparse.py
+++ b/pandas/core/sparse/scipy_sparse.py
@@ -71,7 +71,11 @@ def robust_get_level_values(i):
             labels_to_i = Series(labels_to_i)
             if len(subset) > 1:
                 labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index)
-            labels_to_i.index.names = [index.names[i] for i in subset]
+                labels_to_i.index.names = [index.names[i] for i in subset]
+            else:
+                labels_to_i.index = Index(x[0] for x in labels_to_i.index)
+                labels_to_i.index.name = index.names[subset[0]]
+
             labels_to_i.name = 'value'
             return (labels_to_i)
 
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 2f95e510bba5ef..48bc2ee05dd680 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -1452,7 +1452,12 @@ def cons_row(x):
 
             if expand:
                 result = list(result)
-                return MultiIndex.from_tuples(result, names=name)
+                out = MultiIndex.from_tuples(result, names=name)
+                if out.nlevels == 1:
+                    # We had all tuples of length-one, which are
+                    # better represented as a regular Index.
+                    out = out.get_level_values(0)
+                return out
             else:
                 return Index(result, name=name)
         else:
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index a9821be3fa5e2d..8b1a921536a1dd 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -23,7 +23,8 @@
     is_scalar, is_categorical_dtype)
 from pandas.core.dtypes.missing import isna
 from pandas.core.dtypes.cast import astype_nansafe
-from pandas.core.index import Index, MultiIndex, RangeIndex
+from pandas.core.index import (Index, MultiIndex, RangeIndex,
+                               _ensure_index_from_sequences)
 from pandas.core.series import Series
 from pandas.core.frame import DataFrame
 from pandas.core.categorical import Categorical
@@ -1444,7 +1445,8 @@ def _agg_index(self, index, try_parse_dates=True):
             arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
             arrays.append(arr)
 
-        index = MultiIndex.from_arrays(arrays, names=self.index_names)
+        names = self.index_names
+        index = _ensure_index_from_sequences(arrays, names)
 
         return index
 
@@ -1808,7 +1810,7 @@ def read(self, nrows=None):
                                                  try_parse_dates=True)
                 arrays.append(values)
 
-            index = MultiIndex.from_arrays(arrays)
+            index = _ensure_index_from_sequences(arrays)
 
             if self.usecols is not None:
                 names = self._filter_usecols(names)
@@ -3138,9 +3140,8 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
     if index_col is None or index_col is False:
         index = Index([])
     else:
-        index = [Series([], dtype=dtype[index_name])
-                 for index_name in index_names]
-        index = MultiIndex.from_arrays(index, names=index_names)
+        data = [Series([], dtype=dtype[name]) for name in index_names]
+        index = _ensure_index_from_sequences(data, names=index_names)
         index_col.sort()
         for i, n in enumerate(index_col):
             columns.pop(n - i)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 07e98c326bcaa6..aa32e75ba0d585 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -17,7 +17,7 @@
                     DataFrame, Float64Index, Int64Index,
                     CategoricalIndex, DatetimeIndex, TimedeltaIndex,
                     PeriodIndex, isna)
-from pandas.core.index import _get_combined_index
+from pandas.core.index import _get_combined_index, _ensure_index_from_sequences
 from pandas.util.testing import assert_almost_equal
 from pandas.compat.numpy import np_datetime64_compat
 
@@ -2112,3 +2112,19 @@ def test_intersect_str_dates(self):
         res = i2.intersection(i1)
 
         assert len(res) == 0
+
+
+class TestIndexUtils(object):
+
+    @pytest.mark.parametrize('data, names, expected', [
+        ([[1, 2, 3]], None, Index([1, 2, 3])),
+        ([[1, 2, 3]], ['name'], Index([1, 2, 3], name='name')),
+        ([['a', 'a'], ['c', 'd']], None,
+         MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]])),
+        ([['a', 'a'], ['c', 'd']], ['L1', 'L2'],
+         MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]],
+                    names=['L1', 'L2'])),
+    ])
+    def test_ensure_index_from_sequences(self, data, names, expected):
+        result = _ensure_index_from_sequences(data, names)
+        tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index c66775f4690cc5..798d2444689615 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -537,15 +537,12 @@ def test_astype(self):
             self.index.astype(np.dtype(int))
 
     def test_constructor_single_level(self):
-        single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
-                                  labels=[[0, 1, 2, 3]], names=['first'])
-        assert isinstance(single_level, Index)
-        assert not isinstance(single_level, MultiIndex)
-        assert single_level.name == 'first'
-
-        single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
-                                  labels=[[0, 1, 2, 3]])
-        assert single_level.name is None
+        result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
+                            labels=[[0, 1, 2, 3]], names=['first'])
+        assert isinstance(result, MultiIndex)
+        expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
+        tm.assert_index_equal(result.levels[0], expected)
+        assert result.names == ['first']
 
     def test_constructor_no_levels(self):
         tm.assert_raises_regex(ValueError, "non-zero number "
@@ -768,8 +765,9 @@ def test_from_arrays_empty(self):
 
         # 1 level
         result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
+        assert isinstance(result, MultiIndex)
         expected = Index([], name='A')
-        tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result.levels[0], expected)
 
         # N levels
         for N in [2, 3]:
@@ -830,7 +828,7 @@ def test_from_product_empty(self):
         # 1 level
         result = MultiIndex.from_product([[]], names=['A'])
         expected = pd.Index([], name='A')
-        tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result.levels[0], expected)
 
         # 2 levels
         l1 = [[], ['foo', 'bar', 'baz'], []]
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 5a17cb6d7dc475..7dac83953ad8f7 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -1909,7 +1909,11 @@ def keyfunc(x):
 
     # convert tuples to index
     if nentries == 1:
+        # we have a single level of tuples, i.e. a regular Index
         index = Index(tuples[0], name=names[0])
+    elif nlevels == 1:
+        name = None if names is None else names[0]
+        index = Index((x[0] for x in tuples), name=name)
     else:
         index = MultiIndex.from_tuples(tuples, names=names)
     return index

From b98e688c7d483777a21fb46ec46e86b72b90e5a3 Mon Sep 17 00:00:00 2001
From: cbertinato <chrisbertinato@mac.com>
Date: Thu, 31 Aug 2017 06:24:23 -0400
Subject: [PATCH 036/188] CLN: replace %s syntax with .format in
 io/formats/css.py, excel.py, printing.py, style.py, and terminal.py (#17387)

Progress toward issue #16130. Converted old string formatting to new string formatting in io/formats/css.py, excel.py, printing.py, style.py, and terminal.py
---
 pandas/io/formats/css.py      | 32 +++++++++++++------------
 pandas/io/formats/excel.py    | 18 ++++++++------
 pandas/io/formats/printing.py | 31 ++++++++++++------------
 pandas/io/formats/style.py    | 45 ++++++++++++++++++++---------------
 pandas/io/formats/terminal.py |  2 +-
 5 files changed, 71 insertions(+), 57 deletions(-)

diff --git a/pandas/io/formats/css.py b/pandas/io/formats/css.py
index d12d2373e11908..429c98b579ca09 100644
--- a/pandas/io/formats/css.py
+++ b/pandas/io/formats/css.py
@@ -94,12 +94,13 @@ def __call__(self, declarations_str, inherited=None):
 
         # 3. TODO: resolve other font-relative units
         for side in self.SIDES:
-            prop = 'border-%s-width' % side
+            prop = 'border-{side}-width'.format(side=side)
             if prop in props:
                 props[prop] = self.size_to_pt(
                     props[prop], em_pt=font_size,
                     conversions=self.BORDER_WIDTH_RATIOS)
-            for prop in ['margin-%s' % side, 'padding-%s' % side]:
+            for prop in ['margin-{side}'.format(side=side),
+                         'padding-{side}'.format(side=side)]:
                 if prop in props:
                     # TODO: support %
                     props[prop] = self.size_to_pt(
@@ -152,7 +153,8 @@ def __call__(self, declarations_str, inherited=None):
 
     def size_to_pt(self, in_val, em_pt=None, conversions=UNIT_RATIOS):
         def _error():
-            warnings.warn('Unhandled size: %r' % in_val, CSSWarning)
+            warnings.warn('Unhandled size: {val!r}'.format(val=in_val),
+                          CSSWarning)
             return self.size_to_pt('1!!default', conversions=conversions)
 
         try:
@@ -185,10 +187,10 @@ def _error():
 
         val = round(val, 5)
         if int(val) == val:
-            size_fmt = '%d'
+            size_fmt = '{fmt:d}pt'.format(fmt=int(val))
         else:
-            size_fmt = '%f'
-        return (size_fmt + 'pt') % val
+            size_fmt = '{fmt:f}pt'.format(fmt=val)
+        return size_fmt
 
     def atomize(self, declarations):
         for prop, value in declarations:
@@ -215,19 +217,19 @@ def expand(self, prop, value):
             try:
                 mapping = self.SIDE_SHORTHANDS[len(tokens)]
             except KeyError:
-                warnings.warn('Could not expand "%s: %s"' % (prop, value),
-                              CSSWarning)
+                warnings.warn('Could not expand "{prop}: {val}"'
+                              .format(prop=prop, val=value), CSSWarning)
                 return
             for key, idx in zip(self.SIDES, mapping):
-                yield prop_fmt % key, tokens[idx]
+                yield prop_fmt.format(key), tokens[idx]
 
         return expand
 
-    expand_border_color = _side_expander('border-%s-color')
-    expand_border_style = _side_expander('border-%s-style')
-    expand_border_width = _side_expander('border-%s-width')
-    expand_margin = _side_expander('margin-%s')
-    expand_padding = _side_expander('padding-%s')
+    expand_border_color = _side_expander('border-{:s}-color')
+    expand_border_style = _side_expander('border-{:s}-style')
+    expand_border_width = _side_expander('border-{:s}-width')
+    expand_margin = _side_expander('margin-{:s}')
+    expand_padding = _side_expander('padding-{:s}')
 
     def parse(self, declarations_str):
         """Generates (prop, value) pairs from declarations
@@ -245,4 +247,4 @@ def parse(self, declarations_str):
                 yield prop, val
             else:
                 warnings.warn('Ill-formatted attribute: expected a colon '
-                              'in %r' % decl, CSSWarning)
+                              'in {decl!r}'.format(decl=decl), CSSWarning)
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
index 80c3880d39dfdc..ab689d196f4b60 100644
--- a/pandas/io/formats/excel.py
+++ b/pandas/io/formats/excel.py
@@ -132,10 +132,12 @@ def build_alignment(self, props):
 
     def build_border(self, props):
         return {side: {
-            'style': self._border_style(props.get('border-%s-style' % side),
-                                        props.get('border-%s-width' % side)),
+            'style': self._border_style(props.get('border-{side}-style'
+                                        .format(side=side)),
+                                        props.get('border-{side}-width'
+                                        .format(side=side))),
             'color': self.color_to_excel(
-                props.get('border-%s-color' % side)),
+                props.get('border-{side}-color'.format(side=side))),
         } for side in ['top', 'right', 'bottom', 'left']}
 
     def _border_style(self, style, width):
@@ -302,7 +304,8 @@ def color_to_excel(self, val):
         try:
             return self.NAMED_COLORS[val]
         except KeyError:
-            warnings.warn('Unhandled colour format: %r' % val, CSSWarning)
+            warnings.warn('Unhandled colour format: {val!r}'.format(val=val),
+                          CSSWarning)
 
 
 class ExcelFormatter(object):
@@ -369,7 +372,7 @@ def _format_value(self, val):
             if lib.isposinf_scalar(val):
                 val = self.inf_rep
             elif lib.isneginf_scalar(val):
-                val = '-%s' % self.inf_rep
+                val = '-{inf}'.format(inf=self.inf_rep)
             elif self.float_format is not None:
                 val = float(self.float_format % val)
         return val
@@ -434,8 +437,9 @@ def _format_header_regular(self):
             colnames = self.columns
             if has_aliases:
                 if len(self.header) != len(self.columns):
-                    raise ValueError('Writing %d cols but got %d aliases' %
-                                     (len(self.columns), len(self.header)))
+                    raise ValueError('Writing {cols} cols but got {alias} '
+                                     'aliases'.format(cols=len(self.columns),
+                                                      alias=len(self.header)))
                 else:
                     colnames = self.header
 
diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py
index cbad603630bd34..e0f53f671017aa 100644
--- a/pandas/io/formats/printing.py
+++ b/pandas/io/formats/printing.py
@@ -102,9 +102,9 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
     bounds length of printed sequence, depending on options
     """
     if isinstance(seq, set):
-        fmt = u("{%s}")
+        fmt = u("{{{body}}}")
     else:
-        fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)")
+        fmt = u("[{body}]") if hasattr(seq, '__setitem__') else u("({body})")
 
     if max_seq_items is False:
         nitems = len(seq)
@@ -123,7 +123,7 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
     elif isinstance(seq, tuple) and len(seq) == 1:
         body += ','
 
-    return fmt % body
+    return fmt.format(body=body)
 
 
 def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
@@ -131,10 +131,10 @@ def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
     internal. pprinter for iterables. you should probably use pprint_thing()
     rather then calling this directly.
     """
-    fmt = u("{%s}")
+    fmt = u("{{{things}}}")
     pairs = []
 
-    pfmt = u("%s: %s")
+    pfmt = u("{key}: {val}")
 
     if max_seq_items is False:
         nitems = len(seq)
@@ -142,16 +142,17 @@ def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
         nitems = max_seq_items or get_option("max_seq_items") or len(seq)
 
     for k, v in list(seq.items())[:nitems]:
-        pairs.append(pfmt %
-                     (pprint_thing(k, _nest_lvl + 1,
-                                   max_seq_items=max_seq_items, **kwds),
-                      pprint_thing(v, _nest_lvl + 1,
-                                   max_seq_items=max_seq_items, **kwds)))
+        pairs.append(
+            pfmt.format(
+                key=pprint_thing(k, _nest_lvl + 1,
+                                 max_seq_items=max_seq_items, **kwds),
+                val=pprint_thing(v, _nest_lvl + 1,
+                                 max_seq_items=max_seq_items, **kwds)))
 
     if nitems < len(seq):
-        return fmt % (", ".join(pairs) + ", ...")
+        return fmt.format(things=", ".join(pairs) + ", ...")
     else:
-        return fmt % ", ".join(pairs)
+        return fmt.format(things=", ".join(pairs))
 
 
 def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
@@ -221,10 +222,10 @@ def as_escaped_unicode(thing, escape_chars=escape_chars):
                              max_seq_items=max_seq_items)
     elif isinstance(thing, compat.string_types) and quote_strings:
         if compat.PY3:
-            fmt = "'%s'"
+            fmt = u("'{thing}'")
         else:
-            fmt = "u'%s'"
-        result = fmt % as_escaped_unicode(thing)
+            fmt = u("u'{thing}'")
+        result = fmt.format(thing=as_escaped_unicode(thing))
     else:
         result = as_escaped_unicode(thing)
 
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index 445fceb4b81467..87d672197be300 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -230,7 +230,7 @@ def format_attr(pair):
             # ... except maybe the last for columns.names
             name = self.data.columns.names[r]
             cs = [BLANK_CLASS if name is None else INDEX_NAME_CLASS,
-                  "level%s" % r]
+                  "level{lvl}".format(lvl=r)]
             name = BLANK_VALUE if name is None else name
             row_es.append({"type": "th",
                            "value": name,
@@ -240,7 +240,8 @@ def format_attr(pair):
 
             if clabels:
                 for c, value in enumerate(clabels[r]):
-                    cs = [COL_HEADING_CLASS, "level%s" % r, "col%s" % c]
+                    cs = [COL_HEADING_CLASS, "level{lvl}".format(lvl=r),
+                          "col{col}".format(col=c)]
                     cs.extend(cell_context.get(
                         "col_headings", {}).get(r, {}).get(c, []))
                     es = {
@@ -264,7 +265,7 @@ def format_attr(pair):
 
             for c, name in enumerate(self.data.index.names):
                 cs = [INDEX_NAME_CLASS,
-                      "level%s" % c]
+                      "level{lvl}".format(lvl=c)]
                 name = '' if name is None else name
                 index_header_row.append({"type": "th", "value": name,
                                          "class": " ".join(cs)})
@@ -281,7 +282,8 @@ def format_attr(pair):
         for r, idx in enumerate(self.data.index):
             row_es = []
             for c, value in enumerate(rlabels[r]):
-                rid = [ROW_HEADING_CLASS, "level%s" % c, "row%s" % r]
+                rid = [ROW_HEADING_CLASS, "level{lvl}".format(lvl=c),
+                       "row{row}".format(row=r)]
                 es = {
                     "type": "th",
                     "is_visible": _is_visible(r, c, idx_lengths),
@@ -298,7 +300,8 @@ def format_attr(pair):
                 row_es.append(es)
 
             for c, col in enumerate(self.data.columns):
-                cs = [DATA_CLASS, "row%s" % r, "col%s" % c]
+                cs = [DATA_CLASS, "row{row}".format(row=r),
+                      "col{col}".format(col=c)]
                 cs.extend(cell_context.get("data", {}).get(r, {}).get(c, []))
                 formatter = self._display_funcs[(r, c)]
                 value = self.data.iloc[r, c]
@@ -317,7 +320,8 @@ def format_attr(pair):
                     else:
                         props.append(['', ''])
                 cellstyle.append({'props': props,
-                                  'selector': "row%s_col%s" % (r, c)})
+                                  'selector': "row{row}_col{col}"
+                                  .format(row=r, col=c)})
             body.append(row_es)
 
         return dict(head=head, cellstyle=cellstyle, body=body, uuid=uuid,
@@ -512,22 +516,23 @@ def _apply(self, func, axis=0, subset=None, **kwargs):
             result = func(data, **kwargs)
             if not isinstance(result, pd.DataFrame):
                 raise TypeError(
-                    "Function {!r} must return a DataFrame when "
-                    "passed to `Styler.apply` with axis=None".format(func))
+                    "Function {func!r} must return a DataFrame when "
+                    "passed to `Styler.apply` with axis=None"
+                    .format(func=func))
             if not (result.index.equals(data.index) and
                     result.columns.equals(data.columns)):
-                msg = ('Result of {!r} must have identical index and columns '
-                       'as the input'.format(func))
+                msg = ('Result of {func!r} must have identical index and '
+                       'columns as the input'.format(func=func))
                 raise ValueError(msg)
 
         result_shape = result.shape
         expected_shape = self.data.loc[subset].shape
         if result_shape != expected_shape:
-            msg = ("Function {!r} returned the wrong shape.\n"
-                   "Result has shape: {}\n"
-                   "Expected shape:   {}".format(func,
-                                                 result.shape,
-                                                 expected_shape))
+            msg = ("Function {func!r} returned the wrong shape.\n"
+                   "Result has shape: {res}\n"
+                   "Expected shape:   {expect}".format(func=func,
+                                                       res=result.shape,
+                                                       expect=expected_shape))
             raise ValueError(msg)
         self._update_ctx(result)
         return self
@@ -771,7 +776,8 @@ def set_table_styles(self, table_styles):
 
     @staticmethod
     def _highlight_null(v, null_color):
-        return 'background-color: %s' % null_color if pd.isna(v) else ''
+        return ('background-color: {color}'.format(color=null_color)
+                if pd.isna(v) else '')
 
     def highlight_null(self, null_color='red'):
         """
@@ -839,7 +845,8 @@ def _background_gradient(s, cmap='PuBu', low=0, high=0):
             # https://github.com/matplotlib/matplotlib/issues/5427
             normed = norm(s.values)
             c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]
-            return ['background-color: %s' % color for color in c]
+            return ['background-color: {color}'.format(color=color)
+                    for color in c]
 
     def set_properties(self, subset=None, **kwargs):
         """
@@ -1182,6 +1189,6 @@ def _maybe_wrap_formatter(formatter):
     elif callable(formatter):
         return formatter
     else:
-        msg = "Expected a template string or callable, got {} instead".format(
-            formatter)
+        msg = ("Expected a template string or callable, got {formatter} "
+               "instead".format(formatter=formatter))
         raise TypeError(msg)
diff --git a/pandas/io/formats/terminal.py b/pandas/io/formats/terminal.py
index 30bd1d16b538a4..4bcb28fa59b864 100644
--- a/pandas/io/formats/terminal.py
+++ b/pandas/io/formats/terminal.py
@@ -124,4 +124,4 @@ def ioctl_GWINSZ(fd):
 
 if __name__ == "__main__":
     sizex, sizey = get_terminal_size()
-    print('width = %s height = %s' % (sizex, sizey))
+    print('width = {w} height = {h}'.format(w=sizex, h=sizey))

From 764cf2abca9ae3d0c730c98b5103fcde5b4fd88b Mon Sep 17 00:00:00 2001
From: Sylvia <sylviawhoa@users.noreply.github.com>
Date: Thu, 31 Aug 2017 06:35:51 -0400
Subject: [PATCH 037/188] BUG: not correctly using OrderedDict in
 test_series_apply (#17384)

in Python versions <3.6 this syntax will result in an unordered dic
---
 pandas/tests/series/test_apply.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/series/test_apply.py b/pandas/tests/series/test_apply.py
index e3be5427588b38..d0693984689a65 100644
--- a/pandas/tests/series/test_apply.py
+++ b/pandas/tests/series/test_apply.py
@@ -317,9 +317,9 @@ def test_non_callable_aggregates(self):
 
         # test when mixed w/ callable reducers
         result = s.agg(['size', 'count', 'mean'])
-        expected = Series(OrderedDict({'size': 3.0,
-                                       'count': 2.0,
-                                       'mean': 1.5}))
+        expected = Series(OrderedDict([('size', 3.0),
+                                       ('count', 2.0),
+                                       ('mean', 1.5)]))
         assert_series_equal(result[expected.index], expected)
 
 

From 062f6f118fe4ea439ae255a8ff886a532e20ecdb Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 31 Aug 2017 03:37:58 -0700
Subject: [PATCH 038/188] Remove boxplot from _dataframe_apply_whitelist
 (#17381)

---
 pandas/core/groupby.py                 | 11 ++++++-----
 pandas/tests/groupby/test_whitelist.py |  1 -
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index c23b00dc740a43..248f3b2095a785 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -63,6 +63,8 @@
 import pandas.core.common as com
 from pandas.core.config import option_context
 
+from pandas.plotting._core import boxplot_frame_groupby
+
 from pandas._libs import lib, groupby as libgroupby, Timestamp, NaT, iNaT
 from pandas._libs.lib import count_level_2d
 
@@ -168,8 +170,9 @@
                             {'nlargest', 'nsmallest'}) -
                            {'boxplot'}) | frozenset(['dtype', 'unique'])
 
-_dataframe_apply_whitelist = (_common_apply_whitelist |
-                              frozenset(['dtypes', 'corrwith']))
+_dataframe_apply_whitelist = ((_common_apply_whitelist |
+                              frozenset(['dtypes', 'corrwith'])) -
+                              {'boxplot'})
 
 _cython_transforms = frozenset(['cumprod', 'cumsum', 'shift',
                                 'cummin', 'cummax'])
@@ -4280,9 +4283,7 @@ def groupby_series(obj, col=None):
             results.index = _default_index(len(results))
         return results
 
-
-from pandas.plotting._core import boxplot_frame_groupby  # noqa
-DataFrameGroupBy.boxplot = boxplot_frame_groupby
+    boxplot = boxplot_frame_groupby
 
 
 class PanelGroupBy(NDFrameGroupBy):
diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py
index 2c8bf57f20faea..1c5161d2ffb431 100644
--- a/pandas/tests/groupby/test_whitelist.py
+++ b/pandas/tests/groupby/test_whitelist.py
@@ -42,7 +42,6 @@
     'pct_change',
     'skew',
     'plot',
-    'boxplot',
     'hist',
     'median',
     'dtypes',

From dad39d593eacd1ee2b2465dc2ac025b0cfaffe2a Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Fri, 1 Sep 2017 05:19:30 -0700
Subject: [PATCH 039/188] API: Localize Series when calling to_datetime with
 utc=True (#6415) (#17109)

---
 doc/source/whatsnew/v0.21.0.txt              | 30 +++++++++++
 pandas/core/tools/datetimes.py               |  2 +-
 pandas/io/sql.py                             | 13 ++---
 pandas/tests/indexes/datetimes/test_tools.py | 56 ++++++++++++++++----
 pandas/tests/io/test_sql.py                  | 23 +++++---
 pandas/tests/test_multilevel.py              |  2 +-
 6 files changed, 101 insertions(+), 25 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 273cbd8357f853..e0963a1908bbc6 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -298,6 +298,36 @@ length 2+ levels, so a :class:`MultiIndex` is always returned from all of the
 
    pd.MultiIndex.from_tuples([('a',), ('b',)])
 
+.. _whatsnew_0210.api.utc_localization_with_series:
+
+UTC Localization with Series
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize ``Series`` with a ``datetime64[ns, UTC]`` dtype to be consistent with how list-like and ``Index`` data are handled. (:issue:`6415`).
+
+  Previous Behavior
+
+  .. ipython:: python
+
+     s = Series(['20130101 00:00:00'] * 3)
+
+  .. code-block:: ipython
+
+     In [12]: pd.to_datetime(s, utc=True)
+     Out[12]:
+     0   2013-01-01
+     1   2013-01-01
+     2   2013-01-01
+     dtype: datetime64[ns]
+
+  New Behavior
+
+  .. ipython:: python
+
+     pd.to_datetime(s, utc=True)
+
+Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns.
+
 .. _whatsnew_0210.api:
 
 Other API Changes
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index c0f234a36803d7..9ff0275a7c3708 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -516,7 +516,7 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
         result = arg
     elif isinstance(arg, ABCSeries):
         from pandas import Series
-        values = _convert_listlike(arg._values, False, format)
+        values = _convert_listlike(arg._values, True, format)
         result = Series(values, index=arg.index, name=arg.name)
     elif isinstance(arg, (ABCDataFrame, MutableMapping)):
         result = _assemble_from_unit_mappings(arg, errors=errors)
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
index 9aa47e5c698503..9c6d01d236c576 100644
--- a/pandas/io/sql.py
+++ b/pandas/io/sql.py
@@ -99,24 +99,24 @@ def _convert_params(sql, params):
     return args
 
 
-def _handle_date_column(col, format=None):
+def _handle_date_column(col, utc=None, format=None):
     if isinstance(format, dict):
         return to_datetime(col, errors='ignore', **format)
     else:
         if format in ['D', 's', 'ms', 'us', 'ns']:
-            return to_datetime(col, errors='coerce', unit=format, utc=True)
+            return to_datetime(col, errors='coerce', unit=format, utc=utc)
         elif (issubclass(col.dtype.type, np.floating) or
               issubclass(col.dtype.type, np.integer)):
             # parse dates as timestamp
             format = 's' if format is None else format
-            return to_datetime(col, errors='coerce', unit=format, utc=True)
+            return to_datetime(col, errors='coerce', unit=format, utc=utc)
         elif is_datetime64tz_dtype(col):
             # coerce to UTC timezone
             # GH11216
             return (to_datetime(col, errors='coerce')
                     .astype('datetime64[ns, UTC]'))
         else:
-            return to_datetime(col, errors='coerce', format=format, utc=True)
+            return to_datetime(col, errors='coerce', format=format, utc=utc)
 
 
 def _parse_date_columns(data_frame, parse_dates):
@@ -821,8 +821,9 @@ def _harmonize_columns(self, parse_dates=None):
 
                 if (col_type is datetime or col_type is date or
                         col_type is DatetimeTZDtype):
-                    self.frame[col_name] = _handle_date_column(df_col)
-
+                    # Convert tz-aware Datetime SQL columns to UTC
+                    utc = col_type is DatetimeTZDtype
+                    self.frame[col_name] = _handle_date_column(df_col, utc=utc)
                 elif col_type is float:
                     # floats support NA, can always convert!
                     self.frame[col_name] = df_col.astype(col_type, copy=False)
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index 50669ee357bbdc..089d74a1d69b8c 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -260,15 +260,53 @@ def test_to_datetime_tz_pytz(self):
                                  dtype='datetime64[ns, UTC]', freq=None)
         tm.assert_index_equal(result, expected)
 
-    def test_to_datetime_utc_is_true(self):
-        # See gh-11934
-        start = pd.Timestamp('2014-01-01', tz='utc')
-        end = pd.Timestamp('2014-01-03', tz='utc')
-        date_range = pd.bdate_range(start, end)
-
-        result = pd.to_datetime(date_range, utc=True)
-        expected = pd.DatetimeIndex(data=date_range)
-        tm.assert_index_equal(result, expected)
+    @pytest.mark.parametrize("init_constructor, end_constructor, test_method",
+                             [(Index, DatetimeIndex, tm.assert_index_equal),
+                              (list, DatetimeIndex, tm.assert_index_equal),
+                              (np.array, DatetimeIndex, tm.assert_index_equal),
+                              (Series, Series, tm.assert_series_equal)])
+    def test_to_datetime_utc_true(self,
+                                  init_constructor,
+                                  end_constructor,
+                                  test_method):
+        # See gh-11934 & gh-6415
+        data = ['20100102 121314', '20100102 121315']
+        expected_data = [pd.Timestamp('2010-01-02 12:13:14', tz='utc'),
+                         pd.Timestamp('2010-01-02 12:13:15', tz='utc')]
+
+        result = pd.to_datetime(init_constructor(data),
+                                format='%Y%m%d %H%M%S',
+                                utc=True)
+        expected = end_constructor(expected_data)
+        test_method(result, expected)
+
+        # Test scalar case as well
+        for scalar, expected in zip(data, expected_data):
+            result = pd.to_datetime(scalar, format='%Y%m%d %H%M%S', utc=True)
+            assert result == expected
+
+    def test_to_datetime_utc_true_with_series_single_value(self):
+        # GH 15760 UTC=True with Series
+        ts = 1.5e18
+        result = pd.to_datetime(pd.Series([ts]), utc=True)
+        expected = pd.Series([pd.Timestamp(ts, tz='utc')])
+        tm.assert_series_equal(result, expected)
+
+    def test_to_datetime_utc_true_with_series_tzaware_string(self):
+        ts = '2013-01-01 00:00:00-01:00'
+        expected_ts = '2013-01-01 01:00:00'
+        data = pd.Series([ts] * 3)
+        result = pd.to_datetime(data, utc=True)
+        expected = pd.Series([pd.Timestamp(expected_ts, tz='utc')] * 3)
+        tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize('date, dtype',
+                             [('2013-01-01 01:00:00', 'datetime64[ns]'),
+                              ('2013-01-01 01:00:00', 'datetime64[ns, UTC]')])
+    def test_to_datetime_utc_true_with_series_datetime_ns(self, date, dtype):
+        expected = pd.Series([pd.Timestamp('2013-01-01 01:00:00', tz='UTC')])
+        result = pd.to_datetime(pd.Series([date], dtype=dtype), utc=True)
+        tm.assert_series_equal(result, expected)
 
     def test_to_datetime_tz_psycopg2(self):
 
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index a7c42391effe66..93eb0ff0ac1f26 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -602,7 +602,7 @@ def test_execute_sql(self):
         tm.equalContents(row, [5.1, 3.5, 1.4, 0.2, 'Iris-setosa'])
 
     def test_date_parsing(self):
-        # Test date parsing in read_sq
+        # Test date parsing in read_sql
         # No Parsing
         df = sql.read_sql_query("SELECT * FROM types_test_data", self.conn)
         assert not issubclass(df.DateCol.dtype.type, np.datetime64)
@@ -1271,11 +1271,13 @@ def check(col):
 
                 # "2000-01-01 00:00:00-08:00" should convert to
                 # "2000-01-01 08:00:00"
-                assert col[0] == Timestamp('2000-01-01 08:00:00', tz='UTC')
-
                 # "2000-06-01 00:00:00-07:00" should convert to
                 # "2000-06-01 07:00:00"
-                assert col[1] == Timestamp('2000-06-01 07:00:00', tz='UTC')
+                # GH 6415
+                expected_data = [Timestamp('2000-01-01 08:00:00', tz='UTC'),
+                                 Timestamp('2000-06-01 07:00:00', tz='UTC')]
+                expected = Series(expected_data, name=col.name)
+                tm.assert_series_equal(col, expected)
 
             else:
                 raise AssertionError("DateCol loaded with incorrect type "
@@ -1298,6 +1300,9 @@ def check(col):
                                self.conn, parse_dates=['DateColWithTz'])
         if not hasattr(df, 'DateColWithTz'):
             pytest.skip("no column with datetime with time zone")
+        col = df.DateColWithTz
+        assert is_datetime64tz_dtype(col.dtype)
+        assert str(col.dt.tz) == 'UTC'
         check(df.DateColWithTz)
 
         df = pd.concat(list(pd.read_sql_query("select * from types_test_data",
@@ -1307,9 +1312,9 @@ def check(col):
         assert is_datetime64tz_dtype(col.dtype)
         assert str(col.dt.tz) == 'UTC'
         expected = sql.read_sql_table("types_test_data", self.conn)
-        tm.assert_series_equal(df.DateColWithTz,
-                               expected.DateColWithTz
-                               .astype('datetime64[ns, UTC]'))
+        col = expected.DateColWithTz
+        assert is_datetime64tz_dtype(col.dtype)
+        tm.assert_series_equal(df.DateColWithTz, expected.DateColWithTz)
 
         # xref #7139
         # this might or might not be converted depending on the postgres driver
@@ -1388,8 +1393,10 @@ def test_datetime_date(self):
         df = DataFrame([date(2014, 1, 1), date(2014, 1, 2)], columns=["a"])
         df.to_sql('test_date', self.conn, index=False)
         res = read_sql_table('test_date', self.conn)
+        result = res['a']
+        expected = to_datetime(df['a'])
         # comes back as datetime64
-        tm.assert_series_equal(res['a'], to_datetime(df['a']))
+        tm.assert_series_equal(result, expected)
 
     def test_datetime_time(self):
         # test support for datetime.time
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index a765e2c4ca1bf7..6976fe162c5d5c 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -2137,7 +2137,7 @@ def test_set_index_datetime(self):
                           '2011-07-19 08:00:00', '2011-07-19 09:00:00'],
              'value': range(6)})
         df.index = pd.to_datetime(df.pop('datetime'), utc=True)
-        df.index = df.index.tz_localize('UTC').tz_convert('US/Pacific')
+        df.index = df.index.tz_convert('US/Pacific')
 
         expected = pd.DatetimeIndex(['2011-07-19 07:00:00',
                                      '2011-07-19 08:00:00',

From 9e425d637b0c635f1ec73407e6b45d1c53cd7fca Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Fri, 1 Sep 2017 08:52:44 -0600
Subject: [PATCH 040/188] TST: Enable tests in test_tools.py (#17405)

Enabled tests that currently aren't running.  Small fix to make sure all tests pass.  Verified that the raised messages match expectations for TestToDatetimeUnit::test_frame.
---
 pandas/core/tools/datetimes.py               |  2 +-
 pandas/tests/indexes/datetimes/test_tools.py | 24 +++++++++++++-------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 9ff0275a7c3708..9dde26f43ad337 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -605,7 +605,7 @@ def f(value):
     if len(excess):
         raise ValueError("extra keys have been passed "
                          "to the datetime assemblage: "
-                         "[{excess}]".format(','.join(excess=excess)))
+                         "[{excess}]".format(excess=','.join(excess)))
 
     def coerce(values):
         # we allow coercion to if errors allows
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index 089d74a1d69b8c..5152c1019d8de0 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -25,7 +25,7 @@
                     compat)
 
 
-class TimeConversionFormats(object):
+class TestTimeConversionFormats(object):
 
     def test_to_datetime_format(self):
         values = ['1/1/2000', '1/2/2000', '1/3/2000']
@@ -372,7 +372,7 @@ def test_datetime_invalid_datatype(self):
             pd.to_datetime(pd.to_datetime)
 
 
-class ToDatetimeUnit(object):
+class TestToDatetimeUnit(object):
 
     def test_unit(self):
         # GH 11758
@@ -566,7 +566,10 @@ def test_dataframe(self):
         df2 = DataFrame({'year': [2015, 2016],
                          'month': [2, 20],
                          'day': [4, 5]})
-        with pytest.raises(ValueError):
+
+        msg = ("cannot assemble the datetimes: time data .+ does not "
+               "match format '%Y%m%d' \(match\)")
+        with tm.assert_raises_regex(ValueError, msg):
             to_datetime(df2)
         result = to_datetime(df2, errors='coerce')
         expected = Series([Timestamp('20150204 00:00:00'),
@@ -574,26 +577,31 @@ def test_dataframe(self):
         assert_series_equal(result, expected)
 
         # extra columns
-        with pytest.raises(ValueError):
+        msg = ("extra keys have been passed to the datetime assemblage: "
+               "\[foo\]")
+        with tm.assert_raises_regex(ValueError, msg):
             df2 = df.copy()
             df2['foo'] = 1
             to_datetime(df2)
 
         # not enough
+        msg = ('to assemble mappings requires at least that \[year, month, '
+               'day\] be specified: \[.+\] is missing')
         for c in [['year'],
                   ['year', 'month'],
                   ['year', 'month', 'second'],
                   ['month', 'day'],
                   ['year', 'day', 'second']]:
-            with pytest.raises(ValueError):
+            with tm.assert_raises_regex(ValueError, msg):
                 to_datetime(df[c])
 
         # duplicates
+        msg = 'cannot assemble with duplicate keys'
         df2 = DataFrame({'year': [2015, 2016],
                          'month': [2, 20],
                          'day': [4, 5]})
         df2.columns = ['year', 'year', 'day']
-        with pytest.raises(ValueError):
+        with tm.assert_raises_regex(ValueError, msg):
             to_datetime(df2)
 
         df2 = DataFrame({'year': [2015, 2016],
@@ -601,7 +609,7 @@ def test_dataframe(self):
                          'day': [4, 5],
                          'hour': [4, 5]})
         df2.columns = ['year', 'month', 'day', 'day']
-        with pytest.raises(ValueError):
+        with tm.assert_raises_regex(ValueError, msg):
             to_datetime(df2)
 
     def test_dataframe_dtypes(self):
@@ -632,7 +640,7 @@ def test_dataframe_dtypes(self):
             to_datetime(df)
 
 
-class ToDatetimeMisc(object):
+class TestToDatetimeMisc(object):
 
     def test_index_to_datetime(self):
         idx = Index(['1/1/2000', '1/2/2000', '1/3/2000'])

From f7fe4295f84937bc0fa82c9718e62ec19fc36e6a Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Fri, 1 Sep 2017 17:36:00 +0100
Subject: [PATCH 041/188] TST: remove tests and docs for legacy (pre 0.12) hdf5
 support (#17404)

---
 doc/source/io.rst                             |  38 ------------------
 doc/source/whatsnew/v0.21.0.txt               |   2 +
 .../tests/io/data/legacy_hdf/legacy_0.10.h5   | Bin 238321 -> 0 bytes
 .../io/data/legacy_hdf/legacy_table_0.11.h5   | Bin 293877 -> 0 bytes
 pandas/tests/io/test_pytables.py              |  34 +---------------
 5 files changed, 3 insertions(+), 71 deletions(-)
 delete mode 100644 pandas/tests/io/data/legacy_hdf/legacy_0.10.h5
 delete mode 100644 pandas/tests/io/data/legacy_hdf/legacy_table_0.11.h5

diff --git a/doc/source/io.rst b/doc/source/io.rst
index e3384073617054..f55c72bae5a20e 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -4419,44 +4419,6 @@ Now you can import the ``DataFrame`` into R:
    starting point if you have stored multiple ``DataFrame`` objects to a
    single HDF5 file.
 
-Backwards Compatibility
-'''''''''''''''''''''''
-
-0.10.1 of ``HDFStore`` can read tables created in a prior version of pandas,
-however query terms using the
-prior (undocumented) methodology are unsupported. ``HDFStore`` will
-issue a warning if you try to use a legacy-format file. You must
-read in the entire file and write it out using the new format, using the
-method ``copy`` to take advantage of the updates. The group attribute
-``pandas_version`` contains the version information. ``copy`` takes a
-number of options, please see the docstring.
-
-
-.. ipython:: python
-   :suppress:
-
-   import os
-   legacy_file_path = os.path.abspath('source/_static/legacy_0.10.h5')
-
-.. ipython:: python
-   :okwarning:
-
-   # a legacy store
-   legacy_store = pd.HDFStore(legacy_file_path,'r')
-   legacy_store
-
-   # copy (and return the new handle)
-   new_store = legacy_store.copy('store_new.h5')
-   new_store
-   new_store.close()
-
-.. ipython:: python
-   :suppress:
-
-   legacy_store.close()
-   import os
-   os.remove('store_new.h5')
-
 
 Performance
 '''''''''''
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index e0963a1908bbc6..81e52266f972e5 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -371,6 +371,8 @@ Removal of prior version deprecations/changes
 - ``Categorical`` has dropped the ``.order()`` and ``.sort()`` methods in favor of ``.sort_values()`` (:issue:`12882`)
 - :func:`eval` and :func:`DataFrame.eval` have changed the default of ``inplace`` from ``None`` to ``False`` (:issue:`11149`)
 - The function ``get_offset_name`` has been dropped in favor of the ``.freqstr`` attribute for an offset (:issue:`11834`)
+- pandas no longer tests for compatibility with hdf5-files created with pandas < 0.11 (:issue:`17404`).
+
 
 
 .. _whatsnew_0210.performance:
diff --git a/pandas/tests/io/data/legacy_hdf/legacy_0.10.h5 b/pandas/tests/io/data/legacy_hdf/legacy_0.10.h5
deleted file mode 100644
index b1439ef16361abbc0756fbf7d344fd65d8a1a473..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 238321
zcmeEP1zZ(P*WW9Mh@u#X2_kkN4d&3&T_)1fDF_yJcOWK;ofw$lV1pvkCDIMjDWKo%
z-Q9bENA}+5jqmsV-q%&`Y|MXW&Y3f3&YUy5yGKi1Q&3>60ETb;{1`7L#N4HRevw=K
zB_8G}>X^E|n%rTK>25N;-jCQ}V0;*UZ8Ujah`j$q;&xa$dEU%KLjyxOsryqYc^Ql4
zBlf8KQ0M=o{~H|8(oiwNDUBe>WgejY%+16JO0ISX6UXa_1RIP^XYS#0$dCS+X`AUG
zLBmF%KU^*;xn*WTKiYl{?kf0?D*mH}4I|~Xw_N|-`}WSSrl(?Rips<5(I5R$^;lwJ
zXpYmy&X3ocAG6RfG1WFSVCKiW4wYY8QVJd6`IVR{1NAgKztK`N6;(YAQ)^8_6MYpk
z>%Q<ZB%Xl{VcwJKUlc!u_?frG!pQ#2sh`FSBJvZpA@=|L{_k*r>ZevD$y#LUMe1*g
z6V4v?4(rf?t(&KvtCY3BjoVrWU-q#x>U{ViX0y)OR~n^M2K*S8cuM|-@JwhkH84~s
z`Z#gS&r76z{ZhEq6Z^_RoTudYup@mwiq6p+>GMq|FXxP;&o`R9ypTga`n&x7tCYTz
zBKc=Twi{a?ai>QIV}xG9^$M<!nC*s|*~i(zFA!A=*LwrdAJvXN4o+*`Y<!R&`zubG
zD;&{M3{03=9w}E$IqC-Ix?HAwb_4cZU!p}BNQ>iidf2!-;9SS^ch}PflK5F%PgC^q
zqkWX0Sy5jM*R$w9+LW2U6kn*6jq6X~?48~5jD6uqb=|Ezyfl8y%f`dr#@E^}(2H43
zW<BvHrpobkKrMuN9=p7u-Esa7KEBSL9t3^3h)_5wNogiwz2)RzJ<c)Ec+4!5EEwlH
zzT2-~Kjs-~+~OBTC()+FvO|TK#|#oD$~?k(KAMT=VektQr|QTnL^2%Zz&9LLdH<Uc
z4_SV1zW1h^lbylNKY+<!O+6J0LzBPS10HMLz3~2mTAKOW`-jqd%H)M$im=`p$zx_C
zq5X)lePkd{PbAYsN)h#uovM|xNFN!<(*ze$xx`GP&&-s*Z5!WbmVeWQ0>8Ej2_yqj
zd<T>!qjYnK_%1w6nEp5oPbWOrnLGuh0v`NAQ&GC~*V$Egdi}8U9XxFnf8T+pnh%%h
zOhc)x(`be1DBaq2dpn*c?NLd=(~0BK#b=<@^R|l@o~~FueZ@?a-l~tfiKjod8n@%A
z_Q=vDvru~KO3+C>-LX_@>ui*!R>mjdsl)~wu{kL95o~eC(^u#BM&apQ6Rqo#DD5iB
zX~9#ywcZ+YQF?}t?>L@ThMX>(D|qOb%@(b_89*zvzoFW&c(_Fto=%wF2vY}K)qPd>
z5{x#MD2lgjfb+g@D_$i&L^<ACPRC$mGaM_lW!A^)Hpn~LZQGU3k08tNUfD9UpKz;6
zf#EEkR2bIYw&nEaTEM7ujuFc!0hM1x4OSjXhf)`=@OrDagBQbN95cq(zzB)$o_AEM
zppaWsW!aGmSk&=qu!3FzTzb1xdFt3M=<!r#;NC0s@N~LP!}#!KU{}$~D|IOyG8%8C
zrj0ECady*Fo)7s9j11qLGaZ%)(*_0PBn{4n4#HQ5T$-2(1;)GfTbob=P7C&%uwh&x
zq=bg-{urenkCFZ5IGLUx)01QxLZ+w4G?YwFlj#{UJxivN6BkDWhh%`Hvq5h^{HTD#
z=j;snG`I*3+GBsfP4pRDKMh-V-3dbD3^lcebWB-yN3wBBOb&b(G+$-^m2&WHS=rXM
zoK$$mp=^KG*bH!9-7NUFQ7CXSuQhO=+zA@|ew~?|(hf_D-&dwy&xhG|WzTghAA&nE
zaqnW(zX8cCn~^h$zQb&l{b6r3-@%KX^VMu@v!VQ&<Kyn|C4&C<e=$IGGK>%unH}$4
z0n(&$ecJCA0zRXbyOM1U@bpu^x8+zp*r&>$n}7NPc%{!X(rRfbs2On6r8A%w)@c+L
z#qX{M(=Ke-F>hfB=!>sw^t|SoSgHmo>KLr?@X=J)mDMuRRB~6a@K$kJE90VPr6Rk`
z&);&bgR8cgbCBjb8!a<UCx25V_W&1XY1tq%eODiOhX74!XK4p@D+323b4PhEHxGY(
zA59%;8-F(`bxZwu=7Bn19$NMWa<&1MS_+mX8nX6Fl>CeoHRnm`+G<!C*{J$SOZhp?
zv-EQE^$7G|V(7BQ-$21*sf~lSzWSN~V*^!Fe<^dnwShkCmKfT5_$*VBbF^4yqUG*y
zuvAmgSjKdXhNZ2ev7?Tny|I&}lD46yx|OV=uCtw;t*4BtqK2cIqoQh{iGhRm(x7Ed
z23nqSw)2dQWHd}woJ>sZJPf21jSPI<e4S*peAR6I%oG*ftpZinsoOi*SO)oc%u|r|
z^V3z=()DG}-3Bqoc0V5*4__}&Uk7`FySNER$Sr#;!mm)W{CCF^c7EI<=zDG{BPq+Q
z05vzH3hX`R@}PU*b-`^OJf+59CHw@wkric+wQ}T@o<@4_u@)cenQ#(M<!0BPTU&HG
zaUW!H;`DMJYv~6)6;(BLb{z=2&lMSrcp*4pJ?tqzA7>Az?(=zW?L^9+{byx@;89<8
zn2L#s%2HHkHa*<Q1kM8nHi+4@6t}2$51xl}0{Q$%A7Ae|A!Rdc?=`pav$1u<tx~eN
zF)ij@{_&r_>K9>5+>@#w(Vlh8law9r=;PpxpTOHQ0sT>Rva|E_3baPE6#?{P=jq|=
z=d;$%&(lZ1OIl#M9bsKd`Vm&Nq^F~!uY;ejfa+RbXAcKoUv-;60WTQ=J8Ns(wa#vS
z&K}m*0-m-m4t9P5Ua|rPekNXW0@Lli<ONKpd3gG`+qgLgIZPAqQV`IVk`nMWokrXb
zolz99v$yec@N;%|5Ws)z9o+nE1iX|4rfW-~%=mwFvw1kiHNf5%A*7}7J4;LBr#z+!
zXrmold`iN$^NINZG4HW+v+?ysR|)!YtwYgI70br`RsL}kuXlb0Is9(j<?O$sA22Wh
zCau9F!8ld;E;S$FA?FvvCIWN?-;^=AL73bxvTxx2I(%c01_@;zla~>OnxC{G+YR3c
zIVJWnthdf&_hW8#<G)JJaHm^)*Y97gml#N*asDUv*&f9FvG@F~_gM2+{+ACScur|M
z;^?2}KQ%V}wI6l&k2rhyIXF4s{*j*fIr|&_+uE1#Q&aQ$zxFro+L!8Q?EZxfC0f*o
zNXXxRb^CT7S8x#}#S~K|%VX=+zV&f;`@#$PYdn$V(OWLM+arJEh|OoVO-tzQb$#jy
zLhtsiw+SnzZ-0^zDNld&_3z?V>~{Ef>1$=8fynWd>eT=0{`{w({XQ<qZr|U>6(>p%
zEy8VFF>xx9pBT-k*8G$HuW+FE98!xUJa$q~4*aX*ic~yo_e7$c>tuQC@s?XVTZ(yu
zKk+bvkN?T>u<q+I9DRs@9@{SaPm6~U`js*y|7s7jeRH_YOZjL<6VnKmVZ^qBt;lh6
zHf5SJ8?5)Z*?Yc3y1%>Gq1^ThYCgl={E@wqjF6+=zrEvOyXAZ5=y^=^N(`GW)AM}q
zqlx5+$-{eJ>3K{t4;wwB=lR}8{N#z4DZQ`sJZ6n|r2nMn)qmd+TfhBvUfO;Bf&5?p
zmHti2kI;9&;xs=X`~;L=hw2}c51ZZJrq3i=F@|zEFb{k8H+Fk)=g+13`SxMN-NT6F
zZ}(3UhVhpPwtbpW16C*bCo7nB!4+01#Y;EmgUaof`JZh02(?#4YDJhQLiahvS`UW(
zgsE>#RD-}>IKEz1cF3<X_(9n@^_^fjs5+1_pd7q|rE^DUL`*3JUmmy~&kz3uvx@g_
zSeIW1eETnR@tOA%&c5)hHd(I=G@f2@$m4DqSY|s?;Z$oos9XROmxiYTX|4B1_l6|H
z;UAwroie`_2G2-{%5HxN8os<fYj7q7z?b}d=LIgoS$ikGIVVsDU#JurrI)wB&^6C?
zTWb|TKG9&!w@n}6fXbs{`}tl2*N(5v!3zpNvdH?k_9wppojT=>nw9lX_WEj<>Auxq
zTe-LGpn3JcYLb}zFR^qecxim|5Saofqat!6{Co^Js4Zb!|Dh5VI%^I2wxbRj?(@x^
z99IHd7mK-l-CPbV*6@{wDZK)h>;~4`&CCJ3?-Fm^^vMA>Z?8V?e4Gzb4xL(Gd9@9`
zo*2SAcuE7<`ouduKDHj-UdhODvB`mLzB4|2tFMQ`mpiAO`_u|o6c@iR=P3fpm(o<`
zWmiK5$*;CI9ExDfhr@e=Q{thT$R`(%qg^m|#t6-E8J(bB>K%AKq7EK7I`u`r+-6{M
zE?Ib3OdO~iza%d%D-|5Oco(W{`3enkopgrRCBy3ZEoq6r8la`u#jYD6)sR2j#?rYW
z3JU(}7r1j$88Ck59QAZS3fTTZ-)>*wFKF6mX*FOU7V}8B|F4-zMR42d#wU?&Mev3C
z!hvqiHDJ)*(vF0PG@vud@_FT@LU7UWQSgbRF4$PR+(EP+$;Fw_<o4WhaLmhN(blXM
zP_iP!sAW_;5cGU|&LXKD^j~l?O7nOlFjTWoxizE+ERCMm4vi~ea;M*_EoVM}oRcxu
zi2|=dSn{rSZ)I}e$MAEy%T#jV`uX~=<}7&&cxPA7941-=%HAa{7jdlw>l2q3itfQ;
zVo%)^(d*0wr$2n%niW|Jz0V}i{;bso-_4!lxBpc%thng8eD%F1aO(bY5Ao-v@WL<y
zh2@Lhz?N;}W?VEwan@#IkqK+NpxwevJoN!3px)P1K7T|DtO{S8WvpHXXNsM=Qnd69
z+|+-;$Lix5uwPJaMXgr@tjNA;X|n!1R5etsOws%X18?#zxhwx2^uKbtZvNpqz;nps
zb87Anux)tYbkU+lP<*_;W5JOmIJQAROkb}VK3u!eecsXr=*DPz`}Ng#@b<$S0~PN=
zu;KW<xJeu8Kth-6F8{(hu;Jt^Db>{Xz;}(QhNe%TlZma1fsVPeo2#XX)EXlPcU>PJ
zO+OjOKqYe@KWR%D4HZ3U&v|}Uy6U!m8gAakn%?g6{*G45*1G%I2dXXechPrK^j7o?
zG}6&m(DN`^=dGmV<EU#P?XTo2t?RF0?yjI~s^(x}tL?66>p9Oi$iz`)nY*o<hrO?v
zmZ7bOuY-e)tF)_WkcPJGQXfSZ4KIC7Ut=jbcQq$}4<AQ;ISU&ZRfTzmrUsS@atdz7
z1|E*auAYj1N{)VNK1%C!EtHn{IT~50IoU|*D|p$<Na^YXIY?Vtn%U@RFVoX9P&ZUF
zFbVLNkquN)QuH=+QBzl2XYT54Z>!<BWS+Lu5=8?=eOqa1O*c;^B?ol_M>jj$r8=I{
zOSISMYI&&WnYh}j>#ft+Jz`GniTzhVC*RKrYgTzdal0-VfOJSr*<JS~s$uZ3VcX7#
z@ueX8hRIds4dw8&d&j}>3z=|$%I?-KtQB}&ZFRnJqzX(8J>ER2umQL}*6UE?&jIQK
z(?yea*8*J&y<&%N%^+CvRX=rrV(1(9Khh0qhazL&ZTD8G1IqmK8=XsIz}kjziPO2w
zaOHacy1ApSfJfKml_eh)fUAyr23-eU!v{&0Pp1#gfWBKgVr`>4pxhi8&xp^7;N!wK
zUq%clfp<f<hi3XlLLLRRqEF9WL#5?a6Z&t=hLd%l?)Ugv3%lz0Do-D11Nchk?rWa0
z|E;Ak)DZV4rQrZ&55@NBFYP(c=xct!wtLFw5XI5YGkV)S+^?(qPq$mSoA30d|6d<3
z`*-`nN6sbMxvqzFak6hYxAT8nT#N97bJM@@F4((&{dHcUM6Rx&t`O}1=lM>}*Z=e{
zZq`n5dR|6(G0`rRKcGh^`E&4J&jHH6yjqbU)t>CNz4bn4edxu!0lwx5mrHUTkEa{4
zjEA|7X$i_fIrXNy;R}|~G9c4w%x5H~3HYIvNjB59WzZRS8y8O>Yy6Q7{Io27S{6U;
zj8-MFPRj|{pan#OvFl{HgkRLd)5Dr5+h!g9909(5D)_JEqSZ)ls65kYZqjIR5-I`h
z$)G)SQ?w_G_T=$Bv^WaCu0ZbNITZ1ITTf3WP>DLgugt>_&^iemftEWF2lxfF>dD5>
zOB#_UiRw#@8+|_yf%{1*K6#+$EUc-8Y3Tev?gysICEAC1OZH<JOg`ZOW2#&mFE0o5
zqy}0LXN^|@mAe~V#jg|fCZ30~wxb#t=d(5QI-U#pGx7E{oknJ3K4fAog?NFO(CaL#
z0ea+RHAauKWWzAqof-$|{Zaqu^Nb8^3X>P7NQSWd+B^h{crYnu=zS_$+<>c4)&pZ`
zALj+`!)J>zIzStKuKT(lwmdU0WB39q9E%s(gKXp^#uxwKjyimq7JeO1@eS{;L>2ja
zV$eftxGxyT;GRG{59Q$d-I{_P<sn}*&Rraf-=lkZ-Rs{2Ba5DOAu*JHmZ~F86IC|l
z7u-#*+%_W31D0LhuV1Db%8xd2bcr0D*l`1XJ>sGec|*eA&LG#ep{ErV64(Fy{yh$`
z)&Y_WO4blU#d;q8pSRnP5TlKOq@1}1T3bU;-CD}pKt*2zKaa}@B{%pn)t}h<vO7+?
z2M&Mam#pqz`rPmFGJWG&V+>vkT;Ec9vwQinJ@DQ2W{*CBNd7%ujypW+!@tMNbB9O$
z`1g1P?(nEjv&Wn6^h;T5YU?q3BHnH|?<xM{Jj1u{<>DiTCbPYzS-9?c1ke9h{O{yN
zhK2uo<;t>f-Sy?~%T>ltqxuLU-AdJ4j)ni7?j-UvVXU9u(=X4u&p$@L0t??a{S@E$
zP#f@L1}27<rl?kZl}GjS-u7N!^L*x0{Y)zHQKjz<pLZ}27peDXQ0dQKzsF1u?|B|q
z{`kfk0EojszkfLJhXa2&@P`9`IPix9|3w^N+nJ>Q2!8>KHMP3=uk!=;x*y6u7h=vU
z*mG_IjN3`w{VCM*W9;X_D0_*-v+RF1zWcm{T`q+q{7$6ZFJ)~{yxIaU7dK%jyB7Bt
z;mtJ|Gr|*eo_H?_3rCWQ<HVWQ@mwrF3gvKOx=-)}+&_ZcR>%`VY;6AZsE>5_`gDH|
z!_m#t#t$)}kNQYctjFtvFA<~YApJT_?-#Q&R82^?8R3=dK5ytzADQm;!TllBeUaZx
z7GDB4n0)H87k^~B*GH@yj;s&qc4O8E?X$Vl2VZ2n*Mq;?{fPFJk?W%#vK;C`It-ch
zK>OY6LHVmVJ>RN;YJ*{%{PtEB%xp;J|7`z@9O%6sU5I3Ym2ppQP_ARDpK0~H{I`#&
zc)@5x0;)uo*L(iRIbN`uc>}t50lsvWsyDNr6Y&DJitiaTU}<hLXu#BsgR!S%Ieo_m
z_WZRRR(ycDHigi&rql2PRdm3N65t1F=-}TJC}7sQyB~$SctL=(y~F=kya1Ous(re}
z3(!O84sQQ@@q$TAUIdd2`D?sjG!ZXA#svO)0=5v74ULlSKAPUK0bI}HV=;dF-af{G
z=-l6p4d6c0e>65AL1L-)r~Lfg>))e}vgmC@B!;r3C=5;$?#id)0%4Zu0B!lCf4^H?
zAb2@(X(O_$$TvK&aS?XJ#eDL{z2gG0X2kVBzkiPdz2gGwNI2!s`{&~Vl>Oe*zuY}8
z@b~@uH1Pq}C_xa?-A~JoOZ1En{C)pEm+^r<{rg<T2m18ya~U5%ow7UqY(G8Qzu&!2
z;~b;(Jt+nVFU8#L8P{R^`SJdZWB!U`w>!IBwtxR0^V75a{QsPvp6&1d$LMGK`TM4y
z;v08<daB>`wzK|Se)_aN{Pa`9d$uI5jPZ@7kcq=TzkfLJhXa2&@P`9`IPix9|3w^N
z+lznv?Rc{?&}*CUc>_7$<-*@?#gv9#{_P(AcHD7ASy`lin{q=j9m|v(in(l+azl}h
zceXv=qh5df?#x!Ad)Fx8ckk<+jYiJQX2aS@{{kofe1toZ|Brv3;KTpgZ(siTwlzID
z@$d4_KXoL^$t26`J#XafpVwmEfX+WZ2Ic6Xr|`Fb;!EJsr!`2|e7E>y1bM%{{qQ^f
zdO!4x0I^PAWg5P4968_7FY>yhUv=g$8E|8&LNegS^v+WOQ>J&G3Yaq2@#6>hI&$Vh
zell*zT*ptw4bd`wv{)al-zVb-l%Jw+|9f};6!U#8-5l_}^)dPV11`Q}K$Y9~dVR|O
ziZiyG|D9Qd|2_Y^7*iIlNP+yT|GkcReJTn%t08_-?`gt$hx=&oH3B_+*2q1BpTm9R
zxGNgJE$Nb`U2~7G;m0u((I1W>-6PoV{82s1L%uxxfm9qzwJYT#@9u-@fkFPeMI?rk
zU%kx(Wkj15Nklilx{(iYDH&Z-BX-$-_23}l%&9*7>RQZI4HW86$}I<a`_)BB;J?nV
zjszHOJ^ivY>+Q+!A+hcI?*90m{`9}^59iXK-lspDOMiNw{%|h+>8L-k^>ugp+5Ygp
z{kfFB7DV@;){V3M;QyFEo9z$(o&F%&m)Xp$pYH9)t_RyM-d&gc9{)SJVf(}Trk}!b
z=g+3(y|-QR@A7B2^x@C$xI}4Lf`zye#5a~ABo6=l{^7tM4*cQ39}fKCz#k6$7jb}X
z?{$A~0XOT;DSM}fpOuUC-@o<$N_AgX{2Tu-<<4OGe<^nc)9FFEGwA)lT==Cae@~Bf
z={@|?zgc(vd$$ZCfX&w){Q2XD=GqVa`|p;=lIAMWTAbI3$7~}yEB}8{pZ!A$_Z;ZU
z4;{R@=RIuEw{P){bf5jx-@c;mZbR<alBt)Ry|ugZI_lGMn3r^qL(JZ>=LttoPbQM8
z*J|=U|LTNV*RD){KY2CDA8zNT|M+`G1erV^O%g<{gXPDa;b`+B`#Yz2)9&3we(FBN
z(VyS{6%O?7=gK7EYe@Z1@uB;7squ5<G2vE5$JF=D@f5k^n8(Pi#?lMD<*WBPYitY3
z{Ac^y9H8{ibdrBsWV`j&Kb+%slwJD&_1*KomM6p2cgvNSJo@Lqm;UeljyVt6l5Bgn
z?>J!h=RNV~aBwIaN7%vK$N#32{P}mi--`S1HlUCh>-mgC{9!44BL`$w9Em}_kqM6u
zu(#gN2lRe#B%8lI>E$fvRDSN>W6Irk8~T%9x5KcQ5H=0J+ashNy>BY?cURfx2q{P3
z9Y4!H_q#oQ^5nW*zq`slN9o_hz3g-UNqcO4^`HKJPWpcJkA9byT0cbf4^|wyUq5D#
z;ru<$iOl9?kR5}HA5-xXs=u)NUH(3z{gv@$2&}Dlzhk!xcXU&B=tQy~gpqgdeIHK!
z)Y|Q$;k>sEplwgUT~*tB*w#<>{cF!QP_#Yc+UG+h@WqOxVaFZXK#a_aZSEt!!ew0x
zEVk)IgAv7BR_CWxLGv7K-ltg|@Y2logZ^K(!e_%}3*-~qp-|V{;M}5Iz_X=3TIX#F
z3~!QE7%FrZ%yZuDBxh0yZ+mQVu2(65ftU7)SA}K(&FSe616)(UM|shqk&?0Ct<fRD
z={uXCT|<lTWs9#sBf5Bw>55aZeCv_H&GT|W#_UYH37$WI&g)YNyAx92lg$oUk4<Bt
z))nt{qATN|R_SmP*M-IKW=qu9#h)ACtBF+)k~#}O`j+VCsOYayEnrN!l2!?L^KF!e
z@tjt$hwqJ`zDpZ8AzH5HUsVPs{1h56ZfF^3IN@j162gcHt6F=|dDboPL}}Z8bMG=3
z;@>g<nnVH&Db2dSx3v`Bc;JY>&rl1loL*P4Bd8FRmll9^=h|UrmgJY&mWkl5VtcVg
zNg{mR3WDBSSAc@>#i3>!ia>5hyhOOjdl<a4gE1$(1}^kV*E#sA77ib;CZ-)z3{HKP
ziMr}p2PPD2_|1%{0*5xKeu&Pm1)pQicK(X32hw$)@2BlfgCDkyO<Osn9I794u#=fr
z2gN>Fn(Hg11F3QM)sr{3z+W5MZ=N0f9oQF%BxpaW1Dit^>5Ax-z_oSh`}}iqf#J?3
z**91#d^jX%hv|b_IH=^!7v-PLz%j_!@1SWr$mUJDZB+9WR0kfNqw*^SI7qlC-`W!k
zIs(^rm941)l2^(_^e&e}=@q92j#^#|w)~37s+R8p*K1}4je1cHhu$rWD!W++vu(F|
zoa6Zh&sDt1A26^2P99&n?|VxwoG3W;OYNg7$Y_+`W%aV3^14g&g&nW;Q@$Vbi&2o*
z0bkYLif}>iy7zB8`2OZ4!I&Y@%P;1iONXBVTrARdJc8vrhhC5?ZinJImxn0$R6()H
zyyA6!<zVh4UTKd*&!FMl;+<*JK0`hWlSjL5)&Sw`Ge0}u*1;b>*AE8>H^EQ3YM=Qg
zRKpjGjY89&G{CzBvx{aby@rjm%6-Ehr9jc(56Y#VKZ7fiv#Z+D<Kg1W$q65JH$pLM
z@$kDd>%m>=OHC?D$soac^O21ci@~Eo&6n)06JTglm1xM_HaMvM=*@%X2Z7{rbzSL#
zuV8_@UeWz|EpSK8Q-{2?0vJ5pW^=)g0?4=i#X(2(oe;s(sUf@Nf51b7Ewl8p>)<QV
zlOJEHC4-cUYu(a1cw%BUYc}2EEr5!mR(pLCAvBulaPe_LJy?G+;-2gE0<clj`;y7J
zb`UB!Y^;KN9ne2ye=)te9hytcyw$q40!lQBmb@F&00u5N27^%kJxM~xatGIetNtqs
z1}>`t6PK;pKJ-~DfLroDIhYl}BXgA(oGxhtuiMWCobV`vhQ8kt6#Tw}mr{yneqJpF
zDvZhMqw|X4p^d@gwbiO&amCIZ!@noM8Ot*zN)zirqiWqo-w6dEqHgEpct#z3>0MMS
zZS(<dKkwYK;8Fw3Qyp==AglsBNu6}|MS2-HbFVWk-1#NU-e4n{rV$S&<z`;XepU+G
zP14K*L|TAz+7GP6C=3WSZcGkq$OX>^nA>e#Rs`+8c^R+1oduRB4cwnC(*h+2mwgpo
zej8*rMBaVia2cA{kJZ_u*9_)1deztEmBOLIN%hj(KEdR*iWh4oGof~>FW>ob(ZHbQ
z(F60J4CTUdL!Hb1d9YRHd<=g;G8|A*ENVWW98TyD1%}9d2HTH`WB1=Tz|;w)7o_4!
z;Y2Id^ViS42MJG1<)sZO!F}sX;=GZ$(C?ex26f>&C_76^xW1+Wu8#g5^*X8!td1J~
zJosG$7zxvWzg{66rlMG$|EwH@9Ltd#i~3pO(X&QJPGtiRhec|$+c0GV>DAg_f(oGA
z%YrXE=3+7ZX6br~eD8$ok9DaBoT~$E=B-PIZ%u`Z&2y^#HF%X@-Fx>^leZqagdQ?`
zG5aSR^JVaciGERV;+L;RAUhd+E$@G5#@jOJyLqh0I-L%%>D#3mO|fRs<Q99OS)~DN
zTJe0<!8MKW{d=L&u^lmB(P;CArEkB$dOa!e#uL||s7S*JzI#QW!b`C-CNl}_FkSuP
zh)ojQ>AO{mcT@>1zh^K}?Pnw0`!uX@zI+o{p~!nleM=}fbvb>-8SQo;|3T=yh*bkz
zUvbj)MO_n|{oM7sz|>M0`XKtQ)SE)k*mm#C*}Ph4Fis$O?lb<FOc#$@59Ja#W8^|P
zxu7~Y>yhThVG1EIQo{bg+y2=w@Z_T%on{?S@RVMQ!-GRGWRq@F=h-3v_pR<|5Uqz-
zqZ&L{94my)Q7R=Tw&uX7ev?&iNR^=Waa!xwm<i0r$Zx+ER|Mx23=UUJZvZt*)9yOU
z6~e1-2ZDqTB!TOa+fKJiHUW!N%kSQe6>$Il0T;4nV#-oHX9hXN*1^jcU%gb@kO8uw
z7EeG>4a|1j;UksZ3U4{>u>aDY4~Ov?Shp@H0@GAXSCnka25Ze9S@C`-1YV&rzYZ_^
z3?A&8TrNB&8uE_1qBcvv5!CHoKUpfS5a#;l2Grlcl#6YghE}@N0o55>7XR8(4SjvQ
zOz-Wf2M1-R$`|LQ!`y*qW>37B2*S(;Xv{j&0{m+}jE*h44Q7?YeU-kYVE%wB7YxNP
z<%-3#%QwbWfJq_+b*(q+Kw++GwM%Rf6xhE|TwCTje5#L?#cof6y!DCujh7Tc>9r>#
zcOR*M^FJ;6<YD&%O0CpCcJJ<8xZz>;lsERFFtg=n;O3DjaP!J*7GeTraL6j&*(VM#
zV)jZm2#@ot0FgSfu_G*!fV%f~Q!x2EXi#`^I{91;G?MRk`l0rBSiLw-Ml7xg?mYbI
zV%zaHC^L88vu`3{;F<IL{pV`xp`oE(NJLOPw5r=){`qSiRB^s4S;6xSd~+x}*Q$~W
ziX|-e{}OG3H#<}v#RoQkaIvtt_2Y}c`wK}P4%-^Q{Of^7s_!;Jr6De_M@{+!-ZHG8
zt>S0IEC?1jqq;u@o=npa)_GhH!P3TxORx<-x-k0Ok<AIvz4-Pe|EG~4I?L>S&Gd2@
zFl?_?-jo8E_&hgUG&c`OcdU@|h)9AH-uIu^bw3Ua96E2PeNi)1w~ARNR@??3Ze3lQ
ztH4mMdA&!dwK5%E^_8`an|l#Hm6jU+zOxSOS=#S`fNwp_%u(p#o81P)M6HAm&if2j
zm%m&1aprgEnfSn|(5n-e`-*Jy2j78tko>I1&@MRA^SLHw+6Xfr34HjK)&<WxsJ#AS
zUjavVsaU4ZuYkMW2CgmH^8+6IA$upYunD-2nUK_v=Ly(#@R0Ht4`k<Uo4fJe(S9*u
zcJG&^zW)W!?%`Rz^Hn==G@YQ(=G_R}7HqV1<E;eO_`_z{N;E;<+Ktb`&Zok&CpNyi
zWEBkuEgTn85cC{`B(6N1Q(X^+#Pye*HY^9;a{X~OqA~$E4B25FYMcb0tVtj6PA3D{
zi0_}ZBrF<k8vo?jPNWBREFY$CT2~L>y}xrss;&|^6a@XeDP0EMz1;X|8~Xm!_^wm7
z0&*>&LEZGi54A>Exj6pznANRt&4^0rA1}*5lF-zV^Q|FRZ=-B5aAy{X9xf|>PwqZg
zudG`+Y%8XG;oJt%_hALdesztnF8cx2pE%>%b-D%)IrQt<R_!LJvH#o4IXk{X%@-N3
zc)T6}`AX#(+WSg?(N52gD$B~?usQziVSBou>vORzaQij5I?l=W9Mb2bb|@_!ccKLx
zsgUAdaJmJu*K-YGt}FBNvGMTr^7N$~=zXm#Bd6yG^Y`Y-?EIYGQ-l`Fq3!Q{mj(AD
z^fmuy&u4hasWrVHfn85xKEr7q!p{GnJ}>3YpTVYwyZIbDKez2l_=ot(3FlwuvD|&<
zo|?C^+jj)nzI9Cbp?)~cS2?#YaX$un%Q{YB_wU>%Fy)RY$<EHxE701<!ASu9*m-)O
z_kFFk^YipUZ|N79Zbx|MB>jBRSM42qB|RM-eI5LK1ytAiI(s<y`l{Om;%~6Gv$nQf
z>+I&|>|t#!fF3$<u=B%T#%ADWg5C@?-5$LW$aEU|Qh~dTn{$u@deT5aKwC;mz!!Z^
zfO$XWvj=w6i|7RWoZTJl9o+nE1iX~c7ZIdTX8b?8**qNM8eosUf`C7efbNXGB!EtN
zOcOvKPVlwG*9oxwi-cd2{O*FAjju10V0^tM?pvbt0aYv;_t*X)@SFVX_vKOggur<a
zpQmN6p~ppqDu?}jUCPh<|E<6NUH#%;{hb_MVZ=7HQJx;+=%4QoJ==*|(SpkZ^{fD1
z68n9IJ@fyswJX8(KfixC@P`9`IPjmx0p<hX82M#P6%5;W$HLX+^JQq)QsH*)>vwp}
z<1EkN_78v>f2pS$(E1s)w5JDJmBEPC(VQTh-vBB!-Y;Ds)&_Qp-<>u0ODp68t%hM$
z9y2zbF|3DD>4x{JPB*}~!9F6dpVf1NR%b9KMDF~x<7zsvXlm1U5h(yO3nf;~Zu~%x
zmf}00G+9uE!H9d(Ja5mFHn9K7?A)p@OgY&h{O#Bqzv$^E(5e_FJ<fKf(vx_&5n|gb
zUVehxuOAN^HnxnLveg)jvT2rL8-we?SB0A3Ez5s^P>-ooV>6PuDO(+5jQKSAWu0^(
zoOSfY++f=b;AK(8SEwFMU-sq@@m);1E93SB%pQ{iZjNr*K6XI`P&YUby=hAcec1$B
zm4QuOtMkKg;yZBZqo8)PR3)@i7v4~Xy!^E7O`s9o@6EEV6qja!_LY0x;tw;F6PBF3
zy)C$ao0=oK7sL;p`T0!-eE2wM$7<1Ez<hY>HQz1w>B~--{x}WMjbT5IIm1@uA1jXc
z0TtIXp?t7G;;K`H^koxhM7KY`#n4Wt5)kEa7pqyB4gJHjOJv6X;s%ZAHu1Xqd0Rw2
z7_a~=YkZA-S@xD=bw}rMgGO?0^y$>dZ>I{tg5*hW9o!SZ<6{fnFY5Y9kDl;cXYv%B
z?)#l_k2Vkd3FXbdxgR;70Ie@9kKQ{hh8r}Z`$drRTnV#xVAMQu?Q>TXVa3wHms2H5
zxk0O8jNB<lW=&Hog?C~sszW0f$~H|R=W`~11+>ya)Lfl`?Y5g8S--y?3~txmoNdwu
z72eq0($s_WXaNs?p{a;&2LGP<vnFdLLu}6K+DU0`pkc15qQUD%`kE7HM0cy@R!hnC
z1)y&E?alVSr7&7uSidp1hMTfcd#kB>`DJeC1Qi#3<V}WT0jY|prTfnx=LU`FmgI2-
zYBS<M)@jF(XU27KTC$6%0rK$Da&76av#W5rG0{~eZpUZV!(+m&8R0oK@V%Y0rR>=X
zdb$ZT(rY7pOMhK9S`U9Nyd;p|(gt<%*H1p4z*X654D2~?{57#tO+fnWiRfn{17bSb
zjh38H=fHEKY(%$E!v52`=Uc$Fn-Ao4UlziJiD`L1ORDMVzJ6Hx4zs<@Grko)I@$<V
zK6KLyDJTU`t7OXif8d~72sEPGNUs0Y!Q-o;%?Dwz4O{Zyk^RN<LeExmQ#PXe<XE`@
zlbR|(%v8M*xns}6yQ7ZhOXPgx2Ca@^!WCmD4|c1E%QZ6Zcb2|^@+*PR_nYPP=vMLf
z9Zb3d8!xph&MJWYKT0_adixXTyPFIU70%)YjpST4=Ip&s8&g2^vSsc0I(6`-$Gt}@
zzo$c5StH6u<F~-l<wwIUnqbYDal+!UN$|Og`$m<(9B#@+bPsuW>RSPS8~nx)xwvmp
z0bCPX9|<&D=+T-Fm+4Hy<y<^`#oh$ZV#w3cFeo_Z8+=&!pl(Nb9XDuXx71{(8n)+^
zfY(BoYMM*mfQX~&ud;??%CvM7Wh1?I`9!0ebIA{QVWg<gmtiGfrJ~dMsU|h_TqDqE
z9CJ!J<m1-S1|3b_mdfnHlqJrF*0@`@)7M<qX|%$0WVc}TM=quxGHM2g&C@>I`_uv-
z*p$m<S#jtk1X>ki+#Rv>eQj<%eEp!`jxXkUAToPm{JwQRxG5XSxq7qI=69=V!9~f{
zNr}6k!{p)mZ@q02>FFlQM(sT&$~;UgAsYr+`dmEowidd9a|S=AbFjF!w%y*&q<iK3
zGiupoRiI0{Sv28C3v@iFb9mZ}e)KIyppl%P9o_tNnpQFRxl3#KGX8w1@^F>-zM^pY
zniFUw=Zi$%WQ4jj!0JOuy2{6|z%WMA=Z!l#aE(ACx|2lS+(AKcXgO-R`&IFL=xgE-
z8Sy-Yp65w>R8nxd8Sy0}ALndr0M&jUzs+nc02zyRJ&3Wn!VMbHEwg}cmrPX(7_78e
z=I#3e_&6&0w$JinZqSHs-5-I%(u1ntE?0h;nxXkHCTCzlJBo$T_7bAz$Zm<gf1u2?
zs0E~Kzqgu^x*6PUSmIFkvYZ}0aa_9i47`7@b1b{%Hn#(|nJafW=w-t7IaUu#+hgc!
zPM}f$K6q%3?k$%`z<U6Xt9$=^AU#xk{_%Vc`kX)`IsfeYLvz!cTrfmb%Ch8C1?<Yx
z+`Z`{S7_AU_jK;Zr4?tuX)C=YTw-&fq<!wLIK@hOx;<~ZcrodA|MD<QX+{QgRrc5x
zlo$uEn}w?l-qytp8ujmX?ePQ8NIZtBj}Nt~m!yD~uWQa4?&lz>MA?XL->R_H;hRdp
z;MphI!`D>8QMU&D7;>M3J}1yfuMNHE+4#Vy76zK#+8J=E4qTsS_dWi3EPacuSUr8k
zOkA(Y_?7gZGmlR>`P;ITrM_9PD@bZ)o7F>Z(1`BTj!m*28{R|z5cAi2-133Y^^Z|&
zPV>ajN-9w{ve!;5<JW(Gx*DWRZgv-~DFJ+=`L3OKQNc~wsJ(Mf$}AGhtcUI2_wUS<
zN4DZif%w>nYWfztRUdT|(T!pA8zRSBf2@U~v8P<K$E3m%-7xXQuBY6fk(`6!zs4r6
zOaha4&Sn_o@F-uke6G;{Vjg|XiLz1u9)I-M$S3vXa9pR=*IQ+I5b6~>tvBajuMud}
z-cfrJY+qEjz?Gv8Td#Rv2@C>5<Mw{zV0rx5YTVAG+gy;p<#u^JeBLRwFX??L?6`VH
zEOji0Sq*_kbo0x{9Z&y|1o(UnW~h~-=&w_RN5jhqdb$ZTqTAwgyu`J={bNECn`1vG
zeumfxt;&guv*>wFppjlH5PI=etECXQeu}*6fo9T8!o`opW+&03wMUjNnT6V$@lDTt
z#r&ULaQ((Y`3YT(;LxXjrH1=BXhi~z=)V1O9egCrP;NauK*u({9{hT!oV_=WD>Rbx
zyqXz1%-f5=bHn|i%U{)lRR>R9HnVS}=NeHqYHuY*^frEx6sRc#kI6-rfSjr->8_(3
zG}EaoK_?O246HV5iXddvgX@Ct48?;!!<Rwlqb!t)=($Fq5#6i2JDT^T#DHBsbNcy=
zt_F5{qEFeJEv2tHfkt}G-Z<{Eg?bLiJ#haNUw$(@>Q{2sJu07@veCRmd;S1;*rEc6
zt*{FXSyKoPjnA4I+t@~5_Ku}WTW2%rK496XwDlu6D&q-+HYdZCd(9_Go{gX{n?R%S
zdwJ@^>m><wz^Prwcl(VtczEW+4dyn_xhWgTxryY4Q(i7V;N~GS&wo{Y3;kDyz0upy
z!A;pnuMM~wwkhmx15^lapMAprB4pfq9h&qhl^&g18J~#g#xU;>$C5M(Q{bVDhZYRW
zE|~FIGQ>S5lN&VB=LP9kO|)M%z{B+`<jQRdK$Xjv5z`KGkW`{<M7P?|;7M=Cr^1mt
z4_<wB4$+^n_vV4?94u@CjpSTvm$2>NLyd6jysW21k@>*MkfHcFpF?{~Y_JiVgWGEn
z*S?JXvZx7MSUBc}ooOMME-hUx#&|_fH-ScUUv_z=Zd{1Pq<*|!si|56vM;_!Y^=M(
z4I1_DueBGVO0zzI#)w=KUbI%PTsz_9^!O9ppiz5&7c~*jSIULL4KbHC-f4vA((2Z{
zY2?sDeFR(FnRM%{db>k;X9?Ic>{$Mc_&1<*QTjg1$N}^tFo8yLUd2;%dWCfo7|b}F
zV>jjpEGT%gGD@x$(2m~(8nyS!FCR4w?xw;`SyMJ$(5r%jPQ6|lsD^P<Hqz%|@jQZE
zH{XF&i@^D<W{Dt<uUt_qgM+W<)%m?qINg}XH??~u(gonXMD`G|$_QA{Uv7YXO9nm9
z2{dZ&H@eqHI*2F3FY`AW%0zwv;;FuA9(y?W*9bIfZ}|%g7{|uG2E%O^6^lhSg7p^H
z60;gOSbGE->9uW7^^f1@Ed)GaPqDb>*Ko>^u*i@R9IX4hCR*1eaXD9$vyzR9_zn(q
zJeo5`stTl4jW6TT<IqD1H1fC1&R%`~h*H0ppaWyym%gZl_w<j(2v6nUr6SO%y@yV1
z2w&RS4!r{1)xBz3U=mN0&HZby>7|80qxQ~EpSJJIGvp}=-m+!U)k9$akR|okAAF%l
zcNOKdFzKGIcC96HOfksnT)y$_h;-03h+oO<XcaeT)ZS6N5A0THltZCs<IkFjqE*EC
zi~GqA&gTY=+WX#~FX<X5bHK4x%g=lPPr<VlQMc^MILI1NbEMZ8`4f$*Kjy*75xyyy
zK{DuitoCkF?0)*P_11c8%*FNjlgPc^a)OPpV3zS1__hfy+`lf{^>PlR?V$u3_3xjJ
z^FHmd?f{bDqP>gP2k6{<PVHghJ#NtWI2My)9(4a9oZ(v<<L2A|twbur`gPTEgGPEy
z)_lUfqgq*T`i>ayO+MLhvGqg6IsF;*o`W-de8&;p4D6}e0d<SzzkvIwC?!F;Dp)N!
z)z)Sg2QL+YMs)j`1{CdfYXRejuZ=98QV-WOt!&$InZrzyKqI^5%-L0KBBteF#xvE~
z!OnFs`1*OP3kx{(Py&tQe0X>hCTji@<YvbniwNihl3ELfn3lbx=UQdR=|U#mEt6Wx
zj-03iOUz7*Ys{a(qwZE=KPPkWN)l+)zaL6=R=RAgfazc4Z%&$-1C3q^uP}Bk;ihaP
z=L}tFq;#Pa9G<qsA;>xzxTeR2gL}o?ppjk^U9fQeNT(Q3zI=>y4ORyY9$AYBw_l?{
zqY4-JwN*&q_PKw)c%tj(dbs$G#K(6(zJlrR`vpz7-^>jf-NyFF5aEv&`S5fEcGz}P
zCTyKM;nCD{bef4(Hlq9P`Q~fU!#{wahW4(Ln$5tNarn4^$3AY#M(rIq*RB>DQVcGX
ztY?IKyaHP#Mfq**=y;CGX2nB?he*^_l*WO<1zo}8_tk^*5l>3}vl%pFC<~4Bn!XVb
zk2{tEJBosz-P+O$EW&gKKW(7%fV0p@&JT*2<~w$lfHL*m(lfH<@MFOTk*lV3^D!10
z(QOzOaJIoR6O8-VdDWsd4`f?Uo@7{0HxeN<84tx6VvWYxP4nyFtwp@q(hZeBRN0_Y
zK!^j~tazyXy;*bb`!<0OD}?lvPS(McV(*4lQ@W@Ft8CQXDj`p!r@2*uoR7_ei{l!>
zz8TZK=Ga8hw>MEXlJgS*5h^JU-hqB_^ZdattD(w}lF{;RbUsZ~HWd$LoOiU$+L&Aj
z!^8(<D89=CSD*1;T<(2?zHBlciZR6Bt_pLv%z*M0qTeM_&}#oXK`WO{rkhc*%0~Wk
zfq<#EH8<qJGYw0g+->*>GHwN5*dG=L`W{PIXw=?Ljj}>ZAJqXp@qS(lN8bZ7I&;em
zLpkuAjE6E9eE!0G>p#_jyKOfD8bSn=g-S(!Ovs_r$f#@rjr6&l#<6`$-Zh~AbNL{(
zxE82AVgBC3Tj}gI78;FX=Z3dVG%#YsJOlAQ{^3`khgJ3=Uq?Ef%0i>|j(TOeDKzsn
zsIOhf(>S0E1|*z0o@d9QY$_g#mF?LrqIx6>I%imJ5kAlfM~LR`b`7VSL8G!+@lcm=
ztF2#PE0}-VM1s$_9*&i7&A+{kF51FEqxMeSZ;|AxTLm3Pj1tSamj_}igRB|f=zLr(
zG@@H(#^KbcsuD0sP+`^$<zo0r;MFmu&z<y=%8G|R9x_09^=`hH^{WJvba&Uorh)66
z;yyj+295Mu^v7u%<PIN$Kd#JPI_7ILIPE2J+dup(H)zzq3;f&mhUk`pH?NW<<g#<X
z38V0EXY|Y}t(=qbP_)jYa`*gal1*^UT)l|Q&5a<cZRXQcqd4%K6%TFQd}fYuODBxI
zK3>DptOO){0HL3re4;O#6%XCGDC}nbm`JE}BPGwtt_&VueJ*3k5;{K<YB2(h<Xri!
zg^r3&0o2ef4Ou;-9^Retdb7F64Q|Rt_S&fw>_zhWhwy{^wb3c5cVLFg$`=V~bfZ5i
zn-veO$=@y))%5~Of8SP!4Q>WQkK37+2ei<)7=cE9rn8|V-+Zqx0hU9c-Kp6Pp#93?
zs}oF{=xa{KLm7-u2U0fa&TIg81I$+@C>4T+<xkJGzu_P)1RBvTH?B_P7o!$l{rKLt
zJ@hR!Yk%XNb0U?#Y*su}?Qvyv>*>!h;c@>R>vfx;bpLNVrzz2`Z$Vrm(9C(s%1LwT
zn&HDXnVMs3D#7gS)1^hj=w>x6G#bC9_iAm4(D(t&G?&1W-*TYD&tFF>rqZp3VxbY;
zHK+T%@PnhESeJx#d`unW4SMYSup@(>ZZaN<VN0K$*IB2T0YxJBfZQRxF||Wor8B>#
zbAv{e*gI_F(Nz#Vo#(93G+d|=o)NgPZ0sUBPZ0tUWg|HcJKR5A$Ug^`y-nC+7r+}c
z@YJ`eexh-ZwttiHPzL52{~^7#G!@)dFj}-|QZbxvl^t_L{4;&c$#^KnC_QQS#dAeI
ztO=R1;E`@7Fqpsl++7zAy@VAH9pg87-J<eBP(9dD`2M>D2-KI_efXHjP1#7V1*=}T
zy1uvwgzO3*`n0PCxJQgjv>hMM4I0fu!$;Jfce+~xC)^!=#ARdy7%um|3PVyu%QY$<
ziVZP+$*3Ch5KK*ceS^oh4u(6Vy{J0Op_dS7G=7I%%{scxuNpWWiX6LO@qm~ydLIwX
z*iE<I0kt=QMsz>g_V#A!_!8h#?s@Ivi+E7KM?Pt^ULif*R6G=GtM_?0sHy=>I5}_4
zI=v3KJTE7B$P*`eG%FtZ*dv2)Wa(SzH;n&s(7|_bg}XxBTFo!?TqEP57{h33ou9qJ
zYj{vsaJ(xYrd-$fC5lIv!`y%s53P7KP-t{{0jQbscxK4>N~mevIBJp$hgl7QM&q~j
zo%p@Gz88ZlMpNaMq9=<pM83Qp>d!$dlJQUmV?<(1+S!M-KqqX$fP<6DL1g|tUtJ>(
zGYbNZ^x9r)HDAW4Pw=bQ=e2>uGC||h<g5#x9Au3Z4^22az1;6Odd7R)boZ+hW8i=#
z>hcxRbn!7HEo3~Df!&f9|ICQA1&@cF`JRlPBp3NGd%dR&o%bD~$#^KnI8$8HIjK4w
zrk(NAJ-@60@E^RlZ`wsVp9c$#<a|$Kcu?fXMsW9@!KS_F>B&9*LgD(uOXy{diicwU
zYo6~^{8<Ccq}IWphHB`k8uRX;GF=3lRW`EEB?9#~7X{UVS^I0wx!tY-al>l{`=xQP
z*Qj_XgO@S!s8?YVYy*pL?XBqqhjoi9Z>Foz%Q=BYbf?K*cd(t<4!jF)Khv961%3Co
zb!?31FfU=nLnoiuexoAiD;&Ii_w`$^OJLQq&Tu)uTyDxndTrCyM)^ms-{3;GS<_Va
zd<E(mKc`IUpj)AWxJJf98H^R{FRzS!@C1C+P{_XNPy&G7zDKvh=~mvc(1`AjWdn0=
zDkXu9A3|^0UQUHE{I7(J#5jyZWIPl}%ie&Udd&r(sP*=puGvM9*MOhrUQjDN-DErz
z!=C<nnb$n04W`{%bzUd05Z(!TI;r_Co&OxwoQj8H$Bs|&8aJgK26k-iI_yyn$4@L8
zXFi<6?468<V%V()vD3q%>Y>B4)dlXt&CvI|LB{tW4z>e<M)Q&lp8iS0qf%jm!JD$5
zrwhQyV*VMy$LQ9dvucjy{Pnz?1?!&HgZRg%Q%9WX0+O*_o5clR(f2Vj9*Ulq63Omo
z7oGv1hLm*-JAzgo2Cit(HKvQMp|V-=&|6cY6RN-PD!ZSq9$q>o73e2?+SKrlgN04T
zLoo(#G{5nu-K8)#N#l-<ZXx(_V8m2iiAehPCeVoP(+=N-Y~QwkcgM{6;|iaFxqH>E
zCvW61hLZ763=^9$&dTpg32Znf2WlJ9`w9{w9p<}nu&~K^D8?9ct?b4Q$4dCr?aiqV
z7m`3%x#^UETsr?b;u?WQ<M$KEh2@NnN^mkjGq=365nxk{*BZU#;AbMx$WCP}4c!`G
zT?Uiqd+u^puY__3H|E4#;ox_m;-QSX@M9;1W+p*r<(O3I<sE=Wa7>4FVt*PhG^#m)
zMms}iYJHfKnE>y{hOC&KR|mHl4E7qYNcXG*3ytJlpLg<{XXBp0*Xx$=-*WaUFs%I9
zx$6{%`8O*b`uOw>rP{Md8$NKqsdzIJtSDZg7-3sRFXs|OJQRKFNYd+Ja_t*9C&#<^
zd~poi>}Xusu!w_|OQ6ww%=X+(N0G@bz-ejc_LA>tL`uJX=<#w6BQSwR^O8lGX}=a(
z6oTWsr}C+fX@cRW^44?~a*!4Rjr6%$xc$M!C>9%L$?tBL*$fo>PaW}a4~O|T6%WPC
zqHc-HPG|&5l}ATd6ug3?bj{{We1_4tHyIDbFbH%c6Zu==`O%dlijW^U_xZV$mvtPp
z0~rrxU@3yG>lxQ8(VTzs8OfQI@Zf=%pk!gXr!<hX5NKqtDfN@*U$;0N-1b;-`d~#i
zbh=}C^~6377B(v$8ntF(P{^nba8v!r9H*n5@JC@@$m>-v>2(Vk4`pEMmu<OZ^f?m@
z_kD5g-q4TG9u&tM)N9}djoSNh@pEsDk43;@Pv+q(JF8$h@7w8o%Q)y3Djv#66xgo0
z#`P<Ba?X6zj|T$E_G-H}Z6BXSFXv=D6vMKr@|PJsONQoQpIvT5b-;+J7M*YUaj<g9
zcqr1TbEc0|5B>`JdnI{PFGj0e8+QjN&ZJwXiCT<|hoUtHHyVv5JV^yz*PKqfwif`C
zfW|ZaOE}C0S@F<SnO&y>RyBcTI>#0qm#qhqWvA_BTR3<`S@BS*^n?X7zLtP>!_M!O
z-B|?`CrN_lDb@5`Bhbh`zrJNx{7K0hP-l}wpvU-1AZePnS9s2M`t~N!sJ-iE4%wP$
zk_alFCnxLADuI*Eg<0rZaD_&4zM>?|PItj)Fud^m9p%;suxS5~XwOxV^fhP2LqE3i
zR}Bv>gwLj|21Vne;O6BCbyh+t^ff2rp%@mv=hbe{rL8a_Z1?1@v<|51BN{r{or6v#
z(1`B6yiVJWjcfwNiv$9&qu*ip5_75CIJ&qXk`@Au=xz^Hd|n+_4jlG4ZP`1l1X``w
zS5{`iA;Lw)LmB6C7G2=oSP3lVD>CM*wSaA>;m%E*v?41WD)spDvSlxGz~pgyYfUBy
z!~{kh`xr0rou21pJQTw^RXZd#zrKgtee?zo8<hu5oebNPi<{_qPR2tqh9b|8CGsAX
zaP2uPdC1mwkS<rTqqUa9?41=4)o=A4;;oto?!6nkOLcb*JbUqeT=T{I^ktLrPzGa(
zvHFpISRD`^@Jv1#d4T(W7x`8Z%wb+apwalPIsdUdHU*1m{(PTrfZlWPe5Xg&odOR2
z76Ogh`=p|v-~-dwpnCNOx#CY%@LX}*yv-BotRCdrqT-=gbH8_|BVScQ*@csyh~9ex
z4OKIhUXG))dRS;AbZL7&c?%9pf=0=Ygr4#xLOs3}_8!M}({PQ2MmrCm^_SQ(I}w&X
z@pU}ok_T6wTWoeto9?MQ78==W`;%rF8jsG0vxilB-a4KIug)93;jl>}ec4nz6pM^W
z?-zPK9wY|cSDWEp3gh-TYJU!+vs+Nv<ny5n#_@UA1~m#-0j(bc1y2pB2A5trdFM6U
z2YvgvSZLJV#Q{OnV^=+eMR^hz?R*QsP@^^XEBeu`^I)M7-IKJP+A0zf;qW8hZa@1{
z0Tr_%>gS2m`8*Js6%QTJ5+i)4?lHX4b^Q5OvvkneKF#XQn4k1qqvD~AFw<q3t!nvT
zqx7JcYfq<vO@<kr{3=ECbQ5S~pWjioHFiQ4CI+T2`yQGE*63W)_X&8zP1$H(a#$-M
z`xzQLhpe4AQB&~|Sel<(zw-rMgbUG4#zPs5Zwa<{9SXhxLEe$yKTNFzpF%Z`j=j=M
zUvnxRicPuw>9d#RWw=Us?<n`ECYbcNzs`&*I{!JVY_zlL%JGoJldr;thhu-FE{cad
zuhz`Afpjb9S!hHzcILH$bwVR>5fiG3zy1#FUgWCpbdt{VkI<}m=(q><jA!mSApFd=
z){X0Ul_xz^SgqeeXCt%FX#Dn4@5;Nn@)wLvEI2yDy8#Skh?`ZGbC5NnY;>D9`WLbX
z%&me=Efdui%4NYP`hm@j5h?UMC(wv)vw^Qp9n>uYS=U_-Wh$kDNX^+(-X+mJU5bjN
z;-QSiI&v?6J~|7H9L5GJ<v|dV?j+W+;wOFCWIPmOJm0h8$>rtWpw^C#^u$me<xhM~
zRSAzc$Qpr0{rkr9oV6Jvv6z(O@#66-3qjoeaR(Hp(OKB6nj^Y*NxN<3O>PDTNpUVa
zH{^qvN&0J6Jg2jI5SomKqGwx_B5Gx{vq3}jx={4;OE_oP)JaDVa*$L4jpk!l8%vM8
z&QAid4+o`&<`saS7w24;4|z||H3E(Lcg&+lsmEO(0{<L_?%De>pmq2y#oHt4MnP81
z(R?gLp!Hng=u+4)|4R9qhDH#wRW!_?Kiz6agr?%5C`Ogwx${;g4D%JcI%0qyG(LAI
zMEL{<u95Lj2G(@w!2H?yRbb^3xkvL&GvVr3&5Ix8In<mL58e3v25)5cTev%P>NKyI
zT=;ynv||+Q>KInd(fB=Wn7c^ysdjK<tm~m2b~oWD#(<#n0vyUF<Dm?!_|5HkS|}E2
zt;FLo=)hMf{jO8~%Un9^9+geTL(x;zr4}kFqY~j{UsJIWkBY#mZx5d-JJH$5EHr9w
zv5QrQ*9bI%85MJVB>A$zJF7t@DgB<&OA8ebWyt79XRdQ?g1aTQtyJ#J16PF%mlog6
zhO}cS84qP(Q*t^UTGU<!@d6LsV}}fg8C)TIZ$@Jtec4nz6kC4a(t959I_SFaLD9kL
zdbp)xU*zy5t=ynd|BgN*Z}zmR4i*O6-+B{X3C7fqd?fVZ9XB;c<F`)Umx&cNS#air
z4n1qp68JD=)SBcf4suS$LmAjn$&}P%hHrqd>(0ux^Wx#dxq-F|Rp?@|sKp30ve&kH
z6<KKs*T6)@ev1Si*TI32Is>#kIOsK2JhYhqhe_P87I<^U{4VdbR%o~=>GWP34w6d7
zLovp+8NBkPt7G6(TVun=Vuj$=%ZHbeuG9GyQO(JCC<AjIAtxL3yb$C@COMDTm<*wK
z+BiNn4sy<lhia84jQM%w7s%Wfy({Nj5}cLkQ)aU62YrhXXe8&aw+Igy8JPiuVtk{A
z#wG$Ucj=!>!E`e!R?Sg+YYl7}8}Oh5-d!@`plEOk9CbFSAtRYXi?QON7xFKZ9QyDE
z4B0A^aCGN87`OSwCW(*N=p~gE4=p#Db}>n=9qjKao+`I05B6_tcV96jlN&UW^E#>5
zvUA)Uq0K10HD*&1fqua1O}<8SaUw)F6%S<$2@y0%I`IJrS!)hPb~+d{-~^A$nJ4r-
zXT?K5?X}y!KCu|SXc^ii)E{|>ekCb+iMDWqMs|zDzF6%8AF7~n=!wT&1ubBXRkYv1
z5p*+2RC885bZ?+u>CAKK0Dd>xo357uO^2@vI`ZQtH)W&ods9MQf?r}IxT^WWTWnJy
zcrP_idEKHmdNeB@nlrLC=ego95G~<c6(Ii_fSU%kvK4$Z>oif#S@F<yBO+ZbUwwy@
z%tGY$OMC^37Z*xbN6<x?SZK6!^85y=-RSKA8H)}~(b|>(&8Cc7y8RfPkBfyya<0*(
zzPRH{BaA#3kgM7l3kOW9nVYuo5Ix<j=R+l8U+&m?uob*Fj?OM}DuR}ovzMO;rdzd-
z$|lfAuYEI#v}=%Sg15xQE=)=-gCAcwtr<9!&NIkDqaCd)Sm?e5g^*8X#Cji<W|(K#
zbiZp)9XB;cbjuBnt3H<13{TzP$v@)efSBh811o*D(8UE&k*s*ANOr4!GkQ+4F5+3U
z{=SzW!h4<EsRFuZTUcmB_ko3c<GP%S;jXiOVsB)x!)3cF^l!R#(bt@ehcYl8nI&T^
z49kGw4UxzWm;@y@Y5bgCl>lf*U;>TgT=1axw!=%CfcKq_;mLtNz;+RnCjSK-xJJf9
zF|6P4F|$0zHp3|s`R!s9^I^i%{kh|oR?*j-KqI;X)>NL*UiA(>6K=eERQVztZ7^)@
zg&QyEYtD*?3Tf{@ZolU}m@P9fd*<je*x6$F!7P<7>VSAo#zPrc;pe#GBb8;qy7Inw
z=dd5}dqH}*RSXB)fsBWuH}1%XA5QRm38rcvD+yc17h_h=`)cBLy7dmIY$_hgfblCI
z#ecsBi{1oZVECqhSL*WemUHUqxkkl9u@UVpdFf*5Fy`3!le09k;F+V2mN5(He44DX
zk$t{skn5V#V-@h0N0|(ErxNgeTX(JXDTnc!iia}ZHJvUiUt0{#@~anb_?!%mCSb2a
ze{mSUS@F=H!@o4kpf_qAF+!1tT^~WD;ko<qwC@r@HK*dC7}jF4uKHLpdh5-a{>m=j
z;42N0JdITa^wL7cLoqDYT(M9ssR6bPGyb@-b{AZ5obRBm);D^dlkrdncH~p$Sd$S=
zV9X3b$N44Ya9e43)~yK~xJJc8vD_<jWNYsyL%G}6G|x7bL+df`QU{IUF!v|pp%^1*
z?RJxKdc{!d*|Ph)!>hrJpL*HT?6T?UX2nBaE8kdG5tRf5ON%xo{w#n;=SS~+rCLId
zrsAR4z4=mFk2kf!AcMU7x`)2Q83Jd23StlFxkkl98AJI3olY)BtJhP#c=Ri>LE-gf
z{Y~c4tuaCEO`wt8GV48m?w-N*&|fWk)25jZfX3~sLj)&quzJXND1+fKbj%YJyjT8a
zZuErzav@NC7Fkh%z6e086^W>#3Wh1%oe=kYcQ%Z+&kR%lUJJ+QB?eh}aOj}~8rdxe
z#BYWcZE1m{wKVLW%zq0E=WoCFf9>6AT#Z}UKk&UvnUk5!awtPIDZ{avG^bHWNku9t
zB~6ksLuMlLkSVi_8M4Z(NOMZ_sChRh(zBfHoLA?z&#UM2|NNidE3d9~t?PTOweI`2
zlE<AQ>J~<`x^FF2jj-5TL*Jj??0Pus=U4nW*=}$CQ#5;yg=(8Gex4acm)8a-9hbO8
z_h%*ezV{LQzTf}chnCb%EkE8rhe-8yjFG)`pMJlp@Uc_-cXyJkWB=TTesKRD6}|O4
zjotdHM#Vpy8mZi@67v!9=JD%3l*hX~v{7a%d-{Lc?z}!@Z5ovu+G~U9```VH$v?T-
z_xFQ_j!&CU@HH+gq;0(3+(1{<)J&Vv6!-gn|8pOzxYH%5uLDnG%Pc=>@xu)?cTPYA
zpZy&AZ&%B&`%oV5aAJU&Oa3XkPBv`+*;D~Ft-Q5&iQa$8&HA0C7k|;esgABU?o#cp
z*hGSQw%ZyV7xC8f>pqmnugg4jCLy|zhDLl;Jj;InT<_6Syxv|f{Qb=Tz7OR;?mvHV
z?Y<)Fn#z6@J0XLV-H!5}RQ|i4Ut{LK?n8Mz_wdX$mX{mpyjdx0?h^rBkrY=hyY>2i
z$<69ss36b#F}{{gmv2>>EF&N`#og|HnBGEuYyRs#l*c>MwR*$Dr~)b(t2B7v#dsQ@
z6zYD=Uc@^8`#zLs6{4r!*zFUk)HZ8+eDo6aTdkq5Rd(a|GynTOl>N(f@8#4}Kvi!_
z9+ypMqN_)*i;E2v@m2HdK9u!)NQTmCN%ns#`E)t=Mo>!{txw*3(YyM;<Yv$B{rB?D
zXqjiwDETz~S>ENOB}3e^#8SlR@asO5&u_bzZJTc$PizOKO0K<JNd$3si?uF`SZn{>
zhxSxZ_tB5fq}^U7>h+sgOm~=F36tt8Vu${^4`shetSC<<{7xm+PMi99mu?f?v!J((
zjMDG^T@71nf8U4lznzU48K>Dq%8LJp?ntO1ZQEbgE$S`e&Ewa7D37OLca<MKxt_Kf
zk5(<eAfU%ht^{nA|J~2lu-t#&hw|+m23=5+DJN#TR442Be<byNZm&HwYS-^q>fiUF
z{F3_oJ8cg?&`Rlo+jDyq5w$%xCau_&^k1UcxfZ=n&f)2>5)xY;U@$xV4Rt1OpU5>Q
z{(iOmb06wHL~3rWTnzcVLp-J7csZ4hR+$&0nDYDFzwSeMyd6C}GsCh<snz%XwzG6v
zsQma@E&RzMZrA?459K*d?Gkh$ET8&);I~V8f1>>S8nZF}U;fL?*>}koC&?oQZY?BG
zL4VhM_LB;N#@8#xKIc;VfA2T=Vnz^v00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHaf
zKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_
z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz
z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<
z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZaf&Y5~g+6T+vb{C=
z{G%hC@`q(5k$rY2Hg6mwAm;5UL4LW7g#XJ#?AM>po}P{l-aMW#P>au3+i_&<6aRCh
z-eZr1*`|7uGeXL_>3$jgeV{g<xAj`jxf?E35<4$bYyANN+F&yIro3Oah>SWsp3lPz
z6{k0+&~tNoJX~oVOXrVmmDKDS$eD4P<R0~LKWmgYz(3b*X+3dEid?d{y^z?t3gl<S
z#EA%GHTJurdMBi+mTXU+tFvG+U!x$oXZLA~QaGn7%&5)d_f1H%v%XYH*Z7ZHa=wJG
zA%E*$i6FmFM4%3jKSlA}=HeNdbm-6%xh~BSq{ef9x53+5Ij6cIe9U%MBag=)+h(7C
z;}|s#Rx+4&y`1U<2Td)W*d>;$9l}6X<B7`DbrD;Wh;+#7se9!cN%9|(4VC@4`zQ?5
z=J6&s4S2Ecjex3YHZ5GXD24X9)+J=_`XbI6g@LS(*5Ny{uMaDw{fbwQo|#ff)AM$i
z%{yJe8JH;dApNgCnxCl}EL&1Vl<pjQ9_gP)Hg>;z&g)3Jh(Ikq?~e_iqrzY0k_+bc
zC9}Tg(8w+iY-~=R=TsGDWPOy*?lmiOODk>D{V>FRS~VH{aJr^T-$D_AI()vivC+`$
zE|p~Jpoxc;ITsS=>;qk#@(MVoI^wb6xIccb)wX%5<L^IuPw4%Po8-DBlg&?NOqR*{
zAtF$V$2aluy?H6PiS~MD(IYhMD@hojac%aVWX`DyGivks&pQg!e@sauqu2PJIWnq|
zyfn%<@k8dbh>Wa{&T78qdwM;iGE4lfY8Q6WZ$?dq5r(Cl8GCqilOE63D!)+7aL|+W
z{MgosVa5+{@HAFvY)G4JRU;yh_0h3&Xjn?#dve25>B{O;Su|^TcmjQ$AR>^h)kdEu
z8#8)+p&|1&d>pm)8*To)DJ$BwNJJppk2~Ax(^#=)`t0naRCA3S!V7C}nxs-nxVF`z
z&XY@i*2tUc)24WQ)GK;-{>Otim0hWmQsLmL&!PfZAI-I1N;fTQqz&iBsg86lChoK6
ze*JN&oO7zejI72~KPs6Lxtl7u%rX|F=hDl|+k$%*)rpuYTdRDlhr3?8wv!7wc9&nv
z7ZKN-_ZDBaa&N21{famKsxd)5`h$MbO4?t4<kHgNrPLt%X0YB8?!!tL$oiOc;OM+<
z>*L6&!&zIk(~D{1zP%0ZHWHkNl`xR4Rpl`?XLL`ekOlLeDleMzm1qsQ;uc%{ofAhG
z$ojY^V)O%3-F&)yzFhyD@2TX-;A9!?@Jvpe)-kWYu^RdONT<VVOsCe6K|cmQ)eSEr
z>T~YhSYX#IB9Qg5*EtQ7^sIV1U+>^ixu(y=D{RSujpw;fIblZD$12|RciEbiw6E>V
z@11>8sAkAKsiGBWoHYsqb@;p~CdcI5FNtZio4twN7#>ZDgG5p{d+v7V-nB8Dz-r`G
zYHf2moR>&uK5XzfdZCsI?DG92!;&~N3IkamJqPz18(WZ1YGWkl41bhO9j`a<^h)K%
z5eBkzHRPa}MdpZVI=|jnb>`&;^6*7VrPHN+PE}zbJ9FAQ4;o(F-9(4GuX7shdx9>w
zcQW_3lYn!o$}5IUm-~6<@Q29v{d09KH9bDUGvjS3k<5HlyWu?dDJKkM&qu$N^)p7_
zD<!id-e}Oo^Q7tM!{JAhQ#q$93}iLNEZwHt=WrTTyzu-&$A~DRlzh^7z2XNEQ)PV&
zGMaGx>Xc99qk**7QmJg3n|W2{W@Z^@#?5Wlx3U`fd^z<EovpRS<dbrTwugT;3Hs0)
zdndh_Govt&^|8;jsy-vs+KJT=sc9im{Mcm~BZI|D5=CTWYjvN)_o~ibU#U-LnylS*
zcCNao&iQn!QA9>o<BQC%4yNY0r0PzqvU~I;DlVZoZj^QoXU626nyG))xYqSxhj>dh
z`SYIJ?mgQ(iQT$|Hhcbv;>;)vWczW>*YADyc5kMA{}@#0E6dXutb9mYDy3dTAbVfk
z=O(Ec$a_U>#&2s(l=whr1xf5!Ggu&Es%))3daat^;vY_z551EvA*iOa#!Swcs#DAv
zIC@yd82O)T)lBw@l=Z$6dSsqnl!j9&>Hnx;lX3rY5rJAfo(j**LDj#LZho^j=~zky
z705~_ZVcvrA`3IJK8m{xtm*vrhI%bKljpoLg(fTV!uGAo5|NSBxFMaixtHhBOPd<^
z>)K_};6A>e-Oi?Q26|q1Tg__Z^Th(UNp5-EM#!H<P5Y`}k@)csc+T$JCx9@J?Z<I)
z>kP{m*AgH9Y#W`zAEY6co)380Dk39$c35s$BVj97M?y=SY~v235p{`25gD7rG`Jq+
z!i?<M5irJe&R~y9I?ck;?3`sWITt)!<AND?jkZhWY$yIaS9_R-&RC_>%C?pKc#}!h
zG$Fpv9OeG?oKqDBvi%rztXRqXRxGg#zol!KUP5~qZd%#>k~rt{Q5eW-EOS07e&I?z
zIe4vIEnKyPtV$5Cdv~2LB9OfUoZ6^9_4tBjk`SNLxWvwv6u8FFZpY&|r+Tga=~Y%E
zkGG=v?xTWY@z^1AR@^_eyn&v#iGM8dxQ=qE3Io}GOxNuBZsN>B8g<DsVgJM$QrNy)
zCqb-OL?BzMiLNSzemxTD+%NSvf(NJ24NBv5!<D%=vM?j7G3by&!_xkhWWp?6z4;P+
zjpe>7=?_m=aAs`ZZ2tAH8U=@TTfUBZOFtJ6a@v!eOcy$jP_KW*{k|#;WNWo=U$w{2
z`_+?8)^!VlFK3YPp~;iZUak_6k)1ho8t-%L$ZF!f@K&8pk2-pxRqczTv8buC{ivE{
z)_dLgR60OB?9iV5C3LsgT>I~rpK)e17*M7+>1Q9~x1Q7QWmG{=Y+JN8Ix>N#O!5i#
zmN`kd`X~%!XO8jQh`FC+TWPpn<OhubrBrtAg45rWABo8L^ZD4hQup>p0e!RFc;Z^e
z??l7n@Hwpm+%*a_vS&x11rlwB&l^e7YV{jpFG}c;e$wJvK~0=fJ$f<l$X_*%=^aU?
zjjN?I9qij@+xCb(qBSnvN}^FjAgi&XrZe$Hd@~Kzj=yJiznC`edX>9aw^~FXtI;T4
z(O$4TkB$p0eNvxONxltf^&~&ui3ntUe5V|oY~j~L>rX}R`<_%qH@)U5hsdW=u6Beh
zQr|rJ=d<I8n0@Ze`L%S{iJb>{5iiN3F8uqS%(%aFgn?|Wda2%;tbeGHJTn`;MyVr*
z$cI^X>vy1vbE?8XwjX20yna`=w4AQ#J$do6DXG+T)~)v;L*hjQvi&GHXk%l4tb~l{
zR`G4{cL9;Ppx<x#BJOv9w95BMKWpUO)U~;Ccb|ByRb6tzvIk8x{A68W*N5DHEh7wM
z=jzabyb`%30@7WhOwr7tmgauQ{oK>Afpe<DK(-$T99<eY^xhjXd~D0qCu-I7W7(_`
zH{Vo>2xMz@dv1r;-GdDzf2q{-i-kMrg0Z_KE=fP;43rIW7&YbR^HEA8QOZWShP>52
zpO&naNj#I<^Ur&8XA}mq8pmr7%(-HCk?iyQR2OIXie7neCT>z4_h+CmkoEDSUyRhY
zfL+vV;K0#Q$Fk^y<#LPa+PN2iFp#ZP@2fE<7az^18)n9w*r=63-CF&(_bSSuTx-?4
zcgrePBaiPL_hs^kh#I=n&Chsw=tp9j!Jm`q%KeQj3}nv^DXV7nS0?2&;a#@b^RNce
zf98Ztm6nSlGP3<RZO8f*$@lYVyzZszU(>6I;G)T)iEraX1hN_xw&?cRKDC-I?|1Hw
z)$=cOT-nvRIUBjZJK|1<KK-kYc3!I-`}T<@MK!y9UKcczEU6+ID&5Js0EB_;%xU^)
z{qTxY5j}AyaJsEQ5miv^(RaTK_shI6ki7$J8g=k({K;yvcH7{iPX|PiTgLCbckM{x
ztWg-q)~aBJ-vL9hZ1Q}kvFezWcWB)64w-R5+<!}U(?UN|;pbX?A3aj3+8}^<uaVN4
z^PqzyzMOqUOY<9Njlw{7=1k7%xN)MTn9NQ)Fu|`ii9D0P@J+go`#=^3vbCDi_v+*6
z$zN$fKi=01<&v?FQw5<>UIm;r3Io}hqj&U6ukFE^MD;9{`!h3{q*S<h924XIw(2O(
zY5A+h$Q>3xUM363=EmB?gER|C+u(}z0k^piD`6n(W1dxB>afLCbgW_bci&`h(4kYB
z&$=IY$%!KjWNX#+m~7?BUEip*_Hh}L2m#p^CoMLkphQF<>tofBC)4}SswH_f-#6}U
z%%j6gpFHR@G?z2bc&*pmsXzDQXW1vd?zSEDnR9brmFj%5-${D(9)5Suw?|<h>!bd*
zhSq`K+llo+_w&6<TZs5Y<4Ag&`<+}E$ZFhpQ+AMVSPC87TVbBmiDDu)uw>huLELZ1
z!a&x?%eAp)#+TQUH<z|a_WxQ+3Z5E;WFM^MTmUB|CBs;ae4cu8I{#VU93o?U#8_;6
zCmm2w_Q$u8?VLEmKvv_$l_~}br*cWCMu2$NAGb+2xm<PA<PRbO*;>68vu$q2(l(mv
z@AbIn_%Bo{!_r`vCil@H%*f81zRT^EV}jx+Q9M>=vG)$;C6A20cRiCcV`cd9qMv=_
zS?#cYGe<9<TG<6gHO(obhEEgw9ac>j5y)z+$nV#1Fu#f%?RBH9&$D`BoO0{W=Ml-2
z>m(Ou{Q2w%ReMqKDV@#;te9yYf0ZWJNM@%!$`g^1eOqa(6lXYZ7SP1uW+UzQRg$h<
ziSmJH?oZwSdpC8c7y=N000bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_
z009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz
z00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<
z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb
z2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##
zAOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|
zfB*y_009U<00RFj0xoKcmRV^n_gU@F<MF%Q*_VD}Y#Ti^G<9Hb=te4|=V`(FR7;1B
zxtmAMR1^NMDT)31)7jJ0(ZPF~d7zu4qos?DKcCOP@i^h}-t=3f>&M)@_|P=sW_zt(
zqxLTKe;_lx|9%0-5Nn*wHI*C`6<4{kSrjS6Oi5YYO#809`E8_i2lch7H0b&<p4wd9
z*25{flw*ih&Yrqn1|IH4sysg5%dV)9Uvrn1Z%kP_{7VJ7_WaLJr}b-z^NMxmEfr-P
zL%6!>`!7^q<ZG_PW<lJaPHwtbP2XK})2y0OL9Gwm?b#>b12MPDoA4nfnf|+ljf#%3
zm!*mMGDkLxsB6`_i{I?06S{96#vhYI7Jk2EqT6zVMp~!;IQ*c6V-}7x=KGqs`7JQ9
zVneK#eD}U$S|dGJXI2q$r-q!|E#Y^4{};-CkR@&HU(7Ltg;9X+>KQIdrdq5c51Oat
ztqgia)^?k;Yn(w5@$t2CIBF42SKbg$aeLmtQOLAqPMXV1&AjziGmB+qo(HZktfKRe
z$Y0rYE|vPlcr8{Nl0q9gqF#R6|B7Q4CSLBA>bh#~?iOqoS7skt7k9da>{;dFqBb#_
zm`~W=`><>Q4VbgC>U4WK#}H-#TGLktdIg#ovmti6kNK!oQBFq9TIq4XtCmcDZnU!f
zP!m0$D!=J;)mM%}bOZh8t1A1f)b(UTbc=bS|17PO*qcVlPCd|7!$M$E^Y(55k=i!O
zX^lb?#}Lz|&+ze3@iAYc#)b$882Of5Y#@geW;-^iHIf2_<b?T_1$29B_l8%kuQ>`)
zGY{}GF`KTbqsN*$|9(G{%Q6kLVCCbldKqP8LC}u`gTZf!gM3+zO=bbdEF4xlu5wvu
zK37|r&EnMC$h1!nUlNt0_j7ma6p{`_Tm4(J1@w6POQl(6r5r<etNVKBsd;#6`mtH$
zn&ocEr&aXu#+K+eCriobb>>mWM;1~K{+7%^*#eFs9JHKuSL+z5Xgl!P7`7Abr0!G@
zfyKMw6SsEI){v`9o}8?s{sp5`jz8ta{`(#=)iXCSSmvN)Y0gIY8d4wB&##JZu`|on
z+F3(-j~%dA-zJm}IN`P6?CwJH??VKv)^bvwZth~O$Y$}et$6b3UF{@z!2o}Y_&lmL
zB6COD>q7ccO@B<EYM#cw525X3rK)P?W4S_+&BAE9+HT3owbaS%_2KP7Uy1Z0L;dnz
zjkNiN{lYW+L=Fod_rUpDZYq{WHf#ugd$EZTFAmao?;b|y7nYM3e0$4?Ev3{)RpP_u
zZ`U}6m^aVSOvhT$detJf&HD9!^epy5E!{HaX_HG>9o^hT_Ui50Lc04})6J!&%^Vhv
zy59Qc&RWXGR&0p2IeT?|A{*$6TaV7PH>6RO^=`H~ON+_lOKx`sAFDWq(4Xd}s%+%%
z@8Q7;8RiijX<%PQO+9+gEE(2G0vEL0nL0L=o~-N8&K#1@F@&D`0v9cp!0FxwY>1aF
zHgmU5`a(m7gie@Z`i^vVk<I8n>N)wtwL*T+=xh!PH7y4(O+DXLW;$$vf40u(Rg!&=
z$W$4ag^VvHzN;GEdW}e=A0m!gdWJM}3}Lj;#AemBWe$$ZnMK%Bx%DrGrP27sy+3;k
zKGC}hi}Eab))OPgfZn>T><s<)r`bFQH>E&xef<D7MDc~B1=?9Z=-&SC{DVqr=#B)(
zsbqXTsU*@Jo7aBg7{bNG+{I7PO3Trm4H0f}G`#$E3cXT6<EJe7OihDBR_U8|kg$oC
z5&k8g$-mFSYLS<R^D2KsWfwMtwD}qxiS|sYxb#egE!(mC*T@B`$9*9OB4;!Wh|A)b
z#sA9d1Cl@h0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<
z0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb
z2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##
zAOHafKmY;|fB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|
zfB*y_009U<00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<
z00Izz00bZa0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fB*y_009U<00Izz00bZa
m0SG_<0uX=z1Rwwb2tWV=5P$##AOHafKmY;|fWZG(0{;WJ8)%RK

diff --git a/pandas/tests/io/data/legacy_hdf/legacy_table_0.11.h5 b/pandas/tests/io/data/legacy_hdf/legacy_table_0.11.h5
deleted file mode 100644
index 958effc2ce6f83dcc62caa866b7a7de7c98c4a30..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 293877
zcmeI531C#!_5bfo0t_L6fT+QxPTh!0LlVMft1}4+CK{5M37f6IFi9qmGMNmM2?Vqi
zTq?C{u|<np#ihTxms+*{sxq-|)P>@1tqWL1Y^h)s0n1<hcRTZ5-Yg_!1c~1VnfsP=
zx6e8Ey!-Aunf$c8bV^pn5gANu4hKtT*>aaY_ULW<`RVc$9n<yMddH^IWjejXu6Ar}
zAQRVe^!aREzB5go*sRaj)s~epE=lE6sxPy(1Jz#l*3ng@{hdI0S#gc%<4oOAr%u;u
zJJW}(skuPzwzjA<P0cDL`<zIEJxqsdd$rniCd-v22>W*Me{~ggmE3^YA^b<wMfFJ`
z{$Z7$Tv_b#@bf}`0RJKRGi$4-iH;ibqI3tFURLX=sIHRoV(5^3L0&#T67s5NHa^mX
ze9f%7;*!cTPyLkY+Ns5L^@;Kq>ijk~RF-KSr`Zl!JY#yi?roepGnG7L)47p+LkJ`q
zr$Oz%2}9W1;P>%^`j&dym6yn_dpk}&Rn?_7Zlq+t`5P>bfhyK1*{4I@)+|?zL%QFT
zms87(YyEML{9V50Qls@|_@l3zzsr>VF6a$3dE4v5i-N+clwTdfQ_@;<$lK~OnOY`O
ze$&Q=ne#93h1y%%0xG}gI_1yLE8zdles+k=Cym#rR{CdQGnCI84ElJ>xUtvECS<a5
zepOso`C9@_zRvo_Hh;T3E%P5GudCx+f3P4Uw<15oGg^v{&IlG}go9%<ym?Z<C{J-%
z;PrR-+KmFm{pDzDd7jZ)tij*bIIlim7OD!&)OT3o5@-w5t2TN&eY}mLyzf}c%dOyT
z^z(*!Mi-CH@Onm<$j{00vy^`t4K}!ENq$Y<uvfIUqctE2i1G(n%jaD+>`&bf1KvP=
z$QO*cAM*1&w~#0O4&wU`=Jze|3C7}nrpCTo%8{OobcE8KJ~4nxi(Pw~I-SP(h~pEv
zdpVh>l8r1$u@MvL?r<3s=Y*`2|A@Tga)vuy(Y6!~VMTlRJ(lLjbF;W0-<D`M=HQ-9
z<V~~NHxH9nSPr*OQLNlX=#qrY4cA_Fj=@WfSMz+sY}ay@AWI-mr^=&G<yY;|rvgWg
zs3Gx2el=SRK5S6}VqPXTn@#cimcByIIflMs=X=6lh<ah(c$w3X@2Nkg@u0@tl!~&-
z()#@Rs^Y0)+A7D9*)GVgjd`UxUqQEg!XBx8R@*nmp*eqk{gjHzN~%}nrTpTY*fN=0
zrZ$$vMsnL$?JbDri#K*ehS+kb-)4V1k9SWNAJgylaHu6PN7P@V>ld{<EMvMpT7d9Z
z$C&Ds{yLExlzvRjY;5e%c;z<H4b18XoBeIx@c1H=UmGa>T2*cJ3=gkAkzXS_6Z3W(
zr$hEe>9#$xn=v0bG?CI*gtHM_)ZwT@e1ZT7fB*=900{IW0(G6GfsP3^^TsZ`*HiKI
zdn@eje=Q1+>Ad(O*S2Gx3*C3&e<C}w-~BW%=Z}$^`4gH87oHoQ6?*;30k4gmu_1Eh
zozE}0;Dh%fj*b_WzWT~*uBW!&eg1`6pG6uf1GV$!c*B*YHIE(bzo>IZ<PO`;_T!vy
zMXvmH?rlFUf7SJd_m)E%ZrI=|tC-`TU)R~%GU3>4-{7h>8(dSbzxKI9TmR+yQ{}Bk
zj4Iw0`Pui+sXA=ov#vQ!XV)~$Zw!oIbowE`{MQZhUvh2ufBAo1F!{5{s0Z)u8oTGI
z$ocbzKC<|`yCU=3&YWLRd-kNNf|WyVx+Z)1mdM4MR{W#r$<3~t9%(%5<m!!)Plmtx
z=<&1ObuILUCQX_#Yivnn(Uh&h%iG_MEZlJZPez?;?^?9*!pMIw*cn+__urxymi^N;
zCD2mfEt$R`JmJ_K4|%TspC=;ehu^qx?5_2$&+WfIZd}Et$n!-bE9U+0Zr6g!!YNa{
zb304ge{}Qm?O!f?FEZ-Fbt517;Rlgp8{eOO_xt~fTwZi{!|$5kb5%@kEDlTxFPb!Y
z=|ctOpTF=o*KZpNPWizRTOxPcUJIRd-^Gz3kNo7iB`q&T$`+qBe*A=L&(xV$?+IP@
z%&adWkLFx{>{b7>C355y&)q$G`v%v5CCj%iKkrl5{Icc}Z&UtQ-=z6n&%X1kXa3{*
z`DyKImU~}tP5Z~?K3{2<Yo+ba>wY~Z-92yO)Uq?1L(^w2?#L>*BmMDrUE@cr`@H;;
zk6ds5A^nNZ@?MEFJhd`??c*C<CF4ASrgN)3MNOxDeE(CQeDRU%?ZxjDUtYA!b=$c=
zz3MxMZjEfZ|LGNfo3lGIe{4;Izoa4<p7Dzng_D~8yxG;UedImg@^-m?eCnDlX$Aj^
zyp*}(s8ux|L?%t}PYBGP<MRY>dgi(-$6UP2wY6o_Nn7u}&t?1W`R|u)W?g@OZOXdi
zs<*l7YR@StYVj<ZRCU*rpI&{^y*phS^4=Qu!|Ct4t}i`#=aYLjyZ$)t@7HAy`)6eF
z59dxP_W0&>hFVIF|3TKKPhGY@O@H9MMgNIB?6~^yoty58JQK|Pz`1Kvq-tz&<-CfL
z(7drVlm0mG<Qq3f9(%%Bv8ZmF>qUG1#!olA5-GUt;f;r%^|GsEj{n@b^QRWg?|lEl
z&`)m~yC?F4A59tC^oxgF6S5z<(*3~)5%0Q*S30&0?3&d!XUe?NV()^65zm})>B%oY
z6S?i_KVN(4celIVeEknsz54t+k-uI(ebTzKw?%?;&pmTdVe5=eU;Pz3cQ}9XTh|3w
zef;Iz4cj7VPi%MW9QKmy?kDD#t-17VS73a@-02Gn&#hVX`^#^+z3ZBPyWDsA{4d?K
z!*$dN8?PUG&4-btd28E#6517MuN^<NZtm3baLo^kxBm6g)7HE0KW6#r@BDRpWbEg^
ze(<mjpGLZlyk*`sf8FJ(7{8!+R%>HLdB?~dD>Ei`ZjYQ^+`g#em!Cy$`dv=NgKaNH
zp5A@M^&egJNu;xCMs>bt!mQ$slOEZ%>W*b^xn4Z%#~W{rL|muca@EU|8@5NL-+W?q
zY1$Uoxl<?C%?~a*XXeDqel>mLFJ9ak8MeOgvn}7+?V5brYv14Y_G6Kw17lu2>5wlX
zp0g(u&L7(_rMTdWOGp0Y`PIz*RNGNG?*DFd{paSNZobj6HS+kTJ5PFdM_SkX&gyB!
zK2KS5@vS$Uxpl?2wz$q(^Tp3M4f)j7^vh>{S<h>I=&fx>{;ct#NZ_n-RV8PZH&2{3
zXU^j9|L)a2uH!ov3_Nnn3z6rJ$h~0qm=7Y?|MtnV&;Q{^u72dU(G?H?0T2KI5C8!X
z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH
z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI
z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X
z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH
z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI
z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X
z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!XNErbe
z8^CP5v>aVxHp`ZKYmX3T8Cxqq8LO);D`Q-8k7d)BS<SbUyn{|4{YDWeFDtGQ9iFLs
zt*nT#ptd7|(GqC#b@BscEsT}Zg?&tiYkSSlQ6<*s`Z9INHa3ts_`kY}x=L=q>=6EQ
zBLAWKD@1#I?as!wK-k+7@P(YsZ6Rma+u-*($22t;jFG#HWwKn^LMm@^WwFPj%NuAa
zudbr1w2VVA<VEccHodIYQ&C+d>rXSu7v$yZa)-tphbxa!Ee+bGm>!4rgyS&hI3@2^
z;%rHQI-aPHk@Rl@G!9qmZqC(dO_8z#^6zY%I;QcO%wE%Z8A_TlvR5lbPgQlPjZ2aL
zUw$Ozuu9@M83EEyaEjt5ohr|e$<zoY|6}%Nhw<w~TlzEU!=$I>e$C&}8Zi4K<qysN
zYk)bwa$i06Uxj%EvOAfK{FsC1b+9QFm37?Zv21P-$|l@Wj@oInsXe1#!(_hpj-W5p
z;BRZ3*WR+&$E0_Um!C0Vm1Zs(GQTOSyq?WJ<L6fn)u><PZ}+z}`c&1?{Hi9WJf`+D
z7C(RE+>U^5dh-0LP~K+J>tNabwqV%rTj2A{%={w}KXK0yn~`6&KjdrbXbiWsah1z0
zQGV)&jh7(z^@#zTme{qYsncotSll-=_>Yb5gLW!GRwHuA9hx4}@etlWA!*b1FS{(y
z#@gq0G&lQwvP_Xtcn;w|ntaWEZ`ddEi1RcaW*@KW>k=pVHiFBGO`N^q+RM%{>@U{7
zhiEw=FK^GqwY9~w#F5xkIclFV<;MnSYi6wL6l*_8Ql+{2Fz4jta>5S9<~$*}9QkAS
z*yM6<SsG&-@{-GGvHhMn^b<vzJ&jC4Hk6yaFe7etik#59b$%p<2K9>WH&*?c{ohcN
zyr(S`_BA=Dd;J~0c3BVkr9pChmCdXxt(dCDRs3<yGx5#Fn<?6`S~nziTx84ZLa*J+
z(fvx%_)ujh9v?|K%dDvyC%xB~o0Adkj-7vxlIP`RR@Kqkz$e{p%}(h#oh<=1`Bqnj
zo%BtAD--HZ`MJIMHSM;d^KClrt6z(wx92mJ)0E?5U-dS*cru>?8vd>KdL!Pkddo8D
zt=1cu<1>a?pY^n)$9l?<ra|)uvJ-MLsnd&{ioA~7tJI08D^@S$$71h0gSfoz79*Xc
zQXgi&H`UkF;te=MLM^HAdwm@@7iz^2L^@8Le!AUPMDbtSaU<>#WCtZ}9f2lqXb}ya
zzWO(e8+HFBzCD>QnEjY1><xvT4MHno*uanJ`?dM`@T>M?=J|P9pvj~S=!51@`%@or
z?kmv&vhV&XOZmVa-skDLZ>C<Snk@z=-%@e^5KTz4+q1{(w(yB^-23~gy*yW%{Rm$U
zuB)w{5uJHa`_~q#N;+lhjSqUxbm~*fa&==2k#2ix*2ACoPE0Q6(77DPCYLMMa>4xM
zauc;&&3BT^b!+T?Jm~#=$}L+*<I4OT>Eu_amzCc3u(P&pp|f2*P%e-Ul+IJgIoX(6
zucr*w=R?x}&ZAX{#M`Eyi;ej!JyEE1Z}#1j=Rr{;w{Q0ux-&cvW$QM7?TdX|*G(i;
z{-JO&##bN3LK>q9X{+8LN8;5xi*?$nQ<cB6mdexRL{E@@h$qSQHYNQ$9J3CtEIyun
z4Cj?l{Zad%j_L+j^}AHBi&1J_uM7D6zE)o#EGHklj6~~o-_qr$@f9)A2J5;*U&|Hm
zcg6Y}&DSZdrw;4-De2Pd({xN07Imc23)lex5C8!X009u_F9Z_J7fwph!DGrT6A3*d
zjZRDx<ilwY009sH0T2LzgO>os={~IE0^iW<V#a#%KpCG=T2WgkLUY7Ah?*NXWv4B(
z2i9r(_xg=%O7e4-e3EzP=9&L}_|V@hzy6myrxcVucl^{D7q08rvg3?}+jbZH=AnN-
z9LT;u_pFz0e7)p>|3qHjc2527o7_%je=}p%F(3c<(g%w^`1p=ruUUBB%)dYTz|H^M
z?0fv^w;w$EvVv^3wBm^));yMV_(kizPyMRl!w+38Gw<7R?Tb%M^Vo(vmMmFgFTefD
zamPJ*O4i9YZq9jg+Uj+8?%sa%qYIbcwsq2tOPOQYyc6y{b@+qd{&4HLukHGx^qCWH
zxO(m5o?{&jer@w9PS2=sf6?{vGmqXf?K$71d$Jave%Q3b#-4cHnh%2m?ar<pYu>y$
z@ARqdhy9}9k?_--HdZZs{&yqlw%IRnv-Hmn&3$9{;{z9NSv=>SKTIA|)iUajCq6Xc
z*~-qxx1WC5SiU?o{#O$s?`}Bl+ix7T?$;ed55H^7Mfd-9MehH)9qDd&{nF3xT0iQs
zUo}q{a>lNPaX)_M_GO=3amkpiZ$EgGs(Ra?vOTSZ0<TT1Z~uAKj_TH7gNEm3=H$=r
zT4GBZsCJg_SZd>IP&|q+e%!L-Y^AZ8BG{R4d;9%?V6~FxYRC2sBtUV?Gj-pcI$eEM
z_mQu|@mYOKJ?%<KlA`w&I{RJ|@=HPee6n2EXS`R4je`+&QGHZ_Ro?u*&KzHRW^MJf
z8lK6JkBxte_xl0H`mTtsb+Dk9hY_^bhZhBTkSyOs9FzGyYIDfj>N6oS{OmAu{sq2J
zy9^c;`OUw_ke^q;|BH6I`4!RMAu^wYZd5DlJyc$oV$2&1`gqIuuTRvscKE|BvK{=A
z@%tF692uQl&!dy|!ndgZC{sDP{$N2yZbg2EXS5U@oe?a|2nWYzc=KdMnQXR9Mf|GZ
zq2A9J1&Z><@*mQZEYCAqi^(YA`g~ca*E71o8{)gXY|rTCwl=<7MAh)p=jwVCw?JE<
zUbWrJe{Do@KiyhSZUt|<pEsDwlSQjqc+sBG#iPY%iTs?*Kf}Rsx<-Rjmh4xPH|!O?
z;z8WJHdWuz*7~Rig+>nul-14x-aviG7mWEm6n<V5Z_@W5S-%WM*8PlAufiqkjY5<P
zvaur+d#wQL`>`~(OkO{c-{f<7y^^VHLHBnc-4&+ZI-Shra`=xZTHjvSaHos6S8Q%p
zG=vrH;TL#|<QXUbJcR#<amX9YKN&lL|A=#xKl(nXMm2b)UYHPZda@q7y=I!a)Xc|;
zXtLbQ@nC=9?~B!&8M<nu>V2<WzPAx)gXgLDeTGE$J=GV+W>=|7yI=k09$DY7(fcNP
zUuD>PY&@RXztt6&$lnJu<YWD?Fae@pm^WTRH{^SIU&xpFY)p;kDHUavrS<vsRmD@w
z#06o4=62CIF#NqaUqQEg!+2h5pLLuV{@$Fwuv>m}|7fhl&o?$<J{nKv_Nw3iQ2pe;
z2U02endrTyxF$B^K9j{p@{z1K71UGyc;j2-qyCG1vBhtHVYD2<Tjc3kzOlW?XTGn^
z<7yPGQTjE<PqhTXzB#@SZ>N|Ah<1qHM(<zx##A3IK(yB!FRkxm)*FbrDRNmu4~NDp
zw}~bUqFi&kn*D9w@c1H=9tO+&+%Am9@H`^l2>wI-bbFNZ=l20>h@pp!XgSf!bveSv
zQ9145P)lG=QNCdZs(x-?J*hu>A4*ieC};2W7Zw@qR?nkVJgCRR+nLCI$^Rr;58t+j
zXO6QiIgfg-E{dCq*rGQUb%;+8009sH0T2Lzen{ZTVdF9aXKamJedxeZcWwT+Ye(n*
z4SN5%ovyY3U)SN27Mdj-dCiEQ{dnQ)k=AAPPoH(lGm+Ih{&CTgs^3S#3;FXbm!qf2
z6{mlBL*193MNXOOob%+CXI*ng9R2qJ?w4HQxgnpA%hB`h?`>!2JTY*CYx}(O|9NZC
zhmotF`u)$ZF5jl=<+C~c_{%~o-dQ(f)aFR-_Q_eB7w?F?_Rss)Z1}+%m#mk|iDzoD
zy&B%QF6Z)hB2V49xU21hZIPxsKYrYH&kmAXtfx2Z%oz<8-~ZG4$c1;LUvlW|HzObY
zW<&mW#=RAh?Q8HZ7IRXu-8cD@dz*jziR<ORHw?e>HRgWixI;cITmPYI-~5hFdOr5v
zyYV;dxEEbl&wBUnm%2W5jk@#Fzpb;srt0N#gH>_tq9ZpvebTPouJhgJ{demHpGGFU
z^4k9mulZcn%jL8f5)c3Z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI
z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X
z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH
z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI
z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X
z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH
z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI
z5C8!X009sH0T2KI5C8!X009sH0T2KI5ctXo*w_GO<F(}InzC88+{>SBm+4x4zOJ^c
zjB&|5mQ7z~4>u}#2c1CrjUrH9R$L=GJX81Dsna*=m0jpV))Hv)b@GD-Z%CeMZfldN
zutBj+R7sCNhiiLXd7i50VO?J)E0<;1*g)ps|LQ90D!Bo(L-@}L{D<nR5bg1`I~&^q
zVQ)*o7jibYg`8n;gWu;I)7mmd?s54{mdigyK2_f2%3_a)AB*w^%Jck#Z*>(_rDYs~
zAunoou<2#Bo{H)!DWAsWsQ!Yye3{S2hRSm}@`&+Xsd31Bs$W5kQH?`;!g07{t`c-p
zISbWsZ@KF1NDeXrG!9qmcF)!6&PB=&$gb{Ktd6;<$}O3_rgI6az3Oby#8XvWYGY|+
zwQTkks{E*cWrys%{GsG>bU)dt??;wm`rRSvCtLHAPL=mEQ+r18KW2Y+7{5;Xk^W5j
zFzIQzU-Ngg2F(5_)_)B!=U48l$NsA@uR!V`laU{D@VpK-rJ}NqyF8Z76{vLRmU7fi
zn@#N*{Te3owRZ%4p$30j<Gl8k#XjK&<L75gSf!auhRkouDz7Kbuk6mKU*&K2w>0`x
z)zSQ_CZ{~6_A?egf8*SafNpy7{HjpiX62b}Y_M#9TQKbRE%5neX8w_gpSb6U&B(9X
zAM!PIG=^K+xXR_0C_nYX#)c@{(I*CQZHZlbnmSGPxVA<0_hDVGjqZnbDoJ0^Txgm|
zoQCjz3Q3zD?si#^jkVA1Xm0lVw2cTm7WLa$ldswD4f|vsQL;nNP}A&bRJ5#^W0dAK
zLYE*;{^i=s&N1vU*4~GRc8EgxCU4ipwY9~w)Uono1GF{s`N(|TV0Ag>GgqHk)|OmO
z*rC{#{4lv3`Qy!<$>rR#G{)wJlgnwb{ho+BfnaR4zuw+wta>#2y`j8((xay>6!tYa
zr+fV!zIJ}z$TvuitFoDOr4>_Ed*Y90g~-M-WSOgV!PfB*?H3pCUXHQ^TZ+bmb`QKQ
zy&exTYpTXW@Ac)DAmiPqM)518ud0sL20qztYj#SH>1+w8Nw>_*Kl-Y_ReRGP_Zrf1
zFt@BSkw2r+MNWr~zXE?I?scMg*{prl-{j)Sd<tmzv)=13U-O84Zpz|UNPo57z#N}3
z-1@AqWxdvynnBV$LN=Ozke*VfA3GJvoMbY*o>=|p@gL{;a1fU#Zls%3>c#8_r}~;&
zya8uOs3#SEFjeE|Le4;J1e1<ar>Ac70#W?eb{vVh0NF!HTSuVD8(Ks|sjq$y<H%TN
z5jR5OOzSKL-{k(y6ZVF}&IX|!F^u5fXuf{{{F`~cUKVIFX$5*Q?jy0!IS1gr6CE(d
zIP2~FJ4=mOJ#ek_Z+ae`sh^LQi^0h^n|?l|`M^^B97<^_?lYoEX?FYS^YtW<`xiBZ
z<mY4O1KsCQ=JjIpJc=(P*VR_fFwNMaMT%}QdLDJkBgVG1(hO9euq0|$#F*QkT#jl9
z&Pgs;PUSC1E;mujtq&!aqvG~=N_NAv^5{9+7(cP=^zrW>ZrSvB&o@qfh3+?%-uAGw
zwr!!aT|JmCkPoKLQ^<MIdOu}IWaqQ>do|L7)y`>sh0?v*xqLX3?M(b!sqK8j#afj-
z4`utx>|8?S9|{)}?}@}h9HWVBtKK08<JCKhb=s;^mA|r<%G2dUPmq3yC&~3TC3~6~
zV^3-%#Iuj#yb`KEY9G{5-GJ!&0w19KKi}N))4I+V@cDhMzCc(`PPl}1orUVjkh!_v
zqV*Vltd!}LSNdA6cpoL!>u6q2X{oN)nymlS;vckSslHmJaSQ?=00JNY0wB;I2qc;>
zoRpw*BR`8jUx-df6Xe5b5C8!X009sHfrFO-#rHbBY98R$>tV+F@<18iQCd-3Cqj6{
zI*56FgVsTa0Ij>xIw`H=IDf9n?`fRvs$b~#=z<-$sAF0OrTUQePXdYJY~Q#|*#X(r
zvfI`1zOFZ0=~1~mA5`VmXaQp#FZR9WNG?O;phCpE%lJ?pfgg<y74Ok<Wl6MdX?|aU
z_)x*`n;IXQscc6Zko7%98egRrmk!}S%1*Q$r6>+glqt4Ead0#qHvdwMhb}4IyZ3SC
z_siCJP+Eo8#YW%9skeKKmEWN1rR&yI(O_)Vb;;$jb;dhZB$p#Uvi8>Ga&BGT++Qb`
zv$l7C9VdzBl`1#<QBU8?b${QAI8mj^B%I|msW<PflZ}(!>&q=0NcS7FzV>IF=*m9&
zH5y&?-iwa+_8xX`el3pPuH#%p@iOba>g`~S4_(q@J*k;J#et|%V8k7!PA}28(6M8+
zPDEX?dPzL5IsoxG^g6xI@oaq^Hx&O#<A#n?r=OU3(68;d5%+#=-mE0W@t_ztYCVAX
z9_Tnw=%e@gFwZ+u9sdb^^j;rve&0YxU-cy(H+kHr^*NKC`!?wHmD%Fv%ePeAKZLK~
zah@xGqbGBGqD<4ZIQRGN&mHD<lVow8)czX%9M>sZZ+uweKC@TS%+ioC@2Oc2|9!w~
zlgl}DrsY>Bmn+wDJ8w=dH&M&Iabt2hb6fZ4ekz|!oV?+5ABo2A^)bG4?Ny2!@!pT|
zoonyaD&zL8mdttoV&|{>X5ZFz6Vmy?7T@XCPGF|)K(ck#p2T-r^-J;Tlp5u$^~SGz
zoM#Ow){>r{AZ{b-NG<X>1OX5L0T2KI5IFb=^fu1($^`R;=*0BkM}i)J00@8p2!H?x
zSO`#@=Q91hhj-|8u~f%-I{r^Bdi6BUbJ?Rx-mTla?OAn9>z<Zw;IN+&NEGMU^@6ek
zTE`ErQ^&?SA-(tSS^Sa}8<o4^T~#ivvl;7yJ&p6^->{50&m`X?s`wgQ5$7pvF<G3a
z`u=Kd-zkdo6lIESZ*iXMA5;43lG43<UtxZa+}k+M{HIjCy4sjT(O_)Z6UpVWb<X8$
zlFN}FS^a!+IkzsaW^Ho0m<I2s6LI3$<SR7(TrXAL`}cF*-$$9_2F>rKJ@`IQ;w+a?
zdh=c{**NLFzLbR?3~`>x{2GleidUoKy~P{s&9B87A9tOs;yh*6ebrlkj`LJ{%8?#y
zU*kM`u9s+>=d@|sXNihq^+NN#uf_MU?bkR@rH}aY0gCISaYM&n!MG9rP89#O9XI0M
zPw^uMBF+=zru*N4F~?`6`uje65?2U)bk~P@-f<w}JfV;7`Y_L@_4i8e*Emn14_XIV
ztJha%^K)WL#r;G0VjkzY<ga=%N6#dp{Mh?@_ven@#(7TE&vBiiaMNau^W6D3%`8nq
zrd>7bVXUSrxtv4i>3k%)T)CFp@R#Ir6SZ8|)5+ybP2I1%seCSR@|M$mI#qF=s~=X}
zh_^KkkiNf@(!JR?y%$fkULu|=w0*CBU8{`ep=>F$@BR|!nX8?^Ol`HsdSL82>o+mZ
zb8>>w{7{0p4dJ`7fdB}A00@8p2=q?^y^ZtSm0-RQotXAdOz0H|fB*=900?}e2~eD8
zQ2*WU<$7H#)p4HN-qVX-dLcs0Q^cwsy@#Zfl;QYL$<w+i;yj7WHzTDuPg?(T>Ta(j
zJJQEtZv+2p)r{4*)YGoKLhHq}POioGn$UQdxk1@qr>@Uf&yHPx6m?O3RDo6A{C-0G
z&Na!OSzA4=h94X9vGJ*5$dJ78eL1RUHu-J44i@zCKU>*eA6^s`G}QM$n&00whrF#m
z6Cz`L598k$8O^`I7iyP(tXzD5r1+cWRBwJ>0sk-BN%anq`6P6sTKT<`=J)E}V9>`~
z#+6hrn~=%Wio5tb=b~Qz=azh(^^I+Q{fCo8y+@hq&GiQhGIA^OGd!cE=;(}KVMaJO
zHp838r9^w&G9?b`?|K;pit@w{^+T5D8C^0u!|NGc%s*8zyd0qgA*Xl*+5+{eZQf4t
zhg(G%-<H>f6l1zwylsBoAXStq*3>vIXS6R%_P5C!_KMbZv<7&os@$HnFIR~3_9^=a
zcmwqzUr@HoY#;o*lr!|J{@$PbBlf(;M1Plx_}JKCl9fZBh>bg4C9faGD}9;QBpJyX
zlYf}&EzzRn+Y{v!TtwfBwSchaWSfv*s|DyiWzA+aj%!3gd^5d=S@kb<X*}<$=!)E|
z?-}X5dM{{~am(V|?VsraNx+g)-?n|ADzRMt#vWSVv(eu}qx-+%2V>*5%>JmZxTKO#
zAo%#|J}#!0#s7P=g?Zy8b3?wTew^MjtMNOfqO7vCKEJ-Ic&eygjz4p|XgnC>&z!HI
zTRveKH10}d>qLLz)mNA>AB_#eFK6*G9E{pIrJ}M@6m1@V;+X0c`(le<Z$Ulf7h_B0
z=~=$QUgR_12j+2nqBV-E`TH1K0%6}AU&z$XLDCbJ)T=zAedhQflb@F!-q5@9pC$E8
z@?I(OiLpxkY7d860&@zB3?8bU;^g&2?<7V(RZp?09%1X<>nRwYuhebom-_zvD*i`E
zJzPGK-e}w;TJO7P56>KDJ9c|_e<Ylr*j`o%K0yEkKmY_l00jCWflk}M*ALEO&dl`H
z|CsY-)@>Kxe$Dy^&e-(khtp5a$)DY|#FjQNGvQAC(C@wTCYvD2u!+gmpI7auZXGsg
zc&;VO(j7}}*?g|U1z&k_Fw1wOy_k2(PM<sDzWQ4l-@pIy)BPvB?Rxu(Y<JgLtL`dn
zUodX^#wU+m()H72AHSBDKFIEPWAK*4XTM&VZ+Bc^d&l*wB}?w?N~^tp#38+@_~pk%
z#p#bfaQ*o^4;@f^&lw**@%GTbhU5S8{IMnT9k%r8gWtOOkr}VvJMD>4KV$aM?VI*Q
z9{IyYH(Pq1eOURQhW$^E>mnoJf4_zQYA+of)G19;`%#x@*4vM|L<<o8s7n-e(2u$#
z7JJ3kkGdq*JH*zHx+Inw#MY0xL@SW}s7r&DlKPXDbdQjL00@8p2!H?xfB*=900@8p
z2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?x
zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=9
z00@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p
z2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?x
zfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=9
z00@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p
z2!H?xfB*=900@8p2!H?xfB*=900@8p2!H?xfB*=900@8p2z&(uY-|9t@p5x?+1V^x
z?ltX5lj&N0zOJ^cjB&|5mQ7z~TfR{84t-+Vvv#>Rkpz(TYXaqE#WkXXGj%VWI?dE}
zqz_q3pvl+C4@&fjVx0;b6kBiYWw^H2=RZ^Sx8-w{X0mcwo{bG;4*svMqOOt~Fgt|*
zoWOsmz6#MEU%RuhEfDs$1biW9b6dz6_BQx^&M{5RW8@x}&t$p$Q{+?SO|C5Vc=)j>
zZ=gKSKloNxQB_*TAsF(ab_bhYR_m##u9EVig;ak*UcSs{V?*V+9C^feuhcj!+O7H%
zRKtm>acECC4j<T~<lVZQ(xK^cueV%vb|eQG0UC#^b-U;4^wi<X4#=+F(#PCX<(AA|
zMNX@|>TJ=(Q&nARV`<U*$OThXeNh3+j<RmXRvedHj_xO??o@VdDW=~Yl731xmm1x}
zOzj!T|Cs&RVO)mvBmJ55Vbarbzvk~~4Ve8=tp6Hd&ad28kNsC+UV&_3Cd-$pjr+Px
z{xhYbvW~kvmdzEYbm-RCWx{MW)ncPx!(_hpj-W5p;BRZ3*WR+&C;VXi{ER8PG4f}~
z{HCn3qQv=?L$&5__qR0qRMpY^swSsACeDn-&)+z=BcPj}Jij)3qyE9N{cXXp-?zZ$
zmznuTB7WkYBQ_(y>VL@B)X^AjY2zxFTcZ5b4;vezY)79Mz_lfI?P=;X+2d}_`(a(K
zjqZnbDoJ0^Txgm|oQ81w6_Pf6AG6DPY^;55M{~2^r)@;ov8dn1ntaWEZ`ddEh>{)J
zv!vP6sAyR+$0*Hfgf2mx{L8hMonzQzti2Br?GT0XP2R4HYio;Vsbl5G254($%<s^O
zQkP>sbM=`=2PT&jb||(7Y{})wA0L&ST+S^^W9-Hu$>p@zeoxZ)$Q6vO_Sf6{j8%_j
zzc-YZPkQvUg~Glj=X9^X!`IHw8~FywaaA_6uC!vRYL6o3<Wfd{c_5E7WSOgV!PfB*
z?H8BrUXHQ^TZ+a5X>}jQgUp($@z8sHxh2YY_o-2mue{8vI$9g}WV@}|DLtmMC7>qV
z>T2Khw`y<t<6c8L4(66sCh}+6T}9{Hbo>?gGjXpI#mi>xtNtbzPv%oV!=LqDfBBk6
z>~m8Vze3}!)*G1PGlpBA$J?@A>nlgKQ_myh0so+zL+bQnry`k?OorDJs~<i7<2)Y@
z;_}3ebdyTGnEl{XUsH=W;0y`%q{0uTY8+k28HkNw(sAna)NNiMivQY<BQX~sdnjq^
z2sC*^i)bkI^}dC1WURA@8zFI~bryqfa{uNDdqZJogV2r`M(}Sm-#-BU%{*T(3pAOu
z0=*dbk=W;)190Do4j5yc^>+T9rN*otxYqeMJ&#^7OwEv&i^0h^n|?l|b%mw+Ih4{=
z+-F3S((Ly4N9f5pPt+8WpO2jnbe~6=*Ne^bD87tbS6e;9G-HbvDZ0hzdDJP77(0G|
zu3R5m6E!PhES#2Hj%vC6%jD~7<+_5RtmNfQ)N<(=$>pr=-CxIKH$_@`^qg&spV)PJ
z(UaKwhg&u|-t&#qBwy)m4?Any7CPJ2gXseKVCp=DoENS4Qw53ae1(3mMtZQ?Ijyfy
zx;Hx~?Ie4y)OJ4dNG;40%J!AnxrE9;6fP#-6N!a5Mibdqy+aPht9KUbv{k1ne`PI|
zr^|_+Abk=~lIv|s_H;#zJ*klp&pw9pN~r#*eNabr1ET8-e1P)*d~?fB>pEY+=l8Yx
z0%18h;S$z$7OE#h=H`Bj)?@gwQl?X0>1(;-eUw<Qqj^20rMg~gvh4h*DXB~PQ1#U+
zjbji10T2KI5CDPxKp@e4;iLqedp^N@!7__TkQ=8#00ck)1V8`;4n_hL-&?2S|H67b
z%vfI@DC0XyD{AXR2#;6?QF8;Q>@KZ?P~U0Yjn+wN9jER@m8++5wy&I|<lVa9?@d(4
zv<^!3A?=?862;kOeplH6+0~=^cwg6>t@NneQ+0f8jTSK0@nYX=j^Hvh4k|>vyNnMl
ziN=PCB8+wL*!M_?4;B1WeC0PUJ~UI=jy53cdx|u^N-ZuO!hgh@q(pI4^nQZYhv|54
z?<?r-RHFA4G#-vRMUA^I-UhMt=6#&`{jxP4lvd$YsmA*_^>&Z33kp@eblsZf>JzWz
zCzs3Ca&PH=Sj(Yf@*}$^B`?pdGu${Xxg3?dztbe~yi(<cKkDgwx$f^<5htoNnuN17
zj8u)2-s{UP8yN3B+5U_ZUD-#!M)9EJ*XX#fel3pP?mAgHO*uaHRc{AteCU!M>q*V*
zDGr4E8tEr>dWpt`PMfA~eX^=py{J0?*||9m<N(Cy(ChR*$Fucy+)(@{>4lC{r=OU3
z(68;d5%+#=-mE0W@t_ztYCVAX9_Tnw=%e@gFwf6Z9sdb^^j;rve&0YxU-cy(H+kHr
z^*NKC`$m3GJ>SgMH~LiEKZLK~ah}iXqQ!b5J%;w`xiVRtC$;|){T$b++fRu#?z41E
z^5-};>*2o-_?_f(6le8hZgRPD-H=l!B$u10<uZ$s%Td4fcbdxQ5+`pw-AAJFdwq=W
z+<k)LM!ff9eCO^`tumfOl@~jI?fzVCp1&ruZ|k}V>HT1f?+j}vFjIFR**a@a;ybPS
zrTBD8jq=rc<JUdTQ&|UT*GQWdMomhc)Q3?=YKUSF1V8`;KmY_lpq~)vZJg(d1oH*U
zETW&#K{r4E1V8`;KmY^;0gCf{RDbW`TY6nA)p4HRt5GwGp2m4T>QVA;t)}A})G@7l
z>eER2DS<?Bo}V=-JD_#^+kNWTSSO_S{ymFdvf`t1N9uWTjTSK02YVXlS&TSO&Q82z
zSMNUDk|eF;Qyikf6>**dP_j7BwfuUj<2*%~V%uAsXZkcX&bp-Z{)_W$`hlufR~wV4
z(azYT=O&jEb||(dbwAWm>?iq=_ZyR!=hj&-sZTB!Tf_c26DN*M4n5vlsq)^xpHpuf
zD4q&&o=T5NI7`Fm&3ip_yoEI$y7&5W%Lc~N*Vir1GnrqbxK4^!qvO5B8|=-m#nIbs
z9GECxX5Cl4_2)QGrKcQaSNr(=pFP)0G|ux(?zkvhoH&r&Zgy^dPu}0-Je59l|1IBt
zOmUqwZs_<c7&qd+B8rzf`Z|sqaqp*j=YfdxRC>a=DdTz&zprGDUrF`%eJ;_8-upZn
z`XGIn=N$(k&J*LK``<}A&~ctZAG8j#Td%Lo=I6wgiu;EcqCC#?foA>aM9(Cm{NCI@
z%zniBeV){QzkZJE6os2MYn<n)vvgxjGUi?7Ef~9GMshia&U8<0a=CIXH`1G2Zlad^
z?7ZZ1=C<z7{Zu}eIC;bAKAoyK&-bepH{xxL1Eh5*O7~{pq=iK5CE~e4+xPncEzA?j
zmNNUMRW+-w54JeZA802qQ+LEz4~$)B{U*kF-kKnA^RWbR8^U*E0|5{K0T2KI5a^!-
zdK>3ikMHyBpN4>5fdB}A00@9U5&{(GdAt7p!YB2**uKVjcG~{EzQD%v9Z$ONx#*HF
zjydh3%OVrkzTiK>o_=fk7bC|7u4PLXk3K5h(KPsiTQ?2;k&w5E1=~Nb+ELv)Y|!xB
z%$)q$T}y0f1J%yb9ZPNeMV*ZczFfdpdK_s5uiXFMg^lT1%NMSHYg~KAaObwOR!#ri
zPmVl$iTj@a9zN8b?#On!m+-%Aj2%FLC4A+hVCnhxtZxq)R?wTFynb*Nb7rQm{>Pjz
mvu?Zi_G{KZaK@%LKb(GY_pu*!0;Zk%q2GJwO*Wy&mHmG_5(N4H

diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index f33ba7627101e9..b5ecc4d34cd08b 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -4599,41 +4599,13 @@ def test_legacy_table_read(self):
                 expected = df2[df2.index > df2.index[2]]
                 assert_frame_equal(expected, result)
 
-    def test_legacy_0_10_read(self):
-        # legacy from 0.10
-        with catch_warnings(record=True):
-            path = tm.get_data_path('legacy_hdf/legacy_0.10.h5')
-            with ensure_clean_store(path, mode='r') as store:
-                str(store)
-                for k in store.keys():
-                    store.select(k)
-
-    def test_legacy_0_11_read(self):
-        # legacy from 0.11
-        path = os.path.join('legacy_hdf', 'legacy_table_0.11.h5')
-        with ensure_clean_store(tm.get_data_path(path), mode='r') as store:
-            str(store)
-            assert 'df' in store
-            assert 'df1' in store
-            assert 'mi' in store
-            df = store.select('df')
-            df1 = store.select('df1')
-            mi = store.select('mi')
-            assert isinstance(df, DataFrame)
-            assert isinstance(df1, DataFrame)
-            assert isinstance(mi, DataFrame)
-
     def test_copy(self):
 
         with catch_warnings(record=True):
 
-            def do_copy(f=None, new_f=None, keys=None,
+            def do_copy(f, new_f=None, keys=None,
                         propindexes=True, **kwargs):
                 try:
-                    if f is None:
-                        f = tm.get_data_path(os.path.join('legacy_hdf',
-                                                          'legacy_0.10.h5'))
-
                     store = HDFStore(f, 'r')
 
                     if new_f is None:
@@ -4671,10 +4643,6 @@ def do_copy(f=None, new_f=None, keys=None,
                         pass
                     safe_remove(new_f)
 
-            do_copy()
-            do_copy(keys=['/a', '/b', '/df1_mixed'])
-            do_copy(propindexes=False)
-
             # new table
             df = tm.makeDataFrame()
 

From 8351f86a0079b6b0cb95414807a2c2248530ef2c Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 1 Sep 2017 10:11:40 -0700
Subject: [PATCH 042/188] Tslib unused (#17402)

---
 pandas/_libs/tslib.pyx | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 5dd30072fb7aa0..50e0b77c6d3a0d 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -2622,8 +2622,6 @@ cdef class _Timedelta(timedelta):
             int ndim
 
         if isinstance(other, _Timedelta):
-            if isinstance(other, _NaT):
-                return _cmp_nat_dt(other, self, _reverse_ops[op])
             ots = other
         elif isinstance(other, timedelta):
             ots = Timedelta(other)
@@ -3882,7 +3880,7 @@ fields = ['year', 'quarter', 'month', 'day', 'hour',
           'minute', 'second', 'millisecond', 'microsecond', 'nanosecond',
           'week', 'dayofyear', 'weekofyear', 'days_in_month', 'daysinmonth',
           'dayofweek', 'weekday_name', 'days', 'seconds', 'microseconds',
-          'nanoseconds', 'qyear', 'quarter']
+          'nanoseconds', 'qyear']
 for field in fields:
     prop = property(fget=lambda self: np.nan)
     setattr(NaTType, field, prop)
@@ -4620,7 +4618,6 @@ def build_field_sarray(ndarray[int64_t] dtindex):
     """
     cdef:
         Py_ssize_t i, count = 0
-        int isleap
         pandas_datetimestruct dts
         ndarray[int32_t] years, months, days, hours, minutes, seconds, mus
 
@@ -5270,7 +5267,6 @@ cpdef _isleapyear_arr(ndarray years):
 def monthrange(int64_t year, int64_t month):
     cdef:
         int64_t days
-        int64_t day_of_week
 
     if month < 1 or month > 12:
         raise ValueError("bad month number 0; must be 1-12")

From 1981b679b0619de0765c2009684ce4abd886189d Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Sat, 2 Sep 2017 12:50:55 +0100
Subject: [PATCH 043/188] DOC: Cleaned references to pandas <v0.12 in docs
 (#17375)

---
 doc/source/basics.rst        |  8 ++++----
 doc/source/dsintro.rst       | 13 +++++--------
 doc/source/groupby.rst       |  4 +---
 doc/source/indexing.rst      |  2 --
 doc/source/io.rst            | 14 +++++++-------
 doc/source/missing_data.rst  |  9 ++++-----
 doc/source/timeseries.rst    |  3 +--
 doc/source/visualization.rst |  6 ------
 8 files changed, 22 insertions(+), 37 deletions(-)

diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index fe20a7eb2b7866..35eb14eda238fd 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -251,8 +251,8 @@ replace NaN with some other value using ``fillna`` if you wish).
 Flexible Comparisons
 ~~~~~~~~~~~~~~~~~~~~
 
-Starting in v0.8, pandas introduced binary comparison methods eq, ne, lt, gt,
-le, and ge to Series and DataFrame whose behavior is analogous to the binary
+Series and DataFrame have the binary comparison methods ``eq``, ``ne``, ``lt``, ``gt``,
+``le``, and ``ge`` whose behavior is analogous to the binary
 arithmetic operations described above:
 
 .. ipython:: python
@@ -1908,7 +1908,7 @@ each type in a ``DataFrame``:
 
    dft.get_dtype_counts()
 
-Numeric dtypes will propagate and can coexist in DataFrames (starting in v0.11.0).
+Numeric dtypes will propagate and can coexist in DataFrames.
 If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``,
 or a passed ``Series``, then it will be preserved in DataFrame operations. Furthermore,
 different numeric dtypes will **NOT** be combined. The following example will give you a taste.
@@ -2137,7 +2137,7 @@ gotchas
 ~~~~~~~
 
 Performing selection operations on ``integer`` type data can easily upcast the data to ``floating``.
-The dtype of the input data will be preserved in cases where ``nans`` are not introduced (starting in 0.11.0)
+The dtype of the input data will be preserved in cases where ``nans`` are not introduced.
 See also :ref:`Support for integer NA <gotchas.intna>`
 
 .. ipython:: python
diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst
index 3c6572229802d8..4652ccbf0ad34e 100644
--- a/doc/source/dsintro.rst
+++ b/doc/source/dsintro.rst
@@ -73,7 +73,7 @@ index is passed, one will be created having values ``[0, ..., len(data) - 1]``.
 
 .. note::
 
-    Starting in v0.8.0, pandas supports non-unique index values. If an operation
+    pandas supports non-unique index values. If an operation
     that does not support duplicate index values is attempted, an exception
     will be raised at that time. The reason for being lazy is nearly all performance-based
     (there are many instances in computations, like parts of GroupBy, where the index
@@ -698,7 +698,7 @@ DataFrame in tabular form, though it won't always fit the console width:
 
    print(baseball.iloc[-20:, :12].to_string())
 
-New since 0.10.0, wide DataFrames will now be printed across multiple rows by
+Wide DataFrames will be printed across multiple rows by
 default:
 
 .. ipython:: python
@@ -845,19 +845,16 @@ DataFrame objects with mixed-type columns, all of the data will get upcasted to
 
 .. note::
 
-   Unfortunately Panel, being less commonly used than Series and DataFrame,
+   Panel, being less commonly used than Series and DataFrame,
    has been slightly neglected feature-wise. A number of methods and options
-   available in DataFrame are not available in Panel. This will get worked
-   on, of course, in future releases. And faster if you join me in working on
-   the codebase.
+   available in DataFrame are not available in Panel.
 
 .. _dsintro.to_panel:
 
 From DataFrame using ``to_panel`` method
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-This method was introduced in v0.7 to replace ``LongPanel.to_long``, and converts
-a DataFrame with a two-level index to a Panel.
+``to_panel`` converts a DataFrame with a two-level index to a Panel.
 
 .. ipython:: python
    :okwarning:
diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index 937d682d238b37..53c0b771555f83 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -140,7 +140,7 @@ columns:
 
     In [5]: grouped = df.groupby(get_letter_type, axis=1)
 
-Starting with 0.8, pandas Index objects now support duplicate values. If a
+pandas Index objects support duplicate values. If a
 non-unique index is used as the group key in a groupby operation, all values
 for the same index value will be considered to be in one group and thus the
 output of aggregation functions will only contain unique index values:
@@ -288,8 +288,6 @@ chosen level:
 
    s.sum(level='second')
 
-.. versionadded:: 0.6
-
 Grouping with multiple levels is supported.
 
 .. ipython:: python
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 53a259ad6eb158..4687e464905627 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -66,8 +66,6 @@ See the :ref:`cookbook<cookbook.selection>` for some advanced strategies
 Different Choices for Indexing
 ------------------------------
 
-.. versionadded:: 0.11.0
-
 Object selection has had a number of user-requested additions in order to
 support more explicit location based indexing. Pandas now supports three types
 of multi-axis indexing.
diff --git a/doc/source/io.rst b/doc/source/io.rst
index f55c72bae5a20e..f68358764a40e0 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -364,7 +364,7 @@ warn_bad_lines : boolean, default ``True``
 Specifying column data types
 ''''''''''''''''''''''''''''
 
-Starting with v0.10, you can indicate the data type for the whole DataFrame or
+You can indicate the data type for the whole DataFrame or
 individual columns:
 
 .. ipython:: python
@@ -3346,7 +3346,7 @@ Read/Write API
 ''''''''''''''
 
 ``HDFStore`` supports an top-level API using  ``read_hdf`` for reading and ``to_hdf`` for writing,
-similar to how ``read_csv`` and ``to_csv`` work. (new in 0.11.0)
+similar to how ``read_csv`` and ``to_csv`` work.
 
 .. ipython:: python
 
@@ -3791,7 +3791,7 @@ indexed dimension as the ``where``.
 
 .. note::
 
-   Indexes are automagically created (starting ``0.10.1``) on the indexables
+   Indexes are automagically created on the indexables
    and any data columns you specify. This behavior can be turned off by passing
    ``index=False`` to ``append``.
 
@@ -3878,7 +3878,7 @@ create a new table!)
 Iterator
 ++++++++
 
-Starting in ``0.11.0``, you can pass, ``iterator=True`` or ``chunksize=number_in_a_chunk``
+You can pass ``iterator=True`` or ``chunksize=number_in_a_chunk``
 to ``select`` and ``select_as_multiple`` to return an iterator on the results.
 The default is 50,000 rows returned in a chunk.
 
@@ -3986,8 +3986,8 @@ of rows in an object.
 Multiple Table Queries
 ++++++++++++++++++++++
 
-New in 0.10.1 are the methods ``append_to_multiple`` and
-``select_as_multiple``, that can perform appending/selecting from
+The methods ``append_to_multiple`` and
+``select_as_multiple`` can perform appending/selecting from
 multiple tables at once. The idea is to have one table (call it the
 selector table) that you index most/all of the columns, and perform your
 queries. The other table(s) are data tables with an index matching the
@@ -4291,7 +4291,7 @@ Pass ``min_itemsize`` on the first table creation to a-priori specify the minimu
 ``min_itemsize`` can be an integer, or a dict mapping a column name to an integer. You can pass ``values`` as a key to
 allow all *indexables* or *data_columns* to have this min_itemsize.
 
-Starting in 0.11.0, passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically.
+Passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically.
 
 .. note::
 
diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst
index d54288baa389b9..64a321d67a825f 100644
--- a/doc/source/missing_data.rst
+++ b/doc/source/missing_data.rst
@@ -67,9 +67,8 @@ arise and we wish to also consider that "missing" or "not available" or "NA".
 
 .. note::
 
-   Prior to version v0.10.0 ``inf`` and ``-inf`` were also
-   considered to be "NA" in computations. This is no longer the case by
-   default; use the ``mode.use_inf_as_na`` option to recover it.
+   If you want to consider ``inf`` and ``-inf`` to be "NA" in computations,
+   you can set ``pandas.options.mode.use_inf_as_na = True``.
 
 .. _missing.isna:
 
@@ -485,8 +484,8 @@ respectively:
 
 Replacing Generic Values
 ~~~~~~~~~~~~~~~~~~~~~~~~
-Often times we want to replace arbitrary values with other values. New in v0.8
-is the ``replace`` method in Series/DataFrame that provides an efficient yet
+Often times we want to replace arbitrary values with other values. The
+``replace`` method in Series/DataFrame provides an efficient yet
 flexible way to perform such replacements.
 
 For a Series, you can replace a single value or a list of values by another
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index ce4a920ad77b5f..aded5e4402df2b 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -1069,8 +1069,7 @@ Offset Aliases
 ~~~~~~~~~~~~~~
 
 A number of string aliases are given to useful common time series
-frequencies. We will refer to these aliases as *offset aliases*
-(referred to as *time rules* prior to v0.8.0).
+frequencies. We will refer to these aliases as *offset aliases*.
 
 .. csv-table::
     :header: "Alias", "Description"
diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
index fb799c642131d7..c637246537ca1c 100644
--- a/doc/source/visualization.rst
+++ b/doc/source/visualization.rst
@@ -306,8 +306,6 @@ subplots:
    df.diff().hist(color='k', alpha=0.5, bins=50)
 
 
-.. versionadded:: 0.10.0
-
 The ``by`` keyword can be specified to plot grouped histograms:
 
 .. ipython:: python
@@ -831,8 +829,6 @@ and take a :class:`Series` or :class:`DataFrame` as an argument.
 Scatter Matrix Plot
 ~~~~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.7.3
-
 You can create a scatter plot matrix using the
 ``scatter_matrix`` method in ``pandas.plotting``:
 
@@ -859,8 +855,6 @@ You can create a scatter plot matrix using the
 Density Plot
 ~~~~~~~~~~~~
 
-.. versionadded:: 0.8.0
-
 You can create density plots using the :meth:`Series.plot.kde` and :meth:`DataFrame.plot.kde` methods.
 
 .. ipython:: python

From c2d048137c7288644e8276fed3c5a7071a80221e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 4 Sep 2017 16:32:34 -0700
Subject: [PATCH 044/188] Remove unused _day and _month attrs (#17431)

closes #17429
---
 pandas/_libs/tslib.pyx | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 50e0b77c6d3a0d..8fbc606ccdfe24 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -829,8 +829,6 @@ class NaTType(_NaT):
         cdef _NaT base
 
         base = _NaT.__new__(cls, 1, 1, 1)
-        base._day = -1
-        base._month = -1
         base.value = NPY_NAT
 
         return base

From 5bca6ce860f66ca6f92327086a954b9e0326a85f Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Tue, 5 Sep 2017 11:30:31 +0100
Subject: [PATCH 045/188] DOC: Clean-up references to v12 to v14 (both
 included) (#17420)

---
 doc/source/advanced.rst          | 21 ++---------
 doc/source/basics.rst            | 10 +-----
 doc/source/comparison_with_r.rst |  4 ---
 doc/source/cookbook.rst          |  2 +-
 doc/source/enhancingperf.rst     | 36 ++++++-------------
 doc/source/groupby.rst           | 19 ----------
 doc/source/indexing.rst          | 23 ++----------
 doc/source/install.rst           |  2 +-
 doc/source/io.rst                | 61 +++++++++-----------------------
 doc/source/merging.rst           |  2 --
 doc/source/missing_data.rst      |  9 -----
 doc/source/options.rst           |  2 +-
 doc/source/text.rst              |  2 --
 doc/source/timedeltas.rst        |  2 --
 doc/source/timeseries.rst        | 10 +++---
 doc/source/visualization.rst     | 16 ---------
 16 files changed, 43 insertions(+), 178 deletions(-)

diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
index 711c3e9a95d05d..4af476cd5a7e12 100644
--- a/doc/source/advanced.rst
+++ b/doc/source/advanced.rst
@@ -270,9 +270,6 @@ Passing a list of labels or tuples works similar to reindexing:
 Using slicers
 ~~~~~~~~~~~~~
 
-.. versionadded:: 0.14.0
-
-In 0.14.0 we added a new way to slice multi-indexed objects.
 You can slice a multi-index by providing multiple indexers.
 
 You can provide any of the selectors as if you are indexing by label, see :ref:`Selection by Label <indexing.label>`,
@@ -384,7 +381,7 @@ selecting data at a particular level of a MultiIndex easier.
 
 .. ipython:: python
 
-   # using the slicers (new in 0.14.0)
+   # using the slicers
    df.loc[(slice(None),'one'),:]
 
 You can also select on the columns with :meth:`~pandas.MultiIndex.xs`, by
@@ -397,7 +394,7 @@ providing the axis argument
 
 .. ipython:: python
 
-   # using the slicers (new in 0.14.0)
+   # using the slicers
    df.loc[:,(slice(None),'one')]
 
 :meth:`~pandas.MultiIndex.xs` also allows selection with multiple keys
@@ -408,11 +405,9 @@ providing the axis argument
 
 .. ipython:: python
 
-   # using the slicers (new in 0.14.0)
+   # using the slicers
    df.loc[:,('bar','one')]
 
-.. versionadded:: 0.13.0
-
 You can pass ``drop_level=False`` to :meth:`~pandas.MultiIndex.xs` to retain
 the level that was selected
 
@@ -743,16 +738,6 @@ Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``ND
 Float64Index
 ~~~~~~~~~~~~
 
-.. note::
-
-   As of 0.14.0, ``Float64Index`` is backed by a native ``float64`` dtype
-   array. Prior to 0.14.0, ``Float64Index`` was backed by an ``object`` dtype
-   array. Using a ``float64`` dtype in the backend speeds up arithmetic
-   operations by about 30x and boolean indexing operations on the
-   ``Float64Index`` itself are about 2x as fast.
-
-.. versionadded:: 0.13.0
-
 By default a ``Float64Index`` will be automatically created when passing floating, or mixed-integer-floating values in index creation.
 This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the
 same.
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index 35eb14eda238fd..5880703b1d2711 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -347,7 +347,7 @@ That is because NaNs do not compare as equals:
 
    np.nan == np.nan
 
-So, as of v0.13.1, NDFrames (such as Series, DataFrames, and Panels)
+So, NDFrames (such as Series, DataFrames, and Panels)
 have an :meth:`~DataFrame.equals` method for testing equality, with NaNs in
 corresponding locations treated as equal.
 
@@ -1104,10 +1104,6 @@ Applying with a ``Panel`` will pass a ``Series`` to the applied function. If the
 function returns a ``Series``, the result of the application will be a ``Panel``. If the applied function
 reduces to a scalar, the result of the application will be a ``DataFrame``.
 
-.. note::
-
-   Prior to 0.13.1 ``apply`` on a ``Panel`` would only work on ``ufuncs`` (e.g. ``np.sum/np.max``).
-
 .. ipython:: python
 
    import pandas.util.testing as tm
@@ -1800,8 +1796,6 @@ Series has the :meth:`~Series.searchsorted` method, which works similar to
 smallest / largest values
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.14.0
-
 ``Series`` has the :meth:`~Series.nsmallest` and :meth:`~Series.nlargest` methods which return the
 smallest or largest :math:`n` values. For a large ``Series`` this can be much
 faster than sorting the entire Series and calling ``head(n)`` on the result.
@@ -2168,8 +2162,6 @@ Selecting columns based on ``dtype``
 
 .. _basics.selectdtypes:
 
-.. versionadded:: 0.14.1
-
 The :meth:`~DataFrame.select_dtypes` method implements subsetting of columns
 based on their ``dtype``.
 
diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst
index 194e022e34c7c0..f895cdc25e6205 100644
--- a/doc/source/comparison_with_r.rst
+++ b/doc/source/comparison_with_r.rst
@@ -247,8 +247,6 @@ For more details and examples see :ref:`the reshaping documentation
 |subset|_
 ~~~~~~~~~~
 
-.. versionadded:: 0.13
-
 The :meth:`~pandas.DataFrame.query` method is similar to the base R ``subset``
 function. In R you might want to get the rows of a ``data.frame`` where one
 column's values are less than another column's values:
@@ -277,8 +275,6 @@ For more details and examples see :ref:`the query documentation
 |with|_
 ~~~~~~~~
 
-.. versionadded:: 0.13
-
 An expression using a data.frame called ``df`` in R with the columns ``a`` and
 ``b`` would be evaluated using ``with`` like so:
 
diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst
index 32e7a616fe8564..f51c3e679b36f3 100644
--- a/doc/source/cookbook.rst
+++ b/doc/source/cookbook.rst
@@ -818,7 +818,7 @@ The :ref:`Concat <merging.concatenation>` docs. The :ref:`Join <merging.join>` d
    df1 = pd.DataFrame(np.random.randn(6, 3), index=rng, columns=['A', 'B', 'C'])
    df2 = df1.copy()
 
-ignore_index is needed in pandas < v0.13, and depending on df construction
+Depending on df construction, ``ignore_index`` may be needed
 
 .. ipython:: python
 
diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst
index 685a8690a53d55..264bd1de1fc774 100644
--- a/doc/source/enhancingperf.rst
+++ b/doc/source/enhancingperf.rst
@@ -213,17 +213,18 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra
 
 .. warning::
 
-   In 0.13.0 since ``Series`` has internaly been refactored to no longer sub-class ``ndarray``
-   but instead subclass ``NDFrame``, you can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter
-   to a cython function. Instead pass the actual ``ndarray`` using the ``.values`` attribute of the Series.
+   You can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter
+   to a cython function. Instead pass the actual ``ndarray`` using the
+   ``.values`` attribute of the Series. The reason is that the cython
+   definition is specific to an ndarray and not the passed Series.
 
-   Prior to 0.13.0
+   So, do not do this:
 
    .. code-block:: python
 
         apply_integrate_f(df['a'], df['b'], df['N'])
 
-   Use ``.values`` to get the underlying ``ndarray``
+   But rather, use ``.values`` to get the underlying ``ndarray``
 
    .. code-block:: python
 
@@ -399,10 +400,8 @@ Read more in the `numba docs <http://numba.pydata.org/>`__.
 
 .. _enhancingperf.eval:
 
-Expression Evaluation via :func:`~pandas.eval` (Experimental)
--------------------------------------------------------------
-
-.. versionadded:: 0.13
+Expression Evaluation via :func:`~pandas.eval`
+-----------------------------------------------
 
 The top-level function :func:`pandas.eval` implements expression evaluation of
 :class:`~pandas.Series` and :class:`~pandas.DataFrame` objects.
@@ -539,10 +538,8 @@ Now let's do the same thing but with comparisons:
    of type ``bool`` or ``np.bool_``. Again, you should perform these kinds of
    operations in plain Python.
 
-The ``DataFrame.eval`` method (Experimental)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. versionadded:: 0.13
+The ``DataFrame.eval`` method
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 In addition to the top level :func:`pandas.eval` function you can also
 evaluate an expression in the "context" of a :class:`~pandas.DataFrame`.
@@ -646,19 +643,6 @@ whether the query modifies the original frame.
 Local Variables
 ~~~~~~~~~~~~~~~
 
-In pandas version 0.14 the local variable API has changed. In pandas 0.13.x,
-you could refer to local variables the same way you would in standard Python.
-For example,
-
-.. code-block:: python
-
-   df = pd.DataFrame(np.random.randn(5, 2), columns=['a', 'b'])
-   newcol = np.random.randn(len(df))
-   df.eval('b + newcol')
-
-   UndefinedVariableError: name 'newcol' is not defined
-
-As you can see from the exception generated, this syntax is no longer allowed.
 You must *explicitly reference* any local variable that you want to use in an
 expression by placing the ``@`` character in front of the name. For example,
 
diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index 53c0b771555f83..e1231b9a4a2007 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -766,8 +766,6 @@ missing values with the ``ffill()`` method.
 Filtration
 ----------
 
-.. versionadded:: 0.12
-
 The ``filter`` method returns a subset of the original object. Suppose we
 want to take only elements that belong to groups with a group sum greater
 than 2.
@@ -858,8 +856,6 @@ In this example, we chopped the collection of time series into yearly chunks
 then independently called :ref:`fillna <missing_data.fillna>` on the
 groups.
 
-.. versionadded:: 0.14.1
-
 The ``nlargest`` and ``nsmallest`` methods work on ``Series`` style groupbys:
 
 .. ipython:: python
@@ -1048,19 +1044,6 @@ Just like for a DataFrame or Series you can call head and tail on a groupby:
 
 This shows the first or last n rows from each group.
 
-.. warning::
-
-   Before 0.14.0 this was implemented with a fall-through apply,
-   so the result would incorrectly respect the as_index flag:
-
-   .. code-block:: python
-
-       >>> g.head(1):  # was equivalent to g.apply(lambda x: x.head(1))
-             A  B
-        A
-        1 0  1  2
-        5 2  5  6
-
 .. _groupby.nth:
 
 Taking the nth row of each group
@@ -1113,8 +1096,6 @@ You can also select multiple rows from each group by specifying multiple nth val
 Enumerate group items
 ~~~~~~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.13.0
-
 To see the order in which each row appears within its group, use the
 ``cumcount`` method:
 
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 4687e464905627..a6e7df57be4e50 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -248,8 +248,6 @@ as an attribute:
    - In any of these cases, standard indexing will still work, e.g. ``s['1']``, ``s['min']``, and ``s['index']`` will
      access the corresponding element or column.
 
-   - The ``Series/Panel`` accesses are available starting in 0.13.0.
-
 If you are using the IPython environment, you may also use tab-completion to
 see these accessible attributes.
 
@@ -529,7 +527,6 @@ Out of range slice indexes are handled gracefully just as in Python/Numpy.
 .. ipython:: python
 
     # these are allowed in python/numpy.
-    # Only works in Pandas starting from v0.14.0.
     x = list('abcdef')
     x
     x[4:10]
@@ -539,14 +536,8 @@ Out of range slice indexes are handled gracefully just as in Python/Numpy.
     s.iloc[4:10]
     s.iloc[8:10]
 
-.. note::
-
-    Prior to v0.14.0, ``iloc`` would not accept out of bounds indexers for
-    slices, e.g. a value that exceeds the length of the object being indexed.
-
-
-Note that this could result in an empty axis (e.g. an empty DataFrame being
-returned)
+Note that using slices that go out of bounds can result in
+an empty axis (e.g. an empty DataFrame being returned)
 
 .. ipython:: python
 
@@ -745,8 +736,6 @@ Finally, one can also set a seed for ``sample``'s random number generator using
 Setting With Enlargement
 ------------------------
 
-.. versionadded:: 0.13
-
 The ``.loc/[]`` operations can perform enlargement when setting a non-existant key for that axis.
 
 In the ``Series`` case this is effectively an appending operation
@@ -1020,8 +1009,6 @@ partial setting via ``.loc`` (but on the contents rather than the axis labels)
    df2[ df2[1:4] > 0 ] = 3
    df2
 
-.. versionadded:: 0.13
-
 Where can also accept ``axis`` and ``level`` parameters to align the input when
 performing the ``where``.
 
@@ -1064,8 +1051,6 @@ as condition and ``other`` argument.
 The :meth:`~pandas.DataFrame.query` Method (Experimental)
 ---------------------------------------------------------
 
-.. versionadded:: 0.13
-
 :class:`~pandas.DataFrame` objects have a :meth:`~pandas.DataFrame.query`
 method that allows selection using an expression.
 
@@ -1506,8 +1491,6 @@ The name, if set, will be shown in the console display:
 Setting metadata
 ~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.13.0
-
 Indexes are "mostly immutable", but it is possible to set and change their
 metadata, like the index ``name`` (or, for ``MultiIndex``, ``levels`` and
 ``labels``).
@@ -1790,7 +1773,7 @@ Evaluation order matters
 
 Furthermore, in chained expressions, the order may determine whether a copy is returned or not.
 If an expression will set values on a copy of a slice, then a ``SettingWithCopy``
-exception will be raised (this raise/warn behavior is new starting in 0.13.0)
+warning will be issued.
 
 You can control the action of a chained assignment via the option ``mode.chained_assignment``,
 which can take the values ``['raise','warn',None]``, where showing a warning is the default.
diff --git a/doc/source/install.rst b/doc/source/install.rst
index f92c43839ee317..8dc8224ea6cb24 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -107,7 +107,7 @@ following command::
 
 To install a specific pandas version::
 
-    conda install pandas=0.13.1
+    conda install pandas=0.20.3
 
 To install other packages, IPython for example::
 
diff --git a/doc/source/io.rst b/doc/source/io.rst
index f68358764a40e0..33523ea171f3ae 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -1310,8 +1310,6 @@ column widths for contiguous columns:
 The parser will take care of extra white spaces around the columns
 so it's ok to have extra separation between the columns in the file.
 
-.. versionadded:: 0.13.0
-
 By default, ``read_fwf`` will try to infer the file's ``colspecs`` by using the
 first 100 rows of the file. It can do it only in cases when the columns are
 aligned and correctly separated by the provided ``delimiter`` (default delimiter
@@ -1407,8 +1405,7 @@ Reading columns with a ``MultiIndex``
 
 By specifying list of row locations for the ``header`` argument, you
 can read in a ``MultiIndex`` for the columns. Specifying non-consecutive
-rows will skip the intervening rows. In order to have the pre-0.13 behavior
-of tupleizing columns, specify ``tupleize_cols=True``.
+rows will skip the intervening rows.
 
 .. ipython:: python
 
@@ -1418,7 +1415,7 @@ of tupleizing columns, specify ``tupleize_cols=True``.
    print(open('mi.csv').read())
    pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1])
 
-Starting in 0.13.0, ``read_csv`` will be able to interpret a more common format
+``read_csv`` is also able to interpret a more common format
 of multi-columns indices.
 
 .. ipython:: python
@@ -2012,8 +2009,6 @@ The speedup is less noticeable for smaller datasets:
 Normalization
 '''''''''''''
 
-.. versionadded:: 0.13.0
-
 pandas provides a utility function to take a dict or list of dicts and *normalize* this semi-structured data
 into a flat table.
 
@@ -2198,8 +2193,6 @@ Reading HTML Content
    We **highly encourage** you to read the :ref:`HTML Table Parsing gotchas <io.html.gotchas>`
    below regarding the issues surrounding the BeautifulSoup4/html5lib/lxml parsers.
 
-.. versionadded:: 0.12.0
-
 The top-level :func:`~pandas.io.html.read_html` function can accept an HTML
 string/file/URL and will parse HTML tables into list of pandas DataFrames.
 Let's look at a few examples.
@@ -2653,10 +2646,6 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc
     # equivalent using the read_excel function
     data = read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'], index_col=None, na_values=['NA'])
 
-.. versionadded:: 0.12
-
-``ExcelFile`` has been moved to the top level namespace.
-
 .. versionadded:: 0.17
 
 ``read_excel`` can take an ``ExcelFile`` object as input
@@ -2716,9 +2705,6 @@ Using a list to get multiple sheets:
 
 ``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either
 a list of sheet names, a list of sheet positions, or ``None`` to read all sheets.
-
-.. versionadded:: 0.13
-
 Sheets can be specified by sheet index or sheet name, using an integer or string,
 respectively.
 
@@ -2866,9 +2852,9 @@ Files with a ``.xls`` extension will be written using ``xlwt`` and those with a
 ``.xlsx`` extension will be written using ``xlsxwriter`` (if available) or
 ``openpyxl``.
 
-The DataFrame will be written in a way that tries to mimic the REPL output. One
-difference from 0.12.0 is that the ``index_label`` will be placed in the second
-row instead of the first. You can get the previous behaviour by setting the
+The DataFrame will be written in a way that tries to mimic the REPL output.
+The ``index_label`` will be placed in the second
+row instead of the first. You can place it in the first row by setting the
 ``merge_cells`` option in ``to_excel()`` to ``False``:
 
 .. code-block:: python
@@ -2945,8 +2931,6 @@ Added support for Openpyxl >= 2.2
 Excel writer engines
 ''''''''''''''''''''
 
-.. versionadded:: 0.13
-
 ``pandas`` chooses an Excel writer via two methods:
 
 1. the ``engine`` keyword argument
@@ -3074,14 +3058,19 @@ any pickled pandas object (or any other pickled object) from file:
 
    Loading pickled data received from untrusted sources can be unsafe.
 
-   See: http://docs.python.org/2.7/library/pickle.html
+   See: https://docs.python.org/3.6/library/pickle.html
 
 .. warning::
 
-   Several internal refactorings, 0.13 (:ref:`Series Refactoring <whatsnew_0130.refactoring>`), and 0.15 (:ref:`Index Refactoring <whatsnew_0150.refactoring>`),
-   preserve compatibility with pickles created prior to these versions. However, these must
-   be read with ``pd.read_pickle``, rather than the default python ``pickle.load``.
-   See `this question <http://stackoverflow.com/questions/20444593/pandas-compiled-from-source-default-pickle-behavior-changed>`__
+   Several internal refactorings have been done while still preserving
+   compatibility with pickles created with older versions of pandas. However,
+   for such cases, pickled dataframes, series etc, must be read with
+   ``pd.read_pickle``, rather than ``pickle.load``.
+
+   See `here <http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#whatsnew-0130-refactoring>`__
+   and `here <http://pandas.pydata.org/pandas-docs/stable/whatsnew.html#whatsnew-0150-refactoring>`__
+   for some examples of compatibility-breaking changes. See
+   `this question <http://stackoverflow.com/questions/20444593/pandas-compiled-from-source-default-pickle-behavior-changed>`__
    for a detailed explanation.
 
 .. _io.pickle.compression:
@@ -3150,9 +3139,7 @@ The default is to 'infer
 msgpack
 -------
 
-.. versionadded:: 0.13.0
-
-Starting in 0.13.0, pandas is supporting the ``msgpack`` format for
+pandas supports the ``msgpack`` format for
 object serialization. This is a lightweight portable binary format, similar
 to binary JSON, that is highly space efficient, and provides good performance
 both on the writing (serialization), and reading (deserialization).
@@ -3424,10 +3411,6 @@ This is also true for the major axis of a ``Panel``:
 Fixed Format
 ''''''''''''
 
-.. note::
-
-   This was prior to 0.13.0 the ``Storer`` format.
-
 The examples above show storing using ``put``, which write the HDF5 to ``PyTables`` in a fixed array format, called
 the ``fixed`` format. These types of stores are **not** appendable once written (though you can simply
 remove them and rewrite). Nor are they **queryable**; they must be
@@ -3460,8 +3443,6 @@ other sessions.  In addition, delete & query type operations are
 supported. This format is specified by ``format='table'`` or ``format='t'``
 to ``append`` or ``put`` or ``to_hdf``
 
-.. versionadded:: 0.13
-
 This format can be set as an option as well ``pd.set_option('io.hdf.default_format','table')`` to
 enable ``put/append/to_hdf`` to by default store in the ``table`` format.
 
@@ -3765,9 +3746,7 @@ space. These are in terms of the total number of rows in a table.
 Using timedelta64[ns]
 +++++++++++++++++++++
 
-.. versionadded:: 0.13
-
-Beginning in 0.13.0, you can store and query using the ``timedelta64[ns]`` type. Terms can be
+You can store and query using the ``timedelta64[ns]`` type. Terms can be
 specified in the format: ``<float>(<unit>)``, where float may be signed (and fractional), and unit can be
 ``D,s,ms,us,ns`` for the timedelta. Here's an example:
 
@@ -3889,8 +3868,6 @@ The default is 50,000 rows returned in a chunk.
 
 .. note::
 
-   .. versionadded:: 0.12.0
-
    You can also use the iterator with ``read_hdf`` which will open, then
    automatically close the store when finished iterating.
 
@@ -4603,8 +4580,6 @@ included in Python's standard library by default.
 You can find an overview of supported drivers for each SQL dialect in the
 `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__.
 
-.. versionadded:: 0.14.0
-
 If SQLAlchemy is not installed, a fallback is only provided for sqlite (and
 for mysql for backwards compatibility, but this is deprecated and will be
 removed in a future version).
@@ -4937,8 +4912,6 @@ Full documentation can be found `here <https://pandas-gbq.readthedocs.io/>`__
 Stata Format
 ------------
 
-.. versionadded:: 0.12.0
-
 .. _io.stata_writer:
 
 Writing to Stata format
diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index d956f1ca54e6b8..a5ee1b1a9384cc 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -1053,8 +1053,6 @@ As you can see, this drops any rows where there was no match.
 Joining a single Index to a Multi-index
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.14.0
-
 You can join a singly-indexed ``DataFrame`` with a level of a multi-indexed ``DataFrame``.
 The level will match on the name of the index of the singly-indexed frame against
 a level name of the multi-indexed frame.
diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst
index 64a321d67a825f..65b411ccd4af26 100644
--- a/doc/source/missing_data.rst
+++ b/doc/source/missing_data.rst
@@ -263,8 +263,6 @@ and ``bfill()`` is equivalent to ``fillna(method='bfill')``
 Filling with a PandasObject
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.12
-
 You can also fillna using a dict or Series that is alignable. The labels of the dict or index of the Series
 must match the columns of the frame you wish to fill. The
 use case of this is to fill a DataFrame with the mean of that column.
@@ -280,8 +278,6 @@ use case of this is to fill a DataFrame with the mean of that column.
         dff.fillna(dff.mean())
         dff.fillna(dff.mean()['B':'C'])
 
-.. versionadded:: 0.13
-
 Same result as above, but is aligning the 'fill' value which is
 a Series in this case.
 
@@ -320,11 +316,6 @@ examined :ref:`in the API <api.dataframe.missing>`.
 Interpolation
 ~~~~~~~~~~~~~
 
-.. versionadded:: 0.13.0
-
-  :meth:`~pandas.DataFrame.interpolate`, and :meth:`~pandas.Series.interpolate` have
-  revamped interpolation methods and functionality.
-
 .. versionadded:: 0.17.0
 
   The ``limit_direction`` keyword argument was added.
diff --git a/doc/source/options.rst b/doc/source/options.rst
index 51d02bc89692a6..1592caf90546c7 100644
--- a/doc/source/options.rst
+++ b/doc/source/options.rst
@@ -306,7 +306,7 @@ display.float_format                None         The callable should accept a fl
                                                  See core.format.EngFormatter for an example.
 display.large_repr                  truncate     For DataFrames exceeding max_rows/max_cols,
                                                  the repr (and HTML repr) can show
-                                                 a truncated table (the default from 0.13),
+                                                 a truncated table (the default),
                                                  or switch to the view from df.info()
                                                  (the behaviour in earlier versions of pandas).
                                                  allowable settings, ['truncate', 'info']
diff --git a/doc/source/text.rst b/doc/source/text.rst
index e3e4b24d17f448..85b8aa6aa18578 100644
--- a/doc/source/text.rst
+++ b/doc/source/text.rst
@@ -211,8 +211,6 @@ Extracting Substrings
 Extract first match in each subject (extract)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-.. versionadded:: 0.13.0
-
 .. warning::
 
    In version 0.18.0, ``extract`` gained the ``expand`` argument. When
diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst
index 07effcfdff33b7..daa2c262c8c860 100644
--- a/doc/source/timedeltas.rst
+++ b/doc/source/timedeltas.rst
@@ -242,8 +242,6 @@ Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` ob
 Frequency Conversion
 --------------------
 
-.. versionadded:: 0.13
-
 Timedelta Series, ``TimedeltaIndex``, and ``Timedelta`` scalars can be converted to other 'frequencies' by dividing by another timedelta,
 or by astyping to a specific timedelta type. These operations yield Series and propagate ``NaT`` -> ``nan``.
 Note that division by the numpy scalar is true division, while astyping is equivalent of floor division.
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index aded5e4402df2b..c86c58c3183f6f 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -177,7 +177,7 @@ you can pass the ``dayfirst`` flag:
 
 .. note::
    Specifying a ``format`` argument will potentially speed up the conversion
-   considerably and on versions later then 0.13.0 explicitly specifying
+   considerably and explicitly specifying
    a format string of '%Y%m%d' takes a faster path still.
 
 If you pass a single string to ``to_datetime``, it returns single ``Timestamp``.
@@ -1946,9 +1946,11 @@ These can easily be converted to a ``PeriodIndex``
 Time Zone Handling
 ------------------
 
-Pandas provides rich support for working with timestamps in different time zones using ``pytz`` and ``dateutil`` libraries.
-``dateutil`` support is new in 0.14.1 and currently only supported for fixed offset and tzfile zones. The default library is ``pytz``.
-Support for ``dateutil`` is provided for compatibility with other applications e.g. if you use ``dateutil`` in other python packages.
+Pandas provides rich support for working with timestamps in different time
+zones using ``pytz`` and ``dateutil`` libraries. ``dateutil`` currently is only
+supported for fixed offset and tzfile zones. The default library is ``pytz``.
+Support for ``dateutil`` is provided for compatibility with other
+applications e.g. if you use ``dateutil`` in other python packages.
 
 Working with Time Zones
 ~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
index c637246537ca1c..839390c8778aa1 100644
--- a/doc/source/visualization.rst
+++ b/doc/source/visualization.rst
@@ -512,8 +512,6 @@ Compare to:
 Area Plot
 ~~~~~~~~~
 
-.. versionadded:: 0.14
-
 You can create area plots with :meth:`Series.plot.area` and :meth:`DataFrame.plot.area`.
 Area plots are stacked by default. To produce stacked area plot, each column must be either all positive or all negative values.
 
@@ -550,8 +548,6 @@ To produce an unstacked plot, pass ``stacked=False``. Alpha value is set to 0.5
 Scatter Plot
 ~~~~~~~~~~~~
 
-.. versionadded:: 0.13
-
 Scatter plot can be drawn by using the :meth:`DataFrame.plot.scatter` method.
 Scatter plot requires numeric columns for x and y axis.
 These can be specified by ``x`` and ``y`` keywords each.
@@ -619,8 +615,6 @@ See the :meth:`scatter <matplotlib.axes.Axes.scatter>` method and the
 Hexagonal Bin Plot
 ~~~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.14
-
 You can create hexagonal bin plots with :meth:`DataFrame.plot.hexbin`.
 Hexbin plots can be a useful alternative to scatter plots if your data are
 too dense to plot each point individually.
@@ -682,8 +676,6 @@ See the :meth:`hexbin <matplotlib.axes.Axes.hexbin>` method and the
 Pie plot
 ~~~~~~~~
 
-.. versionadded:: 0.14
-
 You can create a pie plot with :meth:`DataFrame.plot.pie` or :meth:`Series.plot.pie`.
 If your data includes any ``NaN``, they will be automatically filled with 0.
 A ``ValueError`` will be raised if there are any negative values in your data.
@@ -1365,8 +1357,6 @@ Another option is passing an ``ax`` argument to :meth:`Series.plot` to plot on a
 Plotting With Error Bars
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.14
-
 Plotting with error bars is now supported in the :meth:`DataFrame.plot` and :meth:`Series.plot`
 
 Horizontal and vertical errorbars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot()`. The error values can be specified using a variety of formats.
@@ -1407,8 +1397,6 @@ Here is an example of one way to easily plot group means with standard deviation
 Plotting Tables
 ~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.14
-
 Plotting with matplotlib table is now supported in  :meth:`DataFrame.plot` and :meth:`Series.plot` with a ``table`` keyword. The ``table`` keyword can accept ``bool``, :class:`DataFrame` or :class:`Series`. The simple way to draw a table is to specify ``table=True``. Data will be transposed to meet matplotlib's default layout.
 
 .. ipython:: python
@@ -1585,10 +1573,6 @@ available in matplotlib. Although this formatting does not provide the same
 level of refinement you would get when plotting via pandas, it can be faster
 when plotting a large number of points.
 
-.. note::
-
-    The speed up for large data sets only applies to pandas 0.14.0 and later.
-
 .. ipython:: python
    :suppress:
 

From 25d529905521c4710c13b9a2c189a39479c529cb Mon Sep 17 00:00:00 2001
From: s-weigand <s.weigand.phy@gmail.com>
Date: Wed, 6 Sep 2017 14:03:39 +0200
Subject: [PATCH 046/188] BUG: Plotting Timedelta on y-axis #16953 (#17430)

* implemented fix for GH issue #16953

* added tests for fix of issue #16953

* changed comments for git issue to pandas style GH#

* changed linelength in tests, so all lines are less than 80 characters

* added whatsnew entry

* swaped conversion and filtering of values, for plot to also work with object dtypes

* refomated code, so len(line) < 80

* changed whatsnew with timedelta and datetime dtypes

* added support for datetimetz and extended tests

* added reason to pytest.mark.xfail
---
 doc/source/whatsnew/v0.21.0.txt     |  2 +-
 pandas/plotting/_core.py            |  8 ++-
 pandas/tests/plotting/test_frame.py | 76 +++++++++++++++++++++++++++++
 3 files changed, 84 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 81e52266f972e5..1f3bf00c877670 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -432,7 +432,7 @@ I/O
 Plotting
 ^^^^^^^^
 - Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`)
-
+- Bug when plotting ``timedelta`` and ``datetime`` dtypes on y-axis (:issue:`16953`)
 
 Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index e5b9497993172d..a0b7e93efd05cb 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -342,7 +342,13 @@ def _compute_plot_data(self):
                 label = 'None'
             data = data.to_frame(name=label)
 
-        numeric_data = data._convert(datetime=True)._get_numeric_data()
+        # GH16953, _convert is needed as fallback, for ``Series``
+        # with ``dtype == object``
+        data = data._convert(datetime=True, timedelta=True)
+        numeric_data = data.select_dtypes(include=[np.number,
+                                                   "datetime",
+                                                   "datetimetz",
+                                                   "timedelta"])
 
         try:
             is_empty = numeric_data.empty
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 67098529a01119..f3b287a8889c37 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -380,6 +380,82 @@ def test_subplots_timeseries(self):
                 self._check_ticks_props(ax, xlabelsize=7, xrot=45,
                                         ylabelsize=7)
 
+    def test_subplots_timeseries_y_axis(self):
+        # GH16953
+        data = {"numeric": np.array([1, 2, 5]),
+                "timedelta": [pd.Timedelta(-10, unit="s"),
+                              pd.Timedelta(10, unit="m"),
+                              pd.Timedelta(10, unit="h")],
+                "datetime_no_tz": [pd.to_datetime("2017-08-01 00:00:00"),
+                                   pd.to_datetime("2017-08-01 02:00:00"),
+                                   pd.to_datetime("2017-08-02 00:00:00")],
+                "datetime_all_tz": [pd.to_datetime("2017-08-01 00:00:00",
+                                                   utc=True),
+                                    pd.to_datetime("2017-08-01 02:00:00",
+                                                   utc=True),
+                                    pd.to_datetime("2017-08-02 00:00:00",
+                                                   utc=True)],
+                "text": ["This", "should", "fail"]}
+        testdata = DataFrame(data)
+
+        ax_numeric = testdata.plot(y="numeric")
+        assert (ax_numeric.get_lines()[0].get_data()[1] ==
+                testdata["numeric"].values).all()
+        ax_timedelta = testdata.plot(y="timedelta")
+        assert (ax_timedelta.get_lines()[0].get_data()[1] ==
+                testdata["timedelta"].values).all()
+        ax_datetime_no_tz = testdata.plot(y="datetime_no_tz")
+        assert (ax_datetime_no_tz.get_lines()[0].get_data()[1] ==
+                testdata["datetime_no_tz"].values).all()
+        ax_datetime_all_tz = testdata.plot(y="datetime_all_tz")
+        assert (ax_datetime_all_tz.get_lines()[0].get_data()[1] ==
+                testdata["datetime_all_tz"].values).all()
+        with pytest.raises(TypeError):
+            testdata.plot(y="text")
+
+    @pytest.mark.xfail(reason='not support for period, categorical, '
+                       'datetime_mixed_tz')
+    def test_subplots_timeseries_y_axis_not_supported(self):
+        """
+        This test will fail for:
+            period:
+                since period isn't yet implemented in ``select_dtypes``
+                and because it will need a custom value converter +
+                tick formater (as was done for x-axis plots)
+
+            categorical:
+                 because it will need a custom value converter +
+                 tick formater (also doesn't work for x-axis, as of now)
+
+            datetime_mixed_tz:
+                because of the way how pandas handels ``Series`` of
+                ``datetime`` objects with different timezone,
+                generally converting ``datetime`` objects in a tz-aware
+                form could help with this problem
+        """
+        data = {"numeric": np.array([1, 2, 5]),
+                "period": [pd.Period('2017-08-01 00:00:00', freq='H'),
+                           pd.Period('2017-08-01 02:00', freq='H'),
+                           pd.Period('2017-08-02 00:00:00', freq='H')],
+                "categorical": pd.Categorical(["c", "b", "a"],
+                                              categories=["a", "b", "c"],
+                                              ordered=False),
+                "datetime_mixed_tz": [pd.to_datetime("2017-08-01 00:00:00",
+                                                     utc=True),
+                                      pd.to_datetime("2017-08-01 02:00:00"),
+                                      pd.to_datetime("2017-08-02 00:00:00")]}
+        testdata = pd.DataFrame(data)
+        ax_period = testdata.plot(x="numeric", y="period")
+        assert (ax_period.get_lines()[0].get_data()[1] ==
+                testdata["period"].values).all()
+        ax_categorical = testdata.plot(x="numeric", y="categorical")
+        assert (ax_categorical.get_lines()[0].get_data()[1] ==
+                testdata["categorical"].values).all()
+        ax_datetime_mixed_tz = testdata.plot(x="numeric",
+                                             y="datetime_mixed_tz")
+        assert (ax_datetime_mixed_tz.get_lines()[0].get_data()[1] ==
+                testdata["datetime_mixed_tz"].values).all()
+
     @pytest.mark.slow
     def test_subplots_layout(self):
         # GH 6667

From 84a39f99013f238a2e1df9ba63bdaa8a3fd00c08 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 6 Sep 2017 08:23:06 -0400
Subject: [PATCH 047/188] COMPAT: handle pyarrow deprecation of
 timestamps_to_ms in .from_pandas with pyarrow < 0.6.0 (#17447)

closes #17438
---
 ci/requirements-3.5.sh          |  2 +-
 doc/source/whatsnew/v0.21.0.txt |  2 +-
 pandas/io/parquet.py            | 18 ++++++++++++++----
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh
index 33db9c28c78a9c..d694ad3679ac12 100644
--- a/ci/requirements-3.5.sh
+++ b/ci/requirements-3.5.sh
@@ -8,4 +8,4 @@ echo "install 35"
 conda remove -n pandas python-dateutil --force
 pip install python-dateutil
 
-conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1
+conda install -n pandas -c conda-forge feather-format pyarrow=0.5.0
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 1f3bf00c877670..b24a6f067cee4c 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -125,7 +125,7 @@ Other Enhancements
 - :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`)
 - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`)
 - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`)
-- Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here <io.parquet>`.
+- Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here <io.parquet>`. (:issue:`15838`, :issue:`17438`)
 - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`)
 - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`).
 - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`).
diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
index 09603fd6fdcce7..4b507b7f5df6f7 100644
--- a/pandas/io/parquet.py
+++ b/pandas/io/parquet.py
@@ -58,13 +58,23 @@ def __init__(self):
                               "\nor via pip\n"
                               "pip install -U pyarrow\n")
 
+        self._pyarrow_lt_050 = LooseVersion(pyarrow.__version__) < '0.5.0'
+        self._pyarrow_lt_060 = LooseVersion(pyarrow.__version__) < '0.6.0'
         self.api = pyarrow
 
-    def write(self, df, path, compression='snappy', **kwargs):
+    def write(self, df, path, compression='snappy',
+              coerce_timestamps='ms', **kwargs):
         path, _, _ = get_filepath_or_buffer(path)
-        table = self.api.Table.from_pandas(df, timestamps_to_ms=True)
-        self.api.parquet.write_table(
-            table, path, compression=compression, **kwargs)
+        if self._pyarrow_lt_060:
+            table = self.api.Table.from_pandas(df, timestamps_to_ms=True)
+            self.api.parquet.write_table(
+                table, path, compression=compression, **kwargs)
+
+        else:
+            table = self.api.Table.from_pandas(df)
+            self.api.parquet.write_table(
+                table, path, compression=compression,
+                coerce_timestamps=coerce_timestamps, **kwargs)
 
     def read(self, path):
         path, _, _ = get_filepath_or_buffer(path)

From d4577911c750f2f48f760ce451d413116bed72da Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Wed, 6 Sep 2017 15:55:12 +0100
Subject: [PATCH 048/188] DOC/TST: Add examples to MultiIndex.get_level_values
 + related changes (#17414)

---
 pandas/core/indexes/base.py       | 12 ++++++++++--
 pandas/core/indexes/multi.py      | 23 +++++++++++++++++++++--
 pandas/tests/indexes/test_base.py |  6 ++++++
 3 files changed, 37 insertions(+), 4 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 6a30eaefaaae76..a9098126a38e3d 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2529,15 +2529,23 @@ def set_value(self, arr, key, value):
     def _get_level_values(self, level):
         """
         Return an Index of values for requested level, equal to the length
-        of the index
+        of the index.
 
         Parameters
         ----------
-        level : int
+        level : int or str
+            ``level`` is either the integer position of the level in the
+            MultiIndex, or the name of the level.
 
         Returns
         -------
         values : Index
+            ``self``, as there is only one level in the Index.
+
+        See also
+        ---------
+        pandas.MultiIndex.get_level_values : get values for a level of a
+                                             MultiIndex
         """
 
         self._validate_index_level(level)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index d7d5b6d128a2c1..8b2cf0e7c0b407 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -882,15 +882,34 @@ def _get_level_values(self, level):
     def get_level_values(self, level):
         """
         Return vector of label values for requested level,
-        equal to the length of the index
+        equal to the length of the index.
 
         Parameters
         ----------
-        level : int or level name
+        level : int or str
+            ``level`` is either the integer position of the level in the
+            MultiIndex, or the name of the level.
 
         Returns
         -------
         values : Index
+            ``values`` is a level of this MultiIndex converted to
+            a single :class:`Index` (or subclass thereof).
+
+        Examples
+        ---------
+
+        Create a MultiIndex:
+
+        >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def')))
+        >>> mi.names = ['level_1', 'level_2']
+
+        Get level values by supplying level as either integer or name:
+
+        >>> mi.get_level_values(0)
+        Index(['a', 'b', 'c'], dtype='object', name='level_1')
+        >>> mi.get_level_values('level_2')
+        Index(['d', 'e', 'f'], dtype='object', name='level_2')
         """
         level = self._get_level_number(level)
         values = self._get_level_values(level)
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index aa32e75ba0d585..f96dbdcfb8acfe 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1438,6 +1438,12 @@ def test_get_level_values(self):
         result = self.strIndex.get_level_values(0)
         tm.assert_index_equal(result, self.strIndex)
 
+        # test for name (GH 17414)
+        index_with_name = self.strIndex.copy()
+        index_with_name.name = 'a'
+        result = index_with_name.get_level_values('a')
+        tm.assert_index_equal(result, index_with_name)
+
     def test_slice_keep_name(self):
         idx = Index(['a', 'b'], name='asdf')
         assert idx.name == idx[1:].name

From b8694460dbe3d4d82adb757a37e5f515356b1cde Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 6 Sep 2017 17:14:05 -0700
Subject: [PATCH 049/188] Dont re-pin total_seconds as it is already
 implemented (#17432)

---
 pandas/_libs/tslib.pyx | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 8fbc606ccdfe24..962c2ef3956a10 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -856,6 +856,9 @@ class NaTType(_NaT):
         return (__nat_unpickle, (None, ))
 
     def total_seconds(self):
+        """
+        Total duration of timedelta in seconds (to ns precision)
+        """
         # GH 10939
         return np.nan
 
@@ -3890,8 +3893,9 @@ for field in fields:
 _nat_methods = ['date', 'now', 'replace', 'to_pydatetime',
                 'today', 'round', 'floor', 'ceil', 'tz_convert',
                 'tz_localize']
-_nan_methods = ['weekday', 'isoweekday', 'total_seconds']
-_implemented_methods = ['to_datetime', 'to_datetime64', 'isoformat']
+_nan_methods = ['weekday', 'isoweekday']
+_implemented_methods = [
+    'to_datetime', 'to_datetime64', 'isoformat', 'total_seconds']
 _implemented_methods.extend(_nat_methods)
 _implemented_methods.extend(_nan_methods)
 

From 3a12687c4e91501d805fc71c37e9ce0a496b48bf Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Wed, 6 Sep 2017 17:46:50 -0700
Subject: [PATCH 050/188] BUG: Return local Timestamp.weekday_name attribute
 (#17354) (#17377)

---
 doc/source/whatsnew/v0.21.0.txt       |  1 +
 pandas/_libs/tslib.pyx                | 22 ++++++++++++++++++----
 pandas/tests/scalar/test_timestamp.py |  8 ++++++++
 3 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index b24a6f067cee4c..553e622b8560eb 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -399,6 +399,7 @@ Conversion
 - Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods.  Previously returned a ``numpy.bool_``. (:issue:`17237`)
 - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`)
 - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`)
+- Bug in :attr:`Timestamp.weekday_name` returning a UTC-based weekday name when localized to a timezone (:issue:`17354`)
 
 Indexing
 ^^^^^^^^
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 962c2ef3956a10..f31be9502499f1 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -532,9 +532,7 @@ class Timestamp(_Timestamp):
 
     @property
     def weekday_name(self):
-        out = get_date_name_field(
-            np.array([self.value], dtype=np.int64), 'weekday_name')
-        return out[0]
+        return self._get_named_field('weekday_name')
 
     @property
     def dayofyear(self):
@@ -1269,13 +1267,29 @@ cdef class _Timestamp(datetime):
         # same timezone if specified)
         return datetime.__sub__(self, other)
 
-    cpdef _get_field(self, field):
+    cdef int64_t _maybe_convert_value_to_local(self):
+        """Convert UTC i8 value to local i8 value if tz exists"""
+        cdef:
+            int64_t val
         val = self.value
         if self.tz is not None and not _is_utc(self.tz):
             val = tz_convert_single(self.value, 'UTC', self.tz)
+        return val
+
+    cpdef _get_field(self, field):
+        cdef:
+            int64_t val
+        val = self._maybe_convert_value_to_local()
         out = get_date_field(np.array([val], dtype=np.int64), field)
         return int(out[0])
 
+    cpdef _get_named_field(self, field):
+        cdef:
+            int64_t val
+        val = self._maybe_convert_value_to_local()
+        out = get_date_name_field(np.array([val], dtype=np.int64), field)
+        return out[0]
+
     cpdef _get_start_end_field(self, field):
         month_kw = self.freq.kwds.get(
             'startingMonth', self.freq.kwds.get(
diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py
index 7cd1a7db0f9fe9..8d47ce4802ac65 100644
--- a/pandas/tests/scalar/test_timestamp.py
+++ b/pandas/tests/scalar/test_timestamp.py
@@ -555,6 +555,14 @@ def check(value, equal):
         for end in ends:
             assert getattr(ts, end)
 
+    @pytest.mark.parametrize('data, expected',
+                             [(Timestamp('2017-08-28 23:00:00'), 'Monday'),
+                              (Timestamp('2017-08-28 23:00:00', tz='EST'),
+                               'Monday')])
+    def test_weekday_name(self, data, expected):
+        # GH 17354
+        assert data.weekday_name == expected
+
     def test_pprint(self):
         # GH12622
         import pprint

From fd137f537051ad98ca9a9c069827c72a8b9e7543 Mon Sep 17 00:00:00 2001
From: Pietro Battiston <me@pietrobattiston.it>
Date: Thu, 7 Sep 2017 02:47:43 +0200
Subject: [PATCH 051/188] BUG: intersection of decreasing RangeIndexes (#17374)

closes #17296
---
 doc/source/whatsnew/v0.21.0.txt    |  1 +
 pandas/core/indexes/range.py       | 22 +++++++++++++---------
 pandas/tests/indexes/test_range.py | 15 +++++++++++++++
 3 files changed, 29 insertions(+), 9 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 553e622b8560eb..f7cd8230c8b9b5 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -417,6 +417,7 @@ Indexing
 - Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`)
 - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`)
 - Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`)
+- Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`)
 
 I/O
 ^^^
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 82412d3a7ef57a..b759abaed4e564 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -324,12 +324,13 @@ def intersection(self, other):
         if not len(self) or not len(other):
             return RangeIndex._simple_new(None)
 
+        first = self[::-1] if self._step < 0 else self
+        second = other[::-1] if other._step < 0 else other
+
         # check whether intervals intersect
         # deals with in- and decreasing ranges
-        int_low = max(min(self._start, self._stop + 1),
-                      min(other._start, other._stop + 1))
-        int_high = min(max(self._stop, self._start + 1),
-                       max(other._stop, other._start + 1))
+        int_low = max(first._start, second._start)
+        int_high = min(first._stop, second._stop)
         if int_high <= int_low:
             return RangeIndex._simple_new(None)
 
@@ -337,21 +338,24 @@ def intersection(self, other):
         # solve intersection problem
         # performance hint: for identical step sizes, could use
         # cheaper alternative
-        gcd, s, t = self._extended_gcd(self._step, other._step)
+        gcd, s, t = first._extended_gcd(first._step, second._step)
 
         # check whether element sets intersect
-        if (self._start - other._start) % gcd:
+        if (first._start - second._start) % gcd:
             return RangeIndex._simple_new(None)
 
         # calculate parameters for the RangeIndex describing the
         # intersection disregarding the lower bounds
-        tmp_start = self._start + (other._start - self._start) * \
-            self._step // gcd * s
-        new_step = self._step * other._step // gcd
+        tmp_start = first._start + (second._start - first._start) * \
+            first._step // gcd * s
+        new_step = first._step * second._step // gcd
         new_index = RangeIndex(tmp_start, int_high, new_step, fastpath=True)
 
         # adjust index to limiting interval
         new_index._start = new_index._min_fitting_element(int_low)
+
+        if (self._step < 0 and other._step < 0) is not (new_index._step < 0):
+            new_index = new_index[::-1]
         return new_index
 
     def _min_fitting_element(self, lower_limit):
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index 5ecf467b57fc5c..06c8f0ee392c77 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -610,6 +610,21 @@ def test_intersection(self):
                                                 other.values)))
         tm.assert_index_equal(result, expected)
 
+        # reversed (GH 17296)
+        result = other.intersection(self.index)
+        tm.assert_index_equal(result, expected)
+
+        # GH 17296: intersect two decreasing RangeIndexes
+        first = RangeIndex(10, -2, -2)
+        other = RangeIndex(5, -4, -1)
+        expected = first.astype(int).intersection(other.astype(int))
+        result = first.intersection(other).astype(int)
+        tm.assert_index_equal(result, expected)
+
+        # reversed
+        result = other.intersection(first).astype(int)
+        tm.assert_index_equal(result, expected)
+
         index = RangeIndex(5)
 
         # intersect of non-overlapping indices

From 93e23a71f583920b46b4bb20e99a9a5e73685c47 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 6 Sep 2017 17:51:50 -0700
Subject: [PATCH 052/188] Remove property that re-computed microsecond (#17331)

---
 asv_bench/benchmarks/timestamp.py | 60 +++++++++++++++++++++++++++++++
 doc/source/whatsnew/v0.21.0.txt   |  2 +-
 pandas/_libs/period.pyx           |  1 +
 pandas/_libs/tslib.pyx            |  4 ---
 4 files changed, 62 insertions(+), 5 deletions(-)
 create mode 100644 asv_bench/benchmarks/timestamp.py

diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py
new file mode 100644
index 00000000000000..066479b22739a8
--- /dev/null
+++ b/asv_bench/benchmarks/timestamp.py
@@ -0,0 +1,60 @@
+from .pandas_vb_common import *
+from pandas import to_timedelta, Timestamp
+
+
+class TimestampProperties(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.ts = Timestamp('2017-08-25 08:16:14')
+
+    def time_tz(self):
+        self.ts.tz
+
+    def time_offset(self):
+        self.ts.offset
+
+    def time_dayofweek(self):
+        self.ts.dayofweek
+
+    def time_weekday_name(self):
+        self.ts.weekday_name
+
+    def time_dayofyear(self):
+        self.ts.dayofyear
+
+    def time_week(self):
+        self.ts.week
+
+    def time_quarter(self):
+        self.ts.quarter
+
+    def time_days_in_month(self):
+        self.ts.days_in_month
+
+    def time_freqstr(self):
+        self.ts.freqstr
+
+    def time_is_month_start(self):
+        self.ts.is_month_start
+
+    def time_is_month_end(self):
+        self.ts.is_month_end
+
+    def time_is_quarter_start(self):
+        self.ts.is_quarter_start
+
+    def time_is_quarter_end(self):
+        self.ts.is_quarter_end
+
+    def time_is_year_start(self):
+        self.ts.is_quarter_end
+
+    def time_is_year_end(self):
+        self.ts.is_quarter_end
+
+    def time_is_leap_year(self):
+        self.ts.is_quarter_end
+
+    def time_microsecond(self):
+        self.ts.microsecond
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index f7cd8230c8b9b5..33a6db18db3cad 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -382,7 +382,7 @@ Performance Improvements
 
 - Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`)
 - :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`)
-
+- :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`)
 
 .. _whatsnew_0210.bug_fixes:
 
diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index 816b7ebfff86de..0ade8f9a6dde5b 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from datetime import datetime, date, timedelta
 import operator
 
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index f31be9502499f1..a7b33c669a8b8c 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -544,10 +544,6 @@ class Timestamp(_Timestamp):
 
     weekofyear = week
 
-    @property
-    def microsecond(self):
-        return self._get_field('us')
-
     @property
     def quarter(self):
         return self._get_field('q')

From 20fee85ede7f2052f855b8f3445cd1ffc17ee0c3 Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Thu, 7 Sep 2017 02:00:49 +0100
Subject: [PATCH 053/188] cleaned references to pandas v0.15 and v0.16 in docs
 (#17442)

---
 doc/source/10min.rst             |  2 +-
 doc/source/advanced.rst          | 15 ++------
 doc/source/basics.rst            |  8 ++--
 doc/source/categorical.rst       | 64 +++++---------------------------
 doc/source/comparison_with_r.rst |  2 -
 doc/source/computation.rst       |  7 +---
 doc/source/cookbook.rst          |  6 ---
 doc/source/dsintro.rst           |  2 -
 doc/source/gotchas.rst           |  4 +-
 doc/source/indexing.rst          | 14 -------
 doc/source/install.rst           | 20 ++++------
 doc/source/io.rst                | 41 ++++----------------
 doc/source/remote_data.rst       | 11 +++---
 doc/source/reshaping.rst         |  4 +-
 doc/source/sparse.rst            |  2 -
 doc/source/timedeltas.rst        | 26 +++++--------
 doc/source/visualization.rst     |  4 --
 doc/source/whatsnew/v0.21.0.txt  |  1 +
 18 files changed, 53 insertions(+), 180 deletions(-)

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
index def49a641a0ff8..ef6b2d6ef2c904 100644
--- a/doc/source/10min.rst
+++ b/doc/source/10min.rst
@@ -655,7 +655,7 @@ the quarter end:
 Categoricals
 ------------
 
-Since version 0.15, pandas can include categorical data in a ``DataFrame``. For full docs, see the
+pandas can include categorical data in a ``DataFrame``. For full docs, see the
 :ref:`categorical introduction <categorical>` and the :ref:`API documentation <api.categorical>`.
 
 .. ipython:: python
diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
index 4af476cd5a7e12..3f145cf9556645 100644
--- a/doc/source/advanced.rst
+++ b/doc/source/advanced.rst
@@ -26,12 +26,6 @@ See the :ref:`Indexing and Selecting Data <indexing>` for general indexing docum
    should be avoided.  See :ref:`Returning a View versus Copy
    <indexing.view_versus_copy>`
 
-.. warning::
-
-   In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray``
-   but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This should be
-   a transparent change with only very limited API implications (See the :ref:`Internal Refactoring <whatsnew_0150.refactoring>`)
-
 See the :ref:`cookbook<cookbook.selection>` for some advanced strategies
 
 .. _advanced.hierarchical:
@@ -638,12 +632,9 @@ In the following sub-sections we will highlite some other index types.
 CategoricalIndex
 ~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.16.1
-
-We introduce a ``CategoricalIndex``, a new type of index object that is useful for supporting
-indexing with duplicates. This is a container around a ``Categorical`` (introduced in v0.15.0)
-and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1,
-setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``.
+``CategoricalIndex`` is a type of index that is useful for supporting
+indexing with duplicates. This is a container around a ``Categorical``
+and allows efficient indexing and storage of an index with a large number of duplicated elements.
 
 .. ipython:: python
 
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index 5880703b1d2711..42c28df3a6030f 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -719,8 +719,6 @@ on an entire ``DataFrame`` or ``Series``, row- or column-wise, or elementwise.
 Tablewise Function Application
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.16.2
-
 ``DataFrames`` and ``Series`` can of course just be passed into functions.
 However, if the function needs to be called in a chain, consider using the :meth:`~DataFrame.pipe` method.
 Compare the following
@@ -1860,8 +1858,10 @@ dtypes
 ------
 
 The main types stored in pandas objects are ``float``, ``int``, ``bool``,
-``datetime64[ns]`` and ``datetime64[ns, tz]`` (in >= 0.17.0), ``timedelta[ns]``, ``category`` (in >= 0.15.0), and ``object``. In addition these dtypes
-have item sizes, e.g. ``int64`` and ``int32``. See :ref:`Series with TZ <timeseries.timezone_series>` for more detail on ``datetime64[ns, tz]`` dtypes.
+``datetime64[ns]`` and ``datetime64[ns, tz]`` (in >= 0.17.0), ``timedelta[ns]``,
+``category`` and ``object``. In addition these dtypes have item sizes, e.g.
+``int64`` and ``int32``. See :ref:`Series with TZ <timeseries.timezone_series>`
+for more detail on ``datetime64[ns, tz]`` dtypes.
 
 A convenient :attr:`~DataFrame.dtypes` attribute for DataFrames returns a Series with the data type of each column.
 
diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index 02d7920bc4a84e..8835c4a1533d0c 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -16,13 +16,6 @@
 Categorical Data
 ****************
 
-.. versionadded:: 0.15
-
-.. note::
-    While there was `pandas.Categorical` in earlier versions, the ability to use
-    categorical data in `Series` and `DataFrame` is new.
-
-
 This is an introduction to pandas categorical data type, including a short comparison
 with R's ``factor``.
 
@@ -295,10 +288,6 @@ Sorting and Order
 
 .. _categorical.sort:
 
-.. warning::
-
-   The default for construction has changed in v0.16.0 to ``ordered=False``, from the prior implicit ``ordered=True``
-
 If categorical data is ordered (``s.cat.ordered == True``), then the order of the categories has a
 meaning and certain operations are possible. If the categorical is unordered, ``.min()/.max()`` will raise a `TypeError`.
 
@@ -803,13 +792,11 @@ Following table summarizes the results of ``Categoricals`` related concatenation
 Getting Data In/Out
 -------------------
 
-.. versionadded:: 0.15.2
+You can write data that contains ``category`` dtypes to a ``HDFStore``.
+See :ref:`here <io.hdf5-categorical>` for an example and caveats.
 
-Writing data (`Series`, `Frames`) to a HDF store that contains a ``category`` dtype was implemented
-in 0.15.2. See :ref:`here <io.hdf5-categorical>` for an example and caveats.
-
-Writing data to and reading data from *Stata* format files was implemented in
-0.15.2. See :ref:`here <io.stata-categorical>` for an example and caveats.
+It is also possible to write data to and reading data from *Stata* format files.
+See :ref:`here <io.stata-categorical>` for an example and caveats.
 
 Writing to a CSV file will convert the data, effectively removing any information about the
 categorical (categories and ordering). So if you read back the CSV file you have to convert the
@@ -928,32 +915,6 @@ an ``object`` dtype is a constant times the length of the data.
       s.astype('category').nbytes
 
 
-Old style constructor usage
-~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-In earlier versions than pandas 0.15, a `Categorical` could be constructed by passing in precomputed
-`codes` (called then `labels`) instead of values with categories. The `codes` were interpreted as
-pointers to the categories with `-1` as `NaN`. This type of constructor usage is replaced by
-the special constructor :func:`Categorical.from_codes`.
-
-Unfortunately, in some special cases, using code which assumes the old style constructor usage
-will work with the current pandas version, resulting in subtle bugs:
-
-.. code-block:: python
-
-    >>> cat = pd.Categorical([1,2], [1,2,3])
-    >>> # old version
-    >>> cat.get_values()
-    array([2, 3], dtype=int64)
-    >>> # new version
-    >>> cat.get_values()
-    array([1, 2], dtype=int64)
-
-.. warning::
-    If you used `Categoricals` with older versions of pandas, please audit your code before
-    upgrading and change your code to use the :func:`~pandas.Categorical.from_codes`
-    constructor.
-
 `Categorical` is not a `numpy` array
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -982,8 +943,7 @@ Dtype comparisons work:
     dtype == np.str_
     np.str_ == dtype
 
-To check if a Series contains Categorical data, with pandas 0.16 or later, use
-``hasattr(s, 'cat')``:
+To check if a Series contains Categorical data, use ``hasattr(s, 'cat')``:
 
 .. ipython:: python
 
@@ -1023,13 +983,13 @@ basic type) and applying along columns will also convert to object.
 Categorical Index
 ~~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.16.1
-
-A new ``CategoricalIndex`` index type is introduced in version 0.16.1. See the
-:ref:`advanced indexing docs <indexing.categoricalindex>` for a more detailed
+``CategoricalIndex`` is a type of index that is useful for supporting
+indexing with duplicates. This is a container around a ``Categorical``
+and allows efficient indexing and storage of an index with a large number of duplicated elements.
+See the :ref:`advanced indexing docs <indexing.categoricalindex>` for a more detailed
 explanation.
 
-Setting the index, will create create a ``CategoricalIndex``
+Setting the index will create a ``CategoricalIndex``
 
 .. ipython:: python
 
@@ -1041,10 +1001,6 @@ Setting the index, will create create a ``CategoricalIndex``
     # This now sorts by the categories order
     df.sort_index()
 
-In previous versions (<0.16.1) there is no index of type ``category``, so
-setting the index to categorical column will convert the categorical data to a
-"normal" dtype first and therefore remove any custom ordering of the categories.
-
 Side Effects
 ~~~~~~~~~~~~
 
diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst
index f895cdc25e6205..eb97aeeb7e6962 100644
--- a/doc/source/comparison_with_r.rst
+++ b/doc/source/comparison_with_r.rst
@@ -505,8 +505,6 @@ For more details and examples see :ref:`the reshaping documentation
 |factor|_
 ~~~~~~~~~
 
-.. versionadded:: 0.15
-
 pandas has a data type for categorical data.
 
 .. code-block:: r
diff --git a/doc/source/computation.rst b/doc/source/computation.rst
index 76a030d355e332..23699393958cfe 100644
--- a/doc/source/computation.rst
+++ b/doc/source/computation.rst
@@ -924,15 +924,12 @@ EWM has a ``min_periods`` argument, which has the same
 meaning it does for all the ``.expanding`` and ``.rolling`` methods:
 no output values will be set until at least ``min_periods`` non-null values
 are encountered in the (expanding) window.
-(This is a change from versions prior to 0.15.0, in which the ``min_periods``
-argument affected only the ``min_periods`` consecutive entries starting at the
-first non-null value.)
 
-EWM also has an ``ignore_na`` argument, which deterines how
+EWM also has an ``ignore_na`` argument, which determines how
 intermediate null values affect the calculation of the weights.
 When ``ignore_na=False`` (the default), weights are calculated based on absolute
 positions, so that intermediate null values affect the result.
-When ``ignore_na=True`` (which reproduces the behavior in versions prior to 0.15.0),
+When ``ignore_na=True``,
 weights are calculated by ignoring intermediate null values.
 For example, assuming ``adjust=True``, if ``ignore_na=False``, the weighted
 average of ``3, NaN, 5`` would be calculated as
diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst
index f51c3e679b36f3..5bb3ba75fe51bc 100644
--- a/doc/source/cookbook.rst
+++ b/doc/source/cookbook.rst
@@ -256,12 +256,6 @@ Panels
 
    pf = pd.Panel({'df1':df1,'df2':df2,'df3':df3});pf
 
-   #Assignment using Transpose  (pandas < 0.15)
-   pf = pf.transpose(2,0,1)
-   pf['E'] = pd.DataFrame(data, rng, cols)
-   pf = pf.transpose(1,2,0);pf
-
-   #Direct assignment (pandas > 0.15)
    pf.loc[:,:,'F'] = pd.DataFrame(data, rng, cols);pf
 
 `Mask a panel by using np.where and then reconstructing the panel with the new masked values
diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst
index 4652ccbf0ad34e..ec0a1c7a00bf74 100644
--- a/doc/source/dsintro.rst
+++ b/doc/source/dsintro.rst
@@ -453,8 +453,6 @@ available to insert at a particular location in the columns:
 Assigning New Columns in Method Chains
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. versionadded:: 0.16.0
-
 Inspired by `dplyr's
 <http://cran.rstudio.com/web/packages/dplyr/vignettes/introduction.html#mutate>`__
 ``mutate`` verb, DataFrame has an :meth:`~pandas.DataFrame.assign`
diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst
index a3062b4086673b..9e6f98923fca6c 100644
--- a/doc/source/gotchas.rst
+++ b/doc/source/gotchas.rst
@@ -22,8 +22,8 @@ Frequently Asked Questions (FAQ)
 
 DataFrame memory usage
 ----------------------
-As of pandas version 0.15.0, the memory usage of a dataframe (including
-the index) is shown when accessing the ``info`` method of a dataframe. A
+The memory usage of a dataframe (including the index)
+is shown when accessing the ``info`` method of a dataframe. A
 configuration option, ``display.memory_usage`` (see :ref:`options`),
 specifies if the dataframe's memory usage will be displayed when
 invoking the ``df.info()`` method.
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index a6e7df57be4e50..88e62b5d301a38 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -47,12 +47,6 @@ advanced indexing.
    should be avoided.  See :ref:`Returning a View versus Copy
    <indexing.view_versus_copy>`
 
-.. warning::
-
-   In 0.15.0 ``Index`` has internally been refactored to no longer subclass ``ndarray``
-   but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This should be
-   a transparent change with only very limited API implications (See the :ref:`Internal Refactoring <whatsnew_0150.refactoring>`)
-
 .. warning::
 
    Indexing on an integer-based Index with floats has been clarified in 0.18.0, for a summary of the changes, see :ref:`here <whatsnew_0180.float_indexers>`.
@@ -660,7 +654,6 @@ For getting *multiple* indexers, using ``.get_indexer``
 
 Selecting Random Samples
 ------------------------
-.. versionadded::0.16.1
 
 A random selection of rows or columns from a Series, DataFrame, or Panel with the :meth:`~DataFrame.sample` method. The method will sample rows by default, and accepts a specific number of rows/columns to return, or a fraction of rows.
 
@@ -1510,8 +1503,6 @@ See :ref:`Advanced Indexing <advanced>` for usage of MultiIndexes.
   ind.name = "bob"
   ind
 
-.. versionadded:: 0.15.0
-
 ``set_names``, ``set_levels``, and ``set_labels`` also take an optional
 `level`` argument
 
@@ -1527,11 +1518,6 @@ Set operations on Index objects
 
 .. _indexing.set_ops:
 
-.. warning::
-
-   In 0.15.0. the set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain
-   index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``.
-
 The two main operations are ``union (|)``, ``intersection (&)``
 These can be directly called as instance methods or used via overloaded
 operators. Difference is provided via the ``.difference()`` method.
diff --git a/doc/source/install.rst b/doc/source/install.rst
index 8dc8224ea6cb24..c805f84d0faaa7 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -18,7 +18,7 @@ Instructions for installing from source,
 Python version support
 ----------------------
 
-Officially Python 2.7, 3.4, 3.5, and 3.6
+Officially Python 2.7, 3.5, and 3.6.
 
 Installing pandas
 -----------------
@@ -183,21 +183,17 @@ installed), make sure you have `pytest
 
     >>> import pandas as pd
     >>> pd.test()
-    Running unit tests for pandas
-    pandas version 0.18.0
-    numpy version 1.10.2
-    pandas is installed in pandas
-    Python version 2.7.11 |Continuum Analytics, Inc.|
-       (default, Dec  6 2015, 18:57:58) [GCC 4.2.1 (Apple Inc. build 5577)]
-    nose version 1.3.7
+    running: pytest --skip-slow --skip-network C:\Users\TP\Anaconda3\envs\py36\lib\site-packages\pandas
+    ============================= test session starts =============================
+    platform win32 -- Python 3.6.2, pytest-3.2.1, py-1.4.34, pluggy-0.4.0
+    rootdir: C:\Users\TP\Documents\Python\pandasdev\pandas, inifile: setup.cfg
+    collected 12145 items / 3 skipped
+
     ..................................................................S......
     ........S................................................................
     .........................................................................
 
-    ----------------------------------------------------------------------
-    Ran 9252 tests in 368.339s
-
-    OK (SKIP=117)
+    ==================== 12130 passed, 12 skipped in 368.339 seconds =====================
 
 Dependencies
 ------------
diff --git a/doc/source/io.rst b/doc/source/io.rst
index 33523ea171f3ae..de3150035c446b 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -592,8 +592,7 @@ Ignoring line comments and empty lines
 ++++++++++++++++++++++++++++++++++++++
 
 If the ``comment`` parameter is specified, then completely commented lines will
-be ignored. By default, completely blank lines will be ignored as well. Both of
-these are API changes introduced in version 0.15.
+be ignored. By default, completely blank lines will be ignored as well.
 
 .. ipython:: python
 
@@ -2701,8 +2700,6 @@ Using a list to get multiple sheets:
    # Returns the 1st and 4th sheet, as a dictionary of DataFrames.
    read_excel('path_to_file.xls',sheet_name=['Sheet1',3])
 
-.. versionadded:: 0.16
-
 ``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either
 a list of sheet names, a list of sheet positions, or ``None`` to read all sheets.
 Sheets can be specified by sheet index or sheet name, using an integer or string,
@@ -3241,11 +3238,10 @@ for some advanced strategies
 
 .. warning::
 
-   As of version 0.15.0, pandas requires ``PyTables`` >= 3.0.0. Stores written with prior versions of pandas / ``PyTables`` >= 2.3 are fully compatible (this was the previous minimum ``PyTables`` required version).
-
-.. warning::
-
-   There is a ``PyTables`` indexing bug which may appear when querying stores using an index.  If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2.  Stores created previously will need to be rewritten using the updated version.
+   pandas requires ``PyTables`` >= 3.0.0.
+   There is a indexing bug in ``PyTables`` < 3.2 which may appear when querying stores using an index.
+   If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2.
+   Stores created previously will need to be rewritten using the updated version.
 
 .. warning::
 
@@ -4210,10 +4206,8 @@ object : ``strings``                                    ``np.nan``
 Categorical Data
 ++++++++++++++++
 
-.. versionadded:: 0.15.2
-
-Writing data to a ``HDFStore`` that contains a ``category`` dtype was implemented
-in 0.15.2. Queries work the same as if it was an object array. However, the ``category`` dtyped data is
+You can write data that contains ``category`` dtypes to a ``HDFStore``.
+Queries work the same as if it was an object array. However, the ``category`` dtyped data is
 stored in a more efficient manner.
 
 .. ipython:: python
@@ -4228,21 +4222,6 @@ stored in a more efficient manner.
    result
    result.dtypes
 
-.. warning::
-
-   The format of the ``Categorical`` is readable by prior versions of pandas (< 0.15.2), but will retrieve
-   the data as an integer based column (e.g. the ``codes``). However, the ``categories`` *can* be retrieved
-   but require the user to select them manually using the explicit meta path.
-
-   The data is stored like so:
-
-   .. ipython:: python
-
-      cstore
-
-      # to get the categories
-      cstore.select('dfcat/meta/A/meta')
-
 .. ipython:: python
    :suppress:
    :okexcept:
@@ -4746,8 +4725,6 @@ You can check if a table exists using :func:`~pandas.io.sql.has_table`
 Schema support
 ''''''''''''''
 
-.. versionadded:: 0.15.0
-
 Reading from and writing to different schema's is supported through the ``schema``
 keyword in the :func:`~pandas.read_sql_table` and :func:`~pandas.DataFrame.to_sql`
 functions. Note however that this depends on the database flavor (sqlite does not
@@ -4975,8 +4952,6 @@ be used to read the file incrementally.
 
    pd.read_stata('stata.dta')
 
-.. versionadded:: 0.16.0
-
 Specifying a ``chunksize`` yields a
 :class:`~pandas.io.stata.StataReader` instance that can be used to
 read ``chunksize`` lines from the file at a time.  The ``StataReader``
@@ -5034,8 +5009,6 @@ values will have ``object`` data type.
 Categorical Data
 ++++++++++++++++
 
-.. versionadded:: 0.15.2
-
 ``Categorical`` data can be exported to *Stata* data files as value labeled data.
 The exported data consists of the underlying category codes as integer data values
 and the categories as value labels.  *Stata* does not have an explicit equivalent
diff --git a/doc/source/remote_data.rst b/doc/source/remote_data.rst
index 7980133582125e..9af66058a7aaa3 100644
--- a/doc/source/remote_data.rst
+++ b/doc/source/remote_data.rst
@@ -11,14 +11,13 @@ Remote Data Access
 DataReader
 ----------
 
-The sub-package ``pandas.io.data`` is removed in favor of a separately
-installable `pandas-datareader package
+The sub-package ``pandas.io.data`` was deprecated in v.0.17 and removed in
+`v.0.19 <http://pandas-docs.github.io/pandas-docs-travis/whatsnew.html#v0-19-0-october-2-2016>`__.
+ Instead there has been created a separately installable `pandas-datareader package
 <https://github.com/pydata/pandas-datareader>`_. This will allow the data
-modules to be independently updated to your pandas installation. The API for
-``pandas-datareader v0.1.1`` is the same as in ``pandas v0.16.1``.
-(:issue:`8961`)
+modules to be independently updated on your pandas installation.
 
-   You should replace the imports of the following:
+   For code older than < 0.19 you should replace the imports of the following:
 
    .. code-block:: python
 
diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst
index 3dce73b302c7ca..fab83222b313f1 100644
--- a/doc/source/reshaping.rst
+++ b/doc/source/reshaping.rst
@@ -569,8 +569,6 @@ This function is often used along with discretization functions like ``cut``:
 
 See also :func:`Series.str.get_dummies <pandas.Series.str.get_dummies>`.
 
-.. versionadded:: 0.15.0
-
 :func:`get_dummies` also accepts a DataFrame. By default all categorical
 variables (categorical in the statistical sense,
 those with `object` or `categorical` dtype) are encoded as dummy variables.
@@ -675,4 +673,4 @@ handling of NaN:
     you can use  ``df["cat_col"] = pd.Categorical(df["col"])`` or
     ``df["cat_col"] = df["col"].astype("category")``. For full docs on :class:`~pandas.Categorical`,
     see the :ref:`Categorical introduction <categorical>` and the
-    :ref:`API documentation <api.categorical>`. This feature was introduced in version 0.15.
+    :ref:`API documentation <api.categorical>`.
diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst
index b4884cf1c4141b..cf16cee501a3e5 100644
--- a/doc/source/sparse.rst
+++ b/doc/source/sparse.rst
@@ -216,8 +216,6 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you
 SparseSeries
 ~~~~~~~~~~~~
 
-.. versionadded:: 0.16.0
-
 A :meth:`SparseSeries.to_coo` method is implemented for transforming a ``SparseSeries`` indexed by a ``MultiIndex`` to a ``scipy.sparse.coo_matrix``.
 
 The method requires a ``MultiIndex`` with two or more levels.
diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst
index daa2c262c8c860..d055c49dc4721e 100644
--- a/doc/source/timedeltas.rst
+++ b/doc/source/timedeltas.rst
@@ -23,13 +23,12 @@
 Time Deltas
 ***********
 
-.. note::
-
-   Starting in v0.15.0, we introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner,
-   but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes.
+Timedeltas are differences in times, expressed in difference units, e.g. days, hours, minutes,
+seconds. They can be both positive and negative.
 
-Timedeltas are differences in times, expressed in difference units, e.g. days, hours, minutes, seconds.
-They can be both positive and negative.
+``Timedelta`` is a subclass of ``datetime.timedelta``, and behaves in a similar manner,
+but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation,
+parsing, and attributes.
 
 Parsing
 -------
@@ -78,15 +77,10 @@ Further, operations among the scalars yield another scalar ``Timedelta``.
 to_timedelta
 ~~~~~~~~~~~~
 
-.. warning::
-
-       Prior to 0.15.0 ``pd.to_timedelta`` would return a ``Series`` for list-like/Series input, and a ``np.timedelta64`` for scalar input.
-       It will now return a ``TimedeltaIndex`` for list-like input, ``Series`` for Series input, and ``Timedelta`` for scalar input.
-
-       The arguments to ``pd.to_timedelta`` are now ``(arg, unit='ns', box=True)``, previously were ``(arg, box=True, unit='ns')`` as these are more logical.
-
-Using the top-level ``pd.to_timedelta``, you can convert a scalar, array, list, or Series from a recognized timedelta format / value into a ``Timedelta`` type.
-It will construct Series if the input is a Series, a scalar if the input is scalar-like, otherwise will output a ``TimedeltaIndex``.
+Using the top-level ``pd.to_timedelta``, you can convert a scalar, array, list,
+or Series from a recognized timedelta format / value into a ``Timedelta`` type.
+It will construct Series if the input is a Series, a scalar if the input is
+scalar-like, otherwise it will output a ``TimedeltaIndex``.
 
 You can parse a single string to a Timedelta:
 
@@ -328,8 +322,6 @@ You can convert a ``Timedelta`` to an `ISO 8601 Duration`_ string with the
 TimedeltaIndex
 --------------
 
-.. versionadded:: 0.15.0
-
 To generate an index with time delta, you can use either the ``TimedeltaIndex`` or
 the ``timedelta_range`` constructor.
 
diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
index 839390c8778aa1..b5a261e3acac5d 100644
--- a/doc/source/visualization.rst
+++ b/doc/source/visualization.rst
@@ -229,8 +229,6 @@ To get horizontal bar plots, use the ``barh`` method:
 Histograms
 ~~~~~~~~~~
 
-.. versionadded:: 0.15.0
-
 Histogram can be drawn by using the :meth:`DataFrame.plot.hist` and :meth:`Series.plot.hist` methods.
 
 .. ipython:: python
@@ -328,8 +326,6 @@ The ``by`` keyword can be specified to plot grouped histograms:
 Box Plots
 ~~~~~~~~~
 
-.. versionadded:: 0.15.0
-
 Boxplot can be drawn calling :meth:`Series.plot.box` and :meth:`DataFrame.plot.box`,
 or :meth:`DataFrame.boxplot` to visualize the distribution of values within each column.
 
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 33a6db18db3cad..636bb2dc3e60ea 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -485,3 +485,4 @@ Other
 ^^^^^
 - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
 - Several ``NaT`` method docstrings (e.g. :func:`NaT.ctime`) were incorrect (:issue:`17327`)
+- The documentation has had references to versions < v0.16 removed and cleaned up (:issue:`17442`, :issue:`17442` & :issue:`#17404`)

From 24b440e67abb3b14856f0fd920141f5a6dcf83fd Mon Sep 17 00:00:00 2001
From: Dillon Niederhut <deniederhut@users.noreply.github.com>
Date: Thu, 7 Sep 2017 05:52:11 -0500
Subject: [PATCH 054/188] BUG: revert collision warning (#17298)

---
 doc/source/indexing.rst             | 15 ---------------
 doc/source/whatsnew/v0.21.0.txt     | 24 +++---------------------
 pandas/core/generic.py              |  8 ++------
 pandas/tests/dtypes/test_generic.py |  5 -----
 pandas/tests/io/test_pytables.py    |  4 ++--
 5 files changed, 7 insertions(+), 49 deletions(-)

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 88e62b5d301a38..8474116c380825 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -269,21 +269,6 @@ new column. In 0.21.0 and later, this will raise a ``UserWarning``:
     1  2.0
     2  3.0
 
-Similarly, it is possible to create a column with a name which collides with one of Pandas's
-built-in methods or attributes, which can cause confusion later when attempting to access
-that column as an attribute. This behavior now warns:
-
-.. code-block:: ipython
-
-    In[4]: df['sum'] = [5., 7., 9.]
-    UserWarning: Column name 'sum' collides with a built-in method, which will cause unexpected attribute behavior
-    In[5]: df.sum
-    Out[5]:
-    <bound method DataFrame.sum of    one  sum
-    0  1.0  5.0
-    1  2.0  7.0
-    2  3.0  9.0>
-
 Slicing ranges
 --------------
 
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 636bb2dc3e60ea..fa00140fb4abda 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -67,8 +67,8 @@ Improved warnings when attempting to create columns
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 New users are often flummoxed by the relationship between column operations and attribute
-access on ``DataFrame`` instances (:issue:`5904` & :issue:`7175`). Two specific instances
-of this confusion include attempting to create a new column by setting into an attribute:
+access on ``DataFrame`` instances (:issue:`7175`). One specific instance
+of this confusion is attempting to create a new column by setting into an attribute:
 
 .. code-block:: ipython
 
@@ -86,25 +86,7 @@ This does not raise any obvious exceptions, but also does not create a new colum
   1  2.0
   2  3.0
 
-The second source of confusion is creating a column whose name collides with a method or
-attribute already in the instance namespace:
-
-.. code-block:: ipython
-
-  In[4]: df['sum'] = [5., 7., 9.]
-
-This does not permit that column to be accessed as an attribute:
-
-.. code-block:: ipython
-
-  In[5]: df.sum
-  Out[5]:
-  <bound method DataFrame.sum of    one  sum
-  0  1.0  5.0
-  1  2.0  7.0
-  2  3.0  9.0>
-
-Both of these now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access <indexing.attribute_access>`.
+Setting a list-like data structure into a new attribute now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access <indexing.attribute_access>`.
 
 .. _whatsnew_0210.enhancements.other:
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index cdb08d8887e05b..df5f1a8326acd3 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1905,10 +1905,6 @@ def _slice(self, slobj, axis=0, kind=None):
         return result
 
     def _set_item(self, key, value):
-        if isinstance(key, str) and callable(getattr(self, key, None)):
-            warnings.warn("Column name '{key}' collides with a built-in "
-                          "method, which will cause unexpected attribute "
-                          "behavior".format(key=key), stacklevel=3)
         self._data.set(key, value)
         self._clear_item_cache()
 
@@ -3441,8 +3437,8 @@ def __setattr__(self, name, value):
                     object.__setattr__(self, name, value)
             except (AttributeError, TypeError):
                 if isinstance(self, ABCDataFrame) and (is_list_like(value)):
-                    warnings.warn("Pandas doesn't allow Series to be assigned "
-                                  "into nonexistent columns - see "
+                    warnings.warn("Pandas doesn't allow columns to be "
+                                  "created via a new attribute name - see "
                                   "https://pandas.pydata.org/pandas-docs/"
                                   "stable/indexing.html#attribute-access",
                                   stacklevel=2)
diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py
index 82444d6c941576..bd365f9c3281f8 100644
--- a/pandas/tests/dtypes/test_generic.py
+++ b/pandas/tests/dtypes/test_generic.py
@@ -48,7 +48,6 @@ def test_abc_types(self):
 
 
 def test_setattr_warnings():
-    # GH5904 - Suggestion: Warning for DataFrame colname-methodname clash
     # GH7175 - GOTCHA: You can't use dot notation to add a column...
     d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
          'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}
@@ -78,7 +77,3 @@ def test_setattr_warnings():
         #  warn when setting column to nonexistent name
         df.four = df.two + 2
         assert df.four.sum() > df.two.sum()
-
-    with tm.assert_produces_warning(UserWarning):
-        #  warn when column has same name as method
-        df['sum'] = df.two
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index b5ecc4d34cd08b..9c488cb2389bed 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -2011,7 +2011,7 @@ def check(obj, comparator):
         df['string'] = 'foo'
         df['float322'] = 1.
         df['float322'] = df['float322'].astype('float32')
-        df['boolean'] = df['float322'] > 0
+        df['bool'] = df['float322'] > 0
         df['time1'] = Timestamp('20130101')
         df['time2'] = Timestamp('20130102')
         check(df, tm.assert_frame_equal)
@@ -2141,7 +2141,7 @@ def test_table_values_dtypes_roundtrip(self):
             df1['string'] = 'foo'
             df1['float322'] = 1.
             df1['float322'] = df1['float322'].astype('float32')
-            df1['boolean'] = df1['float32'] > 0
+            df1['bool'] = df1['float32'] > 0
             df1['time1'] = Timestamp('20130101')
             df1['time2'] = Timestamp('20130102')
 

From 8a8a4fd74dc1dd2804d5f605fcad47e6f0fd4b60 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <emailformattr@gmail.com>
Date: Thu, 7 Sep 2017 04:28:12 -0700
Subject: [PATCH 055/188] cdef out dtype for _Timestamp._get_field (#17457)

---
 pandas/_libs/tslib.pyx | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index a7b33c669a8b8c..7e009652f7f0c6 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -1275,6 +1275,7 @@ cdef class _Timestamp(datetime):
     cpdef _get_field(self, field):
         cdef:
             int64_t val
+            ndarray[int32_t] out
         val = self._maybe_convert_value_to_local()
         out = get_date_field(np.array([val], dtype=np.int64), field)
         return int(out[0])
@@ -1282,6 +1283,7 @@ cdef class _Timestamp(datetime):
     cpdef _get_named_field(self, field):
         cdef:
             int64_t val
+            ndarray[object] out
         val = self._maybe_convert_value_to_local()
         out = get_date_name_field(np.array([val], dtype=np.int64), field)
         return out[0]
@@ -1291,9 +1293,7 @@ cdef class _Timestamp(datetime):
             'startingMonth', self.freq.kwds.get(
                 'month', 12)) if self.freq else 12
         freqstr = self.freqstr if self.freq else None
-        val = self.value
-        if self.tz is not None and not _is_utc(self.tz):
-            val = tz_convert_single(self.value, 'UTC', self.tz)
+        val = self._maybe_convert_value_to_local()
         out = get_start_end_field(
             np.array([val], dtype=np.int64), field, freqstr, month_kw)
         return out[0]

From 9dc01c4f9142908c4a7db5a3a0300685f6d43308 Mon Sep 17 00:00:00 2001
From: Sam Foo <sfoohei@gmail.com>
Date: Thu, 7 Sep 2017 07:35:40 -0400
Subject: [PATCH 056/188] DOC: Add Timestamp, Period, Timedelta, and Interval
 to api.rst (#17424)

---
 doc/source/api.rst      | 195 ++++++++++++++++++++++++++++++++++++++++
 pandas/_libs/period.pyx |   2 +-
 2 files changed, 196 insertions(+), 1 deletion(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 12e6c7ad7f6305..d34cec86638fba 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1599,6 +1599,201 @@ Conversion
    TimedeltaIndex.floor
    TimedeltaIndex.ceil
 
+.. currentmodule:: pandas
+
+Scalars
+-------
+
+Period
+~~~~~~
+.. autosummary::
+    :toctree: generated/
+
+    Period
+
+Attributes
+~~~~~~~~~~
+.. autosummary::
+    :toctree: generated/
+
+    Period.day
+    Period.dayofweek
+    Period.dayofyear
+    Period.days_in_month
+    Period.daysinmonth
+    Period.end_time
+    Period.freq
+    Period.freqstr
+    Period.hour
+    Period.is_leap_year
+    Period.minute
+    Period.month
+    Period.now
+    Period.ordinal
+    Period.quarter
+    Period.qyear
+    Period.second
+    Period.start_time
+    Period.strftime
+    Period.week
+    Period.weekday
+    Period.weekofyear
+    Period.year
+
+Methods
+~~~~~~~
+.. autosummary::
+    :toctree: generated/
+
+    Period.asfreq
+    Period.strftime
+    Period.to_timestamp
+
+Timestamp
+~~~~~~~~~
+.. autosummary::
+    :toctree: generated/
+
+    Timestamp
+
+Properties
+~~~~~~~~~~
+.. autosummary::
+    :toctree: generated/
+
+    Timestamp.asm8
+    Timestamp.day
+    Timestamp.dayofweek
+    Timestamp.dayofyear
+    Timestamp.days_in_month
+    Timestamp.daysinmonth
+    Timestamp.hour
+    Timestamp.is_leap_year
+    Timestamp.is_month_end
+    Timestamp.is_month_start
+    Timestamp.is_quarter_end
+    Timestamp.is_quarter_start
+    Timestamp.is_year_end
+    Timestamp.is_year_start
+    Timestamp.max
+    Timestamp.microsecond
+    Timestamp.min
+    Timestamp.month
+    Timestamp.nanosecond
+    Timestamp.quarter
+    Timestamp.resolution
+    Timestamp.second
+    Timestamp.tz
+    Timestamp.tzinfo
+    Timestamp.value
+    Timestamp.weekday_name
+    Timestamp.weekofyear
+    Timestamp.year
+
+Methods
+~~~~~~~
+.. autosummary::
+    :toctree: generated/
+
+    Timestamp.astimezone
+    Timestamp.ceil
+    Timestamp.combine
+    Timestamp.ctime
+    Timestamp.date
+    Timestamp.dst
+    Timestamp.floor
+    Timestamp.freq
+    Timestamp.freqstr
+    Timestamp.from_ordinal
+    Timestamp.fromtimestamp
+    Timestamp.isocalendar
+    Timestamp.isoformat
+    Timestamp.isoweekday
+    Timestamp.normalize
+    Timestamp.now
+    Timestamp.replace
+    Timestamp.round
+    Timestamp.strftime
+    Timestamp.strptime
+    Timestamp.time
+    Timestamp.timetuple
+    Timestamp.timetz
+    Timestamp.to_datetime64
+    Timestamp.to_julian_date
+    Timestamp.to_period
+    Timestamp.to_pydatetime
+    Timestamp.today
+    Timestamp.toordinal
+    Timestamp.tz_convert
+    Timestamp.tz_localize
+    Timestamp.tzname
+    Timestamp.utcfromtimestamp
+    Timestamp.utcnow
+    Timestamp.utcoffset
+    Timestamp.utctimetuple
+    Timestamp.weekday
+
+Interval
+~~~~~~~~
+.. autosummary::
+    :toctree: generated/
+
+    Interval
+
+Properties
+~~~~~~~~~~
+.. autosummary::
+    :toctree generated/
+
+    Interval.closed
+    Interval.closed_left
+    Interval.closed_right
+    Interval.left
+    Interval.mid
+    Interval.open_left
+    Interval.open_right
+    Interval.right
+
+Timedelta
+~~~~~~~~~
+.. autosummary::
+    :toctree: generated/
+
+    Timedelta
+
+Properties
+~~~~~~~~~~
+.. autosummary::
+    :toctree generated/
+
+    Timedelta.asm8
+    Timedelta.components
+    Timedelta.days
+    Timedelta.delta
+    Timedelta.freq
+    Timedelta.is_populated
+    Timedelta.max
+    Timedelta.microseconds
+    Timedelta.min
+    Timedelta.nanoseconds
+    Timedelta.resolution
+    Timedelta.seconds
+    Timedelta.value
+
+Methods
+~~~~~~~
+.. autosummary::
+    :toctree generated/
+
+    Timedelta.ceil
+    Timedelta.floor
+    Timedelta.isoformat
+    Timedelta.round
+    Timdelta.to_pytimedelta
+    Timedelta.to_timedelta64
+    Timedelta.total_seconds
+    Timedelta.view
+
 Window
 ------
 .. currentmodule:: pandas.core.window
diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index 0ade8f9a6dde5b..8f89b812fec04f 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -1102,7 +1102,7 @@ cdef class _Period(object):
 
 class Period(_Period):
     """
-    Represents an period of time
+    Represents a period of time
 
     Parameters
     ----------

From aee2ae086e0972aabcb43d05fa2a404153e3b3b5 Mon Sep 17 00:00:00 2001
From: majiang <majiang@users.noreply.github.com>
Date: Thu, 7 Sep 2017 20:41:24 +0900
Subject: [PATCH 057/188] DOC: to_json (#17461)

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index df5f1a8326acd3..8d16b079ba2c8d 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1265,7 +1265,7 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None,
         Parameters
         ----------
         path_or_buf : the path or buffer to write the result string
-            if this is None, return a StringIO of the converted string
+            if this is None, return the converted string
         orient : string
 
             * Series

From 3a291bb7170ca900cb1b886a3c0b39976a9870ef Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Thu, 7 Sep 2017 05:49:27 -0600
Subject: [PATCH 058/188] BUG: Index._searchsorted_monotonic(..., side='right')
 returns the left side position for monotonic decreasing indexes (#17272)

---
 doc/source/whatsnew/v0.21.0.txt               |  1 +
 pandas/core/indexes/base.py                   |  2 +-
 pandas/tests/indexes/common.py                | 59 +++++++++++++++++--
 .../indexes/datetimes/test_datetimelike.py    |  4 +-
 pandas/tests/indexes/period/test_period.py    |  4 +-
 pandas/tests/indexes/test_base.py             |  3 +-
 pandas/tests/indexes/test_numeric.py          | 12 ++--
 pandas/tests/indexes/test_range.py            |  3 +-
 pandas/tests/indexing/test_interval.py        | 56 +++++++++++-------
 9 files changed, 111 insertions(+), 33 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index fa00140fb4abda..d3c61adccc7a61 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -400,6 +400,7 @@ Indexing
 - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`)
 - Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`)
 - Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`)
+- Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`)
 
 I/O
 ^^^
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index a9098126a38e3d..ef5f68936044a8 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3465,7 +3465,7 @@ def _searchsorted_monotonic(self, label, side='left'):
             # everything for it to work (element ordering, search side and
             # resulting value).
             pos = self[::-1].searchsorted(label, side='right' if side == 'left'
-                                          else 'right')
+                                          else 'left')
             return len(self) - pos
 
         raise ValueError('index must be monotonic increasing or decreasing')
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
index 1fdc08d68eb268..90618cd6e235f6 100644
--- a/pandas/tests/indexes/common.py
+++ b/pandas/tests/indexes/common.py
@@ -11,6 +11,7 @@
                     RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex,
                     TimedeltaIndex, PeriodIndex, IntervalIndex,
                     notna, isna)
+from pandas.core.indexes.base import InvalidIndexError
 from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
 from pandas.core.dtypes.common import needs_i8_conversion
 from pandas._libs.tslib import iNaT
@@ -138,9 +139,14 @@ def test_get_indexer_consistency(self):
             if isinstance(index, IntervalIndex):
                 continue
 
-            indexer = index.get_indexer(index[0:2])
-            assert isinstance(indexer, np.ndarray)
-            assert indexer.dtype == np.intp
+            if index.is_unique or isinstance(index, CategoricalIndex):
+                indexer = index.get_indexer(index[0:2])
+                assert isinstance(indexer, np.ndarray)
+                assert indexer.dtype == np.intp
+            else:
+                e = "Reindexing only valid with uniquely valued Index objects"
+                with tm.assert_raises_regex(InvalidIndexError, e):
+                    indexer = index.get_indexer(index[0:2])
 
             indexer, _ = index.get_indexer_non_unique(index[0:2])
             assert isinstance(indexer, np.ndarray)
@@ -632,7 +638,8 @@ def test_difference_base(self):
                     pass
                 elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
                     assert result.__class__ == answer.__class__
-                    tm.assert_numpy_array_equal(result.asi8, answer.asi8)
+                    tm.assert_numpy_array_equal(result.sort_values().asi8,
+                                                answer.sort_values().asi8)
                 else:
                     result = first.difference(case)
                     assert tm.equalContents(result, answer)
@@ -954,3 +961,47 @@ def test_join_self_unique(self, how):
         if index.is_unique:
             joined = index.join(index, how=how)
             assert (index == joined).all()
+
+    def test_searchsorted_monotonic(self):
+        # GH17271
+        for index in self.indices.values():
+            # not implemented for tuple searches in MultiIndex
+            # or Intervals searches in IntervalIndex
+            if isinstance(index, (MultiIndex, IntervalIndex)):
+                continue
+
+            # nothing to test if the index is empty
+            if index.empty:
+                continue
+            value = index[0]
+
+            # determine the expected results (handle dupes for 'right')
+            expected_left, expected_right = 0, (index == value).argmin()
+            if expected_right == 0:
+                # all values are the same, expected_right should be length
+                expected_right = len(index)
+
+            # test _searchsorted_monotonic in all cases
+            # test searchsorted only for increasing
+            if index.is_monotonic_increasing:
+                ssm_left = index._searchsorted_monotonic(value, side='left')
+                assert expected_left == ssm_left
+
+                ssm_right = index._searchsorted_monotonic(value, side='right')
+                assert expected_right == ssm_right
+
+                ss_left = index.searchsorted(value, side='left')
+                assert expected_left == ss_left
+
+                ss_right = index.searchsorted(value, side='right')
+                assert expected_right == ss_right
+            elif index.is_monotonic_decreasing:
+                ssm_left = index._searchsorted_monotonic(value, side='left')
+                assert expected_left == ssm_left
+
+                ssm_right = index._searchsorted_monotonic(value, side='right')
+                assert expected_right == ssm_right
+            else:
+                # non-monotonic should raise.
+                with pytest.raises(ValueError):
+                    index._searchsorted_monotonic(value, side='left')
diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py
index 3b970ee3825212..538e10e6011ec4 100644
--- a/pandas/tests/indexes/datetimes/test_datetimelike.py
+++ b/pandas/tests/indexes/datetimes/test_datetimelike.py
@@ -12,7 +12,9 @@ class TestDatetimeIndex(DatetimeLike):
     _holder = DatetimeIndex
 
     def setup_method(self, method):
-        self.indices = dict(index=tm.makeDateIndex(10))
+        self.indices = dict(index=tm.makeDateIndex(10),
+                            index_dec=date_range('20130110', periods=10,
+                                                 freq='-1D'))
         self.setup_indices()
 
     def create_index(self):
diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
index e24e2ad936e2c2..51f7d13cb0638f 100644
--- a/pandas/tests/indexes/period/test_period.py
+++ b/pandas/tests/indexes/period/test_period.py
@@ -18,7 +18,9 @@ class TestPeriodIndex(DatetimeLike):
     _multiprocess_can_split_ = True
 
     def setup_method(self, method):
-        self.indices = dict(index=tm.makePeriodIndex(10))
+        self.indices = dict(index=tm.makePeriodIndex(10),
+                            index_dec=period_range('20130101', periods=10,
+                                                   freq='D')[::-1])
         self.setup_indices()
 
     def create_index(self):
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index f96dbdcfb8acfe..d69fbbcdf4bf60 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -46,7 +46,8 @@ def setup_method(self, method):
                             catIndex=tm.makeCategoricalIndex(100),
                             empty=Index([]),
                             tuples=MultiIndex.from_tuples(lzip(
-                                ['foo', 'bar', 'baz'], [1, 2, 3])))
+                                ['foo', 'bar', 'baz'], [1, 2, 3])),
+                            repeats=Index([0, 0, 1, 1, 2, 2]))
         self.setup_indices()
 
     def create_index(self):
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
index 1a0a38c1732843..7e7e10e4aeabee 100644
--- a/pandas/tests/indexes/test_numeric.py
+++ b/pandas/tests/indexes/test_numeric.py
@@ -181,7 +181,9 @@ class TestFloat64Index(Numeric):
 
     def setup_method(self, method):
         self.indices = dict(mixed=Float64Index([1.5, 2, 3, 4, 5]),
-                            float=Float64Index(np.arange(5) * 2.5))
+                            float=Float64Index(np.arange(5) * 2.5),
+                            mixed_dec=Float64Index([5, 4, 3, 2, 1.5]),
+                            float_dec=Float64Index(np.arange(4, -1, -1) * 2.5))
         self.setup_indices()
 
     def create_index(self):
@@ -654,7 +656,8 @@ class TestInt64Index(NumericInt):
     _holder = Int64Index
 
     def setup_method(self, method):
-        self.indices = dict(index=Int64Index(np.arange(0, 20, 2)))
+        self.indices = dict(index=Int64Index(np.arange(0, 20, 2)),
+                            index_dec=Int64Index(np.arange(19, -1, -1)))
         self.setup_indices()
 
     def create_index(self):
@@ -949,8 +952,9 @@ class TestUInt64Index(NumericInt):
     _holder = UInt64Index
 
     def setup_method(self, method):
-        self.indices = dict(index=UInt64Index([2**63, 2**63 + 10, 2**63 + 15,
-                                               2**63 + 20, 2**63 + 25]))
+        vals = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25]
+        self.indices = dict(index=UInt64Index(vals),
+                            index_dec=UInt64Index(reversed(vals)))
         self.setup_indices()
 
     def create_index(self):
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index 06c8f0ee392c77..d206c36ee51c95 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -25,7 +25,8 @@ class TestRangeIndex(Numeric):
     _compat_props = ['shape', 'ndim', 'size', 'itemsize']
 
     def setup_method(self, method):
-        self.indices = dict(index=RangeIndex(0, 20, 2, name='foo'))
+        self.indices = dict(index=RangeIndex(0, 20, 2, name='foo'),
+                            index_dec=RangeIndex(18, -1, -2, name='bar'))
         self.setup_indices()
 
     def create_index(self):
diff --git a/pandas/tests/indexing/test_interval.py b/pandas/tests/indexing/test_interval.py
index be6e5e1cffb2e5..31a94abcd99a59 100644
--- a/pandas/tests/indexing/test_interval.py
+++ b/pandas/tests/indexing/test_interval.py
@@ -3,6 +3,7 @@
 import pandas as pd
 
 from pandas import Series, DataFrame, IntervalIndex, Interval
+from pandas.compat import product
 import pandas.util.testing as tm
 
 
@@ -14,16 +15,6 @@ def setup_method(self, method):
     def test_loc_with_scalar(self):
 
         s = self.s
-        expected = 0
-
-        result = s.loc[0.5]
-        assert result == expected
-
-        result = s.loc[1]
-        assert result == expected
-
-        with pytest.raises(KeyError):
-            s.loc[0]
 
         expected = s.iloc[:3]
         tm.assert_series_equal(expected, s.loc[:3])
@@ -42,16 +33,6 @@ def test_loc_with_scalar(self):
     def test_getitem_with_scalar(self):
 
         s = self.s
-        expected = 0
-
-        result = s[0.5]
-        assert result == expected
-
-        result = s[1]
-        assert result == expected
-
-        with pytest.raises(KeyError):
-            s[0]
 
         expected = s.iloc[:3]
         tm.assert_series_equal(expected, s[:3])
@@ -67,6 +48,41 @@ def test_getitem_with_scalar(self):
         expected = s.iloc[2:5]
         tm.assert_series_equal(expected, s[s >= 2])
 
+    @pytest.mark.parametrize('direction, closed',
+                             product(('increasing', 'decreasing'),
+                                     ('left', 'right', 'neither', 'both')))
+    def test_nonoverlapping_monotonic(self, direction, closed):
+        tpls = [(0, 1), (2, 3), (4, 5)]
+        if direction == 'decreasing':
+            tpls = reversed(tpls)
+
+        idx = IntervalIndex.from_tuples(tpls, closed=closed)
+        s = Series(list('abc'), idx)
+
+        for key, expected in zip(idx.left, s):
+            if idx.closed_left:
+                assert s[key] == expected
+                assert s.loc[key] == expected
+            else:
+                with pytest.raises(KeyError):
+                    s[key]
+                with pytest.raises(KeyError):
+                    s.loc[key]
+
+        for key, expected in zip(idx.right, s):
+            if idx.closed_right:
+                assert s[key] == expected
+                assert s.loc[key] == expected
+            else:
+                with pytest.raises(KeyError):
+                    s[key]
+                with pytest.raises(KeyError):
+                    s.loc[key]
+
+        for key, expected in zip(idx.mid, s):
+            assert s[key] == expected
+            assert s.loc[key] == expected
+
     def test_with_interval(self):
 
         s = self.s

From ee6185e2fb9461632949f3ba52a28b37a1f7296e Mon Sep 17 00:00:00 2001
From: Matti Picus <matti.picus@gmail.com>
Date: Thu, 7 Sep 2017 14:56:33 +0300
Subject: [PATCH 059/188] COMPAT: Pypy tweaks (#17351)

---
 doc/source/whatsnew/v0.21.0.txt           | 11 ++++-
 pandas/_libs/src/ujson/python/JSONtoObj.c | 16 +++----
 pandas/io/parsers.py                      |  1 +
 pandas/tests/indexes/test_base.py         | 16 +++++--
 pandas/tests/indexes/test_multi.py        | 13 +++++-
 pandas/tests/io/parser/test_parsers.py    | 52 ++++++++++++++++++++++-
 6 files changed, 92 insertions(+), 17 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index d3c61adccc7a61..f50052347cfb56 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -371,13 +371,11 @@ Performance Improvements
 Bug Fixes
 ~~~~~~~~~
 
-
 Conversion
 ^^^^^^^^^^
 
 - Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`)
 - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
-- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`)
 - Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods.  Previously returned a ``numpy.bool_``. (:issue:`17237`)
 - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`)
 - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`)
@@ -463,6 +461,15 @@ Categorical
   the ``.categories`` to be an empty ``Float64Index`` rather than an empty
   ``Index`` with object dtype (:issue:`17248`)
 
+PyPy
+^^^^
+
+- Compatibility with PyPy in :func:`read_csv` with ``usecols=[<unsorted ints>]`` and
+  :func:`read_json` (:issue:`17351`)
+- Split tests into cases for CPython and PyPy where needed, which highlights the fragility
+  of index matching with ``float('nan')``, ``np.nan`` and ``NAT`` (:issue:`17351`)
+- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size,
+  so an approximation is used instead (:issue:`17228`)
 
 Other
 ^^^^^
diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c
index b0132532c16af7..85cf1d5e5e7a1a 100644
--- a/pandas/_libs/src/ujson/python/JSONtoObj.c
+++ b/pandas/_libs/src/ujson/python/JSONtoObj.c
@@ -409,7 +409,7 @@ JSOBJ Object_npyEndObject(void *prv, JSOBJ obj) {
 }
 
 int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) {
-    PyObject *label;
+    PyObject *label, *labels;
     npy_intp labelidx;
     // add key to label array, value to values array
     NpyArrContext *npyarr = (NpyArrContext *)obj;
@@ -424,11 +424,11 @@ int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) {
     if (!npyarr->labels[labelidx]) {
         npyarr->labels[labelidx] = PyList_New(0);
     }
-
+    labels = npyarr->labels[labelidx];
     // only fill label array once, assumes all column labels are the same
     // for 2-dimensional arrays.
-    if (PyList_GET_SIZE(npyarr->labels[labelidx]) <= npyarr->elcount) {
-        PyList_Append(npyarr->labels[labelidx], label);
+    if (PyList_Check(labels) && PyList_GET_SIZE(labels) <= npyarr->elcount) {
+        PyList_Append(labels, label);
     }
 
     if (((JSONObjectDecoder *)npyarr->dec)->arrayAddItem(prv, obj, value)) {
@@ -439,16 +439,16 @@ int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) {
 }
 
 int Object_objectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) {
-    PyDict_SetItem(obj, name, value);
+    int ret = PyDict_SetItem(obj, name, value);
     Py_DECREF((PyObject *)name);
     Py_DECREF((PyObject *)value);
-    return 1;
+    return ret == 0 ? 1 : 0;
 }
 
 int Object_arrayAddItem(void *prv, JSOBJ obj, JSOBJ value) {
-    PyList_Append(obj, value);
+    int ret = PyList_Append(obj, value);
     Py_DECREF((PyObject *)value);
-    return 1;
+    return ret == 0 ? 1 : 0;
 }
 
 JSOBJ Object_newString(void *prv, wchar_t *start, wchar_t *end) {
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 8b1a921536a1dd..6adf154aabba7f 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -1716,6 +1716,7 @@ def _set_noconvert_columns(self):
             # A set of integers will be converted to a list in
             # the correct order every single time.
             usecols = list(self.usecols)
+            usecols.sort()
         elif (callable(self.usecols) or
                 self.usecols_dtype not in ('empty', None)):
             # The names attribute should have the correct columns
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index d69fbbcdf4bf60..fa73c9fc7b7225 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -9,7 +9,7 @@
 from pandas.tests.indexes.common import Base
 
 from pandas.compat import (range, lrange, lzip, u,
-                           text_type, zip, PY3, PY36)
+                           text_type, zip, PY3, PY36, PYPY)
 import operator
 import numpy as np
 
@@ -1370,13 +1370,21 @@ def test_isin(self):
         assert len(result) == 0
         assert result.dtype == np.bool_
 
-    def test_isin_nan(self):
+    @pytest.mark.skipif(PYPY, reason="np.nan is float('nan') on PyPy")
+    def test_isin_nan_not_pypy(self):
+        tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([float('nan')]),
+                                    np.array([False, False]))
+
+    @pytest.mark.skipif(not PYPY, reason="np.nan is float('nan') on PyPy")
+    def test_isin_nan_pypy(self):
+        tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([float('nan')]),
+                                    np.array([False, True]))
+
+    def test_isin_nan_common(self):
         tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([np.nan]),
                                     np.array([False, True]))
         tm.assert_numpy_array_equal(Index(['a', pd.NaT]).isin([pd.NaT]),
                                     np.array([False, True]))
-        tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([float('nan')]),
-                                    np.array([False, False]))
         tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([pd.NaT]),
                                     np.array([False, False]))
 
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 798d2444689615..86308192c91665 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -14,7 +14,7 @@
 
 from pandas import (CategoricalIndex, DataFrame, Index, MultiIndex,
                     compat, date_range, period_range)
-from pandas.compat import PY3, long, lrange, lzip, range, u
+from pandas.compat import PY3, long, lrange, lzip, range, u, PYPY
 from pandas.errors import PerformanceWarning, UnsortedIndexError
 from pandas.core.indexes.base import InvalidIndexError
 from pandas._libs import lib
@@ -2571,13 +2571,22 @@ def test_isin(self):
         assert len(result) == 0
         assert result.dtype == np.bool_
 
-    def test_isin_nan(self):
+    @pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy")
+    def test_isin_nan_not_pypy(self):
         idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
         tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
                                     np.array([False, False]))
         tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]),
                                     np.array([False, False]))
 
+    @pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy")
+    def test_isin_nan_pypy(self):
+        idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]])
+        tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]),
+                                    np.array([False, True]))
+        tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]),
+                                    np.array([False, True]))
+
     def test_isin_level_kwarg(self):
         idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange(
             4)])
diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py
index 2fee2451c5e36f..0ea4757b10e942 100644
--- a/pandas/tests/io/parser/test_parsers.py
+++ b/pandas/tests/io/parser/test_parsers.py
@@ -3,8 +3,10 @@
 import os
 import pandas.util.testing as tm
 
-from pandas import read_csv, read_table
+from pandas import read_csv, read_table, DataFrame
 from pandas.core.common import AbstractMethodError
+from pandas._libs.lib import Timestamp
+from pandas.compat import StringIO
 
 from .common import ParserTests
 from .header import HeaderTests
@@ -100,3 +102,51 @@ def read_table(self, *args, **kwds):
         kwds = kwds.copy()
         kwds['engine'] = self.engine
         return read_table(*args, **kwds)
+
+
+class TestUnsortedUsecols(object):
+    def test_override__set_noconvert_columns(self):
+        # GH 17351 - usecols needs to be sorted in _setnoconvert_columns
+        # based on the test_usecols_with_parse_dates test from usecols.py
+        from pandas.io.parsers import CParserWrapper, TextFileReader
+
+        s = """a,b,c,d,e
+        0,1,20140101,0900,4
+        0,1,20140102,1000,4"""
+
+        parse_dates = [[1, 2]]
+        cols = {
+            'a': [0, 0],
+            'c_d': [
+                Timestamp('2014-01-01 09:00:00'),
+                Timestamp('2014-01-02 10:00:00')
+            ]
+        }
+        expected = DataFrame(cols, columns=['c_d', 'a'])
+
+        class MyTextFileReader(TextFileReader):
+            def __init__(self):
+                self._currow = 0
+                self.squeeze = False
+
+        class MyCParserWrapper(CParserWrapper):
+            def _set_noconvert_columns(self):
+                if self.usecols_dtype == 'integer':
+                    # self.usecols is a set, which is documented as unordered
+                    # but in practice, a CPython set of integers is sorted.
+                    # In other implementations this assumption does not hold.
+                    # The following code simulates a different order, which
+                    # before GH 17351 would cause the wrong columns to be
+                    # converted via the parse_dates parameter
+                    self.usecols = list(self.usecols)
+                    self.usecols.reverse()
+                return CParserWrapper._set_noconvert_columns(self)
+
+        parser = MyTextFileReader()
+        parser.options = {'usecols': [0, 2, 3],
+                          'parse_dates': parse_dates,
+                          'delimiter': ','}
+        parser._engine = MyCParserWrapper(StringIO(s), **parser.options)
+        df = parser.read()
+
+        tm.assert_frame_equal(df, expected)

From 46832ac8f465aa911ba79ebc1b1a4d0f6baf46f9 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 7 Sep 2017 17:46:12 -0700
Subject: [PATCH 060/188] Replace * imports with explicit imports; remove
 unused declared constants (#17470)

---
 pandas/_libs/src/skiplist.pyx |  1 -
 pandas/_libs/window.pyx       | 38 ++++++-----------------------------
 2 files changed, 6 insertions(+), 33 deletions(-)

diff --git a/pandas/_libs/src/skiplist.pyx b/pandas/_libs/src/skiplist.pyx
index 559b529822a69f..1524dca38d0e07 100644
--- a/pandas/_libs/src/skiplist.pyx
+++ b/pandas/_libs/src/skiplist.pyx
@@ -15,7 +15,6 @@ cdef double Log2(double x):
     return log(x) / log(2.)
 
 cimport numpy as np
-from numpy cimport *
 import numpy as np
 
 from random import random
diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx
index 9fb3d0662eb4f2..b6bd6f92f61998 100644
--- a/pandas/_libs/window.pyx
+++ b/pandas/_libs/window.pyx
@@ -1,55 +1,29 @@
 # cython: profile=False
 # cython: boundscheck=False, wraparound=False, cdivision=True
 
-from numpy cimport *
+from cython cimport Py_ssize_t
+
 cimport numpy as np
 import numpy as np
 
 cimport cython
 
-import_array()
+np.import_array()
 
 cimport util
 
 from libc.stdlib cimport malloc, free
 
-from numpy cimport NPY_INT8 as NPY_int8
-from numpy cimport NPY_INT16 as NPY_int16
-from numpy cimport NPY_INT32 as NPY_int32
-from numpy cimport NPY_INT64 as NPY_int64
-from numpy cimport NPY_FLOAT16 as NPY_float16
-from numpy cimport NPY_FLOAT32 as NPY_float32
-from numpy cimport NPY_FLOAT64 as NPY_float64
-
-from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
-                    uint32_t, uint64_t, float16_t, float32_t, float64_t)
-
-int8 = np.dtype(np.int8)
-int16 = np.dtype(np.int16)
-int32 = np.dtype(np.int32)
-int64 = np.dtype(np.int64)
-float16 = np.dtype(np.float16)
-float32 = np.dtype(np.float32)
-float64 = np.dtype(np.float64)
-
-cdef np.int8_t MINint8 = np.iinfo(np.int8).min
-cdef np.int16_t MINint16 = np.iinfo(np.int16).min
-cdef np.int32_t MINint32 = np.iinfo(np.int32).min
-cdef np.int64_t MINint64 = np.iinfo(np.int64).min
-cdef np.float16_t MINfloat16 = np.NINF
+
+from numpy cimport ndarray, double_t, int64_t, float64_t
+
 cdef np.float32_t MINfloat32 = np.NINF
 cdef np.float64_t MINfloat64 = np.NINF
 
-cdef np.int8_t MAXint8 = np.iinfo(np.int8).max
-cdef np.int16_t MAXint16 = np.iinfo(np.int16).max
-cdef np.int32_t MAXint32 = np.iinfo(np.int32).max
-cdef np.int64_t MAXint64 = np.iinfo(np.int64).max
-cdef np.float16_t MAXfloat16 = np.inf
 cdef np.float32_t MAXfloat32 = np.inf
 cdef np.float64_t MAXfloat64 = np.inf
 
 cdef double NaN = <double> np.NaN
-cdef double nan = NaN
 
 cdef inline int int_max(int a, int b): return a if a >= b else b
 cdef inline int int_min(int a, int b): return a if a <= b else b

From 9c4e4c8959853c7cda554d8e9b530efdd8ef9cb1 Mon Sep 17 00:00:00 2001
From: Sam Foo <sfoohei@gmail.com>
Date: Thu, 7 Sep 2017 20:47:52 -0400
Subject: [PATCH 061/188] Removed Timedelta.is_populated and fixed spelling
 errors (#17469)

---
 doc/source/api.rst | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index d34cec86638fba..c32a541d196057 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1704,7 +1704,7 @@ Methods
     Timestamp.floor
     Timestamp.freq
     Timestamp.freqstr
-    Timestamp.from_ordinal
+    Timestamp.fromordinal
     Timestamp.fromtimestamp
     Timestamp.isocalendar
     Timestamp.isoformat
@@ -1769,9 +1769,7 @@ Properties
     Timedelta.asm8
     Timedelta.components
     Timedelta.days
-    Timedelta.delta
     Timedelta.freq
-    Timedelta.is_populated
     Timedelta.max
     Timedelta.microseconds
     Timedelta.min
@@ -1789,10 +1787,9 @@ Methods
     Timedelta.floor
     Timedelta.isoformat
     Timedelta.round
-    Timdelta.to_pytimedelta
+    Timedelta.to_pytimedelta
     Timedelta.to_timedelta64
     Timedelta.total_seconds
-    Timedelta.view
 
 Window
 ------

From 7e4e8acf5b5d68b3dfadecd3ba816d4f0b9be0ce Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 7 Sep 2017 18:00:05 -0700
Subject: [PATCH 062/188] PERF: Implement get_freq_code in cython frequencies
 (#17422)

---
 asv_bench/benchmarks/period.py      |  29 ++++
 pandas/_libs/tslibs/__init__.py     |   0
 pandas/_libs/tslibs/frequencies.pyx | 201 ++++++++++++++++++++++++++++
 pandas/tseries/frequencies.py       |  79 +----------
 setup.py                            |   4 +
 5 files changed, 235 insertions(+), 78 deletions(-)
 create mode 100644 pandas/_libs/tslibs/__init__.py
 create mode 100644 pandas/_libs/tslibs/frequencies.pyx

diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py
index f9837191a7bae8..78d66295f28cc6 100644
--- a/asv_bench/benchmarks/period.py
+++ b/asv_bench/benchmarks/period.py
@@ -2,6 +2,35 @@
 from pandas import Series, Period, PeriodIndex, date_range
 
 
+class PeriodProperties(object):
+    def setup(self):
+        self.per = Period('2012-06-01', freq='M')
+
+    def time_year(self):
+        self.per.year
+
+    def time_month(self):
+        self.per.month
+
+    def time_quarter(self):
+        self.per.quarter
+
+    def time_day(self):
+        self.per.day
+
+    def time_hour(self):
+        self.per.hour
+
+    def time_minute(self):
+        self.per.second
+
+    def time_second(self):
+        self.per.second
+
+    def time_leap_year(self):
+        self.per.is_leapyear
+
+
 class Constructor(object):
     goal_time = 0.2
 
diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx
new file mode 100644
index 00000000000000..35429e8ae87f00
--- /dev/null
+++ b/pandas/_libs/tslibs/frequencies.pyx
@@ -0,0 +1,201 @@
+# -*- coding: utf-8 -*-
+# cython: profile=False
+import re
+
+cimport cython
+
+import numpy as np
+cimport numpy as np
+np.import_array()
+
+from util cimport is_integer_object
+
+
+cpdef get_freq_code(freqstr):
+    """
+    Return freq str or tuple to freq code and stride (mult)
+
+    Parameters
+    ----------
+    freqstr : str or tuple
+
+    Returns
+    -------
+    return : tuple of base frequency code and stride (mult)
+
+    Example
+    -------
+    >>> get_freq_code('3D')
+    (6000, 3)
+
+    >>> get_freq_code('D')
+    (6000, 1)
+
+    >>> get_freq_code(('D', 3))
+    (6000, 3)
+    """
+    if getattr(freqstr, '_typ', None) == 'dateoffset':
+        freqstr = (freqstr.rule_code, freqstr.n)
+
+    if isinstance(freqstr, tuple):
+        if (is_integer_object(freqstr[0]) and
+                is_integer_object(freqstr[1])):
+            # e.g., freqstr = (2000, 1)
+            return freqstr
+        else:
+            # e.g., freqstr = ('T', 5)
+            try:
+                code = _period_str_to_code(freqstr[0])
+                stride = freqstr[1]
+            except:
+                if is_integer_object(freqstr[1]):
+                    raise
+                code = _period_str_to_code(freqstr[1])
+                stride = freqstr[0]
+            return code, stride
+
+    if is_integer_object(freqstr):
+        return (freqstr, 1)
+
+    base, stride = _base_and_stride(freqstr)
+    code = _period_str_to_code(base)
+
+    return code, stride
+
+
+# hack to handle WOM-1MON
+opattern = re.compile(
+    r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)'
+)
+
+
+cpdef _base_and_stride(freqstr):
+    """
+    Return base freq and stride info from string representation
+
+    Examples
+    --------
+    _freq_and_stride('5Min') -> 'Min', 5
+    """
+    groups = opattern.match(freqstr)
+
+    if not groups:
+        raise ValueError("Could not evaluate {freq}".format(freq=freqstr))
+
+    stride = groups.group(1)
+
+    if len(stride):
+        stride = int(stride)
+    else:
+        stride = 1
+
+    base = groups.group(2)
+
+    return (base, stride)
+
+
+# ---------------------------------------------------------------------
+# Period codes
+
+# period frequency constants corresponding to scikits timeseries
+# originals
+_period_code_map = {
+    # Annual freqs with various fiscal year ends.
+    # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005
+    "A-DEC": 1000,  # Annual - December year end
+    "A-JAN": 1001,  # Annual - January year end
+    "A-FEB": 1002,  # Annual - February year end
+    "A-MAR": 1003,  # Annual - March year end
+    "A-APR": 1004,  # Annual - April year end
+    "A-MAY": 1005,  # Annual - May year end
+    "A-JUN": 1006,  # Annual - June year end
+    "A-JUL": 1007,  # Annual - July year end
+    "A-AUG": 1008,  # Annual - August year end
+    "A-SEP": 1009,  # Annual - September year end
+    "A-OCT": 1010,  # Annual - October year end
+    "A-NOV": 1011,  # Annual - November year end
+
+    # Quarterly frequencies with various fiscal year ends.
+    # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005
+    "Q-DEC": 2000,    # Quarterly - December year end
+    "Q-JAN": 2001,    # Quarterly - January year end
+    "Q-FEB": 2002,    # Quarterly - February year end
+    "Q-MAR": 2003,    # Quarterly - March year end
+    "Q-APR": 2004,    # Quarterly - April year end
+    "Q-MAY": 2005,    # Quarterly - May year end
+    "Q-JUN": 2006,    # Quarterly - June year end
+    "Q-JUL": 2007,    # Quarterly - July year end
+    "Q-AUG": 2008,    # Quarterly - August year end
+    "Q-SEP": 2009,    # Quarterly - September year end
+    "Q-OCT": 2010,    # Quarterly - October year end
+    "Q-NOV": 2011,    # Quarterly - November year end
+
+    "M": 3000,        # Monthly
+
+    "W-SUN": 4000,    # Weekly - Sunday end of week
+    "W-MON": 4001,    # Weekly - Monday end of week
+    "W-TUE": 4002,    # Weekly - Tuesday end of week
+    "W-WED": 4003,    # Weekly - Wednesday end of week
+    "W-THU": 4004,    # Weekly - Thursday end of week
+    "W-FRI": 4005,    # Weekly - Friday end of week
+    "W-SAT": 4006,    # Weekly - Saturday end of week
+
+    "B": 5000,        # Business days
+    "D": 6000,        # Daily
+    "H": 7000,        # Hourly
+    "T": 8000,        # Minutely
+    "S": 9000,        # Secondly
+    "L": 10000,       # Millisecondly
+    "U": 11000,       # Microsecondly
+    "N": 12000,       # Nanosecondly
+}
+
+# Yearly aliases; careful not to put these in _reverse_period_code_map
+_period_code_map.update({'Y' + key[1:]: _period_code_map[key]
+                         for key in _period_code_map
+                         if key.startswith('A-')})
+
+_period_code_map.update({
+    "Q": 2000,  # Quarterly - December year end (default quarterly)
+    "A": 1000,  # Annual
+    "W": 4000,  # Weekly
+    "C": 5000,  # Custom Business Day
+    })
+
+_dont_uppercase = set(('MS', 'ms'))
+
+_lite_rule_alias = {
+    'W': 'W-SUN',
+    'Q': 'Q-DEC',
+
+    'A': 'A-DEC',  # YearEnd(month=12),
+    'Y': 'A-DEC',
+    'AS': 'AS-JAN',  # YearBegin(month=1),
+    'YS': 'AS-JAN',
+    'BA': 'BA-DEC',  # BYearEnd(month=12),
+    'BY': 'BA-DEC',
+    'BAS': 'BAS-JAN',  # BYearBegin(month=1),
+    'BYS': 'BAS-JAN',
+
+    'Min': 'T',
+    'min': 'T',
+    'ms': 'L',
+    'us': 'U',
+    'ns': 'N'}
+
+_INVALID_FREQ_ERROR = "Invalid frequency: {0}"
+
+
+cpdef _period_str_to_code(freqstr):
+    freqstr = _lite_rule_alias.get(freqstr, freqstr)
+
+    if freqstr not in _dont_uppercase:
+        lower = freqstr.lower()
+        freqstr = _lite_rule_alias.get(lower, freqstr)
+
+    if freqstr not in _dont_uppercase:
+        freqstr = freqstr.upper()
+    try:
+        return _period_code_map[freqstr]
+    except KeyError:
+        raise ValueError(_INVALID_FREQ_ERROR.format(freqstr))
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index 7f34bcaf52926e..6644a33245a849 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -8,7 +8,6 @@
 
 from pandas.core.dtypes.generic import ABCSeries
 from pandas.core.dtypes.common import (
-    is_integer,
     is_period_arraylike,
     is_timedelta64_dtype,
     is_datetime64_dtype)
@@ -21,6 +20,7 @@
 
 from pandas._libs import lib, tslib
 from pandas._libs.tslib import Timedelta
+from pandas._libs.tslibs.frequencies import get_freq_code, _base_and_stride
 from pytz import AmbiguousTimeError
 
 
@@ -298,58 +298,6 @@ def get_freq(freq):
     return freq
 
 
-def get_freq_code(freqstr):
-    """
-    Return freq str or tuple to freq code and stride (mult)
-
-    Parameters
-    ----------
-    freqstr : str or tuple
-
-    Returns
-    -------
-    return : tuple of base frequency code and stride (mult)
-
-    Example
-    -------
-    >>> get_freq_code('3D')
-    (6000, 3)
-
-    >>> get_freq_code('D')
-    (6000, 1)
-
-    >>> get_freq_code(('D', 3))
-    (6000, 3)
-    """
-    if isinstance(freqstr, DateOffset):
-        freqstr = (freqstr.rule_code, freqstr.n)
-
-    if isinstance(freqstr, tuple):
-        if (is_integer(freqstr[0]) and
-                is_integer(freqstr[1])):
-            # e.g., freqstr = (2000, 1)
-            return freqstr
-        else:
-            # e.g., freqstr = ('T', 5)
-            try:
-                code = _period_str_to_code(freqstr[0])
-                stride = freqstr[1]
-            except:
-                if is_integer(freqstr[1]):
-                    raise
-                code = _period_str_to_code(freqstr[1])
-                stride = freqstr[0]
-            return code, stride
-
-    if is_integer(freqstr):
-        return (freqstr, 1)
-
-    base, stride = _base_and_stride(freqstr)
-    code = _period_str_to_code(base)
-
-    return code, stride
-
-
 def _get_freq_str(base, mult=1):
     code = _reverse_period_code_map.get(base)
     if mult == 1:
@@ -577,31 +525,6 @@ def to_offset(freq):
 )
 
 
-def _base_and_stride(freqstr):
-    """
-    Return base freq and stride info from string representation
-
-    Examples
-    --------
-    _freq_and_stride('5Min') -> 'Min', 5
-    """
-    groups = opattern.match(freqstr)
-
-    if not groups:
-        raise ValueError("Could not evaluate {freq}".format(freq=freqstr))
-
-    stride = groups.group(1)
-
-    if len(stride):
-        stride = int(stride)
-    else:
-        stride = 1
-
-    base = groups.group(2)
-
-    return (base, stride)
-
-
 def get_base_alias(freqstr):
     """
     Returns the base frequency alias, e.g., '5D' -> 'D'
diff --git a/setup.py b/setup.py
index 444db5bc4d275e..4e326beefa9081 100755
--- a/setup.py
+++ b/setup.py
@@ -341,6 +341,7 @@ class CheckSDist(sdist_class):
                  'pandas/_libs/window.pyx',
                  'pandas/_libs/sparse.pyx',
                  'pandas/_libs/parsers.pyx',
+                 'panads/_libs/tslibs/frequencies.pyx',
                  'pandas/io/sas/sas.pyx']
 
     def initialize_options(self):
@@ -492,6 +493,8 @@ def pxd(name):
                      'sources': ['pandas/_libs/src/datetime/np_datetime.c',
                                  'pandas/_libs/src/datetime/np_datetime_strings.c',
                                  'pandas/_libs/src/period_helper.c']},
+    '_libs.tslibs.frequencies': {'pyxfile': '_libs/tslibs/frequencies',
+                                 'pxdfiles': ['_libs/src/util']},
     '_libs.index': {'pyxfile': '_libs/index',
                     'sources': ['pandas/_libs/src/datetime/np_datetime.c',
                                 'pandas/_libs/src/datetime/np_datetime_strings.c'],
@@ -653,6 +656,7 @@ def pxd(name):
                 'pandas.io.formats',
                 'pandas.io.clipboard',
                 'pandas._libs',
+                'pandas._libs.tslibs',
                 'pandas.plotting',
                 'pandas.stats',
                 'pandas.types',

From 3ccb88c912d898b2fd8decd3d988aca264e4e820 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 8 Sep 2017 03:05:05 -0700
Subject: [PATCH 063/188] Fix typo in setup.py introduced by 17422 (#17473)

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 4e326beefa9081..3269fe7972cf0f 100755
--- a/setup.py
+++ b/setup.py
@@ -341,7 +341,7 @@ class CheckSDist(sdist_class):
                  'pandas/_libs/window.pyx',
                  'pandas/_libs/sparse.pyx',
                  'pandas/_libs/parsers.pyx',
-                 'panads/_libs/tslibs/frequencies.pyx',
+                 'pandas/_libs/tslibs/frequencies.pyx',
                  'pandas/io/sas/sas.pyx']
 
     def initialize_options(self):

From d6df8ea99f2574480e934aae01a1e142f935145e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 8 Sep 2017 03:16:13 -0700
Subject: [PATCH 064/188] Follow up to #17422 (#17472)

---
 pandas/_libs/period.pyx             |  55 ++++++------
 pandas/_libs/tslibs/frequencies.pxd |   4 +
 pandas/_libs/tslibs/frequencies.pyx |   3 +
 pandas/tseries/frequencies.py       | 128 +---------------------------
 4 files changed, 38 insertions(+), 152 deletions(-)
 create mode 100644 pandas/_libs/tslibs/frequencies.pxd

diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index 8f89b812fec04f..e2a3baa8d6e8ba 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -10,17 +10,16 @@ from cpython cimport (
 from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray,
                     NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA)
 import numpy as np
+import_array()
 
 from libc.stdlib cimport free
 
-from pandas import compat
 from pandas.compat import PY2
 
 cimport cython
 
 from datetime cimport (
     is_leapyear,
-    PyDateTime_IMPORT,
     pandas_datetimestruct,
     pandas_datetimestruct_to_datetime,
     pandas_datetime_to_datetimestruct,
@@ -29,6 +28,7 @@ from datetime cimport (
 
 
 cimport util, lib
+from util cimport is_period_object, is_string_object
 
 from lib cimport is_null_datetimelike, is_period
 from pandas._libs import tslib, lib
@@ -41,6 +41,8 @@ from tslib cimport (
     _get_dst_info,
     _nat_scalar_rules)
 
+from tslibs.frequencies cimport get_freq_code
+
 from pandas.tseries import offsets
 from pandas.core.tools.datetimes import parse_time_string
 from pandas.tseries import frequencies
@@ -329,8 +331,6 @@ cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^",
                             "^`GH`^", "^`IJ`^", "^`KL`^"]
 
 cdef object _period_strftime(int64_t value, int freq, object fmt):
-    import sys
-
     cdef:
         Py_ssize_t i
         date_info dinfo
@@ -683,7 +683,7 @@ cdef class _Period(object):
     def _maybe_convert_freq(cls, object freq):
 
         if isinstance(freq, (int, tuple)):
-            code, stride = frequencies.get_freq_code(freq)
+            code, stride = get_freq_code(freq)
             freq = frequencies._get_freq_str(code, stride)
 
         freq = frequencies.to_offset(freq)
@@ -707,7 +707,7 @@ cdef class _Period(object):
             return self
 
     def __richcmp__(self, other, op):
-        if isinstance(other, Period):
+        if is_period_object(other):
             if other.freq != self.freq:
                 msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr)
                 raise IncompatibleFrequency(msg)
@@ -753,7 +753,7 @@ cdef class _Period(object):
             return NotImplemented
 
     def __add__(self, other):
-        if isinstance(self, Period):
+        if is_period_object(self):
             if isinstance(other, (timedelta, np.timedelta64,
                                   offsets.DateOffset,
                                   Timedelta)):
@@ -765,13 +765,13 @@ cdef class _Period(object):
                 return Period(ordinal=ordinal, freq=self.freq)
             else:  # pragma: no cover
                 return NotImplemented
-        elif isinstance(other, Period):
+        elif is_period_object(other):
             return other + self
         else:
             return NotImplemented
 
     def __sub__(self, other):
-        if isinstance(self, Period):
+        if is_period_object(self):
             if isinstance(other, (timedelta, np.timedelta64,
                                   offsets.DateOffset,
                                   Timedelta)):
@@ -780,7 +780,7 @@ cdef class _Period(object):
             elif lib.is_integer(other):
                 ordinal = self.ordinal - other * self.freq.n
                 return Period(ordinal=ordinal, freq=self.freq)
-            elif isinstance(other, Period):
+            elif is_period_object(other):
                 if other.freq != self.freq:
                     msg = _DIFFERENT_FREQ.format(self.freqstr, other.freqstr)
                     raise IncompatibleFrequency(msg)
@@ -789,7 +789,7 @@ cdef class _Period(object):
                 return -other.__sub__(self)
             else:  # pragma: no cover
                 return NotImplemented
-        elif isinstance(other, Period):
+        elif is_period_object(other):
             if self is NaT:
                 return NaT
             return NotImplemented
@@ -813,8 +813,8 @@ cdef class _Period(object):
         """
         freq = self._maybe_convert_freq(freq)
         how = _validate_end_alias(how)
-        base1, mult1 = frequencies.get_freq_code(self.freq)
-        base2, mult2 = frequencies.get_freq_code(freq)
+        base1, mult1 = get_freq_code(self.freq)
+        base2, mult2 = get_freq_code(freq)
 
         # mult1 can't be negative or 0
         end = how == 'E'
@@ -860,17 +860,17 @@ cdef class _Period(object):
         how = _validate_end_alias(how)
 
         if freq is None:
-            base, mult = frequencies.get_freq_code(self.freq)
+            base, mult = get_freq_code(self.freq)
             freq = frequencies.get_to_timestamp_base(base)
 
-        base, mult = frequencies.get_freq_code(freq)
+        base, mult = get_freq_code(freq)
         val = self.asfreq(freq, how)
 
         dt64 = period_ordinal_to_dt64(val.ordinal, base)
         return Timestamp(dt64, tz=tz)
 
     cdef _field(self, alias):
-        base, mult = frequencies.get_freq_code(self.freq)
+        base, mult = get_freq_code(self.freq)
         return get_period_field(alias, self.ordinal, base)
 
     property year:
@@ -935,7 +935,7 @@ cdef class _Period(object):
         return self.freq.freqstr
 
     def __repr__(self):
-        base, mult = frequencies.get_freq_code(self.freq)
+        base, mult = get_freq_code(self.freq)
         formatted = period_format(self.ordinal, base)
         return "Period('%s', '%s')" % (formatted, self.freqstr)
 
@@ -946,7 +946,7 @@ cdef class _Period(object):
         Invoked by unicode(df) in py2 only. Yields a Unicode String in both
         py2/py3.
         """
-        base, mult = frequencies.get_freq_code(self.freq)
+        base, mult = get_freq_code(self.freq)
         formatted = period_format(self.ordinal, base)
         value = ("%s" % formatted)
         return value
@@ -1096,7 +1096,7 @@ cdef class _Period(object):
             >>> a.strftime('%b. %d, %Y was a %A')
             'Jan. 01, 2001 was a Monday'
         """
-        base, mult = frequencies.get_freq_code(self.freq)
+        base, mult = get_freq_code(self.freq)
         return period_format(self.ordinal, base, fmt)
 
 
@@ -1161,10 +1161,10 @@ class Period(_Period):
                 ordinal = _ordinal_from_fields(year, month, quarter, day,
                                                hour, minute, second, freq)
 
-        elif isinstance(value, Period):
+        elif is_period_object(value):
             other = value
-            if freq is None or frequencies.get_freq_code(
-                    freq) == frequencies.get_freq_code(other.freq):
+            if freq is None or get_freq_code(
+                    freq) == get_freq_code(other.freq):
                 ordinal = other.ordinal
                 freq = other.freq
             else:
@@ -1174,7 +1174,7 @@ class Period(_Period):
         elif is_null_datetimelike(value) or value in tslib._nat_strings:
             ordinal = iNaT
 
-        elif isinstance(value, compat.string_types) or lib.is_integer(value):
+        elif is_string_object(value) or lib.is_integer(value):
             if lib.is_integer(value):
                 value = str(value)
             value = value.upper()
@@ -1191,7 +1191,7 @@ class Period(_Period):
             dt = value
             if freq is None:
                 raise ValueError('Must supply freq for datetime value')
-        elif isinstance(value, np.datetime64):
+        elif util.is_datetime64_object(value):
             dt = Timestamp(value)
             if freq is None:
                 raise ValueError('Must supply freq for datetime value')
@@ -1204,7 +1204,7 @@ class Period(_Period):
             raise ValueError(msg)
 
         if ordinal is None:
-            base, mult = frequencies.get_freq_code(freq)
+            base, mult = get_freq_code(freq)
             ordinal = get_period_ordinal(dt.year, dt.month, dt.day,
                                          dt.hour, dt.minute, dt.second,
                                          dt.microsecond, 0, base)
@@ -1214,7 +1214,7 @@ class Period(_Period):
 
 def _ordinal_from_fields(year, month, quarter, day,
                          hour, minute, second, freq):
-    base, mult = frequencies.get_freq_code(freq)
+    base, mult = get_freq_code(freq)
     if quarter is not None:
         year, month = _quarter_to_myear(year, quarter, freq)
 
@@ -1227,8 +1227,7 @@ def _quarter_to_myear(year, quarter, freq):
         if quarter <= 0 or quarter > 4:
             raise ValueError('Quarter must be 1 <= q <= 4')
 
-        mnum = frequencies._month_numbers[
-            frequencies._get_rule_month(freq)] + 1
+        mnum = tslib._MONTH_NUMBERS[tslib._get_rule_month(freq)] + 1
         month = (mnum + (quarter - 1) * 3) % 12 + 1
         if month > mnum:
             year -= 1
diff --git a/pandas/_libs/tslibs/frequencies.pxd b/pandas/_libs/tslibs/frequencies.pxd
new file mode 100644
index 00000000000000..974eb4ab45df0c
--- /dev/null
+++ b/pandas/_libs/tslibs/frequencies.pxd
@@ -0,0 +1,4 @@
+# -*- coding: utf-8 -*-
+# cython: profile=False
+
+cpdef get_freq_code(freqstr)
diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx
index 35429e8ae87f00..f7889d76abbc71 100644
--- a/pandas/_libs/tslibs/frequencies.pyx
+++ b/pandas/_libs/tslibs/frequencies.pyx
@@ -150,6 +150,9 @@ _period_code_map = {
     "N": 12000,       # Nanosecondly
 }
 
+_reverse_period_code_map = {
+    _period_code_map[key]: key for key in _period_code_map}
+
 # Yearly aliases; careful not to put these in _reverse_period_code_map
 _period_code_map.update({'Y' + key[1:]: _period_code_map[key]
                          for key in _period_code_map
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index 6644a33245a849..085a3a784557ba 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -20,7 +20,10 @@
 
 from pandas._libs import lib, tslib
 from pandas._libs.tslib import Timedelta
-from pandas._libs.tslibs.frequencies import get_freq_code, _base_and_stride
+from pandas._libs.tslibs.frequencies import (  # noqa
+    get_freq_code, _base_and_stride, _period_str_to_code,
+    _INVALID_FREQ_ERROR, opattern, _lite_rule_alias, _dont_uppercase,
+    _period_code_map, _reverse_period_code_map)
 from pytz import AmbiguousTimeError
 
 
@@ -375,27 +378,6 @@ def get_period_alias(offset_str):
     return _offset_to_period_map.get(offset_str, None)
 
 
-_lite_rule_alias = {
-    'W': 'W-SUN',
-    'Q': 'Q-DEC',
-
-    'A': 'A-DEC',  # YearEnd(month=12),
-    'Y': 'A-DEC',
-    'AS': 'AS-JAN',  # YearBegin(month=1),
-    'YS': 'AS-JAN',
-    'BA': 'BA-DEC',  # BYearEnd(month=12),
-    'BY': 'BA-DEC',
-    'BAS': 'BAS-JAN',  # BYearBegin(month=1),
-    'BYS': 'BAS-JAN',
-
-    'Min': 'T',
-    'min': 'T',
-    'ms': 'L',
-    'us': 'U',
-    'ns': 'N'
-}
-
-
 _name_to_offset_map = {'days': Day(1),
                        'hours': Hour(1),
                        'minutes': Minute(1),
@@ -405,9 +387,6 @@ def get_period_alias(offset_str):
                        'nanoseconds': Nano(1)}
 
 
-_INVALID_FREQ_ERROR = "Invalid frequency: {0}"
-
-
 @deprecate_kwarg(old_arg_name='freqstr', new_arg_name='freq')
 def to_offset(freq):
     """
@@ -519,12 +498,6 @@ def to_offset(freq):
     return delta
 
 
-# hack to handle WOM-1MON
-opattern = re.compile(
-    r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)'
-)
-
-
 def get_base_alias(freqstr):
     """
     Returns the base frequency alias, e.g., '5D' -> 'D'
@@ -532,9 +505,6 @@ def get_base_alias(freqstr):
     return _base_and_stride(freqstr)[0]
 
 
-_dont_uppercase = set(('MS', 'ms'))
-
-
 def get_offset(name):
     """
     Return DateOffset object associated with rule name
@@ -583,96 +553,6 @@ def get_standard_freq(freq):
 # ---------------------------------------------------------------------
 # Period codes
 
-# period frequency constants corresponding to scikits timeseries
-# originals
-_period_code_map = {
-    # Annual freqs with various fiscal year ends.
-    # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005
-    "A-DEC": 1000,  # Annual - December year end
-    "A-JAN": 1001,  # Annual - January year end
-    "A-FEB": 1002,  # Annual - February year end
-    "A-MAR": 1003,  # Annual - March year end
-    "A-APR": 1004,  # Annual - April year end
-    "A-MAY": 1005,  # Annual - May year end
-    "A-JUN": 1006,  # Annual - June year end
-    "A-JUL": 1007,  # Annual - July year end
-    "A-AUG": 1008,  # Annual - August year end
-    "A-SEP": 1009,  # Annual - September year end
-    "A-OCT": 1010,  # Annual - October year end
-    "A-NOV": 1011,  # Annual - November year end
-
-    # Quarterly frequencies with various fiscal year ends.
-    # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005
-    "Q-DEC": 2000,    # Quarterly - December year end
-    "Q-JAN": 2001,    # Quarterly - January year end
-    "Q-FEB": 2002,    # Quarterly - February year end
-    "Q-MAR": 2003,    # Quarterly - March year end
-    "Q-APR": 2004,    # Quarterly - April year end
-    "Q-MAY": 2005,    # Quarterly - May year end
-    "Q-JUN": 2006,    # Quarterly - June year end
-    "Q-JUL": 2007,    # Quarterly - July year end
-    "Q-AUG": 2008,    # Quarterly - August year end
-    "Q-SEP": 2009,    # Quarterly - September year end
-    "Q-OCT": 2010,    # Quarterly - October year end
-    "Q-NOV": 2011,    # Quarterly - November year end
-
-    "M": 3000,        # Monthly
-
-    "W-SUN": 4000,    # Weekly - Sunday end of week
-    "W-MON": 4001,    # Weekly - Monday end of week
-    "W-TUE": 4002,    # Weekly - Tuesday end of week
-    "W-WED": 4003,    # Weekly - Wednesday end of week
-    "W-THU": 4004,    # Weekly - Thursday end of week
-    "W-FRI": 4005,    # Weekly - Friday end of week
-    "W-SAT": 4006,    # Weekly - Saturday end of week
-
-    "B": 5000,        # Business days
-    "D": 6000,        # Daily
-    "H": 7000,        # Hourly
-    "T": 8000,        # Minutely
-    "S": 9000,        # Secondly
-    "L": 10000,       # Millisecondly
-    "U": 11000,       # Microsecondly
-    "N": 12000,       # Nanosecondly
-}
-
-_reverse_period_code_map = {}
-for _k, _v in compat.iteritems(_period_code_map):
-    _reverse_period_code_map[_v] = _k
-
-# Yearly aliases
-year_aliases = {}
-
-for k, v in compat.iteritems(_period_code_map):
-    if k.startswith("A-"):
-        alias = "Y" + k[1:]
-        year_aliases[alias] = v
-
-_period_code_map.update(**year_aliases)
-del year_aliases
-
-_period_code_map.update({
-    "Q": 2000,  # Quarterly - December year end (default quarterly)
-    "A": 1000,  # Annual
-    "W": 4000,  # Weekly
-    "C": 5000,  # Custom Business Day
-})
-
-
-def _period_str_to_code(freqstr):
-    freqstr = _lite_rule_alias.get(freqstr, freqstr)
-
-    if freqstr not in _dont_uppercase:
-        lower = freqstr.lower()
-        freqstr = _lite_rule_alias.get(lower, freqstr)
-
-    if freqstr not in _dont_uppercase:
-        freqstr = freqstr.upper()
-    try:
-        return _period_code_map[freqstr]
-    except KeyError:
-        raise ValueError(_INVALID_FREQ_ERROR.format(freqstr))
-
 
 def infer_freq(index, warn=True):
     """

From fdbc6b8f4b36f07da62fc901b19754f922ae3952 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Sat, 9 Sep 2017 12:09:08 -0700
Subject: [PATCH 065/188] MAINT: calcurate --> calculate in _doctools.py

---
 pandas/util/_doctools.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/pandas/util/_doctools.py b/pandas/util/_doctools.py
index cbc9518b96416f..d654c78b8b13fe 100644
--- a/pandas/util/_doctools.py
+++ b/pandas/util/_doctools.py
@@ -15,12 +15,18 @@ def __init__(self, cell_width=0.37, cell_height=0.25, font_size=7.5):
         self.font_size = font_size
 
     def _shape(self, df):
-        """Calcurate table chape considering index levels"""
+        """
+        Calculate table chape considering index levels.
+        """
+
         row, col = df.shape
         return row + df.columns.nlevels, col + df.index.nlevels
 
     def _get_cells(self, left, right, vertical):
-        """Calcurate appropriate figure size based on left and right data"""
+        """
+        Calculate appropriate figure size based on left and right data.
+        """
+
         if vertical:
             # calcurate required number of cells
             vcells = max(sum([self._shape(l)[0] for l in left]),

From 23050dca1b404d23527132c0277f3d40dc41cab8 Mon Sep 17 00:00:00 2001
From: Matt Bark <matthax@users.noreply.github.com>
Date: Sun, 10 Sep 2017 03:30:48 -0400
Subject: [PATCH 066/188] BUG: Fix TypeError caused by GH13374 (#17465)

---
 doc/source/whatsnew/v0.21.0.txt              |  1 +
 pandas/io/parsers.py                         |  4 +++-
 pandas/tests/io/parser/python_parser_only.py | 19 +++++++++++++++++++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index f50052347cfb56..bfe7d974a60972 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -411,6 +411,7 @@ I/O
 - Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`)
 - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`)
 - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`)
+- Bug in :func:`read_csv` where automatic delimiter detection caused a ``TypeError`` to be thrown when a bad line was encountered rather than the correct error message (:issue:`13374`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 6adf154aabba7f..d9e83176d0d6e3 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -2836,7 +2836,9 @@ def _rows_to_cols(self, content):
             for row_num, actual_len in bad_lines:
                 msg = ('Expected %d fields in line %d, saw %d' %
                        (col_len, row_num + 1, actual_len))
-                if len(self.delimiter) > 1 and self.quoting != csv.QUOTE_NONE:
+                if (self.delimiter and
+                        len(self.delimiter) > 1 and
+                        self.quoting != csv.QUOTE_NONE):
                     # see gh-13374
                     reason = ('Error could possibly be due to quotes being '
                               'ignored when a multi-char delimiter is used.')
diff --git a/pandas/tests/io/parser/python_parser_only.py b/pandas/tests/io/parser/python_parser_only.py
index a0784d3aeae2d0..c3dc91b3f188c4 100644
--- a/pandas/tests/io/parser/python_parser_only.py
+++ b/pandas/tests/io/parser/python_parser_only.py
@@ -218,6 +218,25 @@ def test_multi_char_sep_quotes(self):
                 self.read_csv(StringIO(data), sep=',,',
                               quoting=csv.QUOTE_NONE)
 
+    def test_none_delimiter(self):
+        # see gh-13374 and gh-17465
+
+        data = "a,b,c\n0,1,2\n3,4,5,6\n7,8,9"
+        expected = DataFrame({'a': [0, 7],
+                              'b': [1, 8],
+                              'c': [2, 9]})
+
+        # We expect the third line in the data to be
+        # skipped because it is malformed,
+        # but we do not expect any errors to occur.
+        result = self.read_csv(StringIO(data), header=0,
+                               sep=None,
+                               error_bad_lines=False,
+                               warn_bad_lines=True,
+                               engine='python',
+                               tupleize_cols=True)
+        tm.assert_frame_equal(result, expected)
+
     def test_skipfooter_bad_row(self):
         # see gh-13879
         # see gh-15910

From c3ad501ed31e2e71ab91a201ed72779fdd597698 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 10 Sep 2017 07:19:52 -0700
Subject: [PATCH 067/188] Remove incorrect kwds from DateOffset tests (#17486)

---
 pandas/tests/tseries/test_offsets.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py
index e03b3e0a85e5e6..7e6e85f322fe0f 100644
--- a/pandas/tests/tseries/test_offsets.py
+++ b/pandas/tests/tseries/test_offsets.py
@@ -111,7 +111,10 @@ def offset_types(self):
 
     def _get_offset(self, klass, value=1, normalize=False):
         # create instance from offset class
-        if klass is FY5253 or klass is FY5253Quarter:
+        if klass is FY5253:
+            klass = klass(n=value, startingMonth=1, weekday=1,
+                          variation='last', normalize=normalize)
+        elif klass is FY5253Quarter:
             klass = klass(n=value, startingMonth=1, weekday=1,
                           qtr_with_extra_week=1, variation='last',
                           normalize=normalize)
@@ -2629,7 +2632,7 @@ def test_offset(self):
 
     def test_day_of_month(self):
         dt = datetime(2007, 1, 1)
-        offset = MonthEnd(day=20)
+        offset = MonthEnd()
 
         result = dt + offset
         assert result == Timestamp(2007, 1, 31)
@@ -3678,7 +3681,7 @@ def test_onOffset(self):
             1, startingMonth=8, weekday=WeekDay.THU,
             qtr_with_extra_week=4)
         offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12,
-                          variation="nearest", qtr_with_extra_week=4)
+                          variation="nearest")
 
         tests = [
             # From Wikipedia

From e6aed2ebb7374ed2a6a7c284750d47728aec285e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 10 Sep 2017 07:43:37 -0700
Subject: [PATCH 068/188] Remove pyx dependencies from setup (#17478)

---
 setup.py | 31 +++++++++----------------------
 1 file changed, 9 insertions(+), 22 deletions(-)

diff --git a/setup.py b/setup.py
index 3269fe7972cf0f..d64a78db7500ac 100755
--- a/setup.py
+++ b/setup.py
@@ -347,14 +347,6 @@ class CheckSDist(sdist_class):
     def initialize_options(self):
         sdist_class.initialize_options(self)
 
-        '''
-        self._pyxfiles = []
-        for root, dirs, files in os.walk('pandas'):
-            for f in files:
-                if f.endswith('.pyx'):
-                    self._pyxfiles.append(pjoin(root, f))
-        '''
-
     def run(self):
         if 'cython' in cmdclass:
             self.run_command('cython')
@@ -479,11 +471,10 @@ def pxd(name):
     '_libs.lib': {'pyxfile': '_libs/lib',
                   'depends': lib_depends + tseries_depends},
     '_libs.hashtable': {'pyxfile': '_libs/hashtable',
-                        'pxdfiles': ['_libs/hashtable'],
                         'depends': (['pandas/_libs/src/klib/khash_python.h']
                                     + _pxi_dep['hashtable'])},
     '_libs.tslib': {'pyxfile': '_libs/tslib',
-                    'pxdfiles': ['_libs/src/util', '_libs/lib'],
+                    'pxdfiles': ['_libs/src/util'],
                     'depends': tseries_depends,
                     'sources': ['pandas/_libs/src/datetime/np_datetime.c',
                                 'pandas/_libs/src/datetime/np_datetime_strings.c',
@@ -498,21 +489,20 @@ def pxd(name):
     '_libs.index': {'pyxfile': '_libs/index',
                     'sources': ['pandas/_libs/src/datetime/np_datetime.c',
                                 'pandas/_libs/src/datetime/np_datetime_strings.c'],
-                    'pxdfiles': ['_libs/src/util', '_libs/hashtable'],
+                    'pxdfiles': ['_libs/src/util'],
                     'depends': _pxi_dep['index']},
     '_libs.algos': {'pyxfile': '_libs/algos',
-                    'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'],
+                    'pxdfiles': ['_libs/src/util'],
                     'depends': _pxi_dep['algos']},
     '_libs.groupby': {'pyxfile': '_libs/groupby',
-                    'pxdfiles': ['_libs/src/util', '_libs/algos'],
-                    'depends': _pxi_dep['groupby']},
+                      'pxdfiles': ['_libs/src/util'],
+                      'depends': _pxi_dep['groupby']},
     '_libs.join': {'pyxfile': '_libs/join',
-                   'pxdfiles': ['_libs/src/util', '_libs/hashtable'],
+                   'pxdfiles': ['_libs/src/util'],
                    'depends': _pxi_dep['join']},
     '_libs.reshape': {'pyxfile': '_libs/reshape',
                       'depends': _pxi_dep['reshape']},
     '_libs.interval': {'pyxfile': '_libs/interval',
-                       'pxdfiles': ['_libs/hashtable'],
                        'depends': _pxi_dep['interval']},
     '_libs.window': {'pyxfile': '_libs/window',
                      'pxdfiles': ['_libs/src/skiplist', '_libs/src/util'],
@@ -525,12 +515,9 @@ def pxd(name):
                       'sources': ['pandas/_libs/src/parser/tokenizer.c',
                                   'pandas/_libs/src/parser/io.c']},
     '_libs.sparse': {'pyxfile': '_libs/sparse',
-                     'depends': (['pandas/_libs/sparse.pyx'] +
-                                 _pxi_dep['sparse'])},
-    '_libs.testing': {'pyxfile': '_libs/testing',
-                      'depends': ['pandas/_libs/testing.pyx']},
-    '_libs.hashing': {'pyxfile': '_libs/hashing',
-                      'depends': ['pandas/_libs/hashing.pyx']},
+                     'depends': _pxi_dep['sparse']},
+    '_libs.testing': {'pyxfile': '_libs/testing'},
+    '_libs.hashing': {'pyxfile': '_libs/hashing'},
     'io.sas._sas': {'pyxfile': 'io/sas/sas'},
     }
 

From 42ed4f143f8b0b386c90df9fa8a55d0f2e5a857c Mon Sep 17 00:00:00 2001
From: Licht Takeuchi <licht-t@outlook.jp>
Date: Mon, 11 Sep 2017 09:01:41 +0900
Subject: [PATCH 069/188] ENH: Add Styler.where (#17474)

---
 doc/source/api.rst                    |  1 +
 doc/source/whatsnew/v0.21.0.txt       |  1 +
 pandas/io/formats/style.py            | 42 +++++++++++++++++++
 pandas/tests/io/formats/test_style.py | 58 +++++++++++++++++++++++++++
 4 files changed, 102 insertions(+)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index c32a541d196057..27a4ab9cc6cbc4 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -2062,6 +2062,7 @@ Style Application
 
    Styler.apply
    Styler.applymap
+   Styler.where
    Styler.format
    Styler.set_precision
    Styler.set_table_styles
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index bfe7d974a60972..eccd71f45ec276 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -112,6 +112,7 @@ Other Enhancements
 - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`).
 - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`).
 - :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`)
+- :func:`Styler.where` has been implemented. It is as a convenience for :func:`Styler.applymap` and enables simple DataFrame styling on the Jupyter notebook (:issue:`17474`).
 
 
 
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
index 87d672197be300..d7677e3642c26e 100644
--- a/pandas/io/formats/style.py
+++ b/pandas/io/formats/style.py
@@ -618,11 +618,53 @@ def applymap(self, func, subset=None, **kwargs):
         -------
         self : Styler
 
+        See Also
+        --------
+        Styler.where
+
         """
         self._todo.append((lambda instance: getattr(instance, '_applymap'),
                            (func, subset), kwargs))
         return self
 
+    def where(self, cond, value, other=None, subset=None, **kwargs):
+        """
+        Apply a function elementwise, updating the HTML
+        representation with a style which is selected in
+        accordance with the return value of a function.
+
+        .. versionadded:: 0.21.0
+
+        Parameters
+        ----------
+        cond : callable
+            ``cond`` should take a scalar and return a boolean
+        value : str
+            applied when ``cond`` returns true
+        other : str
+            applied when ``cond`` returns false
+        subset : IndexSlice
+            a valid indexer to limit ``data`` to *before* applying the
+            function. Consider using a pandas.IndexSlice
+        kwargs : dict
+            pass along to ``cond``
+
+        Returns
+        -------
+        self : Styler
+
+        See Also
+        --------
+        Styler.applymap
+
+        """
+
+        if other is None:
+            other = ''
+
+        return self.applymap(lambda val: value if cond(val) else other,
+                             subset=subset, **kwargs)
+
     def set_precision(self, precision):
         """
         Set the precision used to render.
diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py
index 59d9f938734abf..811381e4cbd2ad 100644
--- a/pandas/tests/io/formats/test_style.py
+++ b/pandas/tests/io/formats/test_style.py
@@ -265,6 +265,64 @@ def f(x):
                             col in self.df.loc[slice_].columns)
             assert result == expected
 
+    def test_where_with_one_style(self):
+        # GH 17474
+        def f(x):
+            return x > 0.5
+
+        style1 = 'foo: bar'
+
+        result = self.df.style.where(f, style1)._compute().ctx
+        expected = dict(((r, c),
+                        [style1 if f(self.df.loc[row, col]) else ''])
+                        for r, row in enumerate(self.df.index)
+                        for c, col in enumerate(self.df.columns))
+        assert result == expected
+
+    def test_where_subset(self):
+        # GH 17474
+        def f(x):
+            return x > 0.5
+
+        style1 = 'foo: bar'
+        style2 = 'baz: foo'
+
+        slices = [pd.IndexSlice[:], pd.IndexSlice[:, ['A']],
+                  pd.IndexSlice[[1], :], pd.IndexSlice[[1], ['A']],
+                  pd.IndexSlice[:2, ['A', 'B']]]
+
+        for slice_ in slices:
+            result = self.df.style.where(f, style1, style2,
+                                         subset=slice_)._compute().ctx
+            expected = dict(((r, c),
+                            [style1 if f(self.df.loc[row, col]) else style2])
+                            for r, row in enumerate(self.df.index)
+                            for c, col in enumerate(self.df.columns)
+                            if row in self.df.loc[slice_].index and
+                            col in self.df.loc[slice_].columns)
+            assert result == expected
+
+    def test_where_subset_compare_with_applymap(self):
+        # GH 17474
+        def f(x):
+            return x > 0.5
+
+        style1 = 'foo: bar'
+        style2 = 'baz: foo'
+
+        def g(x):
+            return style1 if f(x) else style2
+
+        slices = [pd.IndexSlice[:], pd.IndexSlice[:, ['A']],
+                  pd.IndexSlice[[1], :], pd.IndexSlice[[1], ['A']],
+                  pd.IndexSlice[:2, ['A', 'B']]]
+
+        for slice_ in slices:
+            result = self.df.style.where(f, style1, style2,
+                                         subset=slice_)._compute().ctx
+            expected = self.df.style.applymap(g, subset=slice_)._compute().ctx
+            assert result == expected
+
     def test_empty(self):
         df = pd.DataFrame({'A': [1, 0]})
         s = df.style

From f3b6d1f91643d245d6b43b41e7c9fd1349fb8de5 Mon Sep 17 00:00:00 2001
From: rvernica <rvernica@gmail.com>
Date: Mon, 11 Sep 2017 04:03:18 -0700
Subject: [PATCH 070/188] Add file-like object to docs (#17492)

---
 pandas/io/feather_format.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py
index 87a4931421d7d4..b2bf4ab7ff7f1d 100644
--- a/pandas/io/feather_format.py
+++ b/pandas/io/feather_format.py
@@ -41,8 +41,7 @@ def to_feather(df, path):
     Parameters
     ----------
     df : DataFrame
-    path : string
-        File path
+    path : string file path, or file-like object
 
     """
     path = _stringify_path(path)
@@ -92,8 +91,7 @@ def read_feather(path, nthreads=1):
 
     Parameters
     ----------
-    path : string
-        File path
+    path : string file path, or file-like object
     nthreads : int, default 1
         Number of CPU threads to use when reading to pandas.DataFrame
 

From 46856c3936540a47df719d10a7699eb35673e4a4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 11 Sep 2017 04:22:56 -0700
Subject: [PATCH 071/188] Implement _is_utc in timezones (#17419)

---
 pandas/_libs/index.pyx            |  7 +------
 pandas/_libs/period.pyx           |  2 +-
 pandas/_libs/tslib.pxd            |  1 -
 pandas/_libs/tslib.pyx            |  4 ++--
 pandas/_libs/tslibs/__init__.py   |  2 ++
 pandas/_libs/tslibs/timezones.pxd |  4 ++++
 pandas/_libs/tslibs/timezones.pyx | 12 ++++++++++++
 setup.py                          |  2 ++
 8 files changed, 24 insertions(+), 10 deletions(-)
 create mode 100644 pandas/_libs/tslibs/timezones.pxd
 create mode 100644 pandas/_libs/tslibs/timezones.pyx

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 42ba0c1cadaec1..bf4d53683c9b71 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -17,6 +17,7 @@ cimport tslib
 
 from hashtable cimport HashTable
 
+from tslibs.timezones cimport _is_utc
 from pandas._libs import tslib, algos, hashtable as _hash
 from pandas._libs.tslib import Timestamp, Timedelta
 from datetime import datetime, timedelta
@@ -32,9 +33,6 @@ cdef extern from "datetime.h":
 
 cdef int64_t iNaT = util.get_nat()
 
-from dateutil.tz import tzutc as _du_utc
-import pytz
-UTC = pytz.utc
 
 PyDateTime_IMPORT
 
@@ -559,9 +557,6 @@ cdef inline _to_i8(object val):
             return ival
         return val
 
-cdef inline bint _is_utc(object tz):
-    return tz is UTC or isinstance(tz, _du_utc)
-
 
 cdef class MultiIndexObjectEngine(ObjectEngine):
     """
diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index e2a3baa8d6e8ba..08962bca824cac 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -34,9 +34,9 @@ from lib cimport is_null_datetimelike, is_period
 from pandas._libs import tslib, lib
 from pandas._libs.tslib import (Timedelta, Timestamp, iNaT,
                                 NaT, _get_utcoffset)
+from tslibs.timezones cimport _is_utc
 from tslib cimport (
     maybe_get_tz,
-    _is_utc,
     _is_tzlocal,
     _get_dst_info,
     _nat_scalar_rules)
diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd
index aa8cbcb2cedc72..1d81c3cc15cd89 100644
--- a/pandas/_libs/tslib.pxd
+++ b/pandas/_libs/tslib.pxd
@@ -3,7 +3,6 @@ from numpy cimport ndarray, int64_t
 cdef convert_to_tsobject(object, object, object, bint, bint)
 cpdef convert_to_timedelta64(object, object)
 cpdef object maybe_get_tz(object)
-cdef bint _is_utc(object)
 cdef bint _is_tzlocal(object)
 cdef object _get_dst_info(object)
 cdef bint _nat_scalar_rules[6]
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 7e009652f7f0c6..b1f794a0030d12 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -107,6 +107,8 @@ cdef int64_t NPY_NAT = util.get_nat()
 iNaT = NPY_NAT
 
 
+from tslibs.timezones cimport _is_utc
+
 cdef inline object create_timestamp_from_ts(
         int64_t value, pandas_datetimestruct dts,
         object tz, object freq):
@@ -1713,8 +1715,6 @@ def _localize_pydatetime(object dt, object tz):
 def get_timezone(tz):
     return _get_zone(tz)
 
-cdef inline bint _is_utc(object tz):
-    return tz is UTC or isinstance(tz, _dateutil_tzutc)
 
 cdef inline object _get_zone(object tz):
     """
diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py
index e69de29bb2d1d6..f3aa0424f03769 100644
--- a/pandas/_libs/tslibs/__init__.py
+++ b/pandas/_libs/tslibs/__init__.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+# cython: profile=False
diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd
new file mode 100644
index 00000000000000..0708282abe1d0a
--- /dev/null
+++ b/pandas/_libs/tslibs/timezones.pxd
@@ -0,0 +1,4 @@
+# -*- coding: utf-8 -*-
+# cython: profile=False
+
+cdef bint _is_utc(object tz)
diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
new file mode 100644
index 00000000000000..43709e77b70d56
--- /dev/null
+++ b/pandas/_libs/tslibs/timezones.pyx
@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+# cython: profile=False
+
+# dateutil compat
+from dateutil.tz import tzutc as _dateutil_tzutc
+
+import pytz
+UTC = pytz.utc
+
+
+cdef inline bint _is_utc(object tz):
+    return tz is UTC or isinstance(tz, _dateutil_tzutc)
diff --git a/setup.py b/setup.py
index d64a78db7500ac..434ca644739165 100755
--- a/setup.py
+++ b/setup.py
@@ -341,6 +341,7 @@ class CheckSDist(sdist_class):
                  'pandas/_libs/window.pyx',
                  'pandas/_libs/sparse.pyx',
                  'pandas/_libs/parsers.pyx',
+                 'pandas/_libs/tslibs/timezones.pyx',
                  'pandas/_libs/tslibs/frequencies.pyx',
                  'pandas/io/sas/sas.pyx']
 
@@ -479,6 +480,7 @@ def pxd(name):
                     'sources': ['pandas/_libs/src/datetime/np_datetime.c',
                                 'pandas/_libs/src/datetime/np_datetime_strings.c',
                                 'pandas/_libs/src/period_helper.c']},
+    '_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'},
     '_libs.period': {'pyxfile': '_libs/period',
                      'depends': tseries_depends,
                      'sources': ['pandas/_libs/src/datetime/np_datetime.c',

From 34cc2e812f60687d2a4417ff26fc180f7c042674 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 12 Sep 2017 03:09:50 -0700
Subject: [PATCH 072/188] Follow-up to #17419 (#17497)

---
 pandas/_libs/period.pyx           |  5 +--
 pandas/_libs/src/inference.pyx    |  7 ++--
 pandas/_libs/tslib.pxd            |  1 -
 pandas/_libs/tslib.pyx            | 66 ++++--------------------------
 pandas/_libs/tslibs/timezones.pxd |  8 ++++
 pandas/_libs/tslibs/timezones.pyx | 68 ++++++++++++++++++++++++++++++-
 6 files changed, 88 insertions(+), 67 deletions(-)

diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index 08962bca824cac..2b0734f5cf2e7a 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -33,11 +33,10 @@ from util cimport is_period_object, is_string_object
 from lib cimport is_null_datetimelike, is_period
 from pandas._libs import tslib, lib
 from pandas._libs.tslib import (Timedelta, Timestamp, iNaT,
-                                NaT, _get_utcoffset)
-from tslibs.timezones cimport _is_utc
+                                NaT)
+from tslibs.timezones cimport _is_utc, _is_tzlocal, _get_utcoffset
 from tslib cimport (
     maybe_get_tz,
-    _is_tzlocal,
     _get_dst_info,
     _nat_scalar_rules)
 
diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx
index 6b5a8f20f00671..95145ff49b02fd 100644
--- a/pandas/_libs/src/inference.pyx
+++ b/pandas/_libs/src/inference.pyx
@@ -2,7 +2,8 @@ import sys
 from decimal import Decimal
 cimport util
 cimport cython
-from tslib import NaT, get_timezone
+from tslib import NaT
+from tslibs.timezones cimport _get_zone
 from datetime import datetime, timedelta
 iNaT = util.get_nat()
 
@@ -900,13 +901,13 @@ cpdef bint is_datetime_with_singletz_array(ndarray[object] values):
     for i in range(n):
         base_val = values[i]
         if base_val is not NaT:
-            base_tz = get_timezone(getattr(base_val, 'tzinfo', None))
+            base_tz = _get_zone(getattr(base_val, 'tzinfo', None))
 
             for j in range(i, n):
                 val = values[j]
                 if val is not NaT:
                     tz = getattr(val, 'tzinfo', None)
-                    if base_tz != tz and base_tz != get_timezone(tz):
+                    if base_tz != tz and base_tz != _get_zone(tz):
                         return False
             break
 
diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd
index 1d81c3cc15cd89..c1b25963a62571 100644
--- a/pandas/_libs/tslib.pxd
+++ b/pandas/_libs/tslib.pxd
@@ -3,7 +3,6 @@ from numpy cimport ndarray, int64_t
 cdef convert_to_tsobject(object, object, object, bint, bint)
 cpdef convert_to_timedelta64(object, object)
 cpdef object maybe_get_tz(object)
-cdef bint _is_tzlocal(object)
 cdef object _get_dst_info(object)
 cdef bint _nat_scalar_rules[6]
 cdef bint _check_all_nulls(obj)
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index b1f794a0030d12..a8ae0fcd733d6c 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -107,7 +107,13 @@ cdef int64_t NPY_NAT = util.get_nat()
 iNaT = NPY_NAT
 
 
-from tslibs.timezones cimport _is_utc
+from tslibs.timezones cimport (
+    _is_utc, _is_tzlocal,
+    _treat_tz_as_dateutil, _treat_tz_as_pytz,
+    _get_zone,
+    _get_utcoffset)
+from tslibs.timezones import get_timezone, _get_utcoffset  # noqa
+
 
 cdef inline object create_timestamp_from_ts(
         int64_t value, pandas_datetimestruct dts,
@@ -235,10 +241,6 @@ def ints_to_pytimedelta(ndarray[int64_t] arr, box=False):
     return result
 
 
-cdef inline bint _is_tzlocal(object tz):
-    return isinstance(tz, _dateutil_tzlocal)
-
-
 cdef inline bint _is_fixed_offset(object tz):
     if _treat_tz_as_dateutil(tz):
         if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0:
@@ -1443,11 +1445,6 @@ cdef class _TSObject:
         def __get__(self):
             return self.value
 
-cpdef _get_utcoffset(tzinfo, obj):
-    try:
-        return tzinfo._utcoffset
-    except AttributeError:
-        return tzinfo.utcoffset(obj)
 
 # helper to extract datetime and int64 from several different possibilities
 cdef convert_to_tsobject(object ts, object tz, object unit,
@@ -1712,48 +1709,6 @@ def _localize_pydatetime(object dt, object tz):
         return dt.replace(tzinfo=tz)
 
 
-def get_timezone(tz):
-    return _get_zone(tz)
-
-
-cdef inline object _get_zone(object tz):
-    """
-    We need to do several things here:
-    1) Distinguish between pytz and dateutil timezones
-    2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone*
-       but a different tz object)
-    3) Provide something to serialize when we're storing a datetime object
-       in pytables.
-
-    We return a string prefaced with dateutil if it's a dateutil tz, else just
-    the tz name. It needs to be a string so that we can serialize it with
-    UJSON/pytables. maybe_get_tz (below) is the inverse of this process.
-    """
-    if _is_utc(tz):
-        return 'UTC'
-    else:
-        if _treat_tz_as_dateutil(tz):
-            if '.tar.gz' in tz._filename:
-                raise ValueError(
-                    'Bad tz filename. Dateutil on python 3 on windows has a '
-                    'bug which causes tzfile._filename to be the same for all '
-                    'timezone files. Please construct dateutil timezones '
-                    'implicitly by passing a string like "dateutil/Europe'
-                    '/London" when you construct your pandas objects instead '
-                    'of passing a timezone object. See '
-                    'https://github.com/pandas-dev/pandas/pull/7362')
-            return 'dateutil/' + tz._filename
-        else:
-            # tz is a pytz timezone or unknown.
-            try:
-                zone = tz.zone
-                if zone is None:
-                    return tz
-                return zone
-            except AttributeError:
-                return tz
-
-
 cpdef inline object maybe_get_tz(object tz):
     """
     (Maybe) Construct a timezone object from a string. If tz is a string, use
@@ -4285,13 +4240,6 @@ def tz_convert_single(int64_t val, object tz1, object tz2):
 # Timezone data caches, key is the pytz string or dateutil file name.
 dst_cache = {}
 
-cdef inline bint _treat_tz_as_pytz(object tz):
-    return hasattr(tz, '_utc_transition_times') and hasattr(
-        tz, '_transition_info')
-
-cdef inline bint _treat_tz_as_dateutil(object tz):
-    return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx')
-
 
 def _p_tz_cache_key(tz):
     """ Python interface for cache function to facilitate testing."""
diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd
index 0708282abe1d0a..897bd8af7e2deb 100644
--- a/pandas/_libs/tslibs/timezones.pxd
+++ b/pandas/_libs/tslibs/timezones.pxd
@@ -2,3 +2,11 @@
 # cython: profile=False
 
 cdef bint _is_utc(object tz)
+cdef bint _is_tzlocal(object tz)
+
+cdef bint _treat_tz_as_pytz(object tz)
+cdef bint _treat_tz_as_dateutil(object tz)
+
+cdef object _get_zone(object tz)
+
+cpdef _get_utcoffset(tzinfo, obj)
diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
index 43709e77b70d56..249eedef4bb098 100644
--- a/pandas/_libs/tslibs/timezones.pyx
+++ b/pandas/_libs/tslibs/timezones.pyx
@@ -2,7 +2,9 @@
 # cython: profile=False
 
 # dateutil compat
-from dateutil.tz import tzutc as _dateutil_tzutc
+from dateutil.tz import (
+	tzutc as _dateutil_tzutc,
+	tzlocal as _dateutil_tzlocal)
 
 import pytz
 UTC = pytz.utc
@@ -10,3 +12,67 @@ UTC = pytz.utc
 
 cdef inline bint _is_utc(object tz):
     return tz is UTC or isinstance(tz, _dateutil_tzutc)
+
+
+cdef inline bint _is_tzlocal(object tz):
+    return isinstance(tz, _dateutil_tzlocal)
+
+
+cdef inline bint _treat_tz_as_pytz(object tz):
+    return hasattr(tz, '_utc_transition_times') and hasattr(
+        tz, '_transition_info')
+
+
+cdef inline bint _treat_tz_as_dateutil(object tz):
+    return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx')
+
+
+cdef inline object _get_zone(object tz):
+    """
+    We need to do several things here:
+    1) Distinguish between pytz and dateutil timezones
+    2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone*
+       but a different tz object)
+    3) Provide something to serialize when we're storing a datetime object
+       in pytables.
+
+    We return a string prefaced with dateutil if it's a dateutil tz, else just
+    the tz name. It needs to be a string so that we can serialize it with
+    UJSON/pytables. maybe_get_tz (below) is the inverse of this process.
+    """
+    if _is_utc(tz):
+        return 'UTC'
+    else:
+        if _treat_tz_as_dateutil(tz):
+            if '.tar.gz' in tz._filename:
+                raise ValueError(
+                    'Bad tz filename. Dateutil on python 3 on windows has a '
+                    'bug which causes tzfile._filename to be the same for all '
+                    'timezone files. Please construct dateutil timezones '
+                    'implicitly by passing a string like "dateutil/Europe'
+                    '/London" when you construct your pandas objects instead '
+                    'of passing a timezone object. See '
+                    'https://github.com/pandas-dev/pandas/pull/7362')
+            return 'dateutil/' + tz._filename
+        else:
+            # tz is a pytz timezone or unknown.
+            try:
+                zone = tz.zone
+                if zone is None:
+                    return tz
+                return zone
+            except AttributeError:
+                return tz
+
+
+def get_timezone(tz):
+    return _get_zone(tz)
+
+#----------------------------------------------------------------------
+# UTC Offsets
+
+cpdef _get_utcoffset(tzinfo, obj):
+    try:
+        return tzinfo._utcoffset
+    except AttributeError:
+        return tzinfo.utcoffset(obj)

From 9a8427404efb3df5deda12f76352725d628adf5e Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 12 Sep 2017 06:26:02 -0400
Subject: [PATCH 073/188] DOC: fix parquet example to not use ns

---
 doc/source/io.rst | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index de3150035c446b..8fbb23769492e4 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -4515,8 +4515,7 @@ See the documentation for `pyarrow <http://arrow.apache.org/docs/python/>`__ and
                       'd': np.arange(4.0, 7.0, dtype='float64'),
                       'e': [True, False, True],
                       'f': pd.date_range('20130101', periods=3),
-                      'g': pd.date_range('20130101', periods=3, tz='US/Eastern'),
-                      'h': pd.date_range('20130101', periods=3, freq='ns')})
+                      'g': pd.date_range('20130101', periods=3, tz='US/Eastern')})
 
    df
    df.dtypes

From d46b027e793e0f7b03a9372b82ac68cd35c1f35f Mon Sep 17 00:00:00 2001
From: Michael Penkov <misha.penkov@gmail.com>
Date: Tue, 12 Sep 2017 19:31:32 +0900
Subject: [PATCH 074/188] Prevent UnicodeDecodeError in pivot_table under Py2
 (#17489)

---
 doc/source/whatsnew/v0.20.0.txt    |  1 +
 pandas/core/reshape/pivot.py       |  2 +-
 pandas/tests/reshape/test_pivot.py | 10 ++++++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 9d475390175b28..fe24f8f4991727 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -1705,6 +1705,7 @@ Reshaping
 - Bug in ``pd.concat()`` in which concatenating with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`)
 - Bug with ``sort=True`` in ``DataFrame.join`` and ``pd.merge`` when joining on indexes (:issue:`15582`)
 - Bug in ``DataFrame.nsmallest`` and ``DataFrame.nlargest`` where identical values resulted in duplicated rows (:issue:`15297`)
+- Bug in :func:`pandas.pivot_table` incorrectly raising ``UnicodeError`` when passing unicode input for ```margins`` keyword (:issue:`13292`)
 
 Numeric
 ^^^^^^^
diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
index f07123ca184895..d19de6030d4736 100644
--- a/pandas/core/reshape/pivot.py
+++ b/pandas/core/reshape/pivot.py
@@ -145,7 +145,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc,
     if not isinstance(margins_name, compat.string_types):
         raise ValueError('margins_name argument must be a string')
 
-    msg = 'Conflicting name "{name}" in margins'.format(name=margins_name)
+    msg = u'Conflicting name "{name}" in margins'.format(name=margins_name)
     for level in table.index.names:
         if margins_name in table.index.get_level_values(level):
             raise ValueError(msg)
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 879ac96680fbb5..bd8a999ce23304 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -1625,3 +1625,13 @@ def test_isleapyear_deprecate(self):
 
         with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
             assert isleapyear(2004)
+
+    def test_pivot_margins_name_unicode(self):
+        # issue #13292
+        greek = u'\u0394\u03bf\u03ba\u03b9\u03bc\u03ae'
+        frame = pd.DataFrame({'foo': [1, 2, 3]})
+        table = pd.pivot_table(frame, index=['foo'], aggfunc=len, margins=True,
+                               margins_name=greek)
+        index = pd.Index([1, 2, 3, greek], dtype='object', name='foo')
+        expected = pd.DataFrame(index=index)
+        tm.assert_frame_equal(table, expected)

From e682902327bd883a207b291b0326f277b3dcdd12 Mon Sep 17 00:00:00 2001
From: T N <tnir@users.noreply.github.com>
Date: Tue, 12 Sep 2017 19:35:55 +0900
Subject: [PATCH 075/188] DEPR: Add warning for True for dropna of
 SeriesGroupBy.nth (#17493)

---
 doc/source/whatsnew/v0.21.0.txt  |  2 ++
 pandas/core/groupby.py           | 21 +++++++++++++++------
 pandas/tests/groupby/test_nth.py | 10 ++++++++--
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index eccd71f45ec276..33232d2b09416c 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -341,6 +341,8 @@ Deprecations
 
 - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
 
+- :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).
+
 .. _whatsnew_0210.prior_deprecations:
 
 Removal of prior version deprecations/changes
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 248f3b2095a785..f14ed08a27fae8 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -1393,12 +1393,21 @@ def nth(self, n, dropna=None):
 
             return out.sort_index() if self.sort else out
 
-        if isinstance(self._selected_obj, DataFrame) and \
-           dropna not in ['any', 'all']:
-            # Note: when agg-ing picker doesn't raise this, just returns NaN
-            raise ValueError("For a DataFrame groupby, dropna must be "
-                             "either None, 'any' or 'all', "
-                             "(was passed %s)." % (dropna),)
+        if dropna not in ['any', 'all']:
+            if isinstance(self._selected_obj, Series) and dropna is True:
+                warnings.warn("the dropna='%s' keyword is deprecated,"
+                              "use dropna='all' instead. "
+                              "For a Series groupby, dropna must be "
+                              "either None, 'any' or 'all'." % (dropna),
+                              FutureWarning,
+                              stacklevel=2)
+                dropna = 'all'
+            else:
+                # Note: when agg-ing picker doesn't raise this,
+                # just returns NaN
+                raise ValueError("For a DataFrame groupby, dropna must be "
+                                 "either None, 'any' or 'all', "
+                                 "(was passed %s)." % (dropna),)
 
         # old behaviour, but with all and any support for DataFrames.
         # modified in GH 7559 to have better perf
diff --git a/pandas/tests/groupby/test_nth.py b/pandas/tests/groupby/test_nth.py
index 28392537be3c66..ffbede0eb208f3 100644
--- a/pandas/tests/groupby/test_nth.py
+++ b/pandas/tests/groupby/test_nth.py
@@ -2,7 +2,10 @@
 import pandas as pd
 from pandas import DataFrame, MultiIndex, Index, Series, isna
 from pandas.compat import lrange
-from pandas.util.testing import assert_frame_equal, assert_series_equal
+from pandas.util.testing import (
+    assert_frame_equal,
+    assert_produces_warning,
+    assert_series_equal)
 
 from .common import MixIn
 
@@ -171,7 +174,10 @@ def test_nth(self):
         # doc example
         df = DataFrame([[1, np.nan], [1, 4], [5, 6]], columns=['A', 'B'])
         g = df.groupby('A')
-        result = g.B.nth(0, dropna=True)
+        # PR 17493, related to issue 11038
+        # test Series.nth with True for dropna produces DeprecationWarning
+        with assert_produces_warning(FutureWarning):
+            result = g.B.nth(0, dropna=True)
         expected = g.B.first()
         assert_series_equal(result, expected)
 

From 83436af8ae1ccad49b7ceac7471c060d823d10ab Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 12 Sep 2017 08:54:53 -0400
Subject: [PATCH 076/188] COMPAT: Iteration should always yield a python scalar
 (#17491)

xref #10904
closes #13236
closes #13256
xref #14216
---
 doc/source/whatsnew/v0.21.0.txt       | 47 ++++++++++++++++
 pandas/core/base.py                   | 25 ++++++++-
 pandas/core/categorical.py            |  6 ++
 pandas/core/indexes/base.py           |  9 ---
 pandas/core/indexes/category.py       |  4 ++
 pandas/core/series.py                 | 13 -----
 pandas/core/sparse/array.py           | 12 +++-
 pandas/tests/frame/test_api.py        | 11 ++--
 pandas/tests/frame/test_convert_to.py | 13 +++++
 pandas/tests/series/test_io.py        | 36 +-----------
 pandas/tests/test_base.py             | 79 +++++++++++++++++++++++++--
 11 files changed, 187 insertions(+), 68 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 33232d2b09416c..89da897f6c5292 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -188,6 +188,53 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in
    ...
    ValueError: Cannot operate inplace if there is no assignment
 
+.. _whatsnew_0210.api_breaking.iteration_scalars:
+
+Iteration of Series/Index will now return python scalars
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affect int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`).
+
+.. ipython:: python
+
+   s = Series([1, 2, 3])
+   s
+
+Previously:
+
+.. code-block:: python
+
+   In [2]: type(list(s)[0])
+   Out[2]: numpy.int64
+
+New Behaviour:
+
+.. ipython:: python
+
+   type(list(s)[0])
+
+Furthermore this will now correctly box the results of iteration for :func:`DataFrame.to_dict` as well.
+
+.. ipython:: python
+
+   d = {'a':[1], 'b':['b']}
+   df = DataFrame(d)
+
+Previously:
+
+.. code-block:: python
+
+   In [8]: type(df.to_dict()['a'][0])
+   Out[8]: numpy.int64
+
+New Behaviour:
+
+.. ipython:: python
+
+   type(df.to_dict()['a'][0])
+
+.. _whatsnew_0210.api_breaking.dtype_conversions:
+
 Dtype Conversions
 ^^^^^^^^^^^^^^^^^
 
diff --git a/pandas/core/base.py b/pandas/core/base.py
index d60a8515dc920f..62d89eac4b3548 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -8,7 +8,12 @@
 
 from pandas.core.dtypes.missing import isna
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass
-from pandas.core.dtypes.common import is_object_dtype, is_list_like, is_scalar
+from pandas.core.dtypes.common import (
+    is_object_dtype,
+    is_list_like,
+    is_scalar,
+    is_datetimelike)
+
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas.core import common as com
@@ -18,7 +23,8 @@
 from pandas.compat import PYPY
 from pandas.util._decorators import (Appender, cache_readonly,
                                      deprecate_kwarg, Substitution)
-from pandas.core.common import AbstractMethodError
+from pandas.core.common import AbstractMethodError, _maybe_box_datetimelike
+
 from pandas.core.accessor import DirNamesMixin
 
 _shared_docs = dict()
@@ -884,6 +890,21 @@ def argmin(self, axis=None):
         """
         return nanops.nanargmin(self.values)
 
+    def tolist(self):
+        """
+        return a list of the values; box to scalars
+        """
+        return list(self.__iter__())
+
+    def __iter__(self):
+        """
+        provide iteration over the values; box to scalars
+        """
+        if is_datetimelike(self):
+            return (_maybe_box_datetimelike(x) for x in self._values)
+        else:
+            return iter(self._values.tolist())
+
     @cache_readonly
     def hasnans(self):
         """ return if I have any nans; enables various perf speedups """
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 1c2a29333001ca..dbd2a79b7e46d9 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -399,6 +399,12 @@ def itemsize(self):
         """ return the size of a single category """
         return self.categories.itemsize
 
+    def tolist(self):
+        """
+        return a list of my values
+        """
+        return np.array(self).tolist()
+
     def reshape(self, new_shape, *args, **kwargs):
         """
         .. deprecated:: 0.19.0
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index ef5f68936044a8..008828cf4f309a 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -585,12 +585,6 @@ def memory_usage(self, deep=False):
         return result
 
     # ops compat
-    def tolist(self):
-        """
-        return a list of the Index values
-        """
-        return list(self.values)
-
     @deprecate_kwarg(old_arg_name='n', new_arg_name='repeats')
     def repeat(self, repeats, *args, **kwargs):
         """
@@ -1601,9 +1595,6 @@ def is_all_dates(self):
             return False
         return is_datetime_array(_ensure_object(self.values))
 
-    def __iter__(self):
-        return iter(self.values)
-
     def __reduce__(self):
         d = dict(data=self._data)
         d.update(self._get_attributes_dict())
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 0681202289311e..c8044b14e4e57e 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -253,6 +253,10 @@ def get_values(self):
         """ return the underlying data as an ndarray """
         return self._data.get_values()
 
+    def __iter__(self):
+        """ iterate like Categorical """
+        return self._data.__iter__()
+
     @property
     def codes(self):
         return self._data.codes
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 6905fc1aced742..ac11c5f908fdcf 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -19,7 +19,6 @@
     is_integer, is_integer_dtype,
     is_float_dtype,
     is_extension_type, is_datetimetz,
-    is_datetimelike,
     is_datetime64tz_dtype,
     is_timedelta64_dtype,
     is_list_like,
@@ -1095,14 +1094,6 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True,
                 with open(buf, 'w') as f:
                     f.write(result)
 
-    def __iter__(self):
-        """ provide iteration over the values of the Series
-        box values if necessary """
-        if is_datetimelike(self):
-            return (_maybe_box_datetimelike(x) for x in self._values)
-        else:
-            return iter(self._values)
-
     def iteritems(self):
         """
         Lazily iterate over (index, value) tuples
@@ -1118,10 +1109,6 @@ def keys(self):
         """Alias for index"""
         return self.index
 
-    def tolist(self):
-        """ Convert Series to a nested list """
-        return list(self.asobject)
-
     def to_dict(self, into=dict):
         """
         Convert Series to {label -> value} dict or dict-like object.
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index 2f830a98db6497..f965c91999a03d 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -407,8 +407,18 @@ def to_dense(self, fill=None):
         return self.values
 
     def __iter__(self):
+        if np.issubdtype(self.dtype, np.floating):
+            boxer = float
+        elif np.issubdtype(self.dtype, np.integer):
+            boxer = int
+        else:
+            boxer = lambda x: x
+
         for i in range(len(self)):
-            yield self._get_val_at(i)
+            r = self._get_val_at(i)
+
+            # box em
+            yield boxer(r)
 
     def __getitem__(self, key):
         """
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index a62fcb506a34bc..b3209da6449d6a 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -9,7 +9,7 @@
 import sys
 from distutils.version import LooseVersion
 
-from pandas.compat import range, lrange
+from pandas.compat import range, lrange, long
 from pandas import compat
 
 from numpy.random import randn
@@ -205,15 +205,18 @@ def test_itertuples(self):
                          'ints': lrange(5)}, columns=['floats', 'ints'])
 
         for tup in df.itertuples(index=False):
-            assert isinstance(tup[1], np.integer)
+            assert isinstance(tup[1], (int, long))
 
         df = self.klass(data={"a": [1, 2, 3], "b": [4, 5, 6]})
         dfaa = df[['a', 'a']]
 
         assert (list(dfaa.itertuples()) ==
                 [(0, 1, 1), (1, 2, 2), (2, 3, 3)])
-        assert (repr(list(df.itertuples(name=None))) ==
-                '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]')
+
+        # repr with be int/long on windows
+        if not compat.is_platform_windows():
+            assert (repr(list(df.itertuples(name=None))) ==
+                    '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]')
 
         tup = next(df.itertuples(name='TestName'))
 
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 629c695b702fe2..99e5630ce6a43c 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -5,6 +5,7 @@
 import numpy as np
 
 from pandas import compat
+from pandas.compat import long
 from pandas import (DataFrame, Series, MultiIndex, Timestamp,
                     date_range)
 
@@ -236,3 +237,15 @@ def test_to_records_datetimeindex_with_tz(self, tz):
 
         # both converted to UTC, so they are equal
         tm.assert_numpy_array_equal(result, expected)
+
+    def test_to_dict_box_scalars(self):
+        # 14216
+        # make sure that we are boxing properly
+        d = {'a': [1], 'b': ['b']}
+
+        result = DataFrame(d).to_dict()
+        assert isinstance(list(result['a'])[0], (int, long))
+        assert isinstance(list(result['b'])[0], (int, long))
+
+        result = DataFrame(d).to_dict(orient='records')
+        assert isinstance(result[0]['a'], (int, long))
diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py
index 503185de427f16..5b7fd1ec94a90b 100644
--- a/pandas/tests/series/test_io.py
+++ b/pandas/tests/series/test_io.py
@@ -10,7 +10,7 @@
 
 from pandas import Series, DataFrame
 
-from pandas.compat import StringIO, u, long
+from pandas.compat import StringIO, u
 from pandas.util.testing import (assert_series_equal, assert_almost_equal,
                                  assert_frame_equal, ensure_clean)
 import pandas.util.testing as tm
@@ -178,37 +178,3 @@ def test_to_dict(self, mapping):
         from_method = Series(ts.to_dict(collections.Counter))
         from_constructor = Series(collections.Counter(ts.iteritems()))
         tm.assert_series_equal(from_method, from_constructor)
-
-
-class TestSeriesToList(TestData):
-
-    def test_tolist(self):
-        rs = self.ts.tolist()
-        xp = self.ts.values.tolist()
-        assert_almost_equal(rs, xp)
-
-        # datetime64
-        s = Series(self.ts.index)
-        rs = s.tolist()
-        assert self.ts.index[0] == rs[0]
-
-    def test_tolist_np_int(self):
-        # GH10904
-        for t in ['int8', 'int16', 'int32', 'int64']:
-            s = pd.Series([1], dtype=t)
-            assert isinstance(s.tolist()[0], (int, long))
-
-    def test_tolist_np_uint(self):
-        # GH10904
-        for t in ['uint8', 'uint16']:
-            s = pd.Series([1], dtype=t)
-            assert isinstance(s.tolist()[0], int)
-        for t in ['uint32', 'uint64']:
-            s = pd.Series([1], dtype=t)
-            assert isinstance(s.tolist()[0], long)
-
-    def test_tolist_np_float(self):
-        # GH10904
-        for t in ['float16', 'float32', 'float64']:
-            s = pd.Series([1], dtype=t)
-            assert isinstance(s.tolist()[0], float)
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 9e92c7cf1a9b81..210d0260b8d95b 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -13,9 +13,10 @@
     is_object_dtype, is_datetimetz,
     needs_i8_conversion)
 import pandas.util.testing as tm
-from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex, PeriodIndex,
-                    Timedelta, IntervalIndex, Interval)
-from pandas.compat import StringIO, PYPY
+from pandas import (Series, Index, DatetimeIndex, TimedeltaIndex,
+                    PeriodIndex, Timedelta, IntervalIndex, Interval,
+                    CategoricalIndex, Timestamp)
+from pandas.compat import StringIO, PYPY, long
 from pandas.compat.numpy import np_array_datetime64_compat
 from pandas.core.base import PandasDelegate, NoNewAttributesMixin
 from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
@@ -433,7 +434,7 @@ def test_value_counts_unique_nunique(self):
                 # datetimetz Series returns array of Timestamp
                 assert result[0] == orig[0]
                 for r in result:
-                    assert isinstance(r, pd.Timestamp)
+                    assert isinstance(r, Timestamp)
                 tm.assert_numpy_array_equal(result,
                                             orig._values.asobject.values)
             else:
@@ -1031,3 +1032,73 @@ def f():
 
         pytest.raises(AttributeError, f)
         assert not hasattr(t, "b")
+
+
+class TestToIterable(object):
+    # test that we convert an iterable to python types
+
+    dtypes = [
+        ('int8', (int, long)),
+        ('int16', (int, long)),
+        ('int32', (int, long)),
+        ('int64', (int, long)),
+        ('uint8', (int, long)),
+        ('uint16', (int, long)),
+        ('uint32', (int, long)),
+        ('uint64', (int, long)),
+        ('float16', float),
+        ('float32', float),
+        ('float64', float),
+        ('datetime64[ns]', Timestamp),
+        ('datetime64[ns, US/Eastern]', Timestamp),
+        ('timedelta64[ns]', Timedelta)]
+
+    @pytest.mark.parametrize(
+        'dtype, rdtype',
+        dtypes + [
+            ('object', object),
+            ('category', object)])
+    @pytest.mark.parametrize(
+        'method',
+        [
+            lambda x: x.tolist(),
+            lambda x: list(x),
+            lambda x: list(x.__iter__()),
+        ], ids=['tolist', 'list', 'iter'])
+    @pytest.mark.parametrize('typ', [Series, Index])
+    def test_iterable(self, typ, method, dtype, rdtype):
+        # gh-10904
+        # gh-13258
+        # coerce iteration to underlying python / pandas types
+        s = typ([1], dtype=dtype)
+        result = method(s)[0]
+        assert isinstance(result, rdtype)
+
+    @pytest.mark.parametrize(
+        'dtype, rdtype',
+        dtypes + [
+            ('object', (int, long)),
+            ('category', (int, long))])
+    @pytest.mark.parametrize('typ', [Series, Index])
+    def test_iterable_map(self, typ, dtype, rdtype):
+        # gh-13236
+        # coerce iteration to underlying python / pandas types
+        s = typ([1], dtype=dtype)
+        result = s.map(type)[0]
+        if not isinstance(rdtype, tuple):
+            rdtype = tuple([rdtype])
+        assert result in rdtype
+
+    @pytest.mark.parametrize(
+        'method',
+        [
+            lambda x: x.tolist(),
+            lambda x: list(x),
+            lambda x: list(x.__iter__()),
+        ], ids=['tolist', 'list', 'iter'])
+    def test_categorial_datetimelike(self, method):
+        i = CategoricalIndex([Timestamp('1999-12-31'),
+                              Timestamp('2000-12-31')])
+
+        result = method(i)[0]
+        assert isinstance(result, Timestamp)

From 633be31adcd43fc8bfe9a9fd9e7621ff3fc8ccbd Mon Sep 17 00:00:00 2001
From: Giftlin <31629119+Giftlin@users.noreply.github.com>
Date: Wed, 13 Sep 2017 15:33:30 +0530
Subject: [PATCH 077/188] DOC: grammatical mistake (#17511)

---
 pandas/plotting/_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py
index 389e238ccb96ec..6deddc97915f1f 100644
--- a/pandas/plotting/_tools.py
+++ b/pandas/plotting/_tools.py
@@ -141,7 +141,7 @@ def _subplots(naxes=None, sharex=False, sharey=False, squeeze=True,
         array of Axis objects are returned as numpy 1-d arrays.
         - for NxM subplots with N>1 and M>1 are returned as a 2d array.
 
-      If False, no squeezing at all is done: the returned axis object is always
+      If False, no squeezing is done: the returned axis object is always
       a 2-d array containing Axis instances, even if it ends up being 1x1.
 
     subplot_kw : dict

From f6d4d7078d49503adf990f0c159eb603ca1f0c1a Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Wed, 13 Sep 2017 11:04:32 +0100
Subject: [PATCH 078/188] removed versionadded <0.17 (#17504)

---
 doc/source/whatsnew/v0.21.0.txt  | 2 +-
 pandas/core/frame.py             | 6 ------
 pandas/core/generic.py           | 6 ------
 pandas/core/indexes/category.py  | 2 --
 pandas/core/indexes/datetimes.py | 2 +-
 pandas/core/reshape/reshape.py   | 2 --
 pandas/core/sparse/series.py     | 4 ----
 pandas/core/strings.py           | 5 -----
 8 files changed, 2 insertions(+), 27 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 89da897f6c5292..6ffa903c741500 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -526,4 +526,4 @@ Other
 ^^^^^
 - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
 - Several ``NaT`` method docstrings (e.g. :func:`NaT.ctime`) were incorrect (:issue:`17327`)
-- The documentation has had references to versions < v0.16 removed and cleaned up (:issue:`17442`, :issue:`17442` & :issue:`#17404`)
+- The documentation has had references to versions < v0.17 removed and cleaned up (:issue:`17442`, :issue:`17442`, :issue:`17404` & :issue:`17504`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 5991ec825c8417..dd5d490ea66a8f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1479,8 +1479,6 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None,
             Character recognized as decimal separator. E.g. use ',' for
             European data
 
-            .. versionadded:: 0.16.0
-
         """
         formatter = fmt.CSVFormatter(self, path_or_buf,
                                      line_terminator=line_terminator, sep=sep,
@@ -2165,8 +2163,6 @@ def _getitem_frame(self, key):
     def query(self, expr, inplace=False, **kwargs):
         """Query the columns of a frame with a boolean expression.
 
-        .. versionadded:: 0.13
-
         Parameters
         ----------
         expr : string
@@ -2561,8 +2557,6 @@ def assign(self, **kwargs):
         Assign new columns to a DataFrame, returning a new object
         (a copy) with all the original columns in addition to the new ones.
 
-        .. versionadded:: 0.16.0
-
         Parameters
         ----------
         kwargs : keyword, value pairs
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 8d16b079ba2c8d..a71bf7be1bc753 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2348,8 +2348,6 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'):
         errors : {'ignore', 'raise'}, default 'raise'
             If 'ignore', suppress error and existing labels are dropped.
 
-            .. versionadded:: 0.16.1
-
         Returns
         -------
         dropped : type of caller
@@ -3070,8 +3068,6 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
         """
         Returns a random sample of items from an axis of object.
 
-        .. versionadded:: 0.16.1
-
         Parameters
         ----------
         n : int, optional
@@ -3228,8 +3224,6 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
     _shared_docs['pipe'] = ("""
         Apply func(self, \*args, \*\*kwargs)
 
-        .. versionadded:: 0.16.2
-
         Parameters
         ----------
         func : function
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index c8044b14e4e57e..baa3ebce6abbcc 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -33,8 +33,6 @@ class CategoricalIndex(Index, base.PandasDelegate):
     Immutable Index implementing an ordered, sliceable set. CategoricalIndex
     represents a sparsely populated Index with an underlying Categorical.
 
-    .. versionadded:: 0.16.1
-
     Parameters
     ----------
     data : array-like or Categorical, (1-dimensional)
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 5a04c550f4502b..4cfb7547e7d0ac 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -1577,7 +1577,7 @@ def _set_freq(self, value):
     days_in_month = _field_accessor(
         'days_in_month',
         'dim',
-        "The number of days in the month\n\n.. versionadded:: 0.16.0")
+        "The number of days in the month")
     daysinmonth = days_in_month
     is_month_start = _field_accessor(
         'is_month_start',
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index b4abba8026b35b..7260bc9a8b7a14 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -1110,8 +1110,6 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
         Whether the dummy columns should be sparse or not.  Returns
         SparseDataFrame if `data` is a Series or if all columns are included.
         Otherwise returns a DataFrame with some SparseBlocks.
-
-        .. versionadded:: 0.16.1
     drop_first : bool, default False
         Whether to get k-1 dummies out of k categorical levels by removing the
         first level.
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
index 99aec2dd115697..2aecb9d7c4ffbd 100644
--- a/pandas/core/sparse/series.py
+++ b/pandas/core/sparse/series.py
@@ -732,8 +732,6 @@ def to_coo(self, row_levels=(0, ), column_levels=(1, ), sort_labels=False):
         (labels) or numbers of the levels. {row_levels, column_levels} must be
         a partition of the MultiIndex level names (or numbers).
 
-        .. versionadded:: 0.16.0
-
         Parameters
         ----------
         row_levels : tuple/list
@@ -784,8 +782,6 @@ def from_coo(cls, A, dense_index=False):
         """
         Create a SparseSeries from a scipy.sparse.coo_matrix.
 
-        .. versionadded:: 0.16.0
-
         Parameters
         ----------
         A : scipy.sparse.coo_matrix
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 48bc2ee05dd680..021f88d1aec002 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -602,8 +602,6 @@ def str_extract(arr, pat, flags=0, expand=None):
     For each subject string in the Series, extract groups from the
     first match of regular expression pat.
 
-    .. versionadded:: 0.13.0
-
     Parameters
     ----------
     pat : string
@@ -1016,7 +1014,6 @@ def str_split(arr, pat=None, n=None):
         * If True, return DataFrame/MultiIndex expanding dimensionality.
         * If False, return Series/Index.
 
-        .. versionadded:: 0.16.1
     return_type : deprecated, use `expand`
 
     Returns
@@ -1047,8 +1044,6 @@ def str_rsplit(arr, pat=None, n=None):
     string, starting at the end of the string and working to the front.
     Equivalent to :meth:`str.rsplit`.
 
-    .. versionadded:: 0.16.2
-
     Parameters
     ----------
     pat : string, default None

From f11bbf2f505d81900cc83ce387a6a1b1d2a2f866 Mon Sep 17 00:00:00 2001
From: Giftlin <31629119+Giftlin@users.noreply.github.com>
Date: Wed, 13 Sep 2017 17:54:57 +0530
Subject: [PATCH 079/188] DOC: grammatical mistakes (#17512)

---
 pandas/io/stata.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 253ed03c25db94..92f180506a8b71 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -57,7 +57,7 @@
     identifier of column that should be used as index of the DataFrame
 convert_missing : boolean, defaults to False
     Flag indicating whether to convert missing values to their Stata
-    representations.  If False, missing values are replaced with nans.
+    representations.  If False, missing values are replaced with nan.
     If True, columns containing missing values are returned with
     object data types and missing values are represented by
     StataMissingValue objects.
@@ -248,8 +248,9 @@ def _stata_elapsed_date_to_datetime_vec(dates, fmt):
     def convert_year_month_safe(year, month):
         """
         Convert year and month to datetimes, using pandas vectorized versions
-        when the date range falls within the range supported by pandas.  Other
-        wise it falls back to a slower but more robust method using datetime.
+        when the date range falls within the range supported by pandas.
+        Otherwise it falls back to a slower but more robust method
+        using datetime.
         """
         if year.max() < MAX_YEAR and year.min() > MIN_YEAR:
             return to_datetime(100 * year + month, format='%Y%m')

From eef810ef2c64be00943696b33e8bab0b4dd66e9e Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 13 Sep 2017 19:18:56 -0400
Subject: [PATCH 080/188] COMPAT: followup to #17491 (#17503)

---
 doc/source/whatsnew/v0.21.0.txt       | 14 ++---
 pandas/core/base.py                   | 27 ++++++---
 pandas/core/categorical.py            | 10 +++-
 pandas/core/indexes/category.py       |  5 +-
 pandas/tests/indexes/test_category.py | 13 +++--
 pandas/tests/series/test_api.py       | 37 -------------
 pandas/tests/test_base.py             | 79 +++++++++++++++++++++++++--
 7 files changed, 119 insertions(+), 66 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 6ffa903c741500..9da1f321ef5740 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -190,19 +190,19 @@ the target. Now, a ``ValueError`` will be raised when such an input is passed in
 
 .. _whatsnew_0210.api_breaking.iteration_scalars:
 
-Iteration of Series/Index will now return python scalars
+Iteration of Series/Index will now return Python scalars
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affect int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`).
+Previously, when using certain iteration methods for a ``Series`` with dtype ``int`` or ``float``, you would receive a ``numpy`` scalar, e.g. a ``np.int64``, rather than a Python ``int``. Issue (:issue:`10904`) corrected this for ``Series.tolist()`` and ``list(Series)``. This change makes all iteration methods consistent, in particular, for ``__iter__()`` and ``.map()``; note that this only affects int/float dtypes. (:issue:`13236`, :issue:`13258`, :issue:`14216`).
 
 .. ipython:: python
 
-   s = Series([1, 2, 3])
+   s = pd.Series([1, 2, 3])
    s
 
 Previously:
 
-.. code-block:: python
+.. code-block:: ipython
 
    In [2]: type(list(s)[0])
    Out[2]: numpy.int64
@@ -215,14 +215,14 @@ New Behaviour:
 
 Furthermore this will now correctly box the results of iteration for :func:`DataFrame.to_dict` as well.
 
-.. ipython:: python
+.. ipython:: ipython
 
    d = {'a':[1], 'b':['b']}
-   df = DataFrame(d)
+   df = pd,DataFrame(d)
 
 Previously:
 
-.. code-block:: python
+.. code-block:: ipython
 
    In [8]: type(df.to_dict()['a'][0])
    Out[8]: numpy.int64
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 62d89eac4b3548..f0e8d8a16661bb 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -892,18 +892,31 @@ def argmin(self, axis=None):
 
     def tolist(self):
         """
-        return a list of the values; box to scalars
+        Return a list of the values.
+
+        These are each a scalar type, which is a Python scalar
+        (for str, int, float) or a pandas scalar
+        (for Timestamp/Timedelta/Interval/Period)
+
+        See Also
+        --------
+        numpy.tolist
         """
-        return list(self.__iter__())
+
+        if is_datetimelike(self):
+            return [_maybe_box_datetimelike(x) for x in self._values]
+        else:
+            return self._values.tolist()
 
     def __iter__(self):
         """
-        provide iteration over the values; box to scalars
+        Return an iterator of the values.
+
+        These are each a scalar type, which is a Python scalar
+        (for str, int, float) or a pandas scalar
+        (for Timestamp/Timedelta/Interval/Period)
         """
-        if is_datetimelike(self):
-            return (_maybe_box_datetimelike(x) for x in self._values)
-        else:
-            return iter(self._values.tolist())
+        return iter(self.tolist())
 
     @cache_readonly
     def hasnans(self):
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index dbd2a79b7e46d9..97df72900428c4 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -26,7 +26,7 @@
     is_integer_dtype, is_bool,
     is_list_like, is_sequence,
     is_scalar)
-from pandas.core.common import is_null_slice
+from pandas.core.common import is_null_slice, _maybe_box_datetimelike
 
 from pandas.core.algorithms import factorize, take_1d, unique1d
 from pandas.core.base import (PandasObject, PandasDelegate,
@@ -401,8 +401,14 @@ def itemsize(self):
 
     def tolist(self):
         """
-        return a list of my values
+        Return a list of the values.
+
+        These are each a scalar type, which is a Python scalar
+        (for str, int, float) or a pandas scalar
+        (for Timestamp/Timedelta/Interval/Period)
         """
+        if is_datetimelike(self.categories):
+            return [_maybe_box_datetimelike(x) for x in self]
         return np.array(self).tolist()
 
     def reshape(self, new_shape, *args, **kwargs):
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index baa3ebce6abbcc..71cd4790ac3648 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -251,9 +251,8 @@ def get_values(self):
         """ return the underlying data as an ndarray """
         return self._data.get_values()
 
-    def __iter__(self):
-        """ iterate like Categorical """
-        return self._data.__iter__()
+    def tolist(self):
+        return self._data.tolist()
 
     @property
     def codes(self):
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index 05d31af57b36c5..aac68ebd6abede 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -576,12 +576,13 @@ def test_isin(self):
             ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6))
 
         # mismatched categorical -> coerced to ndarray so doesn't matter
-        tm.assert_numpy_array_equal(
-            ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] *
-                                                                    6))
-        tm.assert_numpy_array_equal(
-            ci.isin(ci.set_categories(list('defghi'))),
-            np.array([False] * 5 + [True]))
+        result = ci.isin(ci.set_categories(list('abcdefghi')))
+        expected = np.array([True] * 6)
+        tm.assert_numpy_array_equal(result, expected)
+
+        result = ci.isin(ci.set_categories(list('defghi')))
+        expected = np.array([False] * 5 + [True])
+        tm.assert_numpy_array_equal(result, expected)
 
     def test_identical(self):
 
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
index b7fbe803f8d3b9..d0805e2bb54d25 100644
--- a/pandas/tests/series/test_api.py
+++ b/pandas/tests/series/test_api.py
@@ -245,43 +245,6 @@ def test_iter(self):
         for i, val in enumerate(self.ts):
             assert val == self.ts[i]
 
-    def test_iter_box(self):
-        vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
-        s = pd.Series(vals)
-        assert s.dtype == 'datetime64[ns]'
-        for res, exp in zip(s, vals):
-            assert isinstance(res, pd.Timestamp)
-            assert res.tz is None
-            assert res == exp
-
-        vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
-                pd.Timestamp('2011-01-02', tz='US/Eastern')]
-        s = pd.Series(vals)
-
-        assert s.dtype == 'datetime64[ns, US/Eastern]'
-        for res, exp in zip(s, vals):
-            assert isinstance(res, pd.Timestamp)
-            assert res.tz == exp.tz
-            assert res == exp
-
-        # timedelta
-        vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
-        s = pd.Series(vals)
-        assert s.dtype == 'timedelta64[ns]'
-        for res, exp in zip(s, vals):
-            assert isinstance(res, pd.Timedelta)
-            assert res == exp
-
-        # period (object dtype, not boxed)
-        vals = [pd.Period('2011-01-01', freq='M'),
-                pd.Period('2011-01-02', freq='M')]
-        s = pd.Series(vals)
-        assert s.dtype == 'object'
-        for res, exp in zip(s, vals):
-            assert isinstance(res, pd.Period)
-            assert res.freq == 'M'
-            assert res == exp
-
     def test_keys(self):
         # HACK: By doing this in two stages, we avoid 2to3 wrapping the call
         # to .keys() in a list()
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 210d0260b8d95b..38d78b12b31aa5 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1054,10 +1054,7 @@ class TestToIterable(object):
         ('timedelta64[ns]', Timedelta)]
 
     @pytest.mark.parametrize(
-        'dtype, rdtype',
-        dtypes + [
-            ('object', object),
-            ('category', object)])
+        'dtype, rdtype', dtypes)
     @pytest.mark.parametrize(
         'method',
         [
@@ -1074,6 +1071,43 @@ def test_iterable(self, typ, method, dtype, rdtype):
         result = method(s)[0]
         assert isinstance(result, rdtype)
 
+    @pytest.mark.parametrize(
+        'dtype, rdtype, obj',
+        [
+            ('object', object, 'a'),
+            ('object', (int, long), 1),
+            ('category', object, 'a'),
+            ('category', (int, long), 1)])
+    @pytest.mark.parametrize(
+        'method',
+        [
+            lambda x: x.tolist(),
+            lambda x: list(x),
+            lambda x: list(x.__iter__()),
+        ], ids=['tolist', 'list', 'iter'])
+    @pytest.mark.parametrize('typ', [Series, Index])
+    def test_iterable_object_and_category(self, typ, method,
+                                          dtype, rdtype, obj):
+        # gh-10904
+        # gh-13258
+        # coerce iteration to underlying python / pandas types
+        s = typ([obj], dtype=dtype)
+        result = method(s)[0]
+        assert isinstance(result, rdtype)
+
+    @pytest.mark.parametrize(
+        'dtype, rdtype', dtypes)
+    def test_iterable_items(self, dtype, rdtype):
+        # gh-13258
+        # test items / iteritems yields the correct boxed scalars
+        # this only applies to series
+        s = Series([1], dtype=dtype)
+        _, result = list(s.items())[0]
+        assert isinstance(result, rdtype)
+
+        _, result = list(s.iteritems())[0]
+        assert isinstance(result, rdtype)
+
     @pytest.mark.parametrize(
         'dtype, rdtype',
         dtypes + [
@@ -1102,3 +1136,40 @@ def test_categorial_datetimelike(self, method):
 
         result = method(i)[0]
         assert isinstance(result, Timestamp)
+
+    def test_iter_box(self):
+        vals = [pd.Timestamp('2011-01-01'), pd.Timestamp('2011-01-02')]
+        s = pd.Series(vals)
+        assert s.dtype == 'datetime64[ns]'
+        for res, exp in zip(s, vals):
+            assert isinstance(res, pd.Timestamp)
+            assert res.tz is None
+            assert res == exp
+
+        vals = [pd.Timestamp('2011-01-01', tz='US/Eastern'),
+                pd.Timestamp('2011-01-02', tz='US/Eastern')]
+        s = pd.Series(vals)
+
+        assert s.dtype == 'datetime64[ns, US/Eastern]'
+        for res, exp in zip(s, vals):
+            assert isinstance(res, pd.Timestamp)
+            assert res.tz == exp.tz
+            assert res == exp
+
+        # timedelta
+        vals = [pd.Timedelta('1 days'), pd.Timedelta('2 days')]
+        s = pd.Series(vals)
+        assert s.dtype == 'timedelta64[ns]'
+        for res, exp in zip(s, vals):
+            assert isinstance(res, pd.Timedelta)
+            assert res == exp
+
+        # period (object dtype, not boxed)
+        vals = [pd.Period('2011-01-01', freq='M'),
+                pd.Period('2011-01-02', freq='M')]
+        s = pd.Series(vals)
+        assert s.dtype == 'object'
+        for res, exp in zip(s, vals):
+            assert isinstance(res, pd.Period)
+            assert res.freq == 'M'
+            assert res == exp

From fa557f7391589f351b1260f46b3b3db22492f50b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 13 Sep 2017 16:20:53 -0700
Subject: [PATCH 081/188] De-privatize timezone funcs (#17502)

---
 pandas/_libs/index.pyx            |  6 +-
 pandas/_libs/period.pyx           | 14 ++---
 pandas/_libs/src/inference.pyx    |  6 +-
 pandas/_libs/tslib.pyx            | 96 +++++++++++++++----------------
 pandas/_libs/tslibs/timezones.pxd | 12 ++--
 pandas/_libs/tslibs/timezones.pyx | 20 +++----
 6 files changed, 75 insertions(+), 79 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index bf4d53683c9b71..884117799ec5be 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -17,7 +17,7 @@ cimport tslib
 
 from hashtable cimport HashTable
 
-from tslibs.timezones cimport _is_utc
+from tslibs.timezones cimport is_utc, get_utcoffset
 from pandas._libs import tslib, algos, hashtable as _hash
 from pandas._libs.tslib import Timestamp, Timedelta
 from datetime import datetime, timedelta
@@ -551,8 +551,8 @@ cdef inline _to_i8(object val):
             tzinfo = getattr(val, 'tzinfo', None)
             # Save the original date value so we can get the utcoffset from it.
             ival = _pydatetime_to_dts(val, &dts)
-            if tzinfo is not None and not _is_utc(tzinfo):
-                offset = tslib._get_utcoffset(tzinfo, val)
+            if tzinfo is not None and not is_utc(tzinfo):
+                offset = get_utcoffset(tzinfo, val)
                 ival -= tslib._delta_to_nanoseconds(offset)
             return ival
         return val
diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index 2b0734f5cf2e7a..9e473a7f362b44 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -34,7 +34,7 @@ from lib cimport is_null_datetimelike, is_period
 from pandas._libs import tslib, lib
 from pandas._libs.tslib import (Timedelta, Timestamp, iNaT,
                                 NaT)
-from tslibs.timezones cimport _is_utc, _is_tzlocal, _get_utcoffset
+from tslibs.timezones cimport is_utc, is_tzlocal, get_utcoffset
 from tslib cimport (
     maybe_get_tz,
     _get_dst_info,
@@ -533,7 +533,7 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz):
         ndarray[int64_t] trans, deltas, pos
         pandas_datetimestruct dts
 
-    if _is_utc(tz):
+    if is_utc(tz):
         for i in range(n):
             if stamps[i] == NPY_NAT:
                 continue
@@ -541,7 +541,7 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz):
             curr_reso = _reso_stamp(&dts)
             if curr_reso < reso:
                 reso = curr_reso
-    elif _is_tzlocal(tz):
+    elif is_tzlocal(tz):
         for i in range(n):
             if stamps[i] == NPY_NAT:
                 continue
@@ -549,7 +549,7 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz):
                                               &dts)
             dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                           dts.min, dts.sec, dts.us, tz)
-            delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000
+            delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000
             pandas_datetime_to_datetimestruct(stamps[i] + delta,
                                               PANDAS_FR_ns, &dts)
             curr_reso = _reso_stamp(&dts)
@@ -597,7 +597,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
         ndarray[int64_t] trans, deltas, pos
         pandas_datetimestruct dts
 
-    if _is_utc(tz):
+    if is_utc(tz):
         for i in range(n):
             if stamps[i] == NPY_NAT:
                 result[i] = NPY_NAT
@@ -607,7 +607,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
                                            dts.hour, dts.min, dts.sec,
                                            dts.us, dts.ps, freq)
 
-    elif _is_tzlocal(tz):
+    elif is_tzlocal(tz):
         for i in range(n):
             if stamps[i] == NPY_NAT:
                 result[i] = NPY_NAT
@@ -616,7 +616,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
                                               &dts)
             dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                           dts.min, dts.sec, dts.us, tz)
-            delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000
+            delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000
             pandas_datetime_to_datetimestruct(stamps[i] + delta,
                                               PANDAS_FR_ns, &dts)
             result[i] = get_period_ordinal(dts.year, dts.month, dts.day,
diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx
index 95145ff49b02fd..2bb362eab40975 100644
--- a/pandas/_libs/src/inference.pyx
+++ b/pandas/_libs/src/inference.pyx
@@ -3,7 +3,7 @@ from decimal import Decimal
 cimport util
 cimport cython
 from tslib import NaT
-from tslibs.timezones cimport _get_zone
+from tslibs.timezones cimport get_timezone
 from datetime import datetime, timedelta
 iNaT = util.get_nat()
 
@@ -901,13 +901,13 @@ cpdef bint is_datetime_with_singletz_array(ndarray[object] values):
     for i in range(n):
         base_val = values[i]
         if base_val is not NaT:
-            base_tz = _get_zone(getattr(base_val, 'tzinfo', None))
+            base_tz = get_timezone(getattr(base_val, 'tzinfo', None))
 
             for j in range(i, n):
                 val = values[j]
                 if val is not NaT:
                     tz = getattr(val, 'tzinfo', None)
-                    if base_tz != tz and base_tz != _get_zone(tz):
+                    if base_tz != tz and base_tz != get_timezone(tz):
                         return False
             break
 
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index a8ae0fcd733d6c..629325c28ea9c6 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -108,11 +108,11 @@ iNaT = NPY_NAT
 
 
 from tslibs.timezones cimport (
-    _is_utc, _is_tzlocal,
-    _treat_tz_as_dateutil, _treat_tz_as_pytz,
-    _get_zone,
-    _get_utcoffset)
-from tslibs.timezones import get_timezone, _get_utcoffset  # noqa
+    is_utc, is_tzlocal,
+    treat_tz_as_dateutil, treat_tz_as_pytz,
+    get_timezone,
+    get_utcoffset)
+from tslibs.timezones import get_timezone, get_utcoffset  # noqa
 
 
 cdef inline object create_timestamp_from_ts(
@@ -160,7 +160,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False):
         func_create = create_datetime_from_ts
 
     if tz is not None:
-        if _is_utc(tz):
+        if is_utc(tz):
             for i in range(n):
                 value = arr[i]
                 if value == NPY_NAT:
@@ -169,7 +169,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False):
                     pandas_datetime_to_datetimestruct(
                         value, PANDAS_FR_ns, &dts)
                     result[i] = func_create(value, dts, tz, freq)
-        elif _is_tzlocal(tz) or _is_fixed_offset(tz):
+        elif is_tzlocal(tz) or _is_fixed_offset(tz):
             for i in range(n):
                 value = arr[i]
                 if value == NPY_NAT:
@@ -194,7 +194,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False):
 
                     # Adjust datetime64 timestamp, recompute datetimestruct
                     pos = trans.searchsorted(value, side='right') - 1
-                    if _treat_tz_as_pytz(tz):
+                    if treat_tz_as_pytz(tz):
                         # find right representation of dst etc in pytz timezone
                         new_tz = tz._tzinfos[tz._transition_info[pos]]
                     else:
@@ -242,12 +242,12 @@ def ints_to_pytimedelta(ndarray[int64_t] arr, box=False):
 
 
 cdef inline bint _is_fixed_offset(object tz):
-    if _treat_tz_as_dateutil(tz):
+    if treat_tz_as_dateutil(tz):
         if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0:
             return 1
         else:
             return 0
-    elif _treat_tz_as_pytz(tz):
+    elif treat_tz_as_pytz(tz):
         if (len(tz._transition_info) == 0
             and len(tz._utc_transition_times) == 0):
             return 1
@@ -1107,12 +1107,12 @@ cdef class _Timestamp(datetime):
         try:
             stamp += self.strftime('%z')
             if self.tzinfo:
-                zone = _get_zone(self.tzinfo)
+                zone = get_timezone(self.tzinfo)
         except ValueError:
             year2000 = self.replace(year=2000)
             stamp += year2000.strftime('%z')
             if self.tzinfo:
-                zone = _get_zone(self.tzinfo)
+                zone = get_timezone(self.tzinfo)
 
         try:
             stamp += zone.strftime(' %%Z')
@@ -1272,7 +1272,7 @@ cdef class _Timestamp(datetime):
         cdef:
             int64_t val
         val = self.value
-        if self.tz is not None and not _is_utc(self.tz):
+        if self.tz is not None and not is_utc(self.tz):
             val = tz_convert_single(self.value, 'UTC', self.tz)
         return val
 
@@ -1510,14 +1510,14 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
                     except:
                         pass
                     obj.value = _pydatetime_to_dts(ts, &obj.dts)
-                    ts_offset = _get_utcoffset(ts.tzinfo, ts)
+                    ts_offset = get_utcoffset(ts.tzinfo, ts)
                     obj.value -= _delta_to_nanoseconds(ts_offset)
-                    tz_offset = _get_utcoffset(tz, ts)
+                    tz_offset = get_utcoffset(tz, ts)
                     obj.value += _delta_to_nanoseconds(tz_offset)
                     pandas_datetime_to_datetimestruct(obj.value,
                                                       PANDAS_FR_ns, &obj.dts)
                     obj.tzinfo = tz
-            elif not _is_utc(tz):
+            elif not is_utc(tz):
                 ts = _localize_pydatetime(ts, tz)
                 obj.value = _pydatetime_to_dts(ts, &obj.dts)
                 obj.tzinfo = ts.tzinfo
@@ -1529,8 +1529,8 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
             obj.value = _pydatetime_to_dts(ts, &obj.dts)
             obj.tzinfo = ts.tzinfo
 
-        if obj.tzinfo is not None and not _is_utc(obj.tzinfo):
-            offset = _get_utcoffset(obj.tzinfo, ts)
+        if obj.tzinfo is not None and not is_utc(obj.tzinfo):
+            offset = get_utcoffset(obj.tzinfo, ts)
             obj.value -= _delta_to_nanoseconds(offset)
 
         if is_timestamp(ts):
@@ -1641,13 +1641,13 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
     """
     Take a TSObject in UTC and localizes to timezone tz.
     """
-    if _is_utc(tz):
+    if is_utc(tz):
         obj.tzinfo = tz
-    elif _is_tzlocal(tz):
+    elif is_tzlocal(tz):
         pandas_datetime_to_datetimestruct(obj.value, PANDAS_FR_ns, &obj.dts)
         dt = datetime(obj.dts.year, obj.dts.month, obj.dts.day, obj.dts.hour,
                       obj.dts.min, obj.dts.sec, obj.dts.us, tz)
-        delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000
+        delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000
         if obj.value != NPY_NAT:
             pandas_datetime_to_datetimestruct(obj.value + delta,
                                               PANDAS_FR_ns, &obj.dts)
@@ -1671,7 +1671,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
                 pandas_datetime_to_datetimestruct(
                     obj.value, PANDAS_FR_ns, &obj.dts)
             obj.tzinfo = tz
-        elif _treat_tz_as_pytz(tz):
+        elif treat_tz_as_pytz(tz):
             inf = tz._transition_info[pos]
             if obj.value != NPY_NAT:
                 pandas_datetime_to_datetimestruct(obj.value + deltas[pos],
@@ -1680,7 +1680,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
                 pandas_datetime_to_datetimestruct(obj.value,
                                                   PANDAS_FR_ns, &obj.dts)
             obj.tzinfo = tz._tzinfos[inf]
-        elif _treat_tz_as_dateutil(tz):
+        elif treat_tz_as_dateutil(tz):
             if obj.value != NPY_NAT:
                 pandas_datetime_to_datetimestruct(obj.value + deltas[pos],
                                                   PANDAS_FR_ns, &obj.dts)
@@ -1770,10 +1770,10 @@ def datetime_to_datetime64(ndarray[object] values):
         elif PyDateTime_Check(val):
             if val.tzinfo is not None:
                 if inferred_tz is not None:
-                    if _get_zone(val.tzinfo) != inferred_tz:
+                    if get_timezone(val.tzinfo) != inferred_tz:
                         raise ValueError('Array must be all same time zone')
                 else:
-                    inferred_tz = _get_zone(val.tzinfo)
+                    inferred_tz = get_timezone(val.tzinfo)
 
                 _ts = convert_to_tsobject(val, None, None, 0, 0)
                 iresult[i] = _ts.value
@@ -4088,9 +4088,9 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
         return np.array([], dtype=np.int64)
 
     # Convert to UTC
-    if _get_zone(tz1) != 'UTC':
+    if get_timezone(tz1) != 'UTC':
         utc_dates = np.empty(n, dtype=np.int64)
-        if _is_tzlocal(tz1):
+        if is_tzlocal(tz1):
             for i in range(n):
                 v = vals[i]
                 if v == NPY_NAT:
@@ -4099,7 +4099,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
                     pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts)
                     dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                                   dts.min, dts.sec, dts.us, tz1)
-                    delta = (int(_get_utcoffset(tz1, dt).total_seconds())
+                    delta = (int(get_utcoffset(tz1, dt).total_seconds())
                              * 1000000000)
                     utc_dates[i] = v - delta
         else:
@@ -4126,11 +4126,11 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
     else:
         utc_dates = vals
 
-    if _get_zone(tz2) == 'UTC':
+    if get_timezone(tz2) == 'UTC':
         return utc_dates
 
     result = np.zeros(n, dtype=np.int64)
-    if _is_tzlocal(tz2):
+    if is_tzlocal(tz2):
         for i in range(n):
             v = utc_dates[i]
             if v == NPY_NAT:
@@ -4139,7 +4139,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
                 pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts)
                 dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                               dts.min, dts.sec, dts.us, tz2)
-                delta = (int(_get_utcoffset(tz2, dt).total_seconds())
+                delta = (int(get_utcoffset(tz2, dt).total_seconds())
                              * 1000000000)
                 result[i] = v + delta
         return result
@@ -4202,13 +4202,13 @@ def tz_convert_single(int64_t val, object tz1, object tz2):
         return val
 
     # Convert to UTC
-    if _is_tzlocal(tz1):
+    if is_tzlocal(tz1):
         pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts)
         dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                       dts.min, dts.sec, dts.us, tz1)
-        delta = int(_get_utcoffset(tz1, dt).total_seconds()) * 1000000000
+        delta = int(get_utcoffset(tz1, dt).total_seconds()) * 1000000000
         utc_date = val - delta
-    elif _get_zone(tz1) != 'UTC':
+    elif get_timezone(tz1) != 'UTC':
         trans, deltas, typ = _get_dst_info(tz1)
         pos = trans.searchsorted(val, side='right') - 1
         if pos < 0:
@@ -4218,13 +4218,13 @@ def tz_convert_single(int64_t val, object tz1, object tz2):
     else:
         utc_date = val
 
-    if _get_zone(tz2) == 'UTC':
+    if get_timezone(tz2) == 'UTC':
         return utc_date
-    if _is_tzlocal(tz2):
+    if is_tzlocal(tz2):
         pandas_datetime_to_datetimestruct(val, PANDAS_FR_ns, &dts)
         dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                       dts.min, dts.sec, dts.us, tz2)
-        delta = int(_get_utcoffset(tz2, dt).total_seconds()) * 1000000000
+        delta = int(get_utcoffset(tz2, dt).total_seconds()) * 1000000000
         return utc_date + delta
 
     # Convert UTC to other timezone
@@ -4289,13 +4289,13 @@ cdef object _get_dst_info(object tz):
     """
     cache_key = _tz_cache_key(tz)
     if cache_key is None:
-        num = int(_get_utcoffset(tz, None).total_seconds()) * 1000000000
+        num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000
         return (np.array([NPY_NAT + 1], dtype=np.int64),
                 np.array([num], dtype=np.int64),
                 None)
 
     if cache_key not in dst_cache:
-        if _treat_tz_as_pytz(tz):
+        if treat_tz_as_pytz(tz):
             trans = np.array(tz._utc_transition_times, dtype='M8[ns]')
             trans = trans.view('i8')
             try:
@@ -4306,7 +4306,7 @@ cdef object _get_dst_info(object tz):
             deltas = _unbox_utcoffsets(tz._transition_info)
             typ = 'pytz'
 
-        elif _treat_tz_as_dateutil(tz):
+        elif treat_tz_as_dateutil(tz):
             if len(tz._trans_list):
                 # get utc trans times
                 trans_list = _get_utc_trans_times_from_dateutil_tz(tz)
@@ -4336,7 +4336,7 @@ cdef object _get_dst_info(object tz):
         else:
             # static tzinfo
             trans = np.array([NPY_NAT + 1], dtype=np.int64)
-            num = int(_get_utcoffset(tz, None).total_seconds()) * 1000000000
+            num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000
             deltas = np.array([num], dtype=np.int64)
             typ = 'static'
 
@@ -4405,13 +4405,13 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
 
     result = np.empty(n, dtype=np.int64)
 
-    if _is_tzlocal(tz):
+    if is_tzlocal(tz):
         for i in range(n):
             v = vals[i]
             pandas_datetime_to_datetimestruct(v, PANDAS_FR_ns, &dts)
             dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                           dts.min, dts.sec, dts.us, tz)
-            delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000
+            delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000
             result[i] = v - delta
         return result
 
@@ -5116,7 +5116,7 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz):
         ndarray[int64_t] trans, deltas, pos
         pandas_datetimestruct dts
 
-    if _is_utc(tz):
+    if is_utc(tz):
         with nogil:
             for i in range(n):
                 if stamps[i] == NPY_NAT:
@@ -5125,7 +5125,7 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz):
                 pandas_datetime_to_datetimestruct(
                     stamps[i], PANDAS_FR_ns, &dts)
                 result[i] = _normalized_stamp(&dts)
-    elif _is_tzlocal(tz):
+    elif is_tzlocal(tz):
         for i in range(n):
             if stamps[i] == NPY_NAT:
                 result[i] = NPY_NAT
@@ -5133,7 +5133,7 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz):
             pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
             dt = datetime(dts.year, dts.month, dts.day, dts.hour,
                           dts.min, dts.sec, dts.us, tz)
-            delta = int(_get_utcoffset(tz, dt).total_seconds()) * 1000000000
+            delta = int(get_utcoffset(tz, dt).total_seconds()) * 1000000000
             pandas_datetime_to_datetimestruct(stamps[i] + delta,
                                               PANDAS_FR_ns, &dts)
             result[i] = _normalized_stamp(&dts)
@@ -5180,12 +5180,12 @@ def dates_normalized(ndarray[int64_t] stamps, tz=None):
         Py_ssize_t i, n = len(stamps)
         pandas_datetimestruct dts
 
-    if tz is None or _is_utc(tz):
+    if tz is None or is_utc(tz):
         for i in range(n):
             pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
             if (dts.hour + dts.min + dts.sec + dts.us) > 0:
                 return False
-    elif _is_tzlocal(tz):
+    elif is_tzlocal(tz):
         for i in range(n):
             pandas_datetime_to_datetimestruct(stamps[i], PANDAS_FR_ns, &dts)
             dt = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min,
diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd
index 897bd8af7e2deb..ead5566440ca08 100644
--- a/pandas/_libs/tslibs/timezones.pxd
+++ b/pandas/_libs/tslibs/timezones.pxd
@@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-
 # cython: profile=False
 
-cdef bint _is_utc(object tz)
-cdef bint _is_tzlocal(object tz)
+cdef bint is_utc(object tz)
+cdef bint is_tzlocal(object tz)
 
-cdef bint _treat_tz_as_pytz(object tz)
-cdef bint _treat_tz_as_dateutil(object tz)
+cdef bint treat_tz_as_pytz(object tz)
+cdef bint treat_tz_as_dateutil(object tz)
 
-cdef object _get_zone(object tz)
+cpdef object get_timezone(object tz)
 
-cpdef _get_utcoffset(tzinfo, obj)
+cpdef get_utcoffset(tzinfo, obj)
diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
index 249eedef4bb098..3db369a09ba2d0 100644
--- a/pandas/_libs/tslibs/timezones.pyx
+++ b/pandas/_libs/tslibs/timezones.pyx
@@ -10,24 +10,24 @@ import pytz
 UTC = pytz.utc
 
 
-cdef inline bint _is_utc(object tz):
+cdef inline bint is_utc(object tz):
     return tz is UTC or isinstance(tz, _dateutil_tzutc)
 
 
-cdef inline bint _is_tzlocal(object tz):
+cdef inline bint is_tzlocal(object tz):
     return isinstance(tz, _dateutil_tzlocal)
 
 
-cdef inline bint _treat_tz_as_pytz(object tz):
+cdef inline bint treat_tz_as_pytz(object tz):
     return hasattr(tz, '_utc_transition_times') and hasattr(
         tz, '_transition_info')
 
 
-cdef inline bint _treat_tz_as_dateutil(object tz):
+cdef inline bint treat_tz_as_dateutil(object tz):
     return hasattr(tz, '_trans_list') and hasattr(tz, '_trans_idx')
 
 
-cdef inline object _get_zone(object tz):
+cpdef inline object get_timezone(object tz):
     """
     We need to do several things here:
     1) Distinguish between pytz and dateutil timezones
@@ -40,10 +40,10 @@ cdef inline object _get_zone(object tz):
     the tz name. It needs to be a string so that we can serialize it with
     UJSON/pytables. maybe_get_tz (below) is the inverse of this process.
     """
-    if _is_utc(tz):
+    if is_utc(tz):
         return 'UTC'
     else:
-        if _treat_tz_as_dateutil(tz):
+        if treat_tz_as_dateutil(tz):
             if '.tar.gz' in tz._filename:
                 raise ValueError(
                     'Bad tz filename. Dateutil on python 3 on windows has a '
@@ -64,14 +64,10 @@ cdef inline object _get_zone(object tz):
             except AttributeError:
                 return tz
 
-
-def get_timezone(tz):
-    return _get_zone(tz)
-
 #----------------------------------------------------------------------
 # UTC Offsets
 
-cpdef _get_utcoffset(tzinfo, obj):
+cpdef get_utcoffset(tzinfo, obj):
     try:
         return tzinfo._utcoffset
     except AttributeError:

From 2cf2566de98201454b10b749ac628d538f9695a9 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Thu, 14 Sep 2017 04:11:30 -0600
Subject: [PATCH 082/188] Make *_range functions consistent (#17482)

---
 doc/source/api.rst                            |   9 +
 doc/source/timeseries.rst                     |   9 +
 doc/source/whatsnew/v0.21.0.txt               |  55 +++-
 pandas/core/indexes/datetimes.py              |  58 ++--
 pandas/core/indexes/interval.py               | 170 ++++++++---
 pandas/core/indexes/period.py                 |  62 +++-
 pandas/core/indexes/timedeltas.py             |  54 +++-
 .../indexes/datetimes/test_construction.py    |   5 +-
 .../indexes/datetimes/test_date_range.py      |  51 +++-
 .../tests/indexes/period/test_construction.py |   5 +-
 .../tests/indexes/period/test_period_range.py |  94 ++++++
 pandas/tests/indexes/test_interval.py         | 279 ++++++++++++++++--
 .../indexes/timedeltas/test_construction.py   |   5 +-
 .../timedeltas/test_timedelta_range.py        |  21 +-
 14 files changed, 747 insertions(+), 130 deletions(-)
 create mode 100644 pandas/tests/indexes/period/test_period_range.py

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 27a4ab9cc6cbc4..1541bbccefe214 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -218,10 +218,19 @@ Top-level dealing with datetimelike
    to_timedelta
    date_range
    bdate_range
+   cdate_range
    period_range
    timedelta_range
    infer_freq
 
+Top-level dealing with intervals
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autosummary::
+   :toctree: generated/
+
+   interval_range
+
 Top-level evaluation
 ~~~~~~~~~~~~~~~~~~~~
 
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index c86c58c3183f6f..5422d5c53043d3 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -1705,6 +1705,15 @@ has multiplied span.
 
    pd.PeriodIndex(start='2014-01', freq='3M', periods=4)
 
+If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
+endpoints for a ``PeriodIndex`` with frequency matching that of the
+``PeriodIndex`` constructor.
+
+.. ipython:: python
+
+   pd.PeriodIndex(start=pd.Period('2017Q1', freq='Q'),
+                  end=pd.Period('2017Q2', freq='Q'), freq='M')
+
 Just like ``DatetimeIndex``, a ``PeriodIndex`` can also be used to index pandas
 objects:
 
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 9da1f321ef5740..939199d3f6fa6d 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -218,7 +218,7 @@ Furthermore this will now correctly box the results of iteration for :func:`Data
 .. ipython:: ipython
 
    d = {'a':[1], 'b':['b']}
-   df = pd,DataFrame(d)
+   df = pd.DataFrame(d)
 
 Previously:
 
@@ -358,6 +358,59 @@ Previously, :func:`to_datetime` did not localize datetime ``Series`` data when `
 
 Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns.
 
+.. _whatsnew_0210.api.consistency_of_range_functions:
+
+Consistency of Range Functions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In previous versions, there were some inconsistencies between the various range functions: func:`date_range`, func:`bdate_range`, func:`cdate_range`, func:`period_range`, func:`timedelta_range`, and func:`interval_range`. (:issue:`17471`).
+
+One of the inconsistent behaviors occurred when the ``start``, ``end`` and ``period`` parameters were all specified, potentially leading to ambiguous ranges.  When all three parameters were passed, ``interval_range`` ignored the ``period`` parameter, ``period_range`` ignored the ``end`` parameter, and the other range functions raised.  To promote consistency among the range functions, and avoid potentially ambiguous ranges, ``interval_range`` and ``period_range`` will now raise when all three parameters are passed.
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+  In [2]: pd.interval_range(start=0, end=4, periods=6)
+  Out[2]:
+  IntervalIndex([(0, 1], (1, 2], (2, 3]]
+                closed='right',
+                dtype='interval[int64]')
+
+  In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q')
+  Out[3]: PeriodIndex(['2017Q1', '2017Q2', '2017Q3', '2017Q4', '2018Q1', '2018Q2'], dtype='period[Q-DEC]', freq='Q-DEC')
+
+New Behavior:
+
+.. code-block:: ipython
+
+  In [2]: pd.interval_range(start=0, end=4, periods=6)
+  ---------------------------------------------------------------------------
+  ValueError: Of the three parameters: start, end, and periods, exactly two must be specified
+
+  In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q')  
+  ---------------------------------------------------------------------------
+  ValueError: Of the three parameters: start, end, and periods, exactly two must be specified
+
+Additionally, the endpoint parameter ``end`` was not included in the intervals produced by ``interval_range``.  However, all other range functions include ``end`` in their output.  To promote consistency among the range functions, ``interval_range`` will now include ``end`` as the right endpoint of the final interval, except if ``freq`` is specified in a way which skips ``end``.
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+  In [4]: pd.interval_range(start=0, end=4)
+  Out[4]:
+  IntervalIndex([(0, 1], (1, 2], (2, 3]]
+                closed='right',
+                dtype='interval[int64]')
+
+
+New Behavior:
+
+  .. ipython:: python
+
+     pd.interval_range(start=0, end=4)
+
 .. _whatsnew_0210.api:
 
 Other API Changes
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 4cfb7547e7d0ac..1c8d0b334b91c5 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -292,8 +292,8 @@ def __new__(cls, data=None,
             if is_float(periods):
                 periods = int(periods)
             elif not is_integer(periods):
-                raise ValueError('Periods must be a number, got %s' %
-                                 str(periods))
+                msg = 'periods must be a number, got {periods}'
+                raise TypeError(msg.format(periods=periods))
 
         if data is None and freq is None:
             raise ValueError("Must provide freq argument if no data is "
@@ -412,7 +412,8 @@ def __new__(cls, data=None,
     def _generate(cls, start, end, periods, name, offset,
                   tz=None, normalize=False, ambiguous='raise', closed=None):
         if com._count_not_none(start, end, periods) != 2:
-            raise ValueError('Must specify two of start, end, or periods')
+            raise ValueError('Of the three parameters: start, end, and '
+                             'periods, exactly two must be specified')
 
         _normalized = True
 
@@ -2004,7 +2005,7 @@ def _generate_regular_range(start, end, periods, offset):
 def date_range(start=None, end=None, periods=None, freq='D', tz=None,
                normalize=False, name=None, closed=None, **kwargs):
     """
-    Return a fixed frequency datetime index, with day (calendar) as the default
+    Return a fixed frequency DatetimeIndex, with day (calendar) as the default
     frequency
 
     Parameters
@@ -2013,24 +2014,25 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
         Left bound for generating dates
     end : string or datetime-like, default None
         Right bound for generating dates
-    periods : integer or None, default None
-        If None, must specify start and end
+    periods : integer, default None
+        Number of periods to generate
     freq : string or DateOffset, default 'D' (calendar daily)
         Frequency strings can have multiples, e.g. '5H'
-    tz : string or None
+    tz : string, default None
         Time zone name for returning localized DatetimeIndex, for example
         Asia/Hong_Kong
     normalize : bool, default False
         Normalize start/end dates to midnight before generating date range
-    name : str, default None
-        Name of the resulting index
-    closed : string or None, default None
+    name : string, default None
+        Name of the resulting DatetimeIndex
+    closed : string, default None
         Make the interval closed with respect to the given frequency to
         the 'left', 'right', or both sides (None)
 
     Notes
     -----
-    2 of start, end, or periods must be specified
+    Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
+    must be specified.
 
     To learn more about the frequency strings, please see `this link
     <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
@@ -2047,7 +2049,7 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
 def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
                 normalize=True, name=None, closed=None, **kwargs):
     """
-    Return a fixed frequency datetime index, with business day as the default
+    Return a fixed frequency DatetimeIndex, with business day as the default
     frequency
 
     Parameters
@@ -2056,8 +2058,8 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
         Left bound for generating dates
     end : string or datetime-like, default None
         Right bound for generating dates
-    periods : integer or None, default None
-        If None, must specify start and end
+    periods : integer, default None
+        Number of periods to generate
     freq : string or DateOffset, default 'B' (business daily)
         Frequency strings can have multiples, e.g. '5H'
     tz : string or None
@@ -2065,15 +2067,16 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
         Asia/Beijing
     normalize : bool, default False
         Normalize start/end dates to midnight before generating date range
-    name : str, default None
-        Name for the resulting index
-    closed : string or None, default None
+    name : string, default None
+        Name of the resulting DatetimeIndex
+    closed : string, default None
         Make the interval closed with respect to the given frequency to
         the 'left', 'right', or both sides (None)
 
     Notes
     -----
-    2 of start, end, or periods must be specified
+    Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
+    must be specified.
 
     To learn more about the frequency strings, please see `this link
     <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
@@ -2091,7 +2094,7 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
 def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
                 normalize=True, name=None, closed=None, **kwargs):
     """
-    **EXPERIMENTAL** Return a fixed frequency datetime index, with
+    **EXPERIMENTAL** Return a fixed frequency DatetimeIndex, with
     CustomBusinessDay as the default frequency
 
     .. warning:: EXPERIMENTAL
@@ -2105,29 +2108,30 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
         Left bound for generating dates
     end : string or datetime-like, default None
         Right bound for generating dates
-    periods : integer or None, default None
-        If None, must specify start and end
+    periods : integer, default None
+        Number of periods to generate
     freq : string or DateOffset, default 'C' (CustomBusinessDay)
         Frequency strings can have multiples, e.g. '5H'
-    tz : string or None
+    tz : string, default None
         Time zone name for returning localized DatetimeIndex, for example
         Asia/Beijing
     normalize : bool, default False
         Normalize start/end dates to midnight before generating date range
-    name : str, default None
-        Name for the resulting index
-    weekmask : str, Default 'Mon Tue Wed Thu Fri'
+    name : string, default None
+        Name of the resulting DatetimeIndex
+    weekmask : string, Default 'Mon Tue Wed Thu Fri'
         weekmask of valid business days, passed to ``numpy.busdaycalendar``
     holidays : list
         list/array of dates to exclude from the set of valid business days,
         passed to ``numpy.busdaycalendar``
-    closed : string or None, default None
+    closed : string, default None
         Make the interval closed with respect to the given frequency to
         the 'left', 'right', or both sides (None)
 
     Notes
     -----
-    2 of start, end, or periods must be specified
+    Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
+    must be specified.
 
     To learn more about the frequency strings, please see `this link
     <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index e0ed6c7ea35c0c..6e80f6c900386d 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -15,6 +15,8 @@
     is_float_dtype,
     is_interval_dtype,
     is_scalar,
+    is_float,
+    is_number,
     is_integer)
 from pandas.core.indexes.base import (
     Index, _ensure_index,
@@ -25,11 +27,15 @@
     Interval, IntervalMixin, IntervalTree,
     intervals_to_interval_bounds)
 
+from pandas.core.indexes.datetimes import date_range
+from pandas.core.indexes.timedeltas import timedelta_range
 from pandas.core.indexes.multi import MultiIndex
 from pandas.compat.numpy import function as nv
 from pandas.core import common as com
 from pandas.util._decorators import cache_readonly, Appender
 from pandas.core.config import get_option
+from pandas.tseries.frequencies import to_offset
+from pandas.tseries.offsets import DateOffset
 
 import pandas.core.indexes.base as ibase
 _index_doc_kwargs = dict(ibase._index_doc_kwargs)
@@ -1028,54 +1034,152 @@ def func(self, other):
 IntervalIndex._add_logical_methods_disabled()
 
 
-def interval_range(start=None, end=None, freq=None, periods=None,
-                   name=None, closed='right', **kwargs):
+def _is_valid_endpoint(endpoint):
+    """helper for interval_range to check if start/end are valid types"""
+    return any([is_number(endpoint),
+                isinstance(endpoint, Timestamp),
+                isinstance(endpoint, Timedelta),
+                endpoint is None])
+
+
+def _is_type_compatible(a, b):
+    """helper for interval_range to check type compat of start/end/freq"""
+    is_ts_compat = lambda x: isinstance(x, (Timestamp, DateOffset))
+    is_td_compat = lambda x: isinstance(x, (Timedelta, DateOffset))
+    return ((is_number(a) and is_number(b)) or
+            (is_ts_compat(a) and is_ts_compat(b)) or
+            (is_td_compat(a) and is_td_compat(b)) or
+            com._any_none(a, b))
+
+
+def interval_range(start=None, end=None, periods=None, freq=None,
+                   name=None, closed='right'):
     """
     Return a fixed frequency IntervalIndex
 
     Parameters
     ----------
-    start : string or datetime-like, default None
-        Left bound for generating data
-    end : string or datetime-like, default None
-        Right bound for generating data
-    freq : interger, string or DateOffset, default 1
-    periods : interger, default None
-    name : str, default None
-        Name of the resulting index
+    start : numeric or datetime-like, default None
+        Left bound for generating intervals
+    end : numeric or datetime-like, default None
+        Right bound for generating intervals
+    periods : integer, default None
+        Number of periods to generate
+    freq : numeric, string, or DateOffset, default None
+        The length of each interval. Must be consistent with the type of start
+        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
+        for numeric and 'D' (calendar daily) for datetime-like.
+    name : string, default None
+        Name of the resulting IntervalIndex
     closed : string, default 'right'
         options are: 'left', 'right', 'both', 'neither'
 
     Notes
     -----
-    2 of start, end, or periods must be specified
+    Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
+    must be specified.
 
     Returns
     -------
     rng : IntervalIndex
+
+    Examples
+    --------
+
+    Numeric ``start`` and  ``end`` is supported.
+
+    >>> pd.interval_range(start=0, end=5)
+    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]]
+                  closed='right', dtype='interval[int64]')
+
+    Additionally, datetime-like input is also supported.
+
+    >>> pd.interval_range(start='2017-01-01', end='2017-01-04')
+    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
+                   (2017-01-03, 2017-01-04]]
+                  closed='right', dtype='interval[datetime64[ns]]')
+
+    The ``freq`` parameter specifies the frequency between the left and right.
+    endpoints of the individual intervals within the ``IntervalIndex``.  For
+    numeric ``start`` and ``end``, the frequency must also be numeric.
+
+    >>> pd.interval_range(start=0, periods=4, freq=1.5)
+    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]]
+                  closed='right', dtype='interval[float64]')
+
+    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
+    convertible to a DateOffset.
+
+    >>> pd.interval_range(start='2017-01-01', periods=3, freq='MS')
+    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
+                   (2017-03-01, 2017-04-01]]
+                  closed='right', dtype='interval[datetime64[ns]]')
+
+    The ``closed`` parameter specifies which endpoints of the individual
+    intervals within the ``IntervalIndex`` are closed.
+
+    >>> pd.interval_range(end=5, periods=4, closed='both')
+    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]]
+                  closed='both', dtype='interval[int64]')
     """
+    if com._count_not_none(start, end, periods) != 2:
+        raise ValueError('Of the three parameters: start, end, and periods, '
+                         'exactly two must be specified')
+
+    start = com._maybe_box_datetimelike(start)
+    end = com._maybe_box_datetimelike(end)
+    endpoint = next(com._not_none(start, end))
+
+    if not _is_valid_endpoint(start):
+        msg = 'start must be numeric or datetime-like, got {start}'
+        raise ValueError(msg.format(start=start))
+
+    if not _is_valid_endpoint(end):
+        msg = 'end must be numeric or datetime-like, got {end}'
+        raise ValueError(msg.format(end=end))
+
+    if is_float(periods):
+        periods = int(periods)
+    elif not is_integer(periods) and periods is not None:
+        msg = 'periods must be a number, got {periods}'
+        raise TypeError(msg.format(periods=periods))
+
+    freq = freq or (1 if is_number(endpoint) else 'D')
+    if not is_number(freq):
+        try:
+            freq = to_offset(freq)
+        except ValueError:
+            raise ValueError('freq must be numeric or convertible to '
+                             'DateOffset, got {freq}'.format(freq=freq))
 
-    if freq is None:
-        freq = 1
+    # verify type compatibility
+    if not all([_is_type_compatible(start, end),
+                _is_type_compatible(start, freq),
+                _is_type_compatible(end, freq)]):
+        raise TypeError("start, end, freq need to be type compatible")
 
-    if start is None:
-        if periods is None or end is None:
-            raise ValueError("must specify 2 of start, end, periods")
-        start = end - periods * freq
-    if end is None:
-        if periods is None or start is None:
-            raise ValueError("must specify 2 of start, end, periods")
+    if is_number(endpoint):
+        if periods is None:
+            periods = int((end - start) // freq)
+
+        if start is None:
+            start = end - periods * freq
+
+        # force end to be consistent with freq (lower if freq skips over end)
         end = start + periods * freq
-    if periods is None:
-        if start is None or end is None:
-            raise ValueError("must specify 2 of start, end, periods")
-        pass
-
-    # must all be same units or None
-    arr = np.array([start, end, freq])
-    if is_object_dtype(arr):
-        raise ValueError("start, end, freq need to be the same type")
-
-    return IntervalIndex.from_breaks(np.arange(start, end, freq),
-                                     name=name,
-                                     closed=closed)
+
+        # end + freq for inclusive endpoint
+        breaks = np.arange(start, end + freq, freq)
+    elif isinstance(endpoint, Timestamp):
+        # add one to account for interval endpoints (n breaks = n-1 intervals)
+        if periods is not None:
+            periods += 1
+        breaks = date_range(start=start, end=end, periods=periods, freq=freq)
+    else:
+        # add one to account for interval endpoints (n breaks = n-1 intervals)
+        if periods is not None:
+            periods += 1
+        breaks = timedelta_range(start=start, end=end, periods=periods,
+                                 freq=freq)
+
+    return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 0915462d4d4212..fb47d1db48610b 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -199,8 +199,8 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
             if is_float(periods):
                 periods = int(periods)
             elif not is_integer(periods):
-                raise ValueError('Periods must be a number, got %s' %
-                                 str(periods))
+                msg = 'periods must be a number, got {periods}'
+                raise TypeError(msg.format(periods=periods))
 
         if name is None and hasattr(data, 'name'):
             name = data.name
@@ -1051,8 +1051,9 @@ def tz_localize(self, tz, infer_dst=False):
 
 
 def _get_ordinal_range(start, end, periods, freq, mult=1):
-    if com._count_not_none(start, end, periods) < 2:
-        raise ValueError('Must specify 2 of start, end, periods')
+    if com._count_not_none(start, end, periods) != 2:
+        raise ValueError('Of the three parameters: start, end, and periods, '
+                         'exactly two must be specified')
 
     if freq is not None:
         _, mult = _gfc(freq)
@@ -1066,9 +1067,9 @@ def _get_ordinal_range(start, end, periods, freq, mult=1):
     is_end_per = isinstance(end, Period)
 
     if is_start_per and is_end_per and start.freq != end.freq:
-        raise ValueError('Start and end must have same freq')
+        raise ValueError('start and end must have same freq')
     if (start is tslib.NaT or end is tslib.NaT):
-        raise ValueError('Start and end must not be NaT')
+        raise ValueError('start and end must not be NaT')
 
     if freq is None:
         if is_start_per:
@@ -1157,24 +1158,55 @@ def pnow(freq=None):
 
 def period_range(start=None, end=None, periods=None, freq='D', name=None):
     """
-    Return a fixed frequency datetime index, with day (calendar) as the default
+    Return a fixed frequency PeriodIndex, with day (calendar) as the default
     frequency
 
-
     Parameters
     ----------
-    start : starting value, period-like, optional
-    end : ending value, period-like, optional
-    periods : int, default None
-        Number of periods in the index
-    freq : str/DateOffset, default 'D'
+    start : string or period-like, default None
+        Left bound for generating periods
+    end : string or period-like, default None
+        Right bound for generating periods
+    periods : integer, default None
+        Number of periods to generate
+    freq : string or DateOffset, default 'D' (calendar daily)
         Frequency alias
-    name : str, default None
-        Name for the resulting PeriodIndex
+    name : string, default None
+        Name of the resulting PeriodIndex
+
+    Notes
+    -----
+    Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
+    must be specified.
+
+    To learn more about the frequency strings, please see `this link
+    <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
 
     Returns
     -------
     prng : PeriodIndex
+
+    Examples
+    --------
+
+    >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
+    PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05',
+                 '2017-06', '2017-06', '2017-07', '2017-08', '2017-09',
+                 '2017-10', '2017-11', '2017-12', '2018-01'],
+                dtype='period[M]', freq='M')
+
+    If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
+    endpoints for a ``PeriodIndex`` with frequency matching that of the
+    ``period_range`` constructor.
+
+    >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
+    ...                 end=pd.Period('2017Q2', freq='Q'), freq='M')
+    PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
+                dtype='period[M]', freq='M')
     """
+    if com._count_not_none(start, end, periods) != 2:
+        raise ValueError('Of the three parameters: start, end, and periods, '
+                         'exactly two must be specified')
+
     return PeriodIndex(start=start, end=end, periods=periods,
                        freq=freq, name=name)
diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index 2823951c0f3487..d7b7d56d74a3a9 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -180,8 +180,8 @@ def __new__(cls, data=None, unit=None,
             if is_float(periods):
                 periods = int(periods)
             elif not is_integer(periods):
-                raise ValueError('Periods must be a number, got %s' %
-                                 str(periods))
+                msg = 'periods must be a number, got {periods}'
+                raise TypeError(msg.format(periods=periods))
 
         if data is None and freq is None:
             raise ValueError("Must provide freq argument if no data is "
@@ -234,7 +234,8 @@ def __new__(cls, data=None, unit=None,
     @classmethod
     def _generate(cls, start, end, periods, name, offset, closed=None):
         if com._count_not_none(start, end, periods) != 2:
-            raise ValueError('Must specify two of start, end, or periods')
+            raise ValueError('Of the three parameters: start, end, and '
+                             'periods, exactly two must be specified')
 
         if start is not None:
             start = Timedelta(start)
@@ -960,22 +961,22 @@ def _generate_regular_range(start, end, periods, offset):
 def timedelta_range(start=None, end=None, periods=None, freq='D',
                     name=None, closed=None):
     """
-    Return a fixed frequency timedelta index, with day as the default
+    Return a fixed frequency TimedeltaIndex, with day as the default
     frequency
 
     Parameters
     ----------
     start : string or timedelta-like, default None
-        Left bound for generating dates
-    end : string or datetime-like, default None
-        Right bound for generating dates
-    periods : integer or None, default None
-        If None, must specify start and end
+        Left bound for generating timedeltas
+    end : string or timedelta-like, default None
+        Right bound for generating timedeltas
+    periods : integer, default None
+        Number of periods to generate
     freq : string or DateOffset, default 'D' (calendar daily)
         Frequency strings can have multiples, e.g. '5H'
-    name : str, default None
-        Name of the resulting index
-    closed : string or None, default None
+    name : string, default None
+        Name of the resulting TimedeltaIndex
+    closed : string, default None
         Make the interval closed with respect to the given frequency to
         the 'left', 'right', or both sides (None)
 
@@ -985,11 +986,34 @@ def timedelta_range(start=None, end=None, periods=None, freq='D',
 
     Notes
     -----
-    2 of start, end, or periods must be specified.
+    Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
+    must be specified.
 
     To learn more about the frequency strings, please see `this link
     <http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.
+
+    Examples
+    --------
+
+    >>> pd.timedelta_range(start='1 day', periods=4)
+    TimedeltaIndex(['1 days', '2 days', '3 days', '4 days'],
+                   dtype='timedelta64[ns]', freq='D')
+
+    The ``closed`` parameter specifies which endpoint is included.  The default
+    behavior is to include both endpoints.
+
+    >>> pd.timedelta_range(start='1 day', periods=4, closed='right')
+    TimedeltaIndex(['2 days', '3 days', '4 days'],
+                   dtype='timedelta64[ns]', freq='D')
+
+    The ``freq`` parameter specifies the frequency of the TimedeltaIndex.
+    Only fixed frequencies can be passed, non-fixed frequencies such as
+    'M' (month end) will raise.
+
+    >>> pd.timedelta_range(start='1 day', end='2 days', freq='6H')
+    TimedeltaIndex(['1 days 00:00:00', '1 days 06:00:00', '1 days 12:00:00',
+                    '1 days 18:00:00', '2 days 00:00:00'],
+                   dtype='timedelta64[ns]', freq='6H')
     """
     return TimedeltaIndex(start=start, end=end, periods=periods,
-                          freq=freq, name=name,
-                          closed=closed)
+                          freq=freq, name=name, closed=closed)
diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py
index cf896b06130a24..a4706dd8a3767b 100644
--- a/pandas/tests/indexes/datetimes/test_construction.py
+++ b/pandas/tests/indexes/datetimes/test_construction.py
@@ -307,8 +307,9 @@ def test_constructor_coverage(self):
         exp = date_range('1/1/2000', periods=10)
         tm.assert_index_equal(rng, exp)
 
-        pytest.raises(ValueError, DatetimeIndex, start='1/1/2000',
-                      periods='foo', freq='D')
+        msg = 'periods must be a number, got foo'
+        with tm.assert_raises_regex(TypeError, msg):
+            DatetimeIndex(start='1/1/2000', periods='foo', freq='D')
 
         pytest.raises(ValueError, DatetimeIndex, start='1/1/2000',
                       end='1/10/2000')
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
index da4ca83c10dda2..8d86bebdd4d5e4 100644
--- a/pandas/tests/indexes/datetimes/test_date_range.py
+++ b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -107,8 +107,10 @@ def test_date_range_ambiguous_arguments(self):
         start = datetime(2011, 1, 1, 5, 3, 40)
         end = datetime(2011, 1, 1, 8, 9, 40)
 
-        pytest.raises(ValueError, date_range, start, end, freq='s',
-                      periods=10)
+        msg = ('Of the three parameters: start, end, and periods, '
+               'exactly two must be specified')
+        with tm.assert_raises_regex(ValueError, msg):
+            date_range(start, end, periods=10, freq='s')
 
     def test_date_range_businesshour(self):
         idx = DatetimeIndex(['2014-07-04 09:00', '2014-07-04 10:00',
@@ -146,14 +148,29 @@ def test_date_range_businesshour(self):
 
     def test_range_misspecified(self):
         # GH #1095
+        msg = ('Of the three parameters: start, end, and periods, '
+               'exactly two must be specified')
+
+        with tm.assert_raises_regex(ValueError, msg):
+            date_range(start='1/1/2000')
+
+        with tm.assert_raises_regex(ValueError, msg):
+            date_range(end='1/1/2000')
+
+        with tm.assert_raises_regex(ValueError, msg):
+            date_range(periods=10)
+
+        with tm.assert_raises_regex(ValueError, msg):
+            date_range(start='1/1/2000', freq='H')
 
-        pytest.raises(ValueError, date_range, '1/1/2000')
-        pytest.raises(ValueError, date_range, end='1/1/2000')
-        pytest.raises(ValueError, date_range, periods=10)
+        with tm.assert_raises_regex(ValueError, msg):
+            date_range(end='1/1/2000', freq='H')
 
-        pytest.raises(ValueError, date_range, '1/1/2000', freq='H')
-        pytest.raises(ValueError, date_range, end='1/1/2000', freq='H')
-        pytest.raises(ValueError, date_range, periods=10, freq='H')
+        with tm.assert_raises_regex(ValueError, msg):
+            date_range(periods=10, freq='H')
+
+        with tm.assert_raises_regex(ValueError, msg):
+            date_range()
 
     def test_compat_replace(self):
         # https://github.com/statsmodels/statsmodels/issues/3349
@@ -231,8 +248,13 @@ def test_constructor(self):
         bdate_range(START, END, freq=BDay())
         bdate_range(START, periods=20, freq=BDay())
         bdate_range(end=START, periods=20, freq=BDay())
-        pytest.raises(ValueError, date_range, '2011-1-1', '2012-1-1', 'B')
-        pytest.raises(ValueError, bdate_range, '2011-1-1', '2012-1-1', 'B')
+
+        msg = 'periods must be a number, got B'
+        with tm.assert_raises_regex(TypeError, msg):
+            date_range('2011-1-1', '2012-1-1', 'B')
+
+        with tm.assert_raises_regex(TypeError, msg):
+            bdate_range('2011-1-1', '2012-1-1', 'B')
 
     def test_naive_aware_conflicts(self):
         naive = bdate_range(START, END, freq=BDay(), tz=None)
@@ -510,8 +532,13 @@ def test_constructor(self):
         cdate_range(START, END, freq=CDay())
         cdate_range(START, periods=20, freq=CDay())
         cdate_range(end=START, periods=20, freq=CDay())
-        pytest.raises(ValueError, date_range, '2011-1-1', '2012-1-1', 'C')
-        pytest.raises(ValueError, cdate_range, '2011-1-1', '2012-1-1', 'C')
+
+        msg = 'periods must be a number, got C'
+        with tm.assert_raises_regex(TypeError, msg):
+            date_range('2011-1-1', '2012-1-1', 'C')
+
+        with tm.assert_raises_regex(TypeError, msg):
+            cdate_range('2011-1-1', '2012-1-1', 'C')
 
     def test_cached_range(self):
         DatetimeIndex._cached_range(START, END, offset=CDay())
diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py
index e5b889e1003070..639a9272c38082 100644
--- a/pandas/tests/indexes/period/test_construction.py
+++ b/pandas/tests/indexes/period/test_construction.py
@@ -436,11 +436,12 @@ def test_constructor_error(self):
         start = Period('02-Apr-2005', 'B')
         end_intv = Period('2006-12-31', ('w', 1))
 
-        msg = 'Start and end must have same freq'
+        msg = 'start and end must have same freq'
         with tm.assert_raises_regex(ValueError, msg):
             PeriodIndex(start=start, end=end_intv)
 
-        msg = 'Must specify 2 of start, end, periods'
+        msg = ('Of the three parameters: start, end, and periods, '
+               'exactly two must be specified')
         with tm.assert_raises_regex(ValueError, msg):
             PeriodIndex(start=start)
 
diff --git a/pandas/tests/indexes/period/test_period_range.py b/pandas/tests/indexes/period/test_period_range.py
new file mode 100644
index 00000000000000..640f24f67f72f2
--- /dev/null
+++ b/pandas/tests/indexes/period/test_period_range.py
@@ -0,0 +1,94 @@
+import pytest
+import pandas.util.testing as tm
+from pandas import date_range, NaT, period_range, Period, PeriodIndex
+
+
+class TestPeriodRange(object):
+
+    @pytest.mark.parametrize('freq', ['D', 'W', 'M', 'Q', 'A'])
+    def test_construction_from_string(self, freq):
+        # non-empty
+        expected = date_range(start='2017-01-01', periods=5,
+                              freq=freq, name='foo').to_period()
+        start, end = str(expected[0]), str(expected[-1])
+
+        result = period_range(start=start, end=end, freq=freq, name='foo')
+        tm.assert_index_equal(result, expected)
+
+        result = period_range(start=start, periods=5, freq=freq, name='foo')
+        tm.assert_index_equal(result, expected)
+
+        result = period_range(end=end, periods=5, freq=freq, name='foo')
+        tm.assert_index_equal(result, expected)
+
+        # empty
+        expected = PeriodIndex([], freq=freq, name='foo')
+
+        result = period_range(start=start, periods=0, freq=freq, name='foo')
+        tm.assert_index_equal(result, expected)
+
+        result = period_range(end=end, periods=0, freq=freq, name='foo')
+        tm.assert_index_equal(result, expected)
+
+        result = period_range(start=end, end=start, freq=freq, name='foo')
+        tm.assert_index_equal(result, expected)
+
+    def test_construction_from_period(self):
+        # upsampling
+        start, end = Period('2017Q1', freq='Q'), Period('2018Q1', freq='Q')
+        expected = date_range(start='2017-03-31', end='2018-03-31', freq='M',
+                              name='foo').to_period()
+        result = period_range(start=start, end=end, freq='M', name='foo')
+        tm.assert_index_equal(result, expected)
+
+        # downsampling
+        start, end = Period('2017-1', freq='M'), Period('2019-12', freq='M')
+        expected = date_range(start='2017-01-31', end='2019-12-31', freq='Q',
+                              name='foo').to_period()
+        result = period_range(start=start, end=end, freq='Q', name='foo')
+        tm.assert_index_equal(result, expected)
+
+        # empty
+        expected = PeriodIndex([], freq='W', name='foo')
+
+        result = period_range(start=start, periods=0, freq='W', name='foo')
+        tm.assert_index_equal(result, expected)
+
+        result = period_range(end=end, periods=0, freq='W', name='foo')
+        tm.assert_index_equal(result, expected)
+
+        result = period_range(start=end, end=start, freq='W', name='foo')
+        tm.assert_index_equal(result, expected)
+
+    def test_errors(self):
+        # not enough params
+        msg = ('Of the three parameters: start, end, and periods, '
+               'exactly two must be specified')
+        with tm.assert_raises_regex(ValueError, msg):
+            period_range(start='2017Q1')
+
+        with tm.assert_raises_regex(ValueError, msg):
+            period_range(end='2017Q1')
+
+        with tm.assert_raises_regex(ValueError, msg):
+            period_range(periods=5)
+
+        with tm.assert_raises_regex(ValueError, msg):
+            period_range()
+
+        # too many params
+        with tm.assert_raises_regex(ValueError, msg):
+            period_range(start='2017Q1', end='2018Q1', periods=8, freq='Q')
+
+        # start/end NaT
+        msg = 'start and end must not be NaT'
+        with tm.assert_raises_regex(ValueError, msg):
+            period_range(start=NaT, end='2018Q1')
+
+        with tm.assert_raises_regex(ValueError, msg):
+            period_range(start='2017Q1', end=NaT)
+
+        # invalid periods param
+        msg = 'periods must be a number, got foo'
+        with tm.assert_raises_regex(TypeError, msg):
+            period_range(start='2017Q1', periods='foo')
diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py
index 18eefc3fbdca6e..13c3b35e4d85d9 100644
--- a/pandas/tests/indexes/test_interval.py
+++ b/pandas/tests/indexes/test_interval.py
@@ -2,10 +2,11 @@
 
 import pytest
 import numpy as np
-
+from datetime import timedelta
 from pandas import (Interval, IntervalIndex, Index, isna,
                     interval_range, Timestamp, Timedelta,
-                    compat)
+                    compat, date_range, timedelta_range, DateOffset)
+from pandas.tseries.offsets import Day
 from pandas._libs.interval import IntervalTree
 from pandas.tests.indexes.common import Base
 import pandas.util.testing as tm
@@ -721,40 +722,278 @@ def test_is_non_overlapping_monotonic(self):
 
 class TestIntervalRange(object):
 
-    def test_construction(self):
-        result = interval_range(0, 5, name='foo', closed='both')
+    @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both'])
+    def test_construction_from_numeric(self, closed):
+        # combinations of start/end/periods without freq
         expected = IntervalIndex.from_breaks(
-            np.arange(0, 5), name='foo', closed='both')
+            np.arange(0, 6), name='foo', closed=closed)
+
+        result = interval_range(start=0, end=5, name='foo', closed=closed)
         tm.assert_index_equal(result, expected)
 
-    def test_errors(self):
+        result = interval_range(start=0, periods=5, name='foo', closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(end=5, periods=5, name='foo', closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        # combinations of start/end/periods with freq
+        expected = IntervalIndex.from_tuples([(0, 2), (2, 4), (4, 6)],
+                                             name='foo', closed=closed)
+
+        result = interval_range(start=0, end=6, freq=2, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(start=0, periods=3, freq=2, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(end=6, periods=3, freq=2, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        # output truncates early if freq causes end to be skipped.
+        expected = IntervalIndex.from_tuples([(0.0, 1.5), (1.5, 3.0)],
+                                             name='foo', closed=closed)
+        result = interval_range(start=0, end=4, freq=1.5, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+    @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both'])
+    def test_construction_from_timestamp(self, closed):
+        # combinations of start/end/periods without freq
+        start, end = Timestamp('2017-01-01'), Timestamp('2017-01-06')
+        breaks = date_range(start=start, end=end)
+        expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed)
+
+        result = interval_range(start=start, end=end, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(start=start, periods=5, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(end=end, periods=5, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        # combinations of start/end/periods with fixed freq
+        freq = '2D'
+        start, end = Timestamp('2017-01-01'), Timestamp('2017-01-07')
+        breaks = date_range(start=start, end=end, freq=freq)
+        expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed)
+
+        result = interval_range(start=start, end=end, freq=freq, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(start=start, periods=3, freq=freq, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(end=end, periods=3, freq=freq, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        # output truncates early if freq causes end to be skipped.
+        end = Timestamp('2017-01-08')
+        result = interval_range(start=start, end=end, freq=freq, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        # combinations of start/end/periods with non-fixed freq
+        freq = 'M'
+        start, end = Timestamp('2017-01-01'), Timestamp('2017-12-31')
+        breaks = date_range(start=start, end=end, freq=freq)
+        expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed)
+
+        result = interval_range(start=start, end=end, freq=freq, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(start=start, periods=11, freq=freq, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(end=end, periods=11, freq=freq, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        # output truncates early if freq causes end to be skipped.
+        end = Timestamp('2018-01-15')
+        result = interval_range(start=start, end=end, freq=freq, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+    @pytest.mark.parametrize('closed', ['left', 'right', 'neither', 'both'])
+    def test_construction_from_timedelta(self, closed):
+        # combinations of start/end/periods without freq
+        start, end = Timedelta('1 day'), Timedelta('6 days')
+        breaks = timedelta_range(start=start, end=end)
+        expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed)
+
+        result = interval_range(start=start, end=end, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(start=start, periods=5, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(end=end, periods=5, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        # combinations of start/end/periods with fixed freq
+        freq = '2D'
+        start, end = Timedelta('1 day'), Timedelta('7 days')
+        breaks = timedelta_range(start=start, end=end, freq=freq)
+        expected = IntervalIndex.from_breaks(breaks, name='foo', closed=closed)
+
+        result = interval_range(start=start, end=end, freq=freq, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(start=start, periods=3, freq=freq, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        result = interval_range(end=end, periods=3, freq=freq, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+        # output truncates early if freq causes end to be skipped.
+        end = Timedelta('7 days 1 hour')
+        result = interval_range(start=start, end=end, freq=freq, name='foo',
+                                closed=closed)
+        tm.assert_index_equal(result, expected)
+
+    def test_constructor_coverage(self):
+        # float value for periods
+        expected = pd.interval_range(start=0, periods=10)
+        result = pd.interval_range(start=0, periods=10.5)
+        tm.assert_index_equal(result, expected)
+
+        # equivalent timestamp-like start/end
+        start, end = Timestamp('2017-01-01'), Timestamp('2017-01-15')
+        expected = pd.interval_range(start=start, end=end)
+
+        result = pd.interval_range(start=start.to_pydatetime(),
+                                   end=end.to_pydatetime())
+        tm.assert_index_equal(result, expected)
+
+        result = pd.interval_range(start=start.tz_localize('UTC'),
+                                   end=end.tz_localize('UTC'))
+        tm.assert_index_equal(result, expected)
+
+        result = pd.interval_range(start=start.asm8, end=end.asm8)
+        tm.assert_index_equal(result, expected)
+
+        # equivalent freq with timestamp
+        equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1),
+                      DateOffset(days=1)]
+        for freq in equiv_freq:
+            result = pd.interval_range(start=start, end=end, freq=freq)
+            tm.assert_index_equal(result, expected)
+
+        # equivalent timedelta-like start/end
+        start, end = Timedelta(days=1), Timedelta(days=10)
+        expected = pd.interval_range(start=start, end=end)
+
+        result = pd.interval_range(start=start.to_pytimedelta(),
+                                   end=end.to_pytimedelta())
+        tm.assert_index_equal(result, expected)
+
+        result = pd.interval_range(start=start.asm8, end=end.asm8)
+        tm.assert_index_equal(result, expected)
+
+        # equivalent freq with timedelta
+        equiv_freq = ['D', Day(), Timedelta(days=1), timedelta(days=1)]
+        for freq in equiv_freq:
+            result = pd.interval_range(start=start, end=end, freq=freq)
+            tm.assert_index_equal(result, expected)
 
+    def test_errors(self):
         # not enough params
-        def f():
-            interval_range(0)
+        msg = ('Of the three parameters: start, end, and periods, '
+               'exactly two must be specified')
 
-        pytest.raises(ValueError, f)
+        with tm.assert_raises_regex(ValueError, msg):
+            interval_range(start=0)
 
-        def f():
-            interval_range(periods=2)
+        with tm.assert_raises_regex(ValueError, msg):
+            interval_range(end=5)
 
-        pytest.raises(ValueError, f)
+        with tm.assert_raises_regex(ValueError, msg):
+            interval_range(periods=2)
 
-        def f():
+        with tm.assert_raises_regex(ValueError, msg):
             interval_range()
 
-        pytest.raises(ValueError, f)
+        # too many params
+        with tm.assert_raises_regex(ValueError, msg):
+            interval_range(start=0, end=5, periods=6)
 
         # mixed units
-        def f():
-            interval_range(0, Timestamp('20130101'), freq=2)
+        msg = 'start, end, freq need to be type compatible'
+        with tm.assert_raises_regex(TypeError, msg):
+            interval_range(start=0, end=Timestamp('20130101'), freq=2)
 
-        pytest.raises(ValueError, f)
+        with tm.assert_raises_regex(TypeError, msg):
+            interval_range(start=0, end=Timedelta('1 day'), freq=2)
 
-        def f():
-            interval_range(0, 10, freq=Timedelta('1day'))
+        with tm.assert_raises_regex(TypeError, msg):
+            interval_range(start=0, end=10, freq='D')
 
-        pytest.raises(ValueError, f)
+        with tm.assert_raises_regex(TypeError, msg):
+            interval_range(start=Timestamp('20130101'), end=10, freq='D')
+
+        with tm.assert_raises_regex(TypeError, msg):
+            interval_range(start=Timestamp('20130101'),
+                           end=Timedelta('1 day'), freq='D')
+
+        with tm.assert_raises_regex(TypeError, msg):
+            interval_range(start=Timestamp('20130101'),
+                           end=Timestamp('20130110'), freq=2)
+
+        with tm.assert_raises_regex(TypeError, msg):
+            interval_range(start=Timedelta('1 day'), end=10, freq='D')
+
+        with tm.assert_raises_regex(TypeError, msg):
+            interval_range(start=Timedelta('1 day'),
+                           end=Timestamp('20130110'), freq='D')
+
+        with tm.assert_raises_regex(TypeError, msg):
+            interval_range(start=Timedelta('1 day'),
+                           end=Timedelta('10 days'), freq=2)
+
+        # invalid periods
+        msg = 'periods must be a number, got foo'
+        with tm.assert_raises_regex(TypeError, msg):
+            interval_range(start=0, periods='foo')
+
+        # invalid start
+        msg = 'start must be numeric or datetime-like, got foo'
+        with tm.assert_raises_regex(ValueError, msg):
+            interval_range(start='foo', periods=10)
+
+        # invalid end
+        msg = 'end must be numeric or datetime-like, got \(0, 1\]'
+        with tm.assert_raises_regex(ValueError, msg):
+            interval_range(end=Interval(0, 1), periods=10)
+
+        # invalid freq for datetime-like
+        msg = 'freq must be numeric or convertible to DateOffset, got foo'
+        with tm.assert_raises_regex(ValueError, msg):
+            interval_range(start=0, end=10, freq='foo')
+
+        with tm.assert_raises_regex(ValueError, msg):
+            interval_range(start=Timestamp('20130101'), periods=10, freq='foo')
+
+        with tm.assert_raises_regex(ValueError, msg):
+            interval_range(end=Timedelta('1 day'), periods=10, freq='foo')
 
 
 class TestIntervalTree(object):
diff --git a/pandas/tests/indexes/timedeltas/test_construction.py b/pandas/tests/indexes/timedeltas/test_construction.py
index dd25e2cca2e553..70aadd9f571740 100644
--- a/pandas/tests/indexes/timedeltas/test_construction.py
+++ b/pandas/tests/indexes/timedeltas/test_construction.py
@@ -50,8 +50,9 @@ def test_constructor_coverage(self):
         exp = timedelta_range('1 days', periods=10)
         tm.assert_index_equal(rng, exp)
 
-        pytest.raises(ValueError, TimedeltaIndex, start='1 days',
-                      periods='foo', freq='D')
+        msg = 'periods must be a number, got foo'
+        with tm.assert_raises_regex(TypeError, msg):
+            TimedeltaIndex(start='1 days', periods='foo', freq='D')
 
         pytest.raises(ValueError, TimedeltaIndex, start='1 days',
                       end='10 days')
diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py
index 4732a0ce110dea..7624e1f79af152 100644
--- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py
+++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py
@@ -1,5 +1,4 @@
 import numpy as np
-
 import pandas as pd
 import pandas.util.testing as tm
 from pandas.tseries.offsets import Day, Second
@@ -49,3 +48,23 @@ def test_timedelta_range(self):
         expected = df.loc[pd.Timedelta('0s'):, :]
         result = df.loc['0s':, :]
         assert_frame_equal(expected, result)
+
+    def test_errors(self):
+        # not enough params
+        msg = ('Of the three parameters: start, end, and periods, '
+               'exactly two must be specified')
+        with tm.assert_raises_regex(ValueError, msg):
+            timedelta_range(start='0 days')
+
+        with tm.assert_raises_regex(ValueError, msg):
+            timedelta_range(end='5 days')
+
+        with tm.assert_raises_regex(ValueError, msg):
+            timedelta_range(periods=2)
+
+        with tm.assert_raises_regex(ValueError, msg):
+            timedelta_range()
+
+        # too many params
+        with tm.assert_raises_regex(ValueError, msg):
+            timedelta_range(start='0 days', end='5 days', periods=10)

From 97abd2c9c11aeee0e3d2c58a74d85fa75062ca1f Mon Sep 17 00:00:00 2001
From: Kirk Hansen <hanski07@luther.edu>
Date: Thu, 14 Sep 2017 05:14:43 -0500
Subject: [PATCH 083/188] TST: Made s3 related tests mock boto (#17388)

---
 appveyor.yml                             |   6 ++
 ci/install_circle.sh                     |   1 +
 ci/install_travis.sh                     |   2 +-
 ci/requirements-2.7_WIN.pip              |   0
 ci/requirements-3.6_NUMPY_DEV.pip        |   0
 ci/requirements-3.6_WIN.pip              |   0
 ci/requirements_dev.txt                  |   1 +
 pandas/tests/io/parser/data/tips.csv.bz2 | Bin 0 -> 1316 bytes
 pandas/tests/io/parser/data/tips.csv.gz  | Bin 0 -> 1740 bytes
 pandas/tests/io/parser/test_network.py   | 100 ++++++++++++++---------
 pandas/tests/io/test_excel.py            |  58 ++++++-------
 tox.ini                                  |   1 +
 12 files changed, 102 insertions(+), 67 deletions(-)
 create mode 100644 ci/requirements-2.7_WIN.pip
 create mode 100644 ci/requirements-3.6_NUMPY_DEV.pip
 create mode 100644 ci/requirements-3.6_WIN.pip
 create mode 100644 pandas/tests/io/parser/data/tips.csv.bz2
 create mode 100644 pandas/tests/io/parser/data/tips.csv.gz

diff --git a/appveyor.yml b/appveyor.yml
index 65e62f887554e5..a1f8886f6d068f 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -74,12 +74,18 @@ install:
   # create our env
   - cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist
   - cmd: activate pandas
+  - cmd: pip install moto
   - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run
   - cmd: echo "installing requirements from %REQ%"
   - cmd: conda install -n pandas --file=%REQ%
   - cmd: conda list -n pandas
   - cmd: echo "installing requirements from %REQ% - done"
 
+  # add some pip only reqs to the env
+  - SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.pip
+  - cmd: echo "installing requirements from %REQ%"
+  - cmd: pip install -Ur %REQ%
+
   # build em using the local source checkout in the correct windows env
   - cmd: '%CMD_IN_ENV% python setup.py build_ext --inplace'
 
diff --git a/ci/install_circle.sh b/ci/install_circle.sh
index 29ca69970104b0..fd79f907625e9d 100755
--- a/ci/install_circle.sh
+++ b/ci/install_circle.sh
@@ -67,6 +67,7 @@ time conda create -n pandas -q --file=${REQ_BUILD} || exit 1
 time conda install -n pandas pytest>=3.1.0 || exit 1
 
 source activate pandas
+time pip install moto || exit 1
 
 # build but don't install
 echo "[build em]"
diff --git a/ci/install_travis.sh b/ci/install_travis.sh
index d26689f2e6b4bd..b85263daa1eaca 100755
--- a/ci/install_travis.sh
+++ b/ci/install_travis.sh
@@ -104,7 +104,7 @@ if [ -e ${REQ} ]; then
 fi
 
 time conda install -n pandas pytest>=3.1.0
-time pip install pytest-xdist
+time pip install pytest-xdist moto
 
 if [ "$LINT" ]; then
    conda install flake8
diff --git a/ci/requirements-2.7_WIN.pip b/ci/requirements-2.7_WIN.pip
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/ci/requirements-3.6_NUMPY_DEV.pip b/ci/requirements-3.6_NUMPY_DEV.pip
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/ci/requirements-3.6_WIN.pip b/ci/requirements-3.6_WIN.pip
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/ci/requirements_dev.txt b/ci/requirements_dev.txt
index c7190c506ba18f..dbc4f6cbd65098 100644
--- a/ci/requirements_dev.txt
+++ b/ci/requirements_dev.txt
@@ -5,3 +5,4 @@ cython
 pytest>=3.1.0
 pytest-cov
 flake8
+moto
diff --git a/pandas/tests/io/parser/data/tips.csv.bz2 b/pandas/tests/io/parser/data/tips.csv.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..1452896b05e9d41f58ffd816a0459d86796718a6
GIT binary patch
literal 1316
zcmV+<1>5>UT4*^jL0KkKS@WgHUjPpp-+%xR00n>G1qTcuzHi=eU<cjt*})d+abW1$
z?UlBGu=ou!0TQXB5Wq~DVrUwt;RK}`X`lh102pXw$Qc-e6DEe35_+jqQMCX70003>
zr8L5NfM@_34^ia=nn@4@ngc)pXm>4ki*@VR?6|SRoF#LZ+TkL$)Z)}c<#mBig_KMX
zruJeOi&bv;V=*04xP@hDQp(ibF*2pqxW%nuMr@F6Gix?+fsH|aKayy7UwGa_-`dVs
zYfM$<jnJ~?RJA0I4d-csh>)R7$k8wpC6gfmM#M!-v|)iP#1h4cPkh|rkJNTD3*02|
zUew#%bX<$c*~vCvMH>_%oV^S&6a+#uk<NlhW+j*s8BSA<b~(=AtavX%oSv!NH#a#i
zNGf@zdJ_TASxL_7-VTsKU<m*c%rs{wq{2PO4+Tv<R9j)L?p9(j@*PhSJErN@=<oCO
zzVz47xy%q;T;k$I5sq_{oGwY2$}~B1oNJd}T#=F{G@?T=%#iHkA_)V&fot?Hx{)v7
zrRbS149|~pQU?kos&JiqYLygh=?I;2sSh#;L=%tk`s<>skADG3ECrBRBE^v4aChy?
zvDazQUv(jtyOFJd%+RitVq;Fo?$ru4tx8y4RWLAw3OQ&r5YZ6QA(|<UOm1&2*DV~f
zaB&phHHwJiO+1o#$p{_$s1pkbK_f*G8B$VGWi~Mh8w|-CCN_*lvJ?!_6i@=HtSlsl
zZ{73$|2$5fj@jVO`qpGFvo5uSXfhgJ3?NI5i!cz(MU~SQyVzY55h9ul2VQa~Mvyv%
zZTL=CG?o=yt*as<hlC;>s=%EqEnNvFyDucBxbJ63X0f6|L)lrAb?vZoDHd%^>qwTK
z8M-E+R_N`PibFFSF!cCl2Z7}>xeJ`*<3&DX2?dNalnbN*vYZ7QTLis}+CyTbyv{>s
zl!hm_!_I4KZE}>uSzBr=*www83fCT-SPZ&+p@d<hrLy;yR_n{bGOg?4ly#bv%(#ZK
zQ`@`_8Ol%<?huf>CkFG(R6{D)ETHdAf<wqK+a}Z=qLK??`n652I;(0z#9;{L?*MRX
zZOdlORvKg$i5r+lEIf%h3<S0+nQ5lkrKz&nO=M<3GcXx#-UnCWkr?ncV$DkG6^bia
zfErXfTPhU|^D$dwD;75{3yEo%WxWYEi6TY_ATlyAK>-8>fnW#-GXdM4pE5VK!{hIp
z4{*7<rUTi*<9DOX=mcJ@+pRQ<jFno^j6xO0xg6<Twq&JEG@4}TtOjR$d{DE_xxVED
z7VbX1WgFlPV5EW|+ndu$x|n5gae5pDfS{nHj6n=1H*X(@kRD!V%b4armBBZtCqWR1
zU_-uSfw-7qX^nBc3?+zVgmM@t2rm$rg6$N{0B{an-yb?TFk)qPDtBRSFD9-O=`wm*
zw19y{0)c8^0HIIGj!4)<K|JXzc1swG5cNH)s3dq2AR>H7hK39V*E6-z)7yKmA;#^4
z#PVN7@@@mJL*EhAX#`mH2SAk2lkhNXJBL>BHS&`^r&JS)>z58UjoYiOCqY*zmz*K6
z1SFlk-!C<SgJU6JG-%O{7|0b7Mg$be2xO8`Ad*O!Qd0&|f;Nbt!5K1rHrBs?kBJT1
zauTiFDotTb@|8X_A^NJ}Mtyk&FxmIdoRwwyQem$^j*zCHrj;XpI<|#2DzM*15~ppA
aNzKIb@hH%SimkhG{}*yaI8c!DrO013M^0t{

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/parser/data/tips.csv.gz b/pandas/tests/io/parser/data/tips.csv.gz
new file mode 100644
index 0000000000000000000000000000000000000000..3a131068b2a38b9709ac007094e5634e84a4c101
GIT binary patch
literal 1740
zcmV;-1~d5|iwFqZho4yh19WL{b1q|Zb^wK0$&Mp8488kX3W6w6+dY`mB$wopQ_x8a
zjFDb~UVtQDA3m1bE~&(ttH~1g$Hnh2?+=f^{eF3TT;4B#FK_36mba&uzs|4ApAY}a
zPfzFN?egFGMA@~rkn^;BIX^u-o|m6qmY+YKmme<A&%zx}<XvRp$a(p3PmiadA4iuA
za&);0Ins<CPJvx&LeGW*d<w~hwy-PH7x+|NDNAtPct5)e`4r|~Q*s{WwW)bxz6i$m
zAsTe=LXq$1+J#dT?eii+Eb~MG4)Q=MhJflgiUL#n82VKu2P=RQpacur)B|gPBLZCJ
zFGM6sK#b-#D!mm=0po$-T~0`XO_DskZyqSnj-9VFdcgOhKyam#u;mahY8FVC435~~
z=K^0$WQZf1s{x5(=pZ^)5(!ca^Y6vHArHIHgUih(OOT2<AQ~UH)qmbHuDO&Xggo`H
z=8(8PxG8z9fqe8l;s@o~@IS*lYnc?oK3+v+BLOl1!tl<TV4CD*E<eIT)o=?lTY4m@
zmJS363P}1)gR2S384hHlhNQe03K}TV0D|PAQG(Gs7UiZNGi=lHvk*?RK3U`_VOz2V
zAE6?1#PJ4vi)_!fY*LqmIry*Oh^S->n<AsA1R^bTZ1Af9nBiCH4MO6sC_C3l(ZhAg
zLg%_Rud8{#p5OYkmhQm>`6liVaz=_bPhSWpu1LJ>%Cxlk3T;w2WIQ0LRX3%vrxUPW
z8d$X$uIXc_sI{9kN=EXFie6i&h29y!AZcb)r??rFOLu%3R3P<2gpt$oRe1O6gk~8T
zu3j+kM{M-PhPbG60sxBGP*RgE)NL!@Yr%+f=+n7l@JL0;84IYj5yo31-0M)BHp<)Q
zzkK_6UA}%i|M3mU6cFV&C+q8L8zqA-)xv!>^z@7=Fgi9q_iLEzwg+!G2w0Ts9jf*M
z64F>g8RrtB4m-(FnM=?v>|@tRdI1$7H2kMsssN5^GU(*!z`p{ft@Qr;@_OlzdPSq#
z=N&m=z8R{dV?dV-Iwe>fL1(0h{JJ}+<6sZ(@ePlLCs;FV<D$J18Xo#IVy^=2Aa(p+
zS}9ph5IDPzDyA2bi*;MdI}V16h9~-B510idk*i(o{x*`@BWHvmpRhT5g0Z-Gt2Y2p
zNEm{NY8Bn(4&6Hy7y}O<Ba-ji_m*0aC}e>mX?rYPxs1DA(^whpU+gQLdb{bOK!0;_
zkQW*TzXUDj{aqJ}zCZT`AFw?MCRq$YLmUun3sPt|TJ|F1y1->qh6EwxZc5srUOK?6
zfIOA24Gq;xs91xZWkXI-kgFkpK@VM+dImzp9WY2eRlGn`2@#FO*RJOK&vl0mX5&x|
zsC*~R>SEi53Wfn0JC1s5&DImTC?CmS%t%KJn8SnJ{vz7Tu;z{(oX1Uj?2r-D=FHLg
z#Nx)*tqL1*0`$uskSzVPPI~Zw87JK{kHS;|mjvLPazsSBBGTEE(XeUKcA)Oa1!1&{
ziGd~d!Xgpq$A_L=)+{U2btCFAD_NiGHe#QuSj!mhzmK3jN5V2e#ai_;@D^ZS3^-kH
z6guhK*S?INWvhtT8n-^y8%I8HZbrKc2koF=btc|VG&cU-G4a~h=kf7qrTv=Ut%I~S
zEXzKRMTs`<+xJ_K%nb(}Ie8d~S$W#@BiccQnPiO(+O^Yd9ou<9tf*;o$=WeUAZqAG
zyzyj!F_p;rzPQ?Y92;+@To35Y<=xOSTm>@DJ;}6?*Lzr=TgaG9BIbr{y}$`b72TY!
zqYYtgpVJv*bV|eFpvy$Pm>HFtbh_Na_)b19LfLd-0+3QVd;u1iG1e^0tsmq27&c@f
zqhD+!jOz~T@n@5$<6yJqL9iFfH0&B9mSe(Zd*O_H&`()&cv#qX>*83gV@pnS)Uxa6
zh&!W4Kw{zbuyG*bJ30s^kL%1hKc#3Y!TLa1|HGI+q2~|%8;0j+sEAdd#O2^p#_J5{
zqk&o!uGkw*Xq2S)W72nPTLSJR3mF;xQOdr}*By;^C<lw7ru4P!t<{l$jWR|ZJ~1N0
zot`k@q_OaNbT7Qg`I(bQk2K9tLHn@Ik(;u^e%voC<MzAds3=Ri{bSUV>3XK=k7;*$
zylq6O8Vck|96AOM^M;z(GGMh%)?T{?8o*P+jIR3%VPB~S`#)bVj@Hps@zV;k&aoL?
zJT_x>_m~9<g*m4u)U@^f{n%z+5^;4W^Yf_NmTpxBLjnU2{|a{1*ih`L`(Lr67fc{W
iqmYCl7t_b<oPj(l_TO4%%tJ-B8^`}^ecomV9{>QgT~p5h

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py
index 3344243f8137af..27cc708889fa23 100644
--- a/pandas/tests/io/parser/test_network.py
+++ b/pandas/tests/io/parser/test_network.py
@@ -4,13 +4,20 @@
 Tests parsers ability to read and parse non-local files
 and hence require a network connection to be read.
 """
-
 import os
+
 import pytest
+import moto
 
 import pandas.util.testing as tm
 from pandas import DataFrame
 from pandas.io.parsers import read_csv, read_table
+from pandas.compat import BytesIO
+
+
+@pytest.fixture(scope='module')
+def tips_file():
+    return os.path.join(tm.get_data_path(), 'tips.csv')
 
 
 @pytest.fixture(scope='module')
@@ -19,6 +26,40 @@ def salaries_table():
     return read_table(path)
 
 
+@pytest.fixture(scope='module')
+def s3_resource(tips_file):
+    pytest.importorskip('s3fs')
+    moto.mock_s3().start()
+
+    test_s3_files = [
+        ('tips.csv', tips_file),
+        ('tips.csv.gz', tips_file + '.gz'),
+        ('tips.csv.bz2', tips_file + '.bz2'),
+    ]
+
+    def add_tips_files(bucket_name):
+        for s3_key, file_name in test_s3_files:
+            with open(file_name, 'rb') as f:
+                conn.Bucket(bucket_name).put_object(
+                    Key=s3_key,
+                    Body=f)
+
+    boto3 = pytest.importorskip('boto3')
+    # see gh-16135
+    bucket = 'pandas-test'
+
+    conn = boto3.resource("s3", region_name="us-east-1")
+    conn.create_bucket(Bucket=bucket)
+    add_tips_files(bucket)
+
+    conn.create_bucket(Bucket='cant_get_it', ACL='private')
+    add_tips_files('cant_get_it')
+
+    yield conn
+
+    moto.mock_s3().stop()
+
+
 @pytest.mark.network
 @pytest.mark.parametrize(
     "compression,extension",
@@ -51,15 +92,11 @@ def check_compressed_urls(salaries_table, compression, extension, mode,
 
 
 class TestS3(object):
-
-    def setup_method(self, method):
-        try:
-            import s3fs  # noqa
-        except ImportError:
-            pytest.skip("s3fs not installed")
-
     @tm.network
     def test_parse_public_s3_bucket(self):
+        pytest.importorskip('s3fs')
+        # more of an integration test due to the not-public contents portion
+        # can probably mock this though.
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' +
                           ext, compression=comp)
@@ -74,8 +111,8 @@ def test_parse_public_s3_bucket(self):
         assert not df.empty
         tm.assert_frame_equal(read_csv(tm.get_data_path('tips.csv')), df)
 
-    @tm.network
-    def test_parse_public_s3n_bucket(self):
+    def test_parse_public_s3n_bucket(self, s3_resource):
+
         # Read from AWS s3 as "s3n" URL
         df = read_csv('s3n://pandas-test/tips.csv', nrows=10)
         assert isinstance(df, DataFrame)
@@ -83,8 +120,7 @@ def test_parse_public_s3n_bucket(self):
         tm.assert_frame_equal(read_csv(
             tm.get_data_path('tips.csv')).iloc[:10], df)
 
-    @tm.network
-    def test_parse_public_s3a_bucket(self):
+    def test_parse_public_s3a_bucket(self, s3_resource):
         # Read from AWS s3 as "s3a" URL
         df = read_csv('s3a://pandas-test/tips.csv', nrows=10)
         assert isinstance(df, DataFrame)
@@ -92,8 +128,7 @@ def test_parse_public_s3a_bucket(self):
         tm.assert_frame_equal(read_csv(
             tm.get_data_path('tips.csv')).iloc[:10], df)
 
-    @tm.network
-    def test_parse_public_s3_bucket_nrows(self):
+    def test_parse_public_s3_bucket_nrows(self, s3_resource):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' +
                           ext, nrows=10, compression=comp)
@@ -102,8 +137,7 @@ def test_parse_public_s3_bucket_nrows(self):
             tm.assert_frame_equal(read_csv(
                 tm.get_data_path('tips.csv')).iloc[:10], df)
 
-    @tm.network
-    def test_parse_public_s3_bucket_chunked(self):
+    def test_parse_public_s3_bucket_chunked(self, s3_resource):
         # Read with a chunksize
         chunksize = 5
         local_tips = read_csv(tm.get_data_path('tips.csv'))
@@ -121,8 +155,7 @@ def test_parse_public_s3_bucket_chunked(self):
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
 
-    @tm.network
-    def test_parse_public_s3_bucket_chunked_python(self):
+    def test_parse_public_s3_bucket_chunked_python(self, s3_resource):
         # Read with a chunksize using the Python parser
         chunksize = 5
         local_tips = read_csv(tm.get_data_path('tips.csv'))
@@ -140,8 +173,7 @@ def test_parse_public_s3_bucket_chunked_python(self):
                     chunksize * i_chunk: chunksize * (i_chunk + 1)]
                 tm.assert_frame_equal(true_df, df)
 
-    @tm.network
-    def test_parse_public_s3_bucket_python(self):
+    def test_parse_public_s3_bucket_python(self, s3_resource):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                           compression=comp)
@@ -150,8 +182,7 @@ def test_parse_public_s3_bucket_python(self):
             tm.assert_frame_equal(read_csv(
                 tm.get_data_path('tips.csv')), df)
 
-    @tm.network
-    def test_infer_s3_compression(self):
+    def test_infer_s3_compression(self, s3_resource):
         for ext in ['', '.gz', '.bz2']:
             df = read_csv('s3://pandas-test/tips.csv' + ext,
                           engine='python', compression='infer')
@@ -160,8 +191,7 @@ def test_infer_s3_compression(self):
             tm.assert_frame_equal(read_csv(
                 tm.get_data_path('tips.csv')), df)
 
-    @tm.network
-    def test_parse_public_s3_bucket_nrows_python(self):
+    def test_parse_public_s3_bucket_nrows_python(self, s3_resource):
         for ext, comp in [('', None), ('.gz', 'gzip'), ('.bz2', 'bz2')]:
             df = read_csv('s3://pandas-test/tips.csv' + ext, engine='python',
                           nrows=10, compression=comp)
@@ -170,8 +200,7 @@ def test_parse_public_s3_bucket_nrows_python(self):
             tm.assert_frame_equal(read_csv(
                 tm.get_data_path('tips.csv')).iloc[:10], df)
 
-    @tm.network
-    def test_s3_fails(self):
+    def test_s3_fails(self, s3_resource):
         with pytest.raises(IOError):
             read_csv('s3://nyqpug/asdf.csv')
 
@@ -180,21 +209,18 @@ def test_s3_fails(self):
         with pytest.raises(IOError):
             read_csv('s3://cant_get_it/')
 
-    @tm.network
-    def boto3_client_s3(self):
+    def test_read_csv_handles_boto_s3_object(self,
+                                             s3_resource,
+                                             tips_file):
         # see gh-16135
 
-        # boto3 is a dependency of s3fs
-        import boto3
-        client = boto3.client("s3")
-
-        key = "/tips.csv"
-        bucket = "pandas-test"
-        s3_object = client.get_object(Bucket=bucket, Key=key)
+        s3_object = s3_resource.meta.client.get_object(
+            Bucket='pandas-test',
+            Key='tips.csv')
 
-        result = read_csv(s3_object["Body"])
+        result = read_csv(BytesIO(s3_object["Body"].read()), encoding='utf8')
         assert isinstance(result, DataFrame)
         assert not result.empty
 
-        expected = read_csv(tm.get_data_path('tips.csv'))
+        expected = read_csv(tips_file)
         tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py
index 92147b46097b80..6a399f41975e5b 100644
--- a/pandas/tests/io/test_excel.py
+++ b/pandas/tests/io/test_excel.py
@@ -1,33 +1,32 @@
 # pylint: disable=E1101
-
-from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems
-from datetime import datetime, date, time
-import sys
+import functools
+import operator
 import os
+import sys
+import warnings
+from datetime import datetime, date, time
 from distutils.version import LooseVersion
 from functools import partial
-
-import warnings
 from warnings import catch_warnings
-import operator
-import functools
-import pytest
 
-from numpy import nan
 import numpy as np
+import pytest
+from numpy import nan
+import moto
 
 import pandas as pd
+import pandas.util.testing as tm
 from pandas import DataFrame, Index, MultiIndex
-from pandas.io.formats.excel import ExcelFormatter
-from pandas.io.parsers import read_csv
+from pandas.compat import u, range, map, openpyxl_compat, BytesIO, iteritems
+from pandas.core.config import set_option, get_option
+from pandas.io.common import URLError
 from pandas.io.excel import (
     ExcelFile, ExcelWriter, read_excel, _XlwtWriter, _Openpyxl1Writer,
     _Openpyxl20Writer, _Openpyxl22Writer, register_writer, _XlsxWriter
 )
-from pandas.io.common import URLError
+from pandas.io.formats.excel import ExcelFormatter
+from pandas.io.parsers import read_csv
 from pandas.util.testing import ensure_clean, makeCustomDataframe as mkdf
-from pandas.core.config import set_option, get_option
-import pandas.util.testing as tm
 
 
 def _skip_if_no_xlrd():
@@ -67,13 +66,6 @@ def _skip_if_no_excelsuite():
     _skip_if_no_openpyxl()
 
 
-def _skip_if_no_s3fs():
-    try:
-        import s3fs  # noqa
-    except ImportError:
-        pytest.skip('s3fs not installed, skipping')
-
-
 _seriesd = tm.getSeriesData()
 _tsd = tm.getTimeSeriesData()
 _frame = DataFrame(_seriesd)[:10]
@@ -605,14 +597,22 @@ def test_read_from_http_url(self):
         local_table = self.get_exceldf('test1')
         tm.assert_frame_equal(url_table, local_table)
 
-    @tm.network(check_before_test=True)
     def test_read_from_s3_url(self):
-        _skip_if_no_s3fs()
-
-        url = ('s3://pandas-test/test1' + self.ext)
-        url_table = read_excel(url)
-        local_table = self.get_exceldf('test1')
-        tm.assert_frame_equal(url_table, local_table)
+        boto3 = pytest.importorskip('boto3')
+        pytest.importorskip('s3fs')
+
+        with moto.mock_s3():
+            conn = boto3.resource("s3", region_name="us-east-1")
+            conn.create_bucket(Bucket="pandas-test")
+            file_name = os.path.join(self.dirpath, 'test1' + self.ext)
+            with open(file_name, 'rb') as f:
+                conn.Bucket("pandas-test").put_object(Key="test1" + self.ext,
+                                                      Body=f)
+
+            url = ('s3://pandas-test/test1' + self.ext)
+            url_table = read_excel(url)
+            local_table = self.get_exceldf('test1')
+            tm.assert_frame_equal(url_table, local_table)
 
     @pytest.mark.slow
     def test_read_from_file_url(self):
diff --git a/tox.ini b/tox.ini
index 45ad7fc451e764..f055251581a93c 100644
--- a/tox.ini
+++ b/tox.ini
@@ -19,6 +19,7 @@ deps =
     xlrd
     six
     sqlalchemy
+    moto
 
 # cd to anything but the default {toxinidir} which
 # contains the pandas subdirectory and confuses

From 0097cb712a7361a69eb4f5ebb9bc13c2b8733f19 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Thu, 14 Sep 2017 11:09:30 -0500
Subject: [PATCH 084/188] PERF: Avoid values in Categorical.set_categories
 (#17515)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mater:

```python
In [1]: import pandas as pd; import numpy as np

In [2]: arr = ['s%04d' % i for i in np.random.randint(0, 500000 // 10,
                                                      size=500000)];
s = pd.Series(arr).astype('category')

In [3]: %timeit s.cat.set_categories(s.cat.categories)
68.5 ms ± 846 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
```

HEAD:

```python
In [1]: import pandas as pd; import numpy as np

In [2]: arr = ['s%04d' % i for i in np.random.randint(0, 500000 // 10,
                                                      size=500000)]
s = pd.Series(arr).astype('category')

In [3]: %timeit s.cat.set_categories(s.cat.categories)
7.43 ms ± 110 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
```

Closes https://github.com/pandas-dev/pandas/issues/17508
---
 asv_bench/benchmarks/categoricals.py |  3 ++
 doc/source/whatsnew/v0.21.0.txt      |  1 +
 pandas/core/categorical.py           | 37 ++++++++++++++++-
 pandas/core/dtypes/concat.py         | 11 ++---
 pandas/tests/test_categorical.py     | 62 ++++++++++++++++++++++++++++
 5 files changed, 104 insertions(+), 10 deletions(-)

diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py
index 6432ccfb19efec..d90c994b3d194a 100644
--- a/asv_bench/benchmarks/categoricals.py
+++ b/asv_bench/benchmarks/categoricals.py
@@ -67,6 +67,9 @@ def time_value_counts_dropna(self):
     def time_rendering(self):
         str(self.sel)
 
+    def time_set_categories(self):
+        self.ts.cat.set_categories(self.ts.cat.categories[::2])
+
 
 class Categoricals3(object):
     goal_time = 0.2
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 939199d3f6fa6d..6495ad3e7f6adb 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -467,6 +467,7 @@ Performance Improvements
 
 - Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`)
 - :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`)
+- Improved performance of :meth:`Categorical.set_categories` by not materializing the values (:issue:`17508`)
 - :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`)
 
 .. _whatsnew_0210.bug_fixes:
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 97df72900428c4..e67ce2936819f5 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -777,8 +777,9 @@ def set_categories(self, new_categories, ordered=None, rename=False,
                 # remove all _codes which are larger and set to -1/NaN
                 self._codes[self._codes >= len(new_categories)] = -1
         else:
-            values = cat.__array__()
-            cat._codes = _get_codes_for_values(values, new_categories)
+            codes = _recode_for_categories(self.codes, self.categories,
+                                           new_categories)
+            cat._codes = codes
         cat._categories = new_categories
 
         if ordered is None:
@@ -2113,6 +2114,38 @@ def _get_codes_for_values(values, categories):
     return coerce_indexer_dtype(t.lookup(vals), cats)
 
 
+def _recode_for_categories(codes, old_categories, new_categories):
+    """
+    Convert a set of codes for to a new set of categories
+
+    Parameters
+    ----------
+    codes : array
+    old_categories, new_categories : Index
+
+    Returns
+    -------
+    new_codes : array
+
+    Examples
+    --------
+    >>> old_cat = pd.Index(['b', 'a', 'c'])
+    >>> new_cat = pd.Index(['a', 'b'])
+    >>> codes = np.array([0, 1, 1, 2])
+    >>> _recode_for_categories(codes, old_cat, new_cat)
+    array([ 1,  0,  0, -1])
+    """
+    from pandas.core.algorithms import take_1d
+
+    if len(old_categories) == 0:
+        # All null anyway, so just retain the nulls
+        return codes
+    indexer = coerce_indexer_dtype(new_categories.get_indexer(old_categories),
+                                   new_categories)
+    new_codes = take_1d(indexer, codes.copy(), fill_value=-1)
+    return new_codes
+
+
 def _convert_to_list_like(list_like):
     if hasattr(list_like, "dtype"):
         return list_like
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index 0ce45eea119ed2..f6f956832eebe8 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -314,6 +314,7 @@ def union_categoricals(to_union, sort_categories=False, ignore_order=False):
     Categories (3, object): [b, c, a]
     """
     from pandas import Index, Categorical, CategoricalIndex, Series
+    from pandas.core.categorical import _recode_for_categories
 
     if len(to_union) == 0:
         raise ValueError('No Categoricals to union')
@@ -359,14 +360,8 @@ def _maybe_unwrap(x):
 
         new_codes = []
         for c in to_union:
-            if len(c.categories) > 0:
-                indexer = categories.get_indexer(c.categories)
-
-                from pandas.core.algorithms import take_1d
-                new_codes.append(take_1d(indexer, c.codes, fill_value=-1))
-            else:
-                # must be all NaN
-                new_codes.append(c.codes)
+            new_codes.append(_recode_for_categories(c.codes, c.categories,
+                                                    categories))
         new_codes = np.concatenate(new_codes)
     else:
         # ordered - to show a proper error message
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 7bbe220378993b..8a5f6bf110be32 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -26,6 +26,7 @@
                     Interval, IntervalIndex)
 from pandas.compat import range, lrange, u, PY3, PYPY
 from pandas.core.config import option_context
+from pandas.core.categorical import _recode_for_categories
 
 
 class TestCategorical(object):
@@ -963,6 +964,67 @@ def test_rename_categories(self):
         with pytest.raises(ValueError):
             cat.rename_categories([1, 2])
 
+    @pytest.mark.parametrize('codes, old, new, expected', [
+        ([0, 1], ['a', 'b'], ['a', 'b'], [0, 1]),
+        ([0, 1], ['b', 'a'], ['b', 'a'], [0, 1]),
+        ([0, 1], ['a', 'b'], ['b', 'a'], [1, 0]),
+        ([0, 1], ['b', 'a'], ['a', 'b'], [1, 0]),
+        ([0, 1, 0, 1], ['a', 'b'], ['a', 'b', 'c'], [0, 1, 0, 1]),
+        ([0, 1, 2, 2], ['a', 'b', 'c'], ['a', 'b'], [0, 1, -1, -1]),
+        ([0, 1, -1], ['a', 'b', 'c'], ['a', 'b', 'c'], [0, 1, -1]),
+        ([0, 1, -1], ['a', 'b', 'c'], ['b'], [-1, 0, -1]),
+        ([0, 1, -1], ['a', 'b', 'c'], ['d'], [-1, -1, -1]),
+        ([0, 1, -1], ['a', 'b', 'c'], [], [-1, -1, -1]),
+        ([-1, -1], [], ['a', 'b'], [-1, -1]),
+        ([1, 0], ['b', 'a'], ['a', 'b'], [0, 1]),
+    ])
+    def test_recode_to_categories(self, codes, old, new, expected):
+        codes = np.asanyarray(codes, dtype=np.int8)
+        expected = np.asanyarray(expected, dtype=np.int8)
+        old = Index(old)
+        new = Index(new)
+        result = _recode_for_categories(codes, old, new)
+        tm.assert_numpy_array_equal(result, expected)
+
+    def test_recode_to_categories_large(self):
+        N = 1000
+        codes = np.arange(N)
+        old = Index(codes)
+        expected = np.arange(N - 1, -1, -1, dtype=np.int16)
+        new = Index(expected)
+        result = _recode_for_categories(codes, old, new)
+        tm.assert_numpy_array_equal(result, expected)
+
+    @pytest.mark.parametrize('values, categories, new_categories', [
+        # No NaNs, same cats, same order
+        (['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],),
+        # No NaNs, same cats, different order
+        (['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],),
+        # Same, unsorted
+        (['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],),
+        # No NaNs, same cats, different order
+        (['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],),
+        # NaNs
+        (['a', 'b', 'c'], ['a', 'b'], ['a', 'b']),
+        (['a', 'b', 'c'], ['a', 'b'], ['b', 'a']),
+        (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
+        (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
+        # Introduce NaNs
+        (['a', 'b', 'c'], ['a', 'b'], ['a']),
+        (['a', 'b', 'c'], ['a', 'b'], ['b']),
+        (['b', 'a', 'c'], ['a', 'b'], ['a']),
+        (['b', 'a', 'c'], ['a', 'b'], ['a']),
+        # No overlap
+        (['a', 'b', 'c'], ['a', 'b'], ['d', 'e']),
+    ])
+    @pytest.mark.parametrize('ordered', [True, False])
+    def test_set_categories_many(self, values, categories, new_categories,
+                                 ordered):
+        c = Categorical(values, categories)
+        expected = Categorical(values, new_categories, ordered)
+        result = c.set_categories(new_categories, ordered=ordered)
+        tm.assert_categorical_equal(result, expected)
+
     def test_reorder_categories(self):
         cat = Categorical(["a", "b", "c", "a"], ordered=True)
         old = cat.copy()

From 06a6e63c317e5291eb78081e2a21bc163ddaab6e Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 14 Sep 2017 15:48:59 -0700
Subject: [PATCH 085/188] remove period_helper from non-period reqs (#17531)

---
 setup.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/setup.py b/setup.py
index 434ca644739165..664478cc35845f 100755
--- a/setup.py
+++ b/setup.py
@@ -461,7 +461,6 @@ def pxd(name):
 
 tseries_depends = ['pandas/_libs/src/datetime/np_datetime.h',
                    'pandas/_libs/src/datetime/np_datetime_strings.h',
-                   'pandas/_libs/src/period_helper.h',
                    'pandas/_libs/src/datetime.pxd']
 
 
@@ -478,11 +477,11 @@ def pxd(name):
                     'pxdfiles': ['_libs/src/util'],
                     'depends': tseries_depends,
                     'sources': ['pandas/_libs/src/datetime/np_datetime.c',
-                                'pandas/_libs/src/datetime/np_datetime_strings.c',
-                                'pandas/_libs/src/period_helper.c']},
+                                'pandas/_libs/src/datetime/np_datetime_strings.c']},
     '_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'},
     '_libs.period': {'pyxfile': '_libs/period',
-                     'depends': tseries_depends,
+                     'depends': (tseries_depends +
+                                 ['pandas/_libs/src/period_helper.h']),
                      'sources': ['pandas/_libs/src/datetime/np_datetime.c',
                                  'pandas/_libs/src/datetime/np_datetime_strings.c',
                                  'pandas/_libs/src/period_helper.c']},

From ad70ed4ba921360169820dabd16e4475c527479f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 14 Sep 2017 15:52:53 -0700
Subject: [PATCH 086/188] Fix bug where offset.copy() != offset (#17452)

---
 pandas/tests/tseries/test_offsets.py |   5 +
 pandas/tseries/offsets.py            | 180 ++++++++++++++++-----------
 2 files changed, 115 insertions(+), 70 deletions(-)

diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py
index 7e6e85f322fe0f..cd2c29ffe3ac6b 100644
--- a/pandas/tests/tseries/test_offsets.py
+++ b/pandas/tests/tseries/test_offsets.py
@@ -1955,6 +1955,11 @@ def _check_roundtrip(obj):
         _check_roundtrip(self._object(2))
         _check_roundtrip(self._object() * 2)
 
+    def test_copy(self):
+        # GH 17452
+        off = self._object(weekmask='Mon Wed Fri')
+        assert off == off.copy()
+
 
 class TestCustomBusinessMonthEnd(CustomBusinessMonthBase, Base):
     _object = CBMonthEnd
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 7ccecaa84e6d6d..d82a3a209af6bf 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -11,6 +11,7 @@
 from dateutil.relativedelta import relativedelta, weekday
 from dateutil.easter import easter
 from pandas._libs import tslib, Timestamp, OutOfBoundsDatetime, Timedelta
+from pandas.util._decorators import cache_readonly
 
 import functools
 import operator
@@ -573,9 +574,9 @@ def __setstate__(self, state):
         """Reconstruct an instance from a pickled state"""
         self.__dict__ = state
         if 'weekmask' in state and 'holidays' in state:
-            calendar, holidays = self.get_calendar(weekmask=self.weekmask,
-                                                   holidays=self.holidays,
-                                                   calendar=None)
+            calendar, holidays = _get_calendar(weekmask=self.weekmask,
+                                               holidays=self.holidays,
+                                               calendar=None)
             self.kwds['calendar'] = self.calendar = calendar
             self.kwds['holidays'] = self.holidays = holidays
             self.kwds['weekmask'] = state['weekmask']
@@ -978,9 +979,9 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
         self.normalize = normalize
         self.kwds = kwds
         self.offset = kwds.get('offset', timedelta(0))
-        calendar, holidays = self.get_calendar(weekmask=weekmask,
-                                               holidays=holidays,
-                                               calendar=calendar)
+        calendar, holidays = _get_calendar(weekmask=weekmask,
+                                           holidays=holidays,
+                                           calendar=calendar)
         # CustomBusinessDay instances are identified by the
         # following two attributes. See DateOffset._params()
         # holidays, weekmask
@@ -989,36 +990,6 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
         self.kwds['holidays'] = self.holidays = holidays
         self.kwds['calendar'] = self.calendar = calendar
 
-    def get_calendar(self, weekmask, holidays, calendar):
-        """Generate busdaycalendar"""
-        if isinstance(calendar, np.busdaycalendar):
-            if not holidays:
-                holidays = tuple(calendar.holidays)
-            elif not isinstance(holidays, tuple):
-                holidays = tuple(holidays)
-            else:
-                # trust that calendar.holidays and holidays are
-                # consistent
-                pass
-            return calendar, holidays
-
-        if holidays is None:
-            holidays = []
-        try:
-            holidays = holidays + calendar.holidays().tolist()
-        except AttributeError:
-            pass
-        holidays = [self._to_dt64(dt, dtype='datetime64[D]') for dt in
-                    holidays]
-        holidays = tuple(sorted(holidays))
-
-        kwargs = {'weekmask': weekmask}
-        if holidays:
-            kwargs['holidays'] = holidays
-
-        busdaycalendar = np.busdaycalendar(**kwargs)
-        return busdaycalendar, holidays
-
     @apply_wraps
     def apply(self, other):
         if self.n <= 0:
@@ -1050,25 +1021,10 @@ def apply(self, other):
     def apply_index(self, i):
         raise NotImplementedError
 
-    @staticmethod
-    def _to_dt64(dt, dtype='datetime64'):
-        # Currently
-        # > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]')
-        # numpy.datetime64('2013-05-01T02:00:00.000000+0200')
-        # Thus astype is needed to cast datetime to datetime64[D]
-        if getattr(dt, 'tzinfo', None) is not None:
-            i8 = tslib.pydt_to_i8(dt)
-            dt = tslib.tz_convert_single(i8, 'UTC', dt.tzinfo)
-            dt = Timestamp(dt)
-        dt = np.datetime64(dt)
-        if dt.dtype.name != dtype:
-            dt = dt.astype(dtype)
-        return dt
-
     def onOffset(self, dt):
         if self.normalize and not _is_normalized(dt):
             return False
-        day64 = self._to_dt64(dt, 'datetime64[D]')
+        day64 = _to_dt64(dt, 'datetime64[D]')
         return np.is_busday(day64, busdaycal=self.calendar)
 
 
@@ -1087,19 +1043,25 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
         self.n = int(n)
         self.normalize = normalize
         super(CustomBusinessHour, self).__init__(**kwds)
+
+        calendar, holidays = _get_calendar(weekmask=weekmask,
+                                           holidays=holidays,
+                                           calendar=calendar)
+        self.kwds['weekmask'] = self.weekmask = weekmask
+        self.kwds['holidays'] = self.holidays = holidays
+        self.kwds['calendar'] = self.calendar = calendar
+
+    @cache_readonly
+    def next_bday(self):
         # used for moving to next businessday
         if self.n >= 0:
             nb_offset = 1
         else:
             nb_offset = -1
-        self.next_bday = CustomBusinessDay(n=nb_offset,
-                                           weekmask=weekmask,
-                                           holidays=holidays,
-                                           calendar=calendar)
-
-        self.kwds['weekmask'] = self.next_bday.weekmask
-        self.kwds['holidays'] = self.next_bday.holidays
-        self.kwds['calendar'] = self.next_bday.calendar
+        return CustomBusinessDay(n=nb_offset,
+                                 weekmask=self.weekmask,
+                                 holidays=self.holidays,
+                                 calendar=self.calendar)
 
 
 class MonthOffset(SingleConstructorOffset):
@@ -1471,11 +1433,25 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
         self.normalize = normalize
         self.kwds = kwds
         self.offset = kwds.get('offset', timedelta(0))
-        self.cbday = CustomBusinessDay(n=self.n, normalize=normalize,
-                                       weekmask=weekmask, holidays=holidays,
-                                       calendar=calendar, **kwds)
-        self.m_offset = MonthEnd(n=1, normalize=normalize, **kwds)
-        self.kwds['calendar'] = self.cbday.calendar  # cache numpy calendar
+
+        calendar, holidays = _get_calendar(weekmask=weekmask,
+                                           holidays=holidays,
+                                           calendar=calendar)
+        self.kwds['weekmask'] = self.weekmask = weekmask
+        self.kwds['holidays'] = self.holidays = holidays
+        self.kwds['calendar'] = self.calendar = calendar
+
+    @cache_readonly
+    def cbday(self):
+        kwds = self.kwds
+        return CustomBusinessDay(n=self.n, normalize=self.normalize, **kwds)
+
+    @cache_readonly
+    def m_offset(self):
+        kwds = self.kwds
+        kwds = {key: kwds[key] for key in kwds
+                if key not in ['calendar', 'weekmask', 'holidays']}
+        return MonthEnd(n=1, normalize=self.normalize, **kwds)
 
     @apply_wraps
     def apply(self, other):
@@ -1531,11 +1507,27 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
         self.normalize = normalize
         self.kwds = kwds
         self.offset = kwds.get('offset', timedelta(0))
-        self.cbday = CustomBusinessDay(n=self.n, normalize=normalize,
-                                       weekmask=weekmask, holidays=holidays,
-                                       calendar=calendar, **kwds)
-        self.m_offset = MonthBegin(n=1, normalize=normalize, **kwds)
-        self.kwds['calendar'] = self.cbday.calendar  # cache numpy calendar
+
+        # _get_calendar does validation and possible transformation
+        # of calendar and holidays.
+        calendar, holidays = _get_calendar(weekmask=weekmask,
+                                           holidays=holidays,
+                                           calendar=calendar)
+        kwds['calendar'] = self.calendar = calendar
+        kwds['weekmask'] = self.weekmask = weekmask
+        kwds['holidays'] = self.holidays = holidays
+
+    @cache_readonly
+    def cbday(self):
+        kwds = self.kwds
+        return CustomBusinessDay(n=self.n, normalize=self.normalize, **kwds)
+
+    @cache_readonly
+    def m_offset(self):
+        kwds = self.kwds
+        kwds = {key: kwds[key] for key in kwds
+                if key not in ['calendar', 'weekmask', 'holidays']}
+        return MonthBegin(n=1, normalize=self.normalize, **kwds)
 
     @apply_wraps
     def apply(self, other):
@@ -2861,6 +2853,54 @@ class Nano(Tick):
 CBMonthBegin = CustomBusinessMonthBegin
 CDay = CustomBusinessDay
 
+# ---------------------------------------------------------------------
+# Business Calendar helpers
+
+
+def _get_calendar(weekmask, holidays, calendar):
+    """Generate busdaycalendar"""
+    if isinstance(calendar, np.busdaycalendar):
+        if not holidays:
+            holidays = tuple(calendar.holidays)
+        elif not isinstance(holidays, tuple):
+            holidays = tuple(holidays)
+        else:
+            # trust that calendar.holidays and holidays are
+            # consistent
+            pass
+        return calendar, holidays
+
+    if holidays is None:
+        holidays = []
+    try:
+        holidays = holidays + calendar.holidays().tolist()
+    except AttributeError:
+        pass
+    holidays = [_to_dt64(dt, dtype='datetime64[D]') for dt in holidays]
+    holidays = tuple(sorted(holidays))
+
+    kwargs = {'weekmask': weekmask}
+    if holidays:
+        kwargs['holidays'] = holidays
+
+    busdaycalendar = np.busdaycalendar(**kwargs)
+    return busdaycalendar, holidays
+
+
+def _to_dt64(dt, dtype='datetime64'):
+    # Currently
+    # > np.datetime64(dt.datetime(2013,5,1),dtype='datetime64[D]')
+    # numpy.datetime64('2013-05-01T02:00:00.000000+0200')
+    # Thus astype is needed to cast datetime to datetime64[D]
+    if getattr(dt, 'tzinfo', None) is not None:
+        i8 = tslib.pydt_to_i8(dt)
+        dt = tslib.tz_convert_single(i8, 'UTC', dt.tzinfo)
+        dt = Timestamp(dt)
+    dt = np.datetime64(dt)
+    if dt.dtype.name != dtype:
+        dt = dt.astype(dtype)
+    return dt
+
 
 def _get_firstbday(wkday):
     """

From 94266d48e5f54287a877cf7a0e94ef740e3eda22 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Thu, 14 Sep 2017 18:29:39 -0500
Subject: [PATCH 087/188] PERF: Faster CategoricalIndex from categorical
 (#17513)

---
 doc/source/whatsnew/v0.21.0.txt       |  1 +
 pandas/core/indexes/category.py       |  4 ++++
 pandas/tests/indexes/test_category.py | 10 ++++++++++
 3 files changed, 15 insertions(+)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 6495ad3e7f6adb..52e056103cbdc3 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -469,6 +469,7 @@ Performance Improvements
 - :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`)
 - Improved performance of :meth:`Categorical.set_categories` by not materializing the values (:issue:`17508`)
 - :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`)
+- Improved performance of the :class:`CategoricalIndex` for data that is already categorical dtype (:issue:`17513`)
 
 .. _whatsnew_0210.bug_fixes:
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 71cd4790ac3648..ef1dc4d971f37f 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -130,6 +130,10 @@ def _create_categorical(self, data, categories=None, ordered=None):
         -------
         Categorical
         """
+        if (isinstance(data, (ABCSeries, type(self))) and
+                is_categorical_dtype(data)):
+            data = data.values
+
         if not isinstance(data, ABCCategorical):
             ordered = False if ordered is None else ordered
             from pandas.core.categorical import Categorical
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index aac68ebd6abede..cf365465763fab 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -125,6 +125,16 @@ def test_construction_with_dtype(self):
         result = CategoricalIndex(idx, categories=idx, ordered=True)
         tm.assert_index_equal(result, expected, exact=True)
 
+    def test_create_categorical(self):
+        # https://github.com/pandas-dev/pandas/pull/17513
+        # The public CI constructor doesn't hit this code path with
+        # instances of CategoricalIndex, but we still want to test the code
+        ci = CategoricalIndex(['a', 'b', 'c'])
+        # First ci is self, second ci is data.
+        result = CategoricalIndex._create_categorical(ci, ci)
+        expected = Categorical(['a', 'b', 'c'])
+        tm.assert_categorical_equal(result, expected)
+
     def test_disallow_set_ops(self):
 
         # GH 10039

From 9b21c5456eb4b2cdbc7f74569c4b8660ada951fe Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Thu, 14 Sep 2017 18:33:03 -0700
Subject: [PATCH 088/188] Remove unnecessary iNaT checks from _Period
 properties (#17421)

---
 asv_bench/benchmarks/period.py |  59 +++++++++++++++
 pandas/_libs/period.pyx        | 127 ++++++++++++++++++++-------------
 2 files changed, 135 insertions(+), 51 deletions(-)

diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py
index 78d66295f28cc6..df3c2bf3e4b464 100644
--- a/asv_bench/benchmarks/period.py
+++ b/asv_bench/benchmarks/period.py
@@ -78,6 +78,65 @@ def time_value_counts_pindex(self):
         self.i.value_counts()
 
 
+class Properties(object):
+    def setup(self):
+        self.per = Period('2017-09-06 08:28', freq='min')
+
+    def time_year(self):
+        self.per.year
+
+    def time_month(self):
+        self.per.month
+
+    def time_day(self):
+        self.per.day
+
+    def time_hour(self):
+        self.per.hour
+
+    def time_minute(self):
+        self.per.minute
+
+    def time_second(self):
+        self.per.second
+
+    def time_is_leap_year(self):
+        self.per.is_leap_year
+
+    def time_quarter(self):
+        self.per.quarter
+
+    def time_qyear(self):
+        self.per.qyear
+
+    def time_week(self):
+        self.per.week
+
+    def time_daysinmonth(self):
+        self.per.daysinmonth
+
+    def time_dayofweek(self):
+        self.per.dayofweek
+
+    def time_dayofyear(self):
+        self.per.dayofyear
+
+    def time_start_time(self):
+        self.per.start_time
+
+    def time_end_time(self):
+        self.per.end_time
+
+    def time_to_timestamp():
+        self.per.to_timestamp()
+
+    def time_now():
+        self.per.now()
+
+    def time_asfreq():
+        self.per.asfreq('A')
+
+
 class period_standard_indexing(object):
     goal_time = 0.2
 
diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index 9e473a7f362b44..babe0f7c6834d9 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -107,6 +107,8 @@ cdef extern from "period_helper.h":
     int pday(int64_t ordinal, int freq) except INT32_MIN
     int pweekday(int64_t ordinal, int freq) except INT32_MIN
     int pday_of_week(int64_t ordinal, int freq) except INT32_MIN
+    # TODO: pday_of_week and pweekday are identical.  Make one an alias instead
+    # of importing them separately.
     int pday_of_year(int64_t ordinal, int freq) except INT32_MIN
     int pweek(int64_t ordinal, int freq) except INT32_MIN
     int phour(int64_t ordinal, int freq) except INT32_MIN
@@ -868,58 +870,81 @@ cdef class _Period(object):
         dt64 = period_ordinal_to_dt64(val.ordinal, base)
         return Timestamp(dt64, tz=tz)
 
-    cdef _field(self, alias):
+    @property
+    def year(self):
+        base, mult = get_freq_code(self.freq)
+        return pyear(self.ordinal, base)
+
+    @property
+    def month(self):
+        base, mult = get_freq_code(self.freq)
+        return pmonth(self.ordinal, base)
+
+    @property
+    def day(self):
+        base, mult = get_freq_code(self.freq)
+        return pday(self.ordinal, base)
+
+    @property
+    def hour(self):
+        base, mult = get_freq_code(self.freq)
+        return phour(self.ordinal, base)
+
+    @property
+    def minute(self):
+        base, mult = get_freq_code(self.freq)
+        return pminute(self.ordinal, base)
+
+    @property
+    def second(self):
+        base, mult = get_freq_code(self.freq)
+        return psecond(self.ordinal, base)
+
+    @property
+    def weekofyear(self):
+        base, mult = get_freq_code(self.freq)
+        return pweek(self.ordinal, base)
+
+    @property
+    def week(self):
+        return self.weekofyear
+
+    @property
+    def dayofweek(self):
+        base, mult = get_freq_code(self.freq)
+        return pweekday(self.ordinal, base)
+
+    @property
+    def weekday(self):
+        return self.dayofweek
+
+    @property
+    def dayofyear(self):
+        base, mult = get_freq_code(self.freq)
+        return pday_of_year(self.ordinal, base)
+
+    @property
+    def quarter(self):
         base, mult = get_freq_code(self.freq)
-        return get_period_field(alias, self.ordinal, base)
-
-    property year:
-        def __get__(self):
-            return self._field(0)
-    property month:
-        def __get__(self):
-            return self._field(3)
-    property day:
-        def __get__(self):
-            return self._field(4)
-    property hour:
-        def __get__(self):
-            return self._field(5)
-    property minute:
-        def __get__(self):
-            return self._field(6)
-    property second:
-        def __get__(self):
-            return self._field(7)
-    property weekofyear:
-        def __get__(self):
-            return self._field(8)
-    property week:
-        def __get__(self):
-            return self.weekofyear
-    property dayofweek:
-        def __get__(self):
-            return self._field(10)
-    property weekday:
-        def __get__(self):
-            return self.dayofweek
-    property dayofyear:
-        def __get__(self):
-            return self._field(9)
-    property quarter:
-        def __get__(self):
-            return self._field(2)
-    property qyear:
-        def __get__(self):
-            return self._field(1)
-    property days_in_month:
-        def __get__(self):
-            return self._field(11)
-    property daysinmonth:
-        def __get__(self):
-            return self.days_in_month
-    property is_leap_year:
-        def __get__(self):
-            return bool(is_leapyear(self._field(0)))
+        return pquarter(self.ordinal, base)
+
+    @property
+    def qyear(self):
+        base, mult = get_freq_code(self.freq)
+        return pqyear(self.ordinal, base)
+
+    @property
+    def days_in_month(self):
+        base, mult = get_freq_code(self.freq)
+        return pdays_in_month(self.ordinal, base)
+
+    @property
+    def daysinmonth(self):
+        return self.days_in_month
+
+    @property
+    def is_leap_year(self):
+        return bool(is_leapyear(self.year))
 
     @classmethod
     def now(cls, freq=None):

From 72c38883f09c6902863345de432d3c90a29140b3 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Fri, 15 Sep 2017 02:18:24 -0600
Subject: [PATCH 089/188] CLN: Fix Spelling Errors (#17535)

---
 doc/source/advanced.rst                        | 10 +++++-----
 doc/source/api.rst                             |  2 +-
 doc/source/basics.rst                          |  2 +-
 doc/source/computation.rst                     |  2 +-
 doc/source/groupby.rst                         |  4 ++--
 doc/source/indexing.rst                        |  2 +-
 doc/source/io.rst                              |  2 +-
 doc/source/merging.rst                         |  6 +++---
 doc/source/missing_data.rst                    |  2 +-
 doc/source/options.rst                         |  4 ++--
 doc/source/reshaping.rst                       |  2 +-
 doc/source/sparse.rst                          |  2 +-
 doc/source/style.ipynb                         |  2 +-
 doc/source/timeseries.rst                      | 18 +++++++++---------
 doc/source/visualization.rst                   |  2 +-
 pandas/core/algorithms.py                      |  2 +-
 pandas/core/indexes/interval.py                |  2 +-
 pandas/core/reshape/concat.py                  |  2 +-
 pandas/core/reshape/merge.py                   |  6 +++---
 pandas/core/reshape/tile.py                    |  2 +-
 pandas/io/formats/excel.py                     |  4 ++--
 pandas/io/pytables.py                          | 12 ++++++------
 pandas/io/stata.py                             |  4 ++--
 pandas/plotting/_misc.py                       |  2 +-
 pandas/plotting/_tools.py                      |  2 +-
 pandas/tests/frame/test_convert_to.py          |  4 ++--
 pandas/tests/groupby/test_transform.py         |  2 +-
 pandas/tests/indexes/datetimes/test_tools.py   |  2 +-
 pandas/tests/io/json/test_json_table_schema.py |  2 +-
 pandas/tests/io/parser/test_read_fwf.py        |  2 +-
 pandas/tests/io/test_pytables.py               |  8 ++++----
 pandas/tests/plotting/test_datetimelike.py     |  2 +-
 pandas/tests/series/test_dtypes.py             |  2 +-
 pandas/tests/test_categorical.py               |  2 +-
 pandas/tests/test_sorting.py                   |  2 +-
 pandas/tseries/util.py                         |  2 +-
 36 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
index 3f145cf9556645..3bda8c7eacb61b 100644
--- a/doc/source/advanced.rst
+++ b/doc/source/advanced.rst
@@ -625,7 +625,7 @@ Index Types
 We have discussed ``MultiIndex`` in the previous sections pretty extensively. ``DatetimeIndex`` and ``PeriodIndex``
 are shown :ref:`here <timeseries.overview>`. ``TimedeltaIndex`` are :ref:`here <timedeltas.timedeltas>`.
 
-In the following sub-sections we will highlite some other index types.
+In the following sub-sections we will highlight some other index types.
 
 .. _indexing.categoricalindex:
 
@@ -645,7 +645,7 @@ and allows efficient indexing and storage of an index with a large number of dup
    df.dtypes
    df.B.cat.categories
 
-Setting the index, will create create a ``CategoricalIndex``
+Setting the index, will create a ``CategoricalIndex``
 
 .. ipython:: python
 
@@ -681,7 +681,7 @@ Groupby operations on the index will preserve the index nature as well
 Reindexing operations, will return a resulting index based on the type of the passed
 indexer, meaning that passing a list will return a plain-old-``Index``; indexing with
 a ``Categorical`` will return a ``CategoricalIndex``, indexed according to the categories
-of the PASSED ``Categorical`` dtype. This allows one to arbitrarly index these even with
+of the PASSED ``Categorical`` dtype. This allows one to arbitrarily index these even with
 values NOT in the categories, similarly to how you can reindex ANY pandas index.
 
 .. ipython :: python
@@ -722,7 +722,7 @@ Int64Index and RangeIndex
 Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``NDFrame`` objects.
 
 ``RangeIndex`` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects.
-``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analagous to python `range types <https://docs.python.org/3/library/stdtypes.html#typesseq-range>`__.
+``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analogous to python `range types <https://docs.python.org/3/library/stdtypes.html#typesseq-range>`__.
 
 .. _indexing.float64index:
 
@@ -963,7 +963,7 @@ index can be somewhat complicated. For example, the following does not work:
     s.loc['c':'e'+1]
 
 A very common use case is to limit a time series to start and end at two
-specific dates. To enable this, we made the design design to make label-based
+specific dates. To enable this, we made the design to make label-based
 slicing include both endpoints:
 
 .. ipython:: python
diff --git a/doc/source/api.rst b/doc/source/api.rst
index 1541bbccefe214..4e02f7b11f466c 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1291,7 +1291,7 @@ Index
 -----
 
 **Many of these methods or variants thereof are available on the objects
-that contain an index (Series/Dataframe) and those should most likely be
+that contain an index (Series/DataFrame) and those should most likely be
 used before calling these methods directly.**
 
 .. autosummary::
diff --git a/doc/source/basics.rst b/doc/source/basics.rst
index 42c28df3a6030f..0990d2bd15ee6f 100644
--- a/doc/source/basics.rst
+++ b/doc/source/basics.rst
@@ -923,7 +923,7 @@ Passing a named function will yield that name for the row:
 Aggregating with a dict
 +++++++++++++++++++++++
 
-Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFame.agg``
+Passing a dictionary of column names to a scalar or a list of scalars, to ``DataFrame.agg``
 allows you to customize which functions are applied to which columns. Note that the results
 are not in any particular order, you can use an ``OrderedDict`` instead to guarantee ordering.
 
diff --git a/doc/source/computation.rst b/doc/source/computation.rst
index 23699393958cfe..14cfdbc3648375 100644
--- a/doc/source/computation.rst
+++ b/doc/source/computation.rst
@@ -654,7 +654,7 @@ aggregation with, outputting a DataFrame:
 
    r['A'].agg([np.sum, np.mean, np.std])
 
-On a widowed DataFrame, you can pass a list of functions to apply to each
+On a windowed DataFrame, you can pass a list of functions to apply to each
 column, which produces an aggregated result with a hierarchical index:
 
 .. ipython:: python
diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index e1231b9a4a2007..e9a7d8dd0a46ea 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -561,7 +561,7 @@ must be either implemented on GroupBy or available via :ref:`dispatching
 
 .. note::
 
-    If you pass a dict to ``aggregate``, the ordering of the output colums is
+    If you pass a dict to ``aggregate``, the ordering of the output columns is
     non-deterministic. If you want to be sure the output columns will be in a specific
     order, you can use an ``OrderedDict``.  Compare the output of the following two commands:
 
@@ -1211,7 +1211,7 @@ Groupby by Indexer to 'resample' data
 
 Resampling produces new hypothetical samples (resamples) from already existing observed data or from a model that generates data. These new samples are similar to the pre-existing samples.
 
-In order to resample to work on indices that are non-datetimelike , the following procedure can be utilized.
+In order to resample to work on indices that are non-datetimelike, the following procedure can be utilized.
 
 In the following examples, **df.index // 5** returns a binary array which is used to determine what gets selected for the groupby operation.
 
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
index 8474116c380825..edbc4e6d7fd225 100644
--- a/doc/source/indexing.rst
+++ b/doc/source/indexing.rst
@@ -714,7 +714,7 @@ Finally, one can also set a seed for ``sample``'s random number generator using
 Setting With Enlargement
 ------------------------
 
-The ``.loc/[]`` operations can perform enlargement when setting a non-existant key for that axis.
+The ``.loc/[]`` operations can perform enlargement when setting a non-existent key for that axis.
 
 In the ``Series`` case this is effectively an appending operation
 
diff --git a/doc/source/io.rst b/doc/source/io.rst
index 8fbb23769492e4..fcf7f6029197bd 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -3077,7 +3077,7 @@ Compressed pickle files
 
 .. versionadded:: 0.20.0
 
-:func:`read_pickle`, :meth:`DataFame.to_pickle` and :meth:`Series.to_pickle` can read
+:func:`read_pickle`, :meth:`DataFrame.to_pickle` and :meth:`Series.to_pickle` can read
 and write compressed pickle files. The compression types of ``gzip``, ``bz2``, ``xz`` are supported for reading and writing.
 `zip`` file supports read only and must contain only one data file
 to be read in.
diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index a5ee1b1a9384cc..72787ea97a7824 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -1329,7 +1329,7 @@ By default we are taking the asof of the quotes.
                  on='time',
                  by='ticker')
 
-We only asof within ``2ms`` betwen the quote time and the trade time.
+We only asof within ``2ms`` between the quote time and the trade time.
 
 .. ipython:: python
 
@@ -1338,8 +1338,8 @@ We only asof within ``2ms`` betwen the quote time and the trade time.
                  by='ticker',
                  tolerance=pd.Timedelta('2ms'))
 
-We only asof within ``10ms`` betwen the quote time and the trade time and we exclude exact matches on time.
-Note that though we exclude the exact matches (of the quotes), prior quotes DO propogate to that point
+We only asof within ``10ms`` between the quote time and the trade time and we exclude exact matches on time.
+Note that though we exclude the exact matches (of the quotes), prior quotes DO propagate to that point
 in time.
 
 .. ipython:: python
diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst
index 65b411ccd4af26..b33b5c304853ae 100644
--- a/doc/source/missing_data.rst
+++ b/doc/source/missing_data.rst
@@ -320,7 +320,7 @@ Interpolation
 
   The ``limit_direction`` keyword argument was added.
 
-Both Series and Dataframe objects have an ``interpolate`` method that, by default,
+Both Series and DataFrame objects have an ``interpolate`` method that, by default,
 performs linear interpolation at missing datapoints.
 
 .. ipython:: python
diff --git a/doc/source/options.rst b/doc/source/options.rst
index 1592caf90546c7..f042e4d3f51204 100644
--- a/doc/source/options.rst
+++ b/doc/source/options.rst
@@ -313,9 +313,9 @@ display.large_repr                  truncate     For DataFrames exceeding max_ro
 display.latex.repr                  False        Whether to produce a latex DataFrame
                                                  representation for jupyter frontends
                                                  that support it.
-display.latex.escape                True         Escapes special caracters in Dataframes, when
+display.latex.escape                True         Escapes special characters in DataFrames, when
                                                  using the to_latex method.
-display.latex.longtable             False        Specifies if the to_latex method of a Dataframe
+display.latex.longtable             False        Specifies if the to_latex method of a DataFrame
                                                  uses the longtable format.
 display.latex.multicolumn           True         Combines columns when using a MultiIndex
 display.latex.multicolumn_format    'l'          Alignment of multicolumn labels
diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst
index fab83222b313f1..1209c4a8d6be80 100644
--- a/doc/source/reshaping.rst
+++ b/doc/source/reshaping.rst
@@ -156,7 +156,7 @@ the level numbers:
    stacked.unstack('second')
 
 Notice that the ``stack`` and ``unstack`` methods implicitly sort the index
-levels involved. Hence a call to ``stack`` and then ``unstack``, or viceversa,
+levels involved. Hence a call to ``stack`` and then ``unstack``, or vice versa,
 will result in a **sorted** copy of the original DataFrame or Series:
 
 .. ipython:: python
diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst
index cf16cee501a3e5..89efa7b4be3eee 100644
--- a/doc/source/sparse.rst
+++ b/doc/source/sparse.rst
@@ -132,7 +132,7 @@ dtype, ``fill_value`` default changes:
    s.to_sparse()
 
 You can change the dtype using ``.astype()``, the result is also sparse. Note that
-``.astype()`` also affects to the ``fill_value`` to keep its dense represantation.
+``.astype()`` also affects to the ``fill_value`` to keep its dense representation.
 
 
 .. ipython:: python
diff --git a/doc/source/style.ipynb b/doc/source/style.ipynb
index c250787785e14e..1d6ce163cf977b 100644
--- a/doc/source/style.ipynb
+++ b/doc/source/style.ipynb
@@ -169,7 +169,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Notice the similarity with the standard `df.applymap`, which operates on DataFrames elementwise. We want you to be able to resuse your existing knowledge of how to interact with DataFrames.\n",
+    "Notice the similarity with the standard `df.applymap`, which operates on DataFrames elementwise. We want you to be able to reuse your existing knowledge of how to interact with DataFrames.\n",
     "\n",
     "Notice also that our function returned a string containing the CSS attribute and value, separated by a colon just like in a `<style>` tag. This will be a common theme.\n",
     "\n",
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index 5422d5c53043d3..3b8f105bb1b47c 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -1054,7 +1054,7 @@ as ``BusinessHour`` except that it skips specified custom holidays.
     # Tuesday after MLK Day (Monday is skipped because it's a holiday)
     dt + bhour_us * 2
 
-You can use keyword arguments suported by either ``BusinessHour`` and ``CustomBusinessDay``.
+You can use keyword arguments supported by either ``BusinessHour`` and ``CustomBusinessDay``.
 
 .. ipython:: python
 
@@ -1088,7 +1088,7 @@ frequencies. We will refer to these aliases as *offset aliases*.
     "BMS", "business month start frequency"
     "CBMS", "custom business month start frequency"
     "Q", "quarter end frequency"
-    "BQ", "business quarter endfrequency"
+    "BQ", "business quarter end frequency"
     "QS", "quarter start frequency"
     "BQS", "business quarter start frequency"
     "A, Y", "year end frequency"
@@ -1132,13 +1132,13 @@ For some frequencies you can specify an anchoring suffix:
     :header: "Alias", "Description"
     :widths: 15, 100
 
-    "W\-SUN", "weekly frequency (sundays). Same as 'W'"
-    "W\-MON", "weekly frequency (mondays)"
-    "W\-TUE", "weekly frequency (tuesdays)"
-    "W\-WED", "weekly frequency (wednesdays)"
-    "W\-THU", "weekly frequency (thursdays)"
-    "W\-FRI", "weekly frequency (fridays)"
-    "W\-SAT", "weekly frequency (saturdays)"
+    "W\-SUN", "weekly frequency (Sundays). Same as 'W'"
+    "W\-MON", "weekly frequency (Mondays)"
+    "W\-TUE", "weekly frequency (Tuesdays)"
+    "W\-WED", "weekly frequency (Wednesdays)"
+    "W\-THU", "weekly frequency (Thursdays)"
+    "W\-FRI", "weekly frequency (Fridays)"
+    "W\-SAT", "weekly frequency (Saturdays)"
     "(B)Q(S)\-DEC", "quarterly frequency, year ends in December. Same as 'Q'"
     "(B)Q(S)\-JAN", "quarterly frequency, year ends in January"
     "(B)Q(S)\-FEB", "quarterly frequency, year ends in February"
diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
index b5a261e3acac5d..82ad8de93514e2 100644
--- a/doc/source/visualization.rst
+++ b/doc/source/visualization.rst
@@ -261,7 +261,7 @@ Histogram can be stacked by ``stacked=True``. Bin size can be changed by ``bins`
 
    plt.close('all')
 
-You can pass other keywords supported by matplotlib ``hist``. For example, horizontal and cumulative histgram can be drawn by ``orientation='horizontal'`` and ``cumulative='True'``.
+You can pass other keywords supported by matplotlib ``hist``. For example, horizontal and cumulative histogram can be drawn by ``orientation='horizontal'`` and ``cumulative=True``.
 
 .. ipython:: python
 
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index cccb094eaae7b6..9f712a1cf039be 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -1475,7 +1475,7 @@ def func(arr, indexer, out, fill_value=np.nan):
 def diff(arr, n, axis=0):
     """
     difference of n between self,
-    analagoust to s-s.shift(n)
+    analogous to s-s.shift(n)
 
     Parameters
     ----------
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 6e80f6c900386d..d4df53d76398c7 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -918,7 +918,7 @@ def take(self, indices, axis=0, allow_fill=True,
         except ValueError:
 
             # we need to coerce; migth have NA's in an
-            # interger dtype
+            # integer dtype
             new_left = taker(left.astype(float))
             new_right = taker(right.astype(float))
 
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 9e180c624963c5..4040c651366174 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -72,7 +72,7 @@ def concat(objs, axis=0, join='outer', join_axes=None, ignore_index=False,
     The keys, levels, and names arguments are all optional.
 
     A walkthrough of how this method fits in with other tools for combining
-    panda objects can be found `here
+    pandas objects can be found `here
     <http://pandas.pydata.org/pandas-docs/stable/merging.html>`__.
 
     See Also
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
index 947300a28e510f..6bb6988a7442a1 100644
--- a/pandas/core/reshape/merge.py
+++ b/pandas/core/reshape/merge.py
@@ -447,7 +447,7 @@ def merge_asof(left, right, on=None,
     3 2016-05-25 13:30:00.048   GOOG  720.92       100  720.50  720.93
     4 2016-05-25 13:30:00.048   AAPL   98.00       100     NaN     NaN
 
-    We only asof within 2ms betwen the quote time and the trade time
+    We only asof within 2ms between the quote time and the trade time
 
     >>> pd.merge_asof(trades, quotes,
     ...                       on='time',
@@ -460,9 +460,9 @@ def merge_asof(left, right, on=None,
     3 2016-05-25 13:30:00.048   GOOG  720.92       100  720.50  720.93
     4 2016-05-25 13:30:00.048   AAPL   98.00       100     NaN     NaN
 
-    We only asof within 10ms betwen the quote time and the trade time
+    We only asof within 10ms between the quote time and the trade time
     and we exclude exact matches on time. However *prior* data will
-    propogate forward
+    propagate forward
 
     >>> pd.merge_asof(trades, quotes,
     ...                       on='time',
diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
index 2f5538556fa6d3..fda339aa304612 100644
--- a/pandas/core/reshape/tile.py
+++ b/pandas/core/reshape/tile.py
@@ -359,7 +359,7 @@ def _preprocess_for_cut(x):
     """
     handles preprocessing for cut where we convert passed
     input to array, strip the index information and store it
-    seperately
+    separately
     """
     x_is_series = isinstance(x, Series)
     series_index = None
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
index ab689d196f4b60..51668bb6b08953 100644
--- a/pandas/io/formats/excel.py
+++ b/pandas/io/formats/excel.py
@@ -263,7 +263,7 @@ def build_font(self, props):
                           else None),
             'strike': ('line-through' in decoration) or None,
             'color': self.color_to_excel(props.get('color')),
-            # shadow if nonzero digit before shadow colour
+            # shadow if nonzero digit before shadow color
             'shadow': (bool(re.search('^[^#(]*[1-9]',
                                       props['text-shadow']))
                        if 'text-shadow' in props else None),
@@ -304,7 +304,7 @@ def color_to_excel(self, val):
         try:
             return self.NAMED_COLORS[val]
         except KeyError:
-            warnings.warn('Unhandled colour format: {val!r}'.format(val=val),
+            warnings.warn('Unhandled color format: {val!r}'.format(val=val),
                           CSSWarning)
 
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 712e9e9903f0a5..9f819a4463bed4 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -605,7 +605,7 @@ def open(self, mode='a', **kwargs):
 
         except (Exception) as e:
 
-            # trying to read from a non-existant file causes an error which
+            # trying to read from a non-existent file causes an error which
             # is not part of IOError, make it one
             if self._mode == 'r' and 'Unable to open/create file' in str(e):
                 raise IOError(str(e))
@@ -1621,7 +1621,7 @@ def __iter__(self):
 
     def maybe_set_size(self, min_itemsize=None, **kwargs):
         """ maybe set a string col itemsize:
-               min_itemsize can be an interger or a dict with this columns name
+               min_itemsize can be an integer or a dict with this columns name
                with an integer size """
         if _ensure_decoded(self.kind) == u('string'):
 
@@ -1712,11 +1712,11 @@ def set_info(self, info):
             self.__dict__.update(idx)
 
     def get_attr(self):
-        """ set the kind for this colummn """
+        """ set the kind for this column """
         self.kind = getattr(self.attrs, self.kind_attr, None)
 
     def set_attr(self):
-        """ set the kind for this colummn """
+        """ set the kind for this column """
         setattr(self.attrs, self.kind_attr, self.kind)
 
     def read_metadata(self, handler):
@@ -2160,14 +2160,14 @@ def convert(self, values, nan_rep, encoding):
         return self
 
     def get_attr(self):
-        """ get the data for this colummn """
+        """ get the data for this column """
         self.values = getattr(self.attrs, self.kind_attr, None)
         self.dtype = getattr(self.attrs, self.dtype_attr, None)
         self.meta = getattr(self.attrs, self.meta_attr, None)
         self.set_kind()
 
     def set_attr(self):
-        """ set the data for this colummn """
+        """ set the data for this column """
         setattr(self.attrs, self.kind_attr, self.values)
         setattr(self.attrs, self.meta_attr, self.meta)
         if self.dtype is not None:
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 92f180506a8b71..81862f9cd293e6 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -511,8 +511,8 @@ def _cast_to_stata_types(data):
     this range.  If the int64 values are outside of the range of those
     perfectly representable as float64 values, a warning is raised.
 
-    bool columns are cast to int8.  uint colums are converted to int of the
-    same size if there is no loss in precision, other wise are upcast to a
+    bool columns are cast to int8.  uint columns are converted to int of the
+    same size if there is no loss in precision, otherwise are upcast to a
     larger type.  uint64 is currently not supported since it is concerted to
     object in a DataFrame.
     """
diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py
index db2211fb551350..54f87febdc2141 100644
--- a/pandas/plotting/_misc.py
+++ b/pandas/plotting/_misc.py
@@ -413,7 +413,7 @@ def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None,
     axvlines_kwds: keywords, optional
         Options to be passed to axvline method for vertical lines
     sort_labels: bool, False
-        Sort class_column labels, useful when assigning colours
+        Sort class_column labels, useful when assigning colors
 
         .. versionadded:: 0.20.0
 
diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py
index 6deddc97915f1f..c734855bdc09a7 100644
--- a/pandas/plotting/_tools.py
+++ b/pandas/plotting/_tools.py
@@ -329,7 +329,7 @@ def _handle_shared_axes(axarr, nplots, naxes, nrows, ncols, sharex, sharey):
         if ncols > 1:
             for ax in axarr:
                 # only the first column should get y labels -> set all other to
-                # off as we only have labels in teh first column and we always
+                # off as we only have labels in the first column and we always
                 # have a subplot there, we can skip the layout test
                 if ax.is_first_col():
                     continue
diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py
index 99e5630ce6a43c..5bdb76494f4c82 100644
--- a/pandas/tests/frame/test_convert_to.py
+++ b/pandas/tests/frame/test_convert_to.py
@@ -136,11 +136,11 @@ def test_to_records_with_unicode_index(self):
     def test_to_records_with_unicode_column_names(self):
         # xref issue: https://github.com/numpy/numpy/issues/2407
         # Issue #11879. to_records used to raise an exception when used
-        # with column names containing non ascii caracters in Python 2
+        # with column names containing non-ascii characters in Python 2
         result = DataFrame(data={u"accented_name_é": [1.0]}).to_records()
 
         # Note that numpy allows for unicode field names but dtypes need
-        # to be specified using dictionnary intsead of list of tuples.
+        # to be specified using dictionary instead of list of tuples.
         expected = np.rec.array(
             [(0, 1.0)],
             dtype={"names": ["index", u"accented_name_é"],
diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index 98839a17d6e0c1..267b67972c6406 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -533,7 +533,7 @@ def test_cython_transform(self):
                 for (op, args), targop in ops:
                     if op != 'shift' and 'int' not in gb_target:
                         # numeric apply fastpath promotes dtype so have
-                        # to apply seperately and concat
+                        # to apply separately and concat
                         i = gb[['int']].apply(targop)
                         f = gb[['float', 'float_missing']].apply(targop)
                         expected = pd.concat([f, i], axis=1)
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index 5152c1019d8de0..be27334384f6b7 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -1144,7 +1144,7 @@ def test_parsers(self):
                 exp = DatetimeIndex([pd.Timestamp(expected)])
                 tm.assert_index_equal(res, exp)
 
-            # these really need to have yearfist, but we don't support
+            # these really need to have yearfirst, but we don't support
             if not yearfirst:
                 result5 = Timestamp(date_str)
                 assert result5 == expected
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
index e447a74b2b4628..e097194674cf66 100644
--- a/pandas/tests/io/json/test_json_table_schema.py
+++ b/pandas/tests/io/json/test_json_table_schema.py
@@ -52,7 +52,7 @@ def test_series(self):
         result = build_table_schema(s)
         assert 'pandas_version' in result
 
-    def tets_series_unnamed(self):
+    def test_series_unnamed(self):
         result = build_table_schema(pd.Series([1, 2, 3]), version=False)
         expected = {'fields': [{'name': 'index', 'type': 'integer'},
                                {'name': 'values', 'type': 'integer'}],
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
index ec1d1a2a51cdcd..a60f2b5a4c946d 100644
--- a/pandas/tests/io/parser/test_read_fwf.py
+++ b/pandas/tests/io/parser/test_read_fwf.py
@@ -291,7 +291,7 @@ def test_full_file_with_spaces(self):
         tm.assert_frame_equal(expected, read_fwf(StringIO(test)))
 
     def test_full_file_with_spaces_and_missing(self):
-        # File with spaces and missing values in columsn
+        # File with spaces and missing values in columns
         test = """
 Account               Name    Balance     CreditLimit   AccountCreated
 101                           10000.00                       1/17/1998
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index 9c488cb2389bed..f331378b654be9 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -1370,7 +1370,7 @@ def check_indexers(key, indexers):
                     labels=['l1'], items=['ItemA'], minor_axis=['B'])
                 assert_panel4d_equal(result, expected)
 
-                # non-existant partial selection
+                # non-existent partial selection
                 result = store.select(
                     'p4d', "labels='l1' and items='Item1' and minor_axis='B'")
                 expected = p4d.reindex(labels=['l1'], items=[],
@@ -1980,11 +1980,11 @@ def test_append_misc(self):
 
             with catch_warnings(record=True):
 
-                # unsuported data types for non-tables
+                # unsupported data types for non-tables
                 p4d = tm.makePanel4D()
                 pytest.raises(TypeError, store.put, 'p4d', p4d)
 
-                # unsuported data types
+                # unsupported data types
                 pytest.raises(TypeError, store.put, 'abc', None)
                 pytest.raises(TypeError, store.put, 'abc', '123')
                 pytest.raises(TypeError, store.put, 'abc', 123)
@@ -4965,7 +4965,7 @@ def test_preserve_timedeltaindex_type(self):
             store['df'] = df
             assert_frame_equal(store['df'], df)
 
-    def test_colums_multiindex_modified(self):
+    def test_columns_multiindex_modified(self):
         # BUG: 7212
         # read_hdf store.select modified the passed columns parameters
         # when multi-indexed.
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index cff0c1c0b424e5..eb10e70f4189b1 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -347,7 +347,7 @@ def _test(ax):
             assert int(result[0]) == expected[0].ordinal
             assert int(result[1]) == expected[1].ordinal
 
-            # datetim
+            # datetime
             expected = (Period('1/1/2000', ax.freq),
                         Period('4/1/2000', ax.freq))
             ax.set_xlim(datetime(2000, 1, 1), datetime(2000, 4, 1))
diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py
index c214280ee8386a..fa9feb016726ec 100644
--- a/pandas/tests/series/test_dtypes.py
+++ b/pandas/tests/series/test_dtypes.py
@@ -279,7 +279,7 @@ def test_infer_objects_series(self):
         expected = Series([1., 2., 3., np.nan])
         tm.assert_series_equal(actual, expected)
 
-        # only soft conversions, uncovertable pass thru unchanged
+        # only soft conversions, unconvertable pass thru unchanged
         actual = (Series(np.array([1, 2, 3, None, 'a'], dtype='O'))
                   .infer_objects())
         expected = Series([1, 2, 3, None, 'a'])
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 8a5f6bf110be32..1fa3c84dc0260a 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -4064,7 +4064,7 @@ def test_merge(self):
         expected = df.copy()
 
         # object-cat
-        # note that we propogate the category
+        # note that we propagate the category
         # because we don't have any matching rows
         cright = right.copy()
         cright['d'] = cright['d'].astype('category')
diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
index e58042961129d2..a5b12bbf9608ac 100644
--- a/pandas/tests/test_sorting.py
+++ b/pandas/tests/test_sorting.py
@@ -408,7 +408,7 @@ def test_mixed_integer(self):
         tm.assert_numpy_array_equal(result, expected)
         tm.assert_numpy_array_equal(result_labels, expected_labels)
 
-    def test_mixed_interger_from_list(self):
+    def test_mixed_integer_from_list(self):
         values = ['b', 1, 0, 'a', 0, 'b']
         result = safe_sort(values)
         expected = np.array([0, 0, 1, 'a', 'b', 'b'], dtype=object)
diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py
index 5934f5843736cc..dc8a41215139d4 100644
--- a/pandas/tseries/util.py
+++ b/pandas/tseries/util.py
@@ -16,7 +16,7 @@ def pivot_annual(series, freq=None):
     The output has as many rows as distinct years in the original series,
     and as many columns as the length of a leap year in the units corresponding
     to the original frequency (366 for daily frequency, 366*24 for hourly...).
-    The fist column of the output corresponds to Jan. 1st, 00:00:00,
+    The first column of the output corresponds to Jan. 1st, 00:00:00,
     while the last column corresponds to Dec, 31st, 23:59:59.
     Entries corresponding to Feb. 29th are masked for non-leap years.
 

From 328c7e179b72e257e27adf92a06718fd5a40473f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 15 Sep 2017 02:51:46 -0700
Subject: [PATCH 090/188] Cut/paste (most) remaining tz funcs to
 tslibs/timezones (#17526)

---
 pandas/_libs/period.pyx           |   8 +-
 pandas/_libs/tslib.pxd            |   2 -
 pandas/_libs/tslib.pyx            | 194 ++--------------------------
 pandas/_libs/tslibs/timezones.pxd |   6 +
 pandas/_libs/tslibs/timezones.pyx | 207 +++++++++++++++++++++++++++++-
 5 files changed, 225 insertions(+), 192 deletions(-)

diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index babe0f7c6834d9..49353f7b0491c0 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -34,11 +34,9 @@ from lib cimport is_null_datetimelike, is_period
 from pandas._libs import tslib, lib
 from pandas._libs.tslib import (Timedelta, Timestamp, iNaT,
                                 NaT)
-from tslibs.timezones cimport is_utc, is_tzlocal, get_utcoffset
-from tslib cimport (
-    maybe_get_tz,
-    _get_dst_info,
-    _nat_scalar_rules)
+from tslibs.timezones cimport (
+    is_utc, is_tzlocal, get_utcoffset, _get_dst_info, maybe_get_tz)
+from tslib cimport _nat_scalar_rules
 
 from tslibs.frequencies cimport get_freq_code
 
diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd
index c1b25963a62571..ee8adfe67bb5ed 100644
--- a/pandas/_libs/tslib.pxd
+++ b/pandas/_libs/tslib.pxd
@@ -2,7 +2,5 @@ from numpy cimport ndarray, int64_t
 
 cdef convert_to_tsobject(object, object, object, bint, bint)
 cpdef convert_to_timedelta64(object, object)
-cpdef object maybe_get_tz(object)
-cdef object _get_dst_info(object)
 cdef bint _nat_scalar_rules[6]
 cdef bint _check_all_nulls(obj)
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 629325c28ea9c6..ec12611ae7f027 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -21,8 +21,7 @@ from cpython cimport (
     PyObject_RichCompare,
     Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE,
     PyUnicode_Check,
-    PyUnicode_AsUTF8String,
-)
+    PyUnicode_AsUTF8String)
 
 cdef extern from "Python.h":
     cdef PyTypeObject *Py_TYPE(object)
@@ -73,19 +72,12 @@ import re
 
 # dateutil compat
 from dateutil.tz import (tzoffset, tzlocal as _dateutil_tzlocal,
-                         tzfile as _dateutil_tzfile,
                          tzutc as _dateutil_tzutc,
                          tzstr as _dateutil_tzstr)
 
-from pandas.compat import is_platform_windows
-if is_platform_windows():
-    from dateutil.zoneinfo import gettz as _dateutil_gettz
-else:
-    from dateutil.tz import gettz as _dateutil_gettz
 from dateutil.relativedelta import relativedelta
 from dateutil.parser import DEFAULTPARSER
 
-from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
 from pandas.compat import (parse_date, string_types, iteritems,
                            StringIO, callable)
 
@@ -108,11 +100,17 @@ iNaT = NPY_NAT
 
 
 from tslibs.timezones cimport (
-    is_utc, is_tzlocal,
+    is_utc, is_tzlocal, _is_fixed_offset,
     treat_tz_as_dateutil, treat_tz_as_pytz,
-    get_timezone,
-    get_utcoffset)
-from tslibs.timezones import get_timezone, get_utcoffset  # noqa
+    get_timezone, get_utcoffset, maybe_get_tz,
+    _get_dst_info
+    )
+from tslibs.timezones import (  # noqa
+    get_timezone, get_utcoffset, maybe_get_tz,
+    _p_tz_cache_key, dst_cache,
+    _unbox_utcoffsets,
+    _dateutil_gettz
+    )
 
 
 cdef inline object create_timestamp_from_ts(
@@ -241,20 +239,6 @@ def ints_to_pytimedelta(ndarray[int64_t] arr, box=False):
     return result
 
 
-cdef inline bint _is_fixed_offset(object tz):
-    if treat_tz_as_dateutil(tz):
-        if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0:
-            return 1
-        else:
-            return 0
-    elif treat_tz_as_pytz(tz):
-        if (len(tz._transition_info) == 0
-            and len(tz._utc_transition_times) == 0):
-            return 1
-        else:
-            return 0
-    return 1
-
 _zero_time = datetime_time(0, 0)
 _no_input = object()
 
@@ -1709,27 +1693,6 @@ def _localize_pydatetime(object dt, object tz):
         return dt.replace(tzinfo=tz)
 
 
-cpdef inline object maybe_get_tz(object tz):
-    """
-    (Maybe) Construct a timezone object from a string. If tz is a string, use
-    it to construct a timezone object. Otherwise, just return tz.
-    """
-    if isinstance(tz, string_types):
-        if tz == 'tzlocal()':
-            tz = _dateutil_tzlocal()
-        elif tz.startswith('dateutil/'):
-            zone = tz[9:]
-            tz = _dateutil_gettz(zone)
-            # On Python 3 on Windows, the filename is not always set correctly.
-            if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename:
-                tz._filename = zone
-        else:
-            tz = pytz.timezone(tz)
-    elif is_integer_object(tz):
-        tz = pytz.FixedOffset(tz / 60)
-    return tz
-
-
 class OutOfBoundsDatetime(ValueError):
     pass
 
@@ -4237,141 +4200,6 @@ def tz_convert_single(int64_t val, object tz1, object tz2):
     offset = deltas[pos]
     return utc_date + offset
 
-# Timezone data caches, key is the pytz string or dateutil file name.
-dst_cache = {}
-
-
-def _p_tz_cache_key(tz):
-    """ Python interface for cache function to facilitate testing."""
-    return _tz_cache_key(tz)
-
-
-cdef inline object _tz_cache_key(object tz):
-    """
-    Return the key in the cache for the timezone info object or None
-    if unknown.
-
-    The key is currently the tz string for pytz timezones, the filename for
-    dateutil timezones.
-
-    Notes
-    =====
-    This cannot just be the hash of a timezone object. Unfortunately, the
-    hashes of two dateutil tz objects which represent the same timezone are
-    not equal (even though the tz objects will compare equal and represent
-    the same tz file). Also, pytz objects are not always hashable so we use
-    str(tz) instead.
-    """
-    if isinstance(tz, _pytz_BaseTzInfo):
-        return tz.zone
-    elif isinstance(tz, _dateutil_tzfile):
-        if '.tar.gz' in tz._filename:
-            raise ValueError('Bad tz filename. Dateutil on python 3 on '
-                             'windows has a bug which causes tzfile._filename '
-                             'to be the same for all timezone files. Please '
-                             'construct dateutil timezones implicitly by '
-                             'passing a string like "dateutil/Europe/London" '
-                             'when you construct your pandas objects instead '
-                             'of passing a timezone object. See '
-                             'https://github.com/pandas-dev/pandas/pull/7362')
-        return 'dateutil' + tz._filename
-    else:
-        return None
-
-
-cdef object _get_dst_info(object tz):
-    """
-    return a tuple of :
-      (UTC times of DST transitions,
-       UTC offsets in microseconds corresponding to DST transitions,
-       string of type of transitions)
-
-    """
-    cache_key = _tz_cache_key(tz)
-    if cache_key is None:
-        num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000
-        return (np.array([NPY_NAT + 1], dtype=np.int64),
-                np.array([num], dtype=np.int64),
-                None)
-
-    if cache_key not in dst_cache:
-        if treat_tz_as_pytz(tz):
-            trans = np.array(tz._utc_transition_times, dtype='M8[ns]')
-            trans = trans.view('i8')
-            try:
-                if tz._utc_transition_times[0].year == 1:
-                    trans[0] = NPY_NAT + 1
-            except Exception:
-                pass
-            deltas = _unbox_utcoffsets(tz._transition_info)
-            typ = 'pytz'
-
-        elif treat_tz_as_dateutil(tz):
-            if len(tz._trans_list):
-                # get utc trans times
-                trans_list = _get_utc_trans_times_from_dateutil_tz(tz)
-                trans = np.hstack([
-                    np.array([0], dtype='M8[s]'), # place holder for first item
-                    np.array(trans_list, dtype='M8[s]')]).astype(
-                    'M8[ns]')  # all trans listed
-                trans = trans.view('i8')
-                trans[0] = NPY_NAT + 1
-
-                # deltas
-                deltas = np.array([v.offset for v in (
-                    tz._ttinfo_before,) + tz._trans_idx], dtype='i8')
-                deltas *= 1000000000
-                typ = 'dateutil'
-
-            elif _is_fixed_offset(tz):
-                trans = np.array([NPY_NAT + 1], dtype=np.int64)
-                deltas = np.array([tz._ttinfo_std.offset],
-                                  dtype='i8') * 1000000000
-                typ = 'fixed'
-            else:
-                trans = np.array([], dtype='M8[ns]')
-                deltas = np.array([], dtype='i8')
-                typ = None
-
-        else:
-            # static tzinfo
-            trans = np.array([NPY_NAT + 1], dtype=np.int64)
-            num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000
-            deltas = np.array([num], dtype=np.int64)
-            typ = 'static'
-
-        dst_cache[cache_key] = (trans, deltas, typ)
-
-    return dst_cache[cache_key]
-
-cdef object _get_utc_trans_times_from_dateutil_tz(object tz):
-    """
-    Transition times in dateutil timezones are stored in local non-dst
-    time.  This code converts them to UTC. It's the reverse of the code
-    in dateutil.tz.tzfile.__init__.
-    """
-    new_trans = list(tz._trans_list)
-    last_std_offset = 0
-    for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)):
-        if not tti.isdst:
-            last_std_offset = tti.offset
-        new_trans[i] = trans - last_std_offset
-    return new_trans
-
-
-cpdef ndarray _unbox_utcoffsets(object transinfo):
-    cdef:
-        Py_ssize_t i, sz
-        ndarray[int64_t] arr
-
-    sz = len(transinfo)
-    arr = np.empty(sz, dtype='i8')
-
-    for i in range(sz):
-        arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000
-
-    return arr
-
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd
index ead5566440ca08..fac0018a78bc2e 100644
--- a/pandas/_libs/tslibs/timezones.pxd
+++ b/pandas/_libs/tslibs/timezones.pxd
@@ -1,6 +1,8 @@
 # -*- coding: utf-8 -*-
 # cython: profile=False
 
+from numpy cimport ndarray
+
 cdef bint is_utc(object tz)
 cdef bint is_tzlocal(object tz)
 
@@ -8,5 +10,9 @@ cdef bint treat_tz_as_pytz(object tz)
 cdef bint treat_tz_as_dateutil(object tz)
 
 cpdef object get_timezone(object tz)
+cpdef object maybe_get_tz(object tz)
 
 cpdef get_utcoffset(tzinfo, obj)
+cdef bint _is_fixed_offset(object tz)
+
+cdef object _get_dst_info(object tz)
diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
index 3db369a09ba2d0..346da41e7073be 100644
--- a/pandas/_libs/tslibs/timezones.pyx
+++ b/pandas/_libs/tslibs/timezones.pyx
@@ -1,15 +1,40 @@
 # -*- coding: utf-8 -*-
 # cython: profile=False
 
+cimport cython
+from cython cimport Py_ssize_t
+
 # dateutil compat
 from dateutil.tz import (
-	tzutc as _dateutil_tzutc,
-	tzlocal as _dateutil_tzlocal)
+    tzutc as _dateutil_tzutc,
+    tzlocal as _dateutil_tzlocal,
+    tzfile as _dateutil_tzfile)
+
+import sys
+if sys.platform == 'win32' or sys.platform == 'cygwin':
+    # equiv pd.compat.is_platform_windows()
+    from dateutil.zoneinfo import gettz as _dateutil_gettz
+else:
+    from dateutil.tz import gettz as _dateutil_gettz
 
+
+from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
 import pytz
 UTC = pytz.utc
 
 
+import numpy as np
+cimport numpy as np
+from numpy cimport ndarray, int64_t
+np.import_array()
+
+# ----------------------------------------------------------------------
+from util cimport is_string_object, is_integer_object, get_nat
+
+cdef int64_t NPY_NAT = get_nat()
+
+# ----------------------------------------------------------------------
+
 cdef inline bint is_utc(object tz):
     return tz is UTC or isinstance(tz, _dateutil_tzutc)
 
@@ -64,6 +89,70 @@ cpdef inline object get_timezone(object tz):
             except AttributeError:
                 return tz
 
+
+cpdef inline object maybe_get_tz(object tz):
+    """
+    (Maybe) Construct a timezone object from a string. If tz is a string, use
+    it to construct a timezone object. Otherwise, just return tz.
+    """
+    if is_string_object(tz):
+        if tz == 'tzlocal()':
+            tz = _dateutil_tzlocal()
+        elif tz.startswith('dateutil/'):
+            zone = tz[9:]
+            tz = _dateutil_gettz(zone)
+            # On Python 3 on Windows, the filename is not always set correctly.
+            if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename:
+                tz._filename = zone
+        else:
+            tz = pytz.timezone(tz)
+    elif is_integer_object(tz):
+        tz = pytz.FixedOffset(tz / 60)
+    return tz
+
+
+def _p_tz_cache_key(tz):
+    """ Python interface for cache function to facilitate testing."""
+    return _tz_cache_key(tz)
+
+
+# Timezone data caches, key is the pytz string or dateutil file name.
+dst_cache = {}
+
+
+cdef inline object _tz_cache_key(object tz):
+    """
+    Return the key in the cache for the timezone info object or None
+    if unknown.
+
+    The key is currently the tz string for pytz timezones, the filename for
+    dateutil timezones.
+
+    Notes
+    =====
+    This cannot just be the hash of a timezone object. Unfortunately, the
+    hashes of two dateutil tz objects which represent the same timezone are
+    not equal (even though the tz objects will compare equal and represent
+    the same tz file). Also, pytz objects are not always hashable so we use
+    str(tz) instead.
+    """
+    if isinstance(tz, _pytz_BaseTzInfo):
+        return tz.zone
+    elif isinstance(tz, _dateutil_tzfile):
+        if '.tar.gz' in tz._filename:
+            raise ValueError('Bad tz filename. Dateutil on python 3 on '
+                             'windows has a bug which causes tzfile._filename '
+                             'to be the same for all timezone files. Please '
+                             'construct dateutil timezones implicitly by '
+                             'passing a string like "dateutil/Europe/London" '
+                             'when you construct your pandas objects instead '
+                             'of passing a timezone object. See '
+                             'https://github.com/pandas-dev/pandas/pull/7362')
+        return 'dateutil' + tz._filename
+    else:
+        return None
+
+
 #----------------------------------------------------------------------
 # UTC Offsets
 
@@ -72,3 +161,117 @@ cpdef get_utcoffset(tzinfo, obj):
         return tzinfo._utcoffset
     except AttributeError:
         return tzinfo.utcoffset(obj)
+
+
+cdef inline bint _is_fixed_offset(object tz):
+    if treat_tz_as_dateutil(tz):
+        if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0:
+            return 1
+        else:
+            return 0
+    elif treat_tz_as_pytz(tz):
+        if (len(tz._transition_info) == 0
+            and len(tz._utc_transition_times) == 0):
+            return 1
+        else:
+            return 0
+    return 1
+
+
+cdef object _get_utc_trans_times_from_dateutil_tz(object tz):
+    """
+    Transition times in dateutil timezones are stored in local non-dst
+    time.  This code converts them to UTC. It's the reverse of the code
+    in dateutil.tz.tzfile.__init__.
+    """
+    new_trans = list(tz._trans_list)
+    last_std_offset = 0
+    for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)):
+        if not tti.isdst:
+            last_std_offset = tti.offset
+        new_trans[i] = trans - last_std_offset
+    return new_trans
+
+
+cpdef ndarray _unbox_utcoffsets(object transinfo):
+    cdef:
+        Py_ssize_t i, sz
+        ndarray[int64_t] arr
+
+    sz = len(transinfo)
+    arr = np.empty(sz, dtype='i8')
+
+    for i in range(sz):
+        arr[i] = int(transinfo[i][0].total_seconds()) * 1000000000
+
+    return arr
+
+
+# ----------------------------------------------------------------------
+# Daylight Savings
+
+
+cdef object _get_dst_info(object tz):
+    """
+    return a tuple of :
+      (UTC times of DST transitions,
+       UTC offsets in microseconds corresponding to DST transitions,
+       string of type of transitions)
+
+    """
+    cache_key = _tz_cache_key(tz)
+    if cache_key is None:
+        num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000
+        return (np.array([NPY_NAT + 1], dtype=np.int64),
+                np.array([num], dtype=np.int64),
+                None)
+
+    if cache_key not in dst_cache:
+        if treat_tz_as_pytz(tz):
+            trans = np.array(tz._utc_transition_times, dtype='M8[ns]')
+            trans = trans.view('i8')
+            try:
+                if tz._utc_transition_times[0].year == 1:
+                    trans[0] = NPY_NAT + 1
+            except Exception:
+                pass
+            deltas = _unbox_utcoffsets(tz._transition_info)
+            typ = 'pytz'
+
+        elif treat_tz_as_dateutil(tz):
+            if len(tz._trans_list):
+                # get utc trans times
+                trans_list = _get_utc_trans_times_from_dateutil_tz(tz)
+                trans = np.hstack([
+                    np.array([0], dtype='M8[s]'), # place holder for first item
+                    np.array(trans_list, dtype='M8[s]')]).astype(
+                    'M8[ns]')  # all trans listed
+                trans = trans.view('i8')
+                trans[0] = NPY_NAT + 1
+
+                # deltas
+                deltas = np.array([v.offset for v in (
+                    tz._ttinfo_before,) + tz._trans_idx], dtype='i8')
+                deltas *= 1000000000
+                typ = 'dateutil'
+
+            elif _is_fixed_offset(tz):
+                trans = np.array([NPY_NAT + 1], dtype=np.int64)
+                deltas = np.array([tz._ttinfo_std.offset],
+                                  dtype='i8') * 1000000000
+                typ = 'fixed'
+            else:
+                trans = np.array([], dtype='M8[ns]')
+                deltas = np.array([], dtype='i8')
+                typ = None
+
+        else:
+            # static tzinfo
+            trans = np.array([NPY_NAT + 1], dtype=np.int64)
+            num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000
+            deltas = np.array([num], dtype=np.int64)
+            typ = 'static'
+
+        dst_cache[cache_key] = (trans, deltas, typ)
+
+    return dst_cache[cache_key]

From 9ec157be1cc0908e3d20e099c7a36cd76d3454cc Mon Sep 17 00:00:00 2001
From: Giftlin <31629119+Giftlin@users.noreply.github.com>
Date: Sat, 16 Sep 2017 12:30:10 +0530
Subject: [PATCH 091/188] DOC: Spelling + grammar in chainmap_impl.py (#17548)

---
 pandas/compat/chainmap_impl.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/compat/chainmap_impl.py b/pandas/compat/chainmap_impl.py
index 05a0d5faa4c2a8..c4aa8c8d6ab304 100644
--- a/pandas/compat/chainmap_impl.py
+++ b/pandas/compat/chainmap_impl.py
@@ -34,10 +34,10 @@ def wrapper(self):
 
 class ChainMap(MutableMapping):
     """ A ChainMap groups multiple dicts (or other mappings) together
-    to create a single, updateable view.
+    to create a single, updatable view.
 
     The underlying mappings are stored in a list.  That list is public and can
-    accessed or updated using the *maps* attribute.  There is no other state.
+    be accessed / updated using the *maps* attribute.  There is no other state.
 
     Lookups search the underlying mappings successively until a key is found.
     In contrast, writes, updates, and deletions only operate on the first

From f5cfdbb1f4b715819aceac4b00cf18ba5f467f85 Mon Sep 17 00:00:00 2001
From: Kevin Sheppard <bashtage@users.noreply.github.com>
Date: Sat, 16 Sep 2017 14:40:41 +0100
Subject: [PATCH 092/188] BUG: Set index when reading Stata file (#17328)

Ensures index is set when requested during reading of a Stata dta file
Deprecates and renames index to index_col for API consistence

closes #16342
---
 doc/source/whatsnew/v0.21.0.txt |  2 ++
 pandas/io/stata.py              | 61 ++++++++++++++++++---------------
 pandas/tests/io/test_stata.py   | 11 +++++-
 3 files changed, 46 insertions(+), 28 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 52e056103cbdc3..722e19d2703b51 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -431,6 +431,7 @@ Other API Changes
 - :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`)
 - :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`).
 - :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`)
+- Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`)
 
 
 .. _whatsnew_0210.deprecations:
@@ -515,6 +516,7 @@ I/O
 - Bug in :func:`read_csv` when called with ``low_memory=False`` in which a CSV with at least one column > 2GB in size would incorrectly raise a ``MemoryError`` (:issue:`16798`).
 - Bug in :func:`read_csv` when called with a single-element list ``header`` would return a ``DataFrame`` of all NaN values (:issue:`7757`)
 - Bug in :func:`read_stata` where value labels could not be read when using an iterator (:issue:`16923`)
+- Bug in :func:`read_stata` where the index was not set (:issue:`16342`)
 - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`)
 - Bug in :func:`read_csv` where automatic delimiter detection caused a ``TypeError`` to be thrown when a bad line was encountered rather than the correct error message (:issue:`13374`)
 
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 81862f9cd293e6..afc1631a947c8a 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -9,31 +9,30 @@
 You can find more information on http://presbrey.mit.edu/PyDTA and
 http://www.statsmodels.org/devel/
 """
-import numpy as np
 
-import sys
+import datetime
 import struct
-from dateutil.relativedelta import relativedelta
+import sys
 
-from pandas.core.dtypes.common import (
-    is_categorical_dtype, is_datetime64_dtype,
-    _ensure_object)
+import numpy as np
+from dateutil.relativedelta import relativedelta
+from pandas._libs.lib import max_len_string_array, infer_dtype
+from pandas._libs.tslib import NaT, Timestamp
 
+import pandas as pd
+from pandas import compat, to_timedelta, to_datetime, isna, DatetimeIndex
+from pandas.compat import (lrange, lmap, lzip, text_type, string_types, range,
+                           zip, BytesIO)
 from pandas.core.base import StringMixin
 from pandas.core.categorical import Categorical
+from pandas.core.dtypes.common import (is_categorical_dtype, _ensure_object,
+                                       is_datetime64_dtype)
 from pandas.core.frame import DataFrame
 from pandas.core.series import Series
-import datetime
-from pandas import compat, to_timedelta, to_datetime, isna, DatetimeIndex
-from pandas.compat import lrange, lmap, lzip, text_type, string_types, range, \
-    zip, BytesIO
-from pandas.util._decorators import Appender
-import pandas as pd
-
 from pandas.io.common import (get_filepath_or_buffer, BaseIterator,
                               _stringify_path)
-from pandas._libs.lib import max_len_string_array, infer_dtype
-from pandas._libs.tslib import NaT, Timestamp
+from pandas.util._decorators import Appender
+from pandas.util._decorators import deprecate_kwarg
 
 VALID_ENCODINGS = ('ascii', 'us-ascii', 'latin-1', 'latin_1', 'iso-8859-1',
                    'iso8859-1', '8859', 'cp819', 'latin', 'latin1', 'L1')
@@ -53,8 +52,8 @@
     Encoding used to parse the files. None defaults to latin-1."""
 
 _statafile_processing_params2 = """\
-index : identifier of index column
-    identifier of column that should be used as index of the DataFrame
+index_col : string, optional, default: None
+    Column to set as index
 convert_missing : boolean, defaults to False
     Flag indicating whether to convert missing values to their Stata
     representations.  If False, missing values are replaced with nan.
@@ -159,15 +158,16 @@
 
 
 @Appender(_read_stata_doc)
+@deprecate_kwarg(old_arg_name='index', new_arg_name='index_col')
 def read_stata(filepath_or_buffer, convert_dates=True,
-               convert_categoricals=True, encoding=None, index=None,
+               convert_categoricals=True, encoding=None, index_col=None,
                convert_missing=False, preserve_dtypes=True, columns=None,
                order_categoricals=True, chunksize=None, iterator=False):
 
     reader = StataReader(filepath_or_buffer,
                          convert_dates=convert_dates,
                          convert_categoricals=convert_categoricals,
-                         index=index, convert_missing=convert_missing,
+                         index_col=index_col, convert_missing=convert_missing,
                          preserve_dtypes=preserve_dtypes,
                          columns=columns,
                          order_categoricals=order_categoricals,
@@ -945,8 +945,9 @@ def __init__(self, encoding):
 class StataReader(StataParser, BaseIterator):
     __doc__ = _stata_reader_doc
 
+    @deprecate_kwarg(old_arg_name='index', new_arg_name='index_col')
     def __init__(self, path_or_buf, convert_dates=True,
-                 convert_categoricals=True, index=None,
+                 convert_categoricals=True, index_col=None,
                  convert_missing=False, preserve_dtypes=True,
                  columns=None, order_categoricals=True,
                  encoding='latin-1', chunksize=None):
@@ -957,7 +958,7 @@ def __init__(self, path_or_buf, convert_dates=True,
         # calls to read).
         self._convert_dates = convert_dates
         self._convert_categoricals = convert_categoricals
-        self._index = index
+        self._index_col = index_col
         self._convert_missing = convert_missing
         self._preserve_dtypes = preserve_dtypes
         self._columns = columns
@@ -1461,8 +1462,9 @@ def get_chunk(self, size=None):
         return self.read(nrows=size)
 
     @Appender(_read_method_doc)
+    @deprecate_kwarg(old_arg_name='index', new_arg_name='index_col')
     def read(self, nrows=None, convert_dates=None,
-             convert_categoricals=None, index=None,
+             convert_categoricals=None, index_col=None,
              convert_missing=None, preserve_dtypes=None,
              columns=None, order_categoricals=None):
         # Handle empty file or chunk.  If reading incrementally raise
@@ -1487,6 +1489,8 @@ def read(self, nrows=None, convert_dates=None,
             columns = self._columns
         if order_categoricals is None:
             order_categoricals = self._order_categoricals
+        if index_col is None:
+            index_col = self._index_col
 
         if nrows is None:
             nrows = self.nobs
@@ -1525,14 +1529,14 @@ def read(self, nrows=None, convert_dates=None,
             self._read_value_labels()
 
         if len(data) == 0:
-            data = DataFrame(columns=self.varlist, index=index)
+            data = DataFrame(columns=self.varlist)
         else:
-            data = DataFrame.from_records(data, index=index)
+            data = DataFrame.from_records(data)
             data.columns = self.varlist
 
         # If index is not specified, use actual row number rather than
         # restarting at 0 for each chunk.
-        if index is None:
+        if index_col is None:
             ix = np.arange(self._lines_read - read_lines, self._lines_read)
             data = data.set_index(ix)
 
@@ -1554,7 +1558,7 @@ def read(self, nrows=None, convert_dates=None,
         cols_ = np.where(self.dtyplist)[0]
 
         # Convert columns (if needed) to match input type
-        index = data.index
+        ix = data.index
         requires_type_conversion = False
         data_formatted = []
         for i in cols_:
@@ -1564,7 +1568,7 @@ def read(self, nrows=None, convert_dates=None,
                 if dtype != np.dtype(object) and dtype != self.dtyplist[i]:
                     requires_type_conversion = True
                     data_formatted.append(
-                        (col, Series(data[col], index, self.dtyplist[i])))
+                        (col, Series(data[col], ix, self.dtyplist[i])))
                 else:
                     data_formatted.append((col, data[col]))
         if requires_type_conversion:
@@ -1607,6 +1611,9 @@ def read(self, nrows=None, convert_dates=None,
             if convert:
                 data = DataFrame.from_items(retyped_data)
 
+        if index_col is not None:
+            data = data.set_index(data.pop(index_col))
+
         return data
 
     def _do_convert_missing(self, data, convert_missing):
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index a414928d318c42..94a0ac31e093e4 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -476,7 +476,7 @@ def test_read_write_reread_dta15(self):
         tm.assert_frame_equal(parsed_114, parsed_117)
 
     def test_timestamp_and_label(self):
-        original = DataFrame([(1,)], columns=['var'])
+        original = DataFrame([(1,)], columns=['variable'])
         time_stamp = datetime(2000, 2, 29, 14, 21)
         data_label = 'This is a data file.'
         with tm.ensure_clean() as path:
@@ -1309,3 +1309,12 @@ def test_value_labels_iterator(self, write_index):
             dta_iter = pd.read_stata(path, iterator=True)
             value_labels = dta_iter.value_labels()
         assert value_labels == {'A': {0: 'A', 1: 'B', 2: 'C', 3: 'E'}}
+
+    def test_set_index(self):
+        # GH 17328
+        df = tm.makeDataFrame()
+        df.index.name = 'index'
+        with tm.ensure_clean() as path:
+            df.to_stata(path)
+            reread = pd.read_stata(path, index_col='index')
+        tm.assert_frame_equal(df, reread)

From 40e19bb8451a44923ddf10e3e534a6243f967ff9 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Sun, 17 Sep 2017 08:20:08 -0500
Subject: [PATCH 093/188] BUG: Respect color argument in bar plot (#17360)

This affect Series-like barplots with user-defined colors. We were always
treating them as dataframe-like, with one color per column.
---
 doc/source/whatsnew/v0.21.0.txt      |  2 ++
 pandas/plotting/_core.py             |  8 +++++++-
 pandas/tests/plotting/test_frame.py  | 14 ++++++++++++++
 pandas/tests/plotting/test_series.py | 10 ++++++++++
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 722e19d2703b51..7dcee381a068d1 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -524,6 +524,8 @@ Plotting
 ^^^^^^^^
 - Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`)
 - Bug when plotting ``timedelta`` and ``datetime`` dtypes on y-axis (:issue:`16953`)
+- Bug in ``Series.plot.bar`` or ``DataFramee.plot.bar`` with ``y`` not respecting user-passed ``color`` (:issue:`16822`)
+
 
 Groupby/Resample/Rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index a0b7e93efd05cb..029ea3c4167575 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -1150,6 +1150,9 @@ class BarPlot(MPLPlot):
     orientation = 'vertical'
 
     def __init__(self, data, **kwargs):
+        # we have to treat a series differently than a
+        # 1-column DataFrame w.r.t. color handling
+        self._is_series = isinstance(data, ABCSeries)
         self.bar_width = kwargs.pop('width', 0.5)
         pos = kwargs.pop('position', 0.5)
         kwargs.setdefault('align', 'center')
@@ -1204,7 +1207,10 @@ def _make_plot(self):
         for i, (label, y) in enumerate(self._iter_data(fillna=0)):
             ax = self._get_ax(i)
             kwds = self.kwds.copy()
-            kwds['color'] = colors[i % ncolors]
+            if self._is_series:
+                kwds['color'] = colors
+            else:
+                kwds['color'] = colors[i % ncolors]
 
             errors = self._get_errorbars(label=label, index=i)
             kwds = dict(kwds, **errors)
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index f3b287a8889c37..545680ed70797b 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -807,6 +807,20 @@ def test_bar_colors(self):
         self._check_colors(ax.patches[::5], facecolors=['green'] * 5)
         tm.close()
 
+    def test_bar_user_colors(self):
+        df = pd.DataFrame({"A": range(4),
+                           "B": range(1, 5),
+                           "color": ['red', 'blue', 'blue', 'red']})
+        # This should *only* work when `y` is specified, else
+        # we use one color per column
+        ax = df.plot.bar(y='A', color=df['color'])
+        result = [p.get_facecolor() for p in ax.patches]
+        expected = [(1., 0., 0., 1.),
+                    (0., 0., 1., 1.),
+                    (0., 0., 1., 1.),
+                    (1., 0., 0., 1.)]
+        assert result == expected
+
     @pytest.mark.slow
     def test_bar_linewidth(self):
         df = DataFrame(randn(5, 5))
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
index 8164ad74a190a7..2c708ecd39073d 100644
--- a/pandas/tests/plotting/test_series.py
+++ b/pandas/tests/plotting/test_series.py
@@ -258,6 +258,16 @@ def test_bar_ignore_index(self):
         ax = df.plot.bar(use_index=False, ax=ax)
         self._check_text_labels(ax.get_xticklabels(), ['0', '1', '2', '3'])
 
+    def test_bar_user_colors(self):
+        s = Series([1, 2, 3, 4])
+        ax = s.plot.bar(color=['red', 'blue', 'blue', 'red'])
+        result = [p.get_facecolor() for p in ax.patches]
+        expected = [(1., 0., 0., 1.),
+                    (0., 0., 1., 1.),
+                    (0., 0., 1., 1.),
+                    (1., 0., 0., 1.)]
+        assert result == expected
+
     def test_rotation(self):
         df = DataFrame(randn(5, 5))
         # Default rot 0

From 98f05eb46e4b981bc628d0e1fc9c91e202a238db Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 17 Sep 2017 16:12:41 +0200
Subject: [PATCH 094/188] DOC: fixes after #17503 and #17491 (#17541)

---
 doc/source/whatsnew/v0.21.0.txt | 2 +-
 pandas/core/base.py             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 7dcee381a068d1..06230f1be6463b 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -215,7 +215,7 @@ New Behaviour:
 
 Furthermore this will now correctly box the results of iteration for :func:`DataFrame.to_dict` as well.
 
-.. ipython:: ipython
+.. ipython:: python
 
    d = {'a':[1], 'b':['b']}
    df = pd.DataFrame(d)
diff --git a/pandas/core/base.py b/pandas/core/base.py
index f0e8d8a16661bb..be021f3621c735 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -900,7 +900,7 @@ def tolist(self):
 
         See Also
         --------
-        numpy.tolist
+        numpy.ndarray.tolist
         """
 
         if is_datetimelike(self):

From 371caaa711031c21658c2ba2e40a706d57e1e120 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 17 Sep 2017 16:13:20 +0200
Subject: [PATCH 095/188] TST: sql flaky test: check less decimals (#17510)
 (#17538)

---
 pandas/tests/io/test_sql.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
index 93eb0ff0ac1f26..2df43158b5370d 100644
--- a/pandas/tests/io/test_sql.py
+++ b/pandas/tests/io/test_sql.py
@@ -2131,7 +2131,7 @@ def test_write_row_by_row(self):
 
         result = sql.read_sql("select * from test", con=self.conn)
         result.index = frame.index
-        tm.assert_frame_equal(result, frame)
+        tm.assert_frame_equal(result, frame, check_less_precise=True)
 
     def test_execute(self):
         frame = tm.makeTimeDataFrame()
@@ -2416,7 +2416,7 @@ def test_write_row_by_row(self):
 
         result = sql.read_sql("select * from test", con=self.conn)
         result.index = frame.index
-        tm.assert_frame_equal(result, frame)
+        tm.assert_frame_equal(result, frame, check_less_precise=True)
 
     def test_chunksize_read_type(self):
         _skip_if_no_pymysql()

From 643fc1e0670eaa9e4a332a3a87805b03f68da74c Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Sun, 17 Sep 2017 09:04:55 -0600
Subject: [PATCH 096/188] DOC: Fixes after #17482 (#17554)

---
 doc/source/whatsnew/v0.21.0.txt  | 2 +-
 pandas/core/api.py               | 3 ++-
 pandas/core/indexes/datetimes.py | 9 ++-------
 pandas/core/indexes/interval.py  | 6 ++++--
 pandas/tests/api/test_api.py     | 2 +-
 5 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 06230f1be6463b..91b6f0628e2bca 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -363,7 +363,7 @@ Additionally, DataFrames with datetime columns that were parsed by :func:`read_s
 Consistency of Range Functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-In previous versions, there were some inconsistencies between the various range functions: func:`date_range`, func:`bdate_range`, func:`cdate_range`, func:`period_range`, func:`timedelta_range`, and func:`interval_range`. (:issue:`17471`).
+In previous versions, there were some inconsistencies between the various range functions: :func:`date_range`, :func:`bdate_range`, :func:`cdate_range`, :func:`period_range`, :func:`timedelta_range`, and :func:`interval_range`. (:issue:`17471`).
 
 One of the inconsistent behaviors occurred when the ``start``, ``end`` and ``period`` parameters were all specified, potentially leading to ambiguous ranges.  When all three parameters were passed, ``interval_range`` ignored the ``period`` parameter, ``period_range`` ignored the ``end`` parameter, and the other range functions raised.  To promote consistency among the range functions, and avoid potentially ambiguous ranges, ``interval_range`` and ``period_range`` will now raise when all three parameters are passed.
 
diff --git a/pandas/core/api.py b/pandas/core/api.py
index 086fedd7d7cf89..6a32d3763ffb19 100644
--- a/pandas/core/api.py
+++ b/pandas/core/api.py
@@ -16,7 +16,8 @@
                                PeriodIndex, NaT)
 from pandas.core.indexes.period import Period, period_range, pnow
 from pandas.core.indexes.timedeltas import Timedelta, timedelta_range
-from pandas.core.indexes.datetimes import Timestamp, date_range, bdate_range
+from pandas.core.indexes.datetimes import (Timestamp, date_range, bdate_range,
+                                           cdate_range)
 from pandas.core.indexes.interval import Interval, interval_range
 
 from pandas.core.series import Series
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 1c8d0b334b91c5..6b1b61c2798f4f 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -2094,13 +2094,8 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
 def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
                 normalize=True, name=None, closed=None, **kwargs):
     """
-    **EXPERIMENTAL** Return a fixed frequency DatetimeIndex, with
-    CustomBusinessDay as the default frequency
-
-    .. warning:: EXPERIMENTAL
-
-        The CustomBusinessDay class is not officially supported and the API is
-        likely to change in future versions. Use this at your own risk.
+    Return a fixed frequency DatetimeIndex, with CustomBusinessDay as the
+    default frequency
 
     Parameters
     ----------
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index d4df53d76398c7..c0a9c139722f57 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -1094,7 +1094,8 @@ def interval_range(start=None, end=None, periods=None, freq=None,
 
     Additionally, datetime-like input is also supported.
 
-    >>> pd.interval_range(start='2017-01-01', end='2017-01-04')
+    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
+                          end=pd.Timestamp('2017-01-04'))
     IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                    (2017-01-03, 2017-01-04]]
                   closed='right', dtype='interval[datetime64[ns]]')
@@ -1110,7 +1111,8 @@ def interval_range(start=None, end=None, periods=None, freq=None,
     Similarly, for datetime-like ``start`` and ``end``, the frequency must be
     convertible to a DateOffset.
 
-    >>> pd.interval_range(start='2017-01-01', periods=3, freq='MS')
+    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
+                          periods=3, freq='MS')
     IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                    (2017-03-01, 2017-04-01]]
                   closed='right', dtype='interval[datetime64[ns]]')
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index 09cccd54b74f85..cbc73615811a2c 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -63,7 +63,7 @@ class TestPDApi(Base):
     # top-level functions
     funcs = ['bdate_range', 'concat', 'crosstab', 'cut',
              'date_range', 'interval_range', 'eval',
-             'factorize', 'get_dummies',
+             'factorize', 'get_dummies', 'cdate_range',
              'infer_freq', 'isna', 'isnull', 'lreshape',
              'melt', 'notna', 'notnull', 'offsets',
              'merge', 'merge_ordered', 'merge_asof',

From 138be889e74ae64132249232ad90f9e4239fd1c7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 17 Sep 2017 08:05:43 -0700
Subject: [PATCH 097/188] Remove from numpy cimport * (#17521)

---
 pandas/_libs/algos.pyx           | 34 +++++++++++---------------------
 pandas/_libs/groupby.pyx         |  9 +++++----
 pandas/_libs/hashtable.pyx       |  4 +---
 pandas/_libs/interval.pyx        |  3 +--
 pandas/_libs/intervaltree.pxi.in |  8 ++++++--
 pandas/_libs/join.pyx            | 23 ++++-----------------
 pandas/_libs/reshape.pyx         | 23 ++++-----------------
 pandas/_libs/src/reduce.pyx      |  1 -
 8 files changed, 32 insertions(+), 73 deletions(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 897a60e0c2f212..8cbc65633c786d 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -1,12 +1,12 @@
 # cython: profile=False
 
-from numpy cimport *
 cimport numpy as np
 import numpy as np
 
 cimport cython
+from cython cimport Py_ssize_t
 
-import_array()
+np.import_array()
 
 cdef float64_t FP_ERR = 1e-13
 
@@ -15,31 +15,19 @@ cimport util
 from libc.stdlib cimport malloc, free
 from libc.string cimport memmove
 
-from numpy cimport NPY_INT8 as NPY_int8
-from numpy cimport NPY_INT16 as NPY_int16
-from numpy cimport NPY_INT32 as NPY_int32
-from numpy cimport NPY_INT64 as NPY_int64
-from numpy cimport NPY_FLOAT16 as NPY_float16
-from numpy cimport NPY_FLOAT32 as NPY_float32
-from numpy cimport NPY_FLOAT64 as NPY_float64
-
-from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
-                    uint32_t, uint64_t, float16_t, float32_t, float64_t)
-
-int8 = np.dtype(np.int8)
-int16 = np.dtype(np.int16)
-int32 = np.dtype(np.int32)
-int64 = np.dtype(np.int64)
-float16 = np.dtype(np.float16)
-float32 = np.dtype(np.float32)
-float64 = np.dtype(np.float64)
+from numpy cimport (ndarray,
+                    NPY_INT64, NPY_UINT64, NPY_INT32, NPY_INT16, NPY_INT8,
+                    NPY_FLOAT32, NPY_FLOAT64,
+                    NPY_OBJECT,
+                    int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
+                    uint32_t, uint64_t, float16_t, float32_t, float64_t,
+                    double_t)
+
 
 cdef double NaN = <double> np.NaN
 cdef double nan = NaN
 
-cdef extern from "../src/headers/math.h":
-    double sqrt(double x) nogil
-    double fabs(double) nogil
+from libc.math cimport sqrt, fabs
 
 # this is our util.pxd
 from util cimport numeric, get_nat
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index c6ff602cfef1ca..9500e685367c86 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -1,16 +1,17 @@
 # cython: profile=False
 
-from numpy cimport *
-cimport numpy as np
+cimport numpy as cnp
 import numpy as np
 
 cimport cython
 
-import_array()
+cnp.import_array()
 
 cimport util
 
-from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
+from numpy cimport (ndarray,
+                    double_t,
+                    int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
                     uint32_t, uint64_t, float16_t, float32_t, float64_t)
 
 from libc.stdlib cimport malloc, free
diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index 2462b7af7b0fee..9aeb700dd5923a 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -23,7 +23,7 @@ from khash cimport (
     kh_put_pymap, kh_resize_pymap)
 
 
-from numpy cimport *
+from numpy cimport ndarray, uint8_t, uint32_t
 
 from libc.stdlib cimport malloc, free
 from cpython cimport (PyMem_Malloc, PyMem_Realloc, PyMem_Free,
@@ -56,8 +56,6 @@ cdef extern from "datetime.h":
 
 PyDateTime_IMPORT
 
-cdef extern from "Python.h":
-    int PySlice_Check(object)
 
 cdef size_t _INIT_VEC_CAP = 128
 
diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
index e287e1fc8bdafa..bfbda9696ff2bb 100644
--- a/pandas/_libs/interval.pyx
+++ b/pandas/_libs/interval.pyx
@@ -1,11 +1,10 @@
 cimport numpy as np
 import numpy as np
-import pandas as pd
 
 cimport util
 cimport cython
 import cython
-from numpy cimport *
+from numpy cimport ndarray
 from tslib import Timestamp
 
 from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
diff --git a/pandas/_libs/intervaltree.pxi.in b/pandas/_libs/intervaltree.pxi.in
index 4fa0d6d156fa23..b22e694c9fcca5 100644
--- a/pandas/_libs/intervaltree.pxi.in
+++ b/pandas/_libs/intervaltree.pxi.in
@@ -4,11 +4,15 @@ Template for intervaltree
 WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
 """
 
-from numpy cimport int64_t, float64_t
-from numpy cimport ndarray, PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take
+from numpy cimport (
+    int64_t, int32_t, float64_t, float32_t,
+    ndarray,
+    PyArray_ArgSort, NPY_QUICKSORT, PyArray_Take)
 import numpy as np
 
 cimport cython
+from cython cimport Py_ssize_t
+
 cimport numpy as cnp
 cnp.import_array()
 
diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
index 385a9762ed90d7..503bdda75875f7 100644
--- a/pandas/_libs/join.pyx
+++ b/pandas/_libs/join.pyx
@@ -1,34 +1,19 @@
 # cython: profile=False
 
-from numpy cimport *
 cimport numpy as np
 import numpy as np
 
 cimport cython
+from cython cimport Py_ssize_t
 
-import_array()
+np.import_array()
 
 cimport util
 
-from numpy cimport NPY_INT8 as NPY_int8
-from numpy cimport NPY_INT16 as NPY_int16
-from numpy cimport NPY_INT32 as NPY_int32
-from numpy cimport NPY_INT64 as NPY_int64
-from numpy cimport NPY_FLOAT16 as NPY_float16
-from numpy cimport NPY_FLOAT32 as NPY_float32
-from numpy cimport NPY_FLOAT64 as NPY_float64
-
-from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
+from numpy cimport (ndarray,
+                    int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
                     uint32_t, uint64_t, float16_t, float32_t, float64_t)
 
-int8 = np.dtype(np.int8)
-int16 = np.dtype(np.int16)
-int32 = np.dtype(np.int32)
-int64 = np.dtype(np.int64)
-float16 = np.dtype(np.float16)
-float32 = np.dtype(np.float32)
-float64 = np.dtype(np.float64)
-
 cdef double NaN = <double> np.NaN
 cdef double nan = NaN
 
diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx
index 82851b7e809940..d6996add374a95 100644
--- a/pandas/_libs/reshape.pyx
+++ b/pandas/_libs/reshape.pyx
@@ -1,34 +1,19 @@
 # cython: profile=False
 
-from numpy cimport *
 cimport numpy as np
 import numpy as np
 
 cimport cython
+from cython cimport Py_ssize_t
 
-import_array()
+np.import_array()
 
 cimport util
 
-from numpy cimport NPY_INT8 as NPY_int8
-from numpy cimport NPY_INT16 as NPY_int16
-from numpy cimport NPY_INT32 as NPY_int32
-from numpy cimport NPY_INT64 as NPY_int64
-from numpy cimport NPY_FLOAT16 as NPY_float16
-from numpy cimport NPY_FLOAT32 as NPY_float32
-from numpy cimport NPY_FLOAT64 as NPY_float64
-
-from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
+from numpy cimport (ndarray,
+                    int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
                     uint32_t, uint64_t, float16_t, float32_t, float64_t)
 
-int8 = np.dtype(np.int8)
-int16 = np.dtype(np.int16)
-int32 = np.dtype(np.int32)
-int64 = np.dtype(np.int64)
-float16 = np.dtype(np.float16)
-float32 = np.dtype(np.float32)
-float64 = np.dtype(np.float64)
-
 cdef double NaN = <double> np.NaN
 cdef double nan = NaN
 
diff --git a/pandas/_libs/src/reduce.pyx b/pandas/_libs/src/reduce.pyx
index 3ce94022e586bf..f578eb2f4a3462 100644
--- a/pandas/_libs/src/reduce.pyx
+++ b/pandas/_libs/src/reduce.pyx
@@ -1,5 +1,4 @@
 #cython=False
-from numpy cimport *
 import numpy as np
 
 from distutils.version import LooseVersion

From 26b461bb6706fe387caf191293bf511de70291d9 Mon Sep 17 00:00:00 2001
From: Alex B <a.b320012@gmail.com>
Date: Sun, 17 Sep 2017 17:20:57 -0400
Subject: [PATCH 098/188] added support for nearest upsample + comments on
 returns for upsampled functions (#17498)

---
 doc/source/api.rst              |  1 +
 doc/source/whatsnew/v0.21.0.txt |  1 +
 pandas/core/resample.py         | 30 ++++++++++++++++++++++++++++++
 pandas/tests/test_resample.py   | 28 +++++++++++++++++++++++++++-
 4 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 4e02f7b11f466c..6b3e6bedcb24b3 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -2025,6 +2025,7 @@ Upsampling
    Resampler.backfill
    Resampler.bfill
    Resampler.pad
+   Resampler.nearest
    Resampler.fillna
    Resampler.asfreq
    Resampler.interpolate
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 91b6f0628e2bca..23a98d59554e91 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -28,6 +28,7 @@ New features
   and :class:`~pandas.ExcelWriter` to work properly with the file system path protocol (:issue:`13823`)
 - Added ``skipna`` parameter to :func:`~pandas.api.types.infer_dtype` to
   support type inference in the presence of missing values (:issue:`17059`).
+- :class:`~pandas.Resampler.nearest` is added to support nearest-neighbor upsampling (:issue:`17496`).
 
 .. _whatsnew_0210.enhancements.infer_objects:
 
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 96e7a6a3b3904a..01c7e875b8eccc 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -455,6 +455,10 @@ def pad(self, limit=None):
         limit : integer, optional
             limit of how many values to fill
 
+        Returns
+        -------
+        an upsampled Series
+
         See Also
         --------
         Series.fillna
@@ -463,6 +467,28 @@ def pad(self, limit=None):
         return self._upsample('pad', limit=limit)
     ffill = pad
 
+    def nearest(self, limit=None):
+        """
+        Fill values with nearest neighbor starting from center
+
+        Parameters
+        ----------
+        limit : integer, optional
+            limit of how many values to fill
+
+            .. versionadded:: 0.21.0
+
+        Returns
+        -------
+        an upsampled Series
+
+        See Also
+        --------
+        Series.fillna
+        DataFrame.fillna
+        """
+        return self._upsample('nearest', limit=limit)
+
     def backfill(self, limit=None):
         """
         Backward fill the values
@@ -472,6 +498,10 @@ def backfill(self, limit=None):
         limit : integer, optional
             limit of how many values to fill
 
+        Returns
+        -------
+        an upsampled Series
+
         See Also
         --------
         Series.fillna
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
index d42e37048d87ff..28a68a0a6e36d3 100644
--- a/pandas/tests/test_resample.py
+++ b/pandas/tests/test_resample.py
@@ -1329,6 +1329,14 @@ def test_upsample_with_limit(self):
         expected = ts.reindex(result.index, method='ffill', limit=2)
         assert_series_equal(result, expected)
 
+    def test_nearest_upsample_with_limit(self):
+        rng = date_range('1/1/2000', periods=3, freq='5t')
+        ts = Series(np.random.randn(len(rng)), rng)
+
+        result = ts.resample('t').nearest(limit=2)
+        expected = ts.reindex(result.index, method='nearest', limit=2)
+        assert_series_equal(result, expected)
+
     def test_resample_ohlc(self):
         s = self.series
 
@@ -2934,6 +2942,24 @@ def test_getitem_multiple(self):
         result = r['buyer'].count()
         assert_series_equal(result, expected)
 
+    def test_nearest(self):
+
+        # GH 17496
+        # Resample nearest
+        index = pd.date_range('1/1/2000', periods=3, freq='T')
+        result = pd.Series(range(3), index=index).resample('20s').nearest()
+
+        expected = pd.Series(
+            np.array([0, 0, 1, 1, 1, 2, 2]),
+            index=pd.DatetimeIndex(
+                ['2000-01-01 00:00:00', '2000-01-01 00:00:20',
+                 '2000-01-01 00:00:40', '2000-01-01 00:01:00',
+                 '2000-01-01 00:01:20', '2000-01-01 00:01:40',
+                 '2000-01-01 00:02:00'],
+                dtype='datetime64[ns]',
+                freq='20S'))
+        assert_series_equal(result, expected)
+
     def test_methods(self):
         g = self.frame.groupby('A')
         r = g.resample('2s')
@@ -2960,7 +2986,7 @@ def test_methods(self):
             expected = g.B.apply(lambda x: getattr(x.resample('2s'), f)())
             assert_series_equal(result, expected)
 
-        for f in ['backfill', 'ffill', 'asfreq']:
+        for f in ['nearest', 'backfill', 'ffill', 'asfreq']:
             result = getattr(r, f)()
             expected = g.apply(lambda x: getattr(x.resample('2s'), f)())
             assert_frame_equal(result, expected)

From 553a829bf8b433ca7c555fad527c6e0d9f020e91 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 17 Sep 2017 14:26:28 -0700
Subject: [PATCH 099/188] Use cache_readonly attrs to minimize attrs set in
 __init__ (#17450)

---
 pandas/tseries/offsets.py | 47 ++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index d82a3a209af6bf..6a518937b11957 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -939,12 +939,14 @@ def __init__(self, n=1, normalize=False, **kwds):
         self.normalize = normalize
         super(BusinessHour, self).__init__(**kwds)
 
+    @cache_readonly
+    def next_bday(self):
         # used for moving to next businessday
         if self.n >= 0:
             nb_offset = 1
         else:
             nb_offset = -1
-        self.next_bday = BusinessDay(n=nb_offset)
+        return BusinessDay(n=nb_offset)
 
 
 class CustomBusinessDay(BusinessDay):
@@ -1562,6 +1564,7 @@ class Week(DateOffset):
         Always generate specific day of week. 0 for Monday
     """
     _adjust_dst = True
+    _inc = timedelta(weeks=1)
 
     def __init__(self, n=1, normalize=False, **kwds):
         self.n = n
@@ -1573,7 +1576,6 @@ def __init__(self, n=1, normalize=False, **kwds):
                 raise ValueError('Day must be 0<=day<=6, got {day}'
                                  .format(day=self.weekday))
 
-        self._inc = timedelta(weeks=1)
         self.kwds = kwds
 
     def isAnchored(self):
@@ -1977,13 +1979,6 @@ class QuarterEnd(QuarterOffset):
     _default_startingMonth = 3
     _prefix = 'Q'
 
-    def __init__(self, n=1, normalize=False, **kwds):
-        self.n = n
-        self.normalize = normalize
-        self.startingMonth = kwds.get('startingMonth', 3)
-
-        self.kwds = kwds
-
     def isAnchored(self):
         return (self.n == 1 and self.startingMonth is not None)
 
@@ -2316,12 +2311,28 @@ def __init__(self, n=1, normalize=False, **kwds):
             raise ValueError('{variation} is not a valid variation'
                              .format(variation=self.variation))
 
+    @cache_readonly
+    def _relativedelta_forward(self):
         if self.variation == "nearest":
             weekday_offset = weekday(self.weekday)
-            self._rd_forward = relativedelta(weekday=weekday_offset)
-            self._rd_backward = relativedelta(weekday=weekday_offset(-1))
+            return relativedelta(weekday=weekday_offset)
         else:
-            self._offset_lwom = LastWeekOfMonth(n=1, weekday=self.weekday)
+            return None
+
+    @cache_readonly
+    def _relativedelta_backward(self):
+        if self.variation == "nearest":
+            weekday_offset = weekday(self.weekday)
+            return relativedelta(weekday=weekday_offset(-1))
+        else:
+            return None
+
+    @cache_readonly
+    def _offset_lwom(self):
+        if self.variation == "nearest":
+            return None
+        else:
+            return LastWeekOfMonth(n=1, weekday=self.weekday)
 
     def isAnchored(self):
         return self.n == 1 \
@@ -2425,8 +2436,8 @@ def _get_year_end_nearest(self, dt):
         if target_date.weekday() == self.weekday:
             return target_date
         else:
-            forward = target_date + self._rd_forward
-            backward = target_date + self._rd_backward
+            forward = target_date + self._relativedelta_forward
+            backward = target_date + self._relativedelta_backward
 
             if forward - target_date < target_date - backward:
                 return forward
@@ -2542,7 +2553,10 @@ def __init__(self, n=1, normalize=False, **kwds):
         if self.n == 0:
             raise ValueError('N cannot be 0')
 
-        self._offset = FY5253(
+    @cache_readonly
+    def _offset(self):
+        kwds = self.kwds
+        return FY5253(
             startingMonth=kwds['startingMonth'],
             weekday=kwds["weekday"],
             variation=kwds["variation"])
@@ -2652,9 +2666,6 @@ class Easter(DateOffset):
     """
     _adjust_dst = True
 
-    def __init__(self, n=1, **kwds):
-        super(Easter, self).__init__(n, **kwds)
-
     @apply_wraps
     def apply(self, other):
         currentEaster = easter(other.year)

From cbb090fac6a39d0e687a01b2ad1b7c136f5d92fc Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 17 Sep 2017 18:14:15 -0400
Subject: [PATCH 100/188] Revert "Remove pyx dependencies from setup (#17478)"
 (#17565)

This reverts commit e6aed2ebb7374ed2a6a7c284750d47728aec285e.
---
 setup.py | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/setup.py b/setup.py
index 664478cc35845f..0e4e22b875e1db 100755
--- a/setup.py
+++ b/setup.py
@@ -348,6 +348,14 @@ class CheckSDist(sdist_class):
     def initialize_options(self):
         sdist_class.initialize_options(self)
 
+        '''
+        self._pyxfiles = []
+        for root, dirs, files in os.walk('pandas'):
+            for f in files:
+                if f.endswith('.pyx'):
+                    self._pyxfiles.append(pjoin(root, f))
+        '''
+
     def run(self):
         if 'cython' in cmdclass:
             self.run_command('cython')
@@ -471,10 +479,11 @@ def pxd(name):
     '_libs.lib': {'pyxfile': '_libs/lib',
                   'depends': lib_depends + tseries_depends},
     '_libs.hashtable': {'pyxfile': '_libs/hashtable',
+                        'pxdfiles': ['_libs/hashtable'],
                         'depends': (['pandas/_libs/src/klib/khash_python.h']
                                     + _pxi_dep['hashtable'])},
     '_libs.tslib': {'pyxfile': '_libs/tslib',
-                    'pxdfiles': ['_libs/src/util'],
+                    'pxdfiles': ['_libs/src/util', '_libs/lib'],
                     'depends': tseries_depends,
                     'sources': ['pandas/_libs/src/datetime/np_datetime.c',
                                 'pandas/_libs/src/datetime/np_datetime_strings.c']},
@@ -490,20 +499,21 @@ def pxd(name):
     '_libs.index': {'pyxfile': '_libs/index',
                     'sources': ['pandas/_libs/src/datetime/np_datetime.c',
                                 'pandas/_libs/src/datetime/np_datetime_strings.c'],
-                    'pxdfiles': ['_libs/src/util'],
+                    'pxdfiles': ['_libs/src/util', '_libs/hashtable'],
                     'depends': _pxi_dep['index']},
     '_libs.algos': {'pyxfile': '_libs/algos',
-                    'pxdfiles': ['_libs/src/util'],
+                    'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'],
                     'depends': _pxi_dep['algos']},
     '_libs.groupby': {'pyxfile': '_libs/groupby',
-                      'pxdfiles': ['_libs/src/util'],
-                      'depends': _pxi_dep['groupby']},
+                    'pxdfiles': ['_libs/src/util', '_libs/algos'],
+                    'depends': _pxi_dep['groupby']},
     '_libs.join': {'pyxfile': '_libs/join',
-                   'pxdfiles': ['_libs/src/util'],
+                   'pxdfiles': ['_libs/src/util', '_libs/hashtable'],
                    'depends': _pxi_dep['join']},
     '_libs.reshape': {'pyxfile': '_libs/reshape',
                       'depends': _pxi_dep['reshape']},
     '_libs.interval': {'pyxfile': '_libs/interval',
+                       'pxdfiles': ['_libs/hashtable'],
                        'depends': _pxi_dep['interval']},
     '_libs.window': {'pyxfile': '_libs/window',
                      'pxdfiles': ['_libs/src/skiplist', '_libs/src/util'],
@@ -516,9 +526,12 @@ def pxd(name):
                       'sources': ['pandas/_libs/src/parser/tokenizer.c',
                                   'pandas/_libs/src/parser/io.c']},
     '_libs.sparse': {'pyxfile': '_libs/sparse',
-                     'depends': _pxi_dep['sparse']},
-    '_libs.testing': {'pyxfile': '_libs/testing'},
-    '_libs.hashing': {'pyxfile': '_libs/hashing'},
+                     'depends': (['pandas/_libs/sparse.pyx'] +
+                                 _pxi_dep['sparse'])},
+    '_libs.testing': {'pyxfile': '_libs/testing',
+                      'depends': ['pandas/_libs/testing.pyx']},
+    '_libs.hashing': {'pyxfile': '_libs/hashing',
+                      'depends': ['pandas/_libs/hashing.pyx']},
     'io.sas._sas': {'pyxfile': 'io/sas/sas'},
     }
 

From 9cc33333fa035e842f35fceccde6afa71f3f1d1b Mon Sep 17 00:00:00 2001
From: Giftlin Rajaiah <giftlin.rgn@gmail.com>
Date: Mon, 18 Sep 2017 18:00:06 +0530
Subject: [PATCH 101/188] BUG: preserve name in set_categories (#17509)
 (#17517)

---
 doc/source/categorical.rst       |  2 ++
 doc/source/whatsnew/v0.21.0.txt  |  1 +
 pandas/core/categorical.py       |  8 +++++---
 pandas/tests/test_categorical.py | 19 +++++++++++++++++++
 4 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index 8835c4a1533d0c..65361886436d65 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -146,6 +146,8 @@ Using ``.describe()`` on categorical data will produce similar output to a `Seri
     df.describe()
     df["cat"].describe()
 
+.. _categorical.cat:
+
 Working with categories
 -----------------------
 
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 23a98d59554e91..c808babeee5d99 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -571,6 +571,7 @@ Categorical
 - Bug in the categorical constructor with empty values and categories causing
   the ``.categories`` to be an empty ``Float64Index`` rather than an empty
   ``Index`` with object dtype (:issue:`17248`)
+- Bug in categorical operations with :ref:`Series.cat <categorical.cat>' not preserving the original Series' name (:issue:`17509`)
 
 PyPy
 ^^^^
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index e67ce2936819f5..ddca93f07ad5ea 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -2054,9 +2054,10 @@ class CategoricalAccessor(PandasDelegate, NoNewAttributesMixin):
 
     """
 
-    def __init__(self, values, index):
+    def __init__(self, values, index, name):
         self.categorical = values
         self.index = index
+        self.name = name
         self._freeze()
 
     def _delegate_property_get(self, name):
@@ -2075,14 +2076,15 @@ def _delegate_method(self, name, *args, **kwargs):
         method = getattr(self.categorical, name)
         res = method(*args, **kwargs)
         if res is not None:
-            return Series(res, index=self.index)
+            return Series(res, index=self.index, name=self.name)
 
     @classmethod
     def _make_accessor(cls, data):
         if not is_categorical_dtype(data.dtype):
             raise AttributeError("Can only use .cat accessor with a "
                                  "'category' dtype")
-        return CategoricalAccessor(data.values, data.index)
+        return CategoricalAccessor(data.values, data.index,
+                                   getattr(data, 'name', None),)
 
 
 CategoricalAccessor._add_delegate_accessors(delegate=Categorical,
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index 1fa3c84dc0260a..c361b430cfd8a1 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -57,6 +57,25 @@ def test_getitem_listlike(self):
         expected = c[np.array([100000]).astype(np.int64)].codes
         tm.assert_numpy_array_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "method",
+        [
+            lambda x: x.cat.set_categories([1, 2, 3]),
+            lambda x: x.cat.reorder_categories([2, 3, 1], ordered=True),
+            lambda x: x.cat.rename_categories([1, 2, 3]),
+            lambda x: x.cat.remove_unused_categories(),
+            lambda x: x.cat.remove_categories([2]),
+            lambda x: x.cat.add_categories([4]),
+            lambda x: x.cat.as_ordered(),
+            lambda x: x.cat.as_unordered(),
+        ])
+    def test_getname_categorical_accessor(self, method):
+        # GH 17509
+        s = pd.Series([1, 2, 3], name='A').astype('category')
+        expected = 'A'
+        result = method(s).name
+        assert result == expected
+
     def test_getitem_category_type(self):
         # GH 14580
         # test iloc() on Series with Categorical data

From 37e23d03e1ba2298d9df05ded69028dfac0e823e Mon Sep 17 00:00:00 2001
From: philipphanemann <philipp.hanemann@gmail.com>
Date: Mon, 18 Sep 2017 15:45:08 +0200
Subject: [PATCH 102/188] DOC: to_datetime format argument examples (#17412)

---
 doc/source/timeseries.rst | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index 3b8f105bb1b47c..7399deb1319d88 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -175,12 +175,8 @@ you can pass the ``dayfirst`` flag:
    can't be parsed with the day being first it will be parsed as if
    ``dayfirst`` were False.
 
-.. note::
-   Specifying a ``format`` argument will potentially speed up the conversion
-   considerably and explicitly specifying
-   a format string of '%Y%m%d' takes a faster path still.
-
 If you pass a single string to ``to_datetime``, it returns single ``Timestamp``.
+
 Also, ``Timestamp`` can accept the string input.
 Note that ``Timestamp`` doesn't accept string parsing option like ``dayfirst``
 or ``format``, use ``to_datetime`` if these are required.
@@ -191,6 +187,25 @@ or ``format``, use ``to_datetime`` if these are required.
 
     pd.Timestamp('2010/11/12')
 
+Providing a Format Argument
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In addition to the required datetime string, a ``format`` argument can be passed to ensure specific parsing.
+It will potentially speed up the conversion considerably.
+
+For example:
+
+.. ipython:: python
+
+    pd.to_datetime('2010/11/12', format='%Y/%m/%d')
+
+    pd.to_datetime('12-11-2010 00:00', format='%d-%m-%Y %H:%M')
+
+For more information on how to specify the ``format`` options, see https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior.
+
+Assembling datetime from multiple DataFrame columns
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 .. versionadded:: 0.18.1
 
 You can also pass a ``DataFrame`` of integer or string columns to assemble into a ``Series`` of ``Timestamps``.

From 0e85ca7f5afb26880e81ba6ff3965c8c0f27bbaf Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Mon, 18 Sep 2017 15:50:13 -0700
Subject: [PATCH 103/188] MAINT: Remove dupe pyarrow line in
 rquirements_3.6_WIN.run

---
 ci/requirements-3.6_WIN.run | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ci/requirements-3.6_WIN.run b/ci/requirements-3.6_WIN.run
index 226caa458f6eef..af7a90b126f22b 100644
--- a/ci/requirements-3.6_WIN.run
+++ b/ci/requirements-3.6_WIN.run
@@ -8,7 +8,6 @@ xlrd
 xlwt
 scipy
 feather-format
-pyarrow
 numexpr
 pytables
 matplotlib

From 6630c4eddf2762d519507304ad73de189a7e0c6c Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 19 Sep 2017 07:47:36 -0400
Subject: [PATCH 104/188] COMPAT: pyarrow >= 0.7.0 compat (#17588)

closes #17581
---
 doc/source/io.rst               |  2 +-
 pandas/tests/io/test_parquet.py | 44 +++++++++++++++++++++++++++------
 2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index fcf7f6029197bd..ab1ad74ee8516b 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -4492,7 +4492,7 @@ Several caveats.
 - The format will NOT write an ``Index``, or ``MultiIndex`` for the ``DataFrame`` and will raise an
   error if a non-default one is provided. You can simply ``.reset_index(drop=True)`` in order to store the index.
 - Duplicate column names and non-string columns names are not supported
-- Categorical dtypes are currently not-supported (for ``pyarrow``).
+- Categorical dtypes can be serialized to parquet, but will de-serialize as ``object`` dtype.
 - Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
   on an attempt at serialization.
 
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 78c72e2a055669..2d69f6d38475e1 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -2,11 +2,12 @@
 
 import pytest
 import datetime
+from distutils.version import LooseVersion
 from warnings import catch_warnings
 
 import numpy as np
 import pandas as pd
-from pandas.compat import PY3, is_platform_windows
+from pandas.compat import PY3
 from pandas.io.parquet import (to_parquet, read_parquet, get_engine,
                                PyArrowImpl, FastParquetImpl)
 from pandas.util import testing as tm
@@ -42,8 +43,24 @@ def engine(request):
 def pa():
     if not _HAVE_PYARROW:
         pytest.skip("pyarrow is not installed")
-    if is_platform_windows():
-        pytest.skip("pyarrow-parquet not building on windows")
+    return 'pyarrow'
+
+
+@pytest.fixture
+def pa_lt_070():
+    if not _HAVE_PYARROW:
+        pytest.skip("pyarrow is not installed")
+    if LooseVersion(pyarrow.__version__) >= '0.7.0':
+        pytest.skip("pyarrow is >= 0.7.0")
+    return 'pyarrow'
+
+
+@pytest.fixture
+def pa_ge_070():
+    if not _HAVE_PYARROW:
+        pytest.skip("pyarrow is not installed")
+    if LooseVersion(pyarrow.__version__) < '0.7.0':
+        pytest.skip("pyarrow is < 0.7.0")
     return 'pyarrow'
 
 
@@ -302,10 +319,6 @@ def test_unsupported(self, pa):
         df = pd.DataFrame({'a': pd.period_range('2013', freq='M', periods=3)})
         self.check_error_on_write(df, pa, ValueError)
 
-        # categorical
-        df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
-        self.check_error_on_write(df, pa, NotImplementedError)
-
         # timedelta
         df = pd.DataFrame({'a': pd.timedelta_range('1 day',
                                                    periods=3)})
@@ -315,6 +328,23 @@ def test_unsupported(self, pa):
         df = pd.DataFrame({'a': ['a', 1, 2.0]})
         self.check_error_on_write(df, pa, ValueError)
 
+    def test_categorical(self, pa_ge_070):
+        pa = pa_ge_070
+
+        # supported in >= 0.7.0
+        df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
+
+        # de-serialized as object
+        expected = df.assign(a=df.a.astype(object))
+        self.check_round_trip(df, pa, expected)
+
+    def test_categorical_unsupported(self, pa_lt_070):
+        pa = pa_lt_070
+
+        # supported in >= 0.7.0
+        df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
+        self.check_error_on_write(df, pa, NotImplementedError)
+
 
 class TestParquetFastParquet(Base):
 

From 3795272ad8987f5af22c5887fc95bb35c1b6bb69 Mon Sep 17 00:00:00 2001
From: nmartensen <nis.martensen@web.de>
Date: Tue, 19 Sep 2017 20:52:03 +0200
Subject: [PATCH 105/188] Fix automatic xlims in line plots (#16600)

* BUG: set correct xlims for lines (#11471, #11310)

 * Do not assume that xdata is sorted.
 * Use numpy.nanmin() and numpy.nanmax() instead.

* BUG: Let new MPL automatically determine xlims (#15495)

 * Avoid setting xlims since recent matplotlib already does it correctly
 * and we should let it apply its default styles where possible

* TST: plotting: update expected results for matplotlib 2

Matplotlib 2.0 uses new defaults that cause some of our tests to fail.
This adds appropriate new sets of expected results to the following
tests in tests/plotting/test_datetimelike.py:

test_finder_daily
test_finder_quarterly
test_finder_annual
test_finder_hourly
test_finder_minutely
test_finder_monthly
test_format_timedelta_ticks_narrow
test_format_timedelta_ticks_wide

* TST: plotting: Relax some tests to work with matplotlib 2.0

Matplotlib 2.0 by default now adds some padding between the boundaries
of the data and the boundaries of the plot. This causes some of our
tests to fail if we don't relax them slightly.

  modified:   pandas/tests/plotting/test_datetimelike.py
test_irregular_ts_shared_ax_xlim
test_mixed_freq_regular_first
test_mixed_freq_regular_first_df
test_secondary_y_irregular_ts_xlim
test_secondary_y_non_ts_xlim
test_secondary_y_regular_ts_xlim

  modified:   pandas/tests/plotting/test_frame.py
test_area_lim
test_line_lim

  modified:   pandas/tests/plotting/test_series.py
test_ts_area_lim
test_ts_line_lim

* TST: Add lineplot tests with unsorted x data

Two new tests check interaction of non-monotonic x data and xlims:
test_frame / test_unsorted_index_lims
test_series / test_unsorted_index_xlim

* DOC: lineplot/xlims whatsnew entry for v0.21.0
---
 doc/source/whatsnew/v0.21.0.txt            |   2 +
 pandas/plotting/_core.py                   |  10 +-
 pandas/plotting/_tools.py                  |   4 +-
 pandas/tests/plotting/test_datetimelike.py | 120 +++++++++++++++------
 pandas/tests/plotting/test_frame.py        |  39 +++++--
 pandas/tests/plotting/test_series.py       |  34 +++---
 6 files changed, 150 insertions(+), 59 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index c808babeee5d99..ecea931960b6ab 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -525,6 +525,8 @@ Plotting
 ^^^^^^^^
 - Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`)
 - Bug when plotting ``timedelta`` and ``datetime`` dtypes on y-axis (:issue:`16953`)
+- Line plots no longer assume monotonic x data when calculating xlims, they show the entire lines now even for unsorted x data. (:issue:`11310`)(:issue:`11471`)
+- With matplotlib 2.0.0 and above, calculation of x limits for line plots is left to matplotlib, so that its new default settings are applied. (:issue:`15495`)
 - Bug in ``Series.plot.bar`` or ``DataFramee.plot.bar`` with ``y`` not respecting user-passed ``color`` (:issue:`16822`)
 
 
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 029ea3c4167575..7a40018494fc4f 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -31,7 +31,8 @@
 from pandas.util._decorators import Appender
 
 from pandas.plotting._compat import (_mpl_ge_1_3_1,
-                                     _mpl_ge_1_5_0)
+                                     _mpl_ge_1_5_0,
+                                     _mpl_ge_2_0_0)
 from pandas.plotting._style import (mpl_stylesheet, plot_params,
                                     _get_standard_colors)
 from pandas.plotting._tools import (_subplots, _flatten, table,
@@ -969,9 +970,10 @@ def _make_plot(self):
                              **kwds)
             self._add_legend_handle(newlines[0], label, index=i)
 
-            lines = _get_all_lines(ax)
-            left, right = _get_xlim(lines)
-            ax.set_xlim(left, right)
+            if not _mpl_ge_2_0_0():
+                lines = _get_all_lines(ax)
+                left, right = _get_xlim(lines)
+                ax.set_xlim(left, right)
 
     @classmethod
     def _plot(cls, ax, x, y, style=None, column_num=None,
diff --git a/pandas/plotting/_tools.py b/pandas/plotting/_tools.py
index c734855bdc09a7..047a57ead72f87 100644
--- a/pandas/plotting/_tools.py
+++ b/pandas/plotting/_tools.py
@@ -361,8 +361,8 @@ def _get_xlim(lines):
     left, right = np.inf, -np.inf
     for l in lines:
         x = l.get_xdata(orig=False)
-        left = min(x[0], left)
-        right = max(x[-1], right)
+        left = min(np.nanmin(x), left)
+        right = max(np.nanmax(x), right)
     return left, right
 
 
diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py
index eb10e70f4189b1..d66012e2a56a0f 100644
--- a/pandas/tests/plotting/test_datetimelike.py
+++ b/pandas/tests/plotting/test_datetimelike.py
@@ -386,9 +386,16 @@ def test_get_finder(self):
 
     @pytest.mark.slow
     def test_finder_daily(self):
-        xp = Period('1999-1-1', freq='B').ordinal
         day_lst = [10, 40, 252, 400, 950, 2750, 10000]
-        for n in day_lst:
+
+        if self.mpl_ge_2_0_0:
+            xpl1 = [7565, 7564, 7553, 7546, 7518, 7428, 7066]
+            xpl2 = [7566, 7564, 7554, 7546, 7519, 7429, 7066]
+        else:
+            xpl1 = xpl2 = [Period('1999-1-1', freq='B').ordinal] * len(day_lst)
+
+        for i, n in enumerate(day_lst):
+            xp = xpl1[i]
             rng = bdate_range('1999-1-1', periods=n)
             ser = Series(np.random.randn(len(rng)), rng)
             _, ax = self.plt.subplots()
@@ -396,6 +403,7 @@ def test_finder_daily(self):
             xaxis = ax.get_xaxis()
             rs = xaxis.get_majorticklocs()[0]
             assert xp == rs
+            xp = xpl2[i]
             vmin, vmax = ax.get_xlim()
             ax.set_xlim(vmin + 0.9, vmax)
             rs = xaxis.get_majorticklocs()[0]
@@ -404,9 +412,16 @@ def test_finder_daily(self):
 
     @pytest.mark.slow
     def test_finder_quarterly(self):
-        xp = Period('1988Q1').ordinal
         yrs = [3.5, 11]
-        for n in yrs:
+
+        if self.mpl_ge_2_0_0:
+            xpl1 = [68, 68]
+            xpl2 = [72, 68]
+        else:
+            xpl1 = xpl2 = [Period('1988Q1').ordinal] * len(yrs)
+
+        for i, n in enumerate(yrs):
+            xp = xpl1[i]
             rng = period_range('1987Q2', periods=int(n * 4), freq='Q')
             ser = Series(np.random.randn(len(rng)), rng)
             _, ax = self.plt.subplots()
@@ -414,6 +429,7 @@ def test_finder_quarterly(self):
             xaxis = ax.get_xaxis()
             rs = xaxis.get_majorticklocs()[0]
             assert rs == xp
+            xp = xpl2[i]
             (vmin, vmax) = ax.get_xlim()
             ax.set_xlim(vmin + 0.9, vmax)
             rs = xaxis.get_majorticklocs()[0]
@@ -422,9 +438,16 @@ def test_finder_quarterly(self):
 
     @pytest.mark.slow
     def test_finder_monthly(self):
-        xp = Period('Jan 1988').ordinal
         yrs = [1.15, 2.5, 4, 11]
-        for n in yrs:
+
+        if self.mpl_ge_2_0_0:
+            xpl1 = [216, 216, 204, 204]
+            xpl2 = [216, 216, 216, 204]
+        else:
+            xpl1 = xpl2 = [Period('Jan 1988').ordinal] * len(yrs)
+
+        for i, n in enumerate(yrs):
+            xp = xpl1[i]
             rng = period_range('1987Q2', periods=int(n * 12), freq='M')
             ser = Series(np.random.randn(len(rng)), rng)
             _, ax = self.plt.subplots()
@@ -432,6 +455,7 @@ def test_finder_monthly(self):
             xaxis = ax.get_xaxis()
             rs = xaxis.get_majorticklocs()[0]
             assert rs == xp
+            xp = xpl2[i]
             vmin, vmax = ax.get_xlim()
             ax.set_xlim(vmin + 0.9, vmax)
             rs = xaxis.get_majorticklocs()[0]
@@ -450,7 +474,11 @@ def test_finder_monthly_long(self):
 
     @pytest.mark.slow
     def test_finder_annual(self):
-        xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170]
+        if self.mpl_ge_2_0_0:
+            xp = [1986, 1986, 1990, 1990, 1995, 2020, 1970, 1970]
+        else:
+            xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170]
+
         for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]):
             rng = period_range('1987', periods=nyears, freq='A')
             ser = Series(np.random.randn(len(rng)), rng)
@@ -470,7 +498,10 @@ def test_finder_minutely(self):
         ser.plot(ax=ax)
         xaxis = ax.get_xaxis()
         rs = xaxis.get_majorticklocs()[0]
-        xp = Period('1/1/1999', freq='Min').ordinal
+        if self.mpl_ge_2_0_0:
+            xp = Period('1998-12-29 12:00', freq='Min').ordinal
+        else:
+            xp = Period('1/1/1999', freq='Min').ordinal
         assert rs == xp
 
     def test_finder_hourly(self):
@@ -481,7 +512,10 @@ def test_finder_hourly(self):
         ser.plot(ax=ax)
         xaxis = ax.get_xaxis()
         rs = xaxis.get_majorticklocs()[0]
-        xp = Period('1/1/1999', freq='H').ordinal
+        if self.mpl_ge_2_0_0:
+            xp = Period('1998-12-31 22:00', freq='H').ordinal
+        else:
+            xp = Period('1/1/1999', freq='H').ordinal
         assert rs == xp
 
     @pytest.mark.slow
@@ -665,8 +699,8 @@ def test_mixed_freq_regular_first(self):
         assert idx2.equals(s2.index.to_period('B'))
         left, right = ax2.get_xlim()
         pidx = s1.index.to_period()
-        assert left == pidx[0].ordinal
-        assert right == pidx[-1].ordinal
+        assert left <= pidx[0].ordinal
+        assert right >= pidx[-1].ordinal
 
     @pytest.mark.slow
     def test_mixed_freq_irregular_first(self):
@@ -696,8 +730,8 @@ def test_mixed_freq_regular_first_df(self):
         assert idx2.equals(s2.index.to_period('B'))
         left, right = ax2.get_xlim()
         pidx = s1.index.to_period()
-        assert left == pidx[0].ordinal
-        assert right == pidx[-1].ordinal
+        assert left <= pidx[0].ordinal
+        assert right >= pidx[-1].ordinal
 
     @pytest.mark.slow
     def test_mixed_freq_irregular_first_df(self):
@@ -1211,8 +1245,8 @@ def test_irregular_ts_shared_ax_xlim(self):
 
         # check that axis limits are correct
         left, right = ax.get_xlim()
-        assert left == ts_irregular.index.min().toordinal()
-        assert right == ts_irregular.index.max().toordinal()
+        assert left <= ts_irregular.index.min().toordinal()
+        assert right >= ts_irregular.index.max().toordinal()
 
     @pytest.mark.slow
     def test_secondary_y_non_ts_xlim(self):
@@ -1228,7 +1262,7 @@ def test_secondary_y_non_ts_xlim(self):
         s2.plot(secondary_y=True, ax=ax)
         left_after, right_after = ax.get_xlim()
 
-        assert left_before == left_after
+        assert left_before >= left_after
         assert right_before < right_after
 
     @pytest.mark.slow
@@ -1245,7 +1279,7 @@ def test_secondary_y_regular_ts_xlim(self):
         s2.plot(secondary_y=True, ax=ax)
         left_after, right_after = ax.get_xlim()
 
-        assert left_before == left_after
+        assert left_before >= left_after
         assert right_before < right_after
 
     @pytest.mark.slow
@@ -1278,8 +1312,8 @@ def test_secondary_y_irregular_ts_xlim(self):
         ts_irregular[:5].plot(ax=ax)
 
         left, right = ax.get_xlim()
-        assert left == ts_irregular.index.min().toordinal()
-        assert right == ts_irregular.index.max().toordinal()
+        assert left <= ts_irregular.index.min().toordinal()
+        assert right >= ts_irregular.index.max().toordinal()
 
     def test_plot_outofbounds_datetime(self):
         # 2579 - checking this does not raise
@@ -1294,9 +1328,14 @@ def test_format_timedelta_ticks_narrow(self):
         if is_platform_mac():
             pytest.skip("skip on mac for precision display issue on older mpl")
 
-        expected_labels = [
-            '00:00:00.00000000{:d}'.format(i)
-            for i in range(10)]
+        if self.mpl_ge_2_0_0:
+            expected_labels = [''] + [
+                '00:00:00.00000000{:d}'.format(2 * i)
+                for i in range(5)] + ['']
+        else:
+            expected_labels = [
+                '00:00:00.00000000{:d}'.format(i)
+                for i in range(10)]
 
         rng = timedelta_range('0', periods=10, freq='ns')
         df = DataFrame(np.random.randn(len(rng), 3), rng)
@@ -1312,17 +1351,32 @@ def test_format_timedelta_ticks_wide(self):
         if is_platform_mac():
             pytest.skip("skip on mac for precision display issue on older mpl")
 
-        expected_labels = [
-            '00:00:00',
-            '1 days 03:46:40',
-            '2 days 07:33:20',
-            '3 days 11:20:00',
-            '4 days 15:06:40',
-            '5 days 18:53:20',
-            '6 days 22:40:00',
-            '8 days 02:26:40',
-            ''
-        ]
+        if self.mpl_ge_2_0_0:
+            expected_labels = [
+                '',
+                '00:00:00',
+                '1 days 03:46:40',
+                '2 days 07:33:20',
+                '3 days 11:20:00',
+                '4 days 15:06:40',
+                '5 days 18:53:20',
+                '6 days 22:40:00',
+                '8 days 02:26:40',
+                '9 days 06:13:20',
+                ''
+            ]
+        else:
+            expected_labels = [
+                '00:00:00',
+                '1 days 03:46:40',
+                '2 days 07:33:20',
+                '3 days 11:20:00',
+                '4 days 15:06:40',
+                '5 days 18:53:20',
+                '6 days 22:40:00',
+                '8 days 02:26:40',
+                ''
+            ]
 
         rng = timedelta_range('0', periods=10, freq='1 d')
         df = DataFrame(np.random.randn(len(rng), 3), rng)
diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py
index 545680ed70797b..a428d73fce1e3e 100644
--- a/pandas/tests/plotting/test_frame.py
+++ b/pandas/tests/plotting/test_frame.py
@@ -304,6 +304,29 @@ def test_unsorted_index(self):
         rs = Series(rs[:, 1], rs[:, 0], dtype=np.int64, name='y')
         tm.assert_series_equal(rs, df.y)
 
+    def test_unsorted_index_lims(self):
+        df = DataFrame({'y': [0., 1., 2., 3.]}, index=[1., 0., 3., 2.])
+        ax = df.plot()
+        xmin, xmax = ax.get_xlim()
+        lines = ax.get_lines()
+        assert xmin <= np.nanmin(lines[0].get_data()[0])
+        assert xmax >= np.nanmax(lines[0].get_data()[0])
+
+        df = DataFrame({'y': [0., 1., np.nan, 3., 4., 5., 6.]},
+                       index=[1., 0., 3., 2., np.nan, 3., 2.])
+        ax = df.plot()
+        xmin, xmax = ax.get_xlim()
+        lines = ax.get_lines()
+        assert xmin <= np.nanmin(lines[0].get_data()[0])
+        assert xmax >= np.nanmax(lines[0].get_data()[0])
+
+        df = DataFrame({'y': [0., 1., 2., 3.], 'z': [91., 90., 93., 92.]})
+        ax = df.plot(x='z', y='y')
+        xmin, xmax = ax.get_xlim()
+        lines = ax.get_lines()
+        assert xmin <= np.nanmin(lines[0].get_data()[0])
+        assert xmax >= np.nanmax(lines[0].get_data()[0])
+
     @pytest.mark.slow
     def test_subplots(self):
         df = DataFrame(np.random.rand(10, 3),
@@ -735,14 +758,14 @@ def test_line_lim(self):
         ax = df.plot()
         xmin, xmax = ax.get_xlim()
         lines = ax.get_lines()
-        assert xmin == lines[0].get_data()[0][0]
-        assert xmax == lines[0].get_data()[0][-1]
+        assert xmin <= lines[0].get_data()[0][0]
+        assert xmax >= lines[0].get_data()[0][-1]
 
         ax = df.plot(secondary_y=True)
         xmin, xmax = ax.get_xlim()
         lines = ax.get_lines()
-        assert xmin == lines[0].get_data()[0][0]
-        assert xmax == lines[0].get_data()[0][-1]
+        assert xmin <= lines[0].get_data()[0][0]
+        assert xmax >= lines[0].get_data()[0][-1]
 
         axes = df.plot(secondary_y=True, subplots=True)
         self._check_axes_shape(axes, axes_num=3, layout=(3, 1))
@@ -751,8 +774,8 @@ def test_line_lim(self):
             assert not hasattr(ax, 'right_ax')
             xmin, xmax = ax.get_xlim()
             lines = ax.get_lines()
-            assert xmin == lines[0].get_data()[0][0]
-            assert xmax == lines[0].get_data()[0][-1]
+            assert xmin <= lines[0].get_data()[0][0]
+            assert xmax >= lines[0].get_data()[0][-1]
 
     def test_area_lim(self):
         df = DataFrame(rand(6, 4), columns=['x', 'y', 'z', 'four'])
@@ -763,8 +786,8 @@ def test_area_lim(self):
             xmin, xmax = ax.get_xlim()
             ymin, ymax = ax.get_ylim()
             lines = ax.get_lines()
-            assert xmin == lines[0].get_data()[0][0]
-            assert xmax == lines[0].get_data()[0][-1]
+            assert xmin <= lines[0].get_data()[0][0]
+            assert xmax >= lines[0].get_data()[0][-1]
             assert ymin == 0
 
             ax = _check_plot_works(neg_df.plot.area, stacked=stacked)
diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py
index 2c708ecd39073d..d04065ee343392 100644
--- a/pandas/tests/plotting/test_series.py
+++ b/pandas/tests/plotting/test_series.py
@@ -102,23 +102,23 @@ def test_ts_line_lim(self):
         ax = self.ts.plot(ax=ax)
         xmin, xmax = ax.get_xlim()
         lines = ax.get_lines()
-        assert xmin == lines[0].get_data(orig=False)[0][0]
-        assert xmax == lines[0].get_data(orig=False)[0][-1]
+        assert xmin <= lines[0].get_data(orig=False)[0][0]
+        assert xmax >= lines[0].get_data(orig=False)[0][-1]
         tm.close()
 
         ax = self.ts.plot(secondary_y=True, ax=ax)
         xmin, xmax = ax.get_xlim()
         lines = ax.get_lines()
-        assert xmin == lines[0].get_data(orig=False)[0][0]
-        assert xmax == lines[0].get_data(orig=False)[0][-1]
+        assert xmin <= lines[0].get_data(orig=False)[0][0]
+        assert xmax >= lines[0].get_data(orig=False)[0][-1]
 
     def test_ts_area_lim(self):
         _, ax = self.plt.subplots()
         ax = self.ts.plot.area(stacked=False, ax=ax)
         xmin, xmax = ax.get_xlim()
         line = ax.get_lines()[0].get_data(orig=False)[0]
-        assert xmin == line[0]
-        assert xmax == line[-1]
+        assert xmin <= line[0]
+        assert xmax >= line[-1]
         tm.close()
 
         # GH 7471
@@ -126,8 +126,8 @@ def test_ts_area_lim(self):
         ax = self.ts.plot.area(stacked=False, x_compat=True, ax=ax)
         xmin, xmax = ax.get_xlim()
         line = ax.get_lines()[0].get_data(orig=False)[0]
-        assert xmin == line[0]
-        assert xmax == line[-1]
+        assert xmin <= line[0]
+        assert xmax >= line[-1]
         tm.close()
 
         tz_ts = self.ts.copy()
@@ -136,16 +136,16 @@ def test_ts_area_lim(self):
         ax = tz_ts.plot.area(stacked=False, x_compat=True, ax=ax)
         xmin, xmax = ax.get_xlim()
         line = ax.get_lines()[0].get_data(orig=False)[0]
-        assert xmin == line[0]
-        assert xmax == line[-1]
+        assert xmin <= line[0]
+        assert xmax >= line[-1]
         tm.close()
 
         _, ax = self.plt.subplots()
         ax = tz_ts.plot.area(stacked=False, secondary_y=True, ax=ax)
         xmin, xmax = ax.get_xlim()
         line = ax.get_lines()[0].get_data(orig=False)[0]
-        assert xmin == line[0]
-        assert xmax == line[-1]
+        assert xmin <= line[0]
+        assert xmax >= line[-1]
 
     def test_label(self):
         s = Series([1, 2])
@@ -289,6 +289,16 @@ def test_irregular_datetime(self):
         ax.set_xlim('1/1/1999', '1/1/2001')
         assert xp == ax.get_xlim()[0]
 
+    def test_unsorted_index_xlim(self):
+        ser = Series([0., 1., np.nan, 3., 4., 5., 6.],
+                     index=[1., 0., 3., 2., np.nan, 3., 2.])
+        _, ax = self.plt.subplots()
+        ax = ser.plot(ax=ax)
+        xmin, xmax = ax.get_xlim()
+        lines = ax.get_lines()
+        assert xmin <= np.nanmin(lines[0].get_data(orig=False)[0])
+        assert xmax >= np.nanmax(lines[0].get_data(orig=False)[0])
+
     @pytest.mark.slow
     def test_pie_series(self):
         # if sum of values is less than 1.0, pie handle them as rate and draw

From 21a38008e3cab7a0459cce4fab4ace11379c3148 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Tue, 19 Sep 2017 14:28:14 -0600
Subject: [PATCH 106/188] ENH: Implement MultiIndex.is_monotonic_decreasing
 (#17455)

Implemented MultiIndex.is_monotonic_decreasing, and added associated tests.  Also added tests for IntervalIndex.is_monotonic_decreasing, as it uses MultiIndex under the hood.
---
 doc/source/whatsnew/v0.21.0.txt       |   2 +-
 pandas/core/indexes/multi.py          |   5 +-
 pandas/tests/indexes/test_interval.py | 102 ++++++++++++++++++++++++--
 pandas/tests/indexes/test_multi.py    |  83 ++++++++++++++++++++-
 4 files changed, 178 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index ecea931960b6ab..5a353544a42831 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -114,7 +114,7 @@ Other Enhancements
 - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`).
 - :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`)
 - :func:`Styler.where` has been implemented. It is as a convenience for :func:`Styler.applymap` and enables simple DataFrame styling on the Jupyter notebook (:issue:`17474`).
-
+- :func:`MultiIndex.is_monotonic_decreasing` has been implemented.  Previously returned ``False`` in all cases. (:issue:`16554`)
 
 
 .. _whatsnew_0210.api_breaking:
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 8b2cf0e7c0b407..ea613a27b65219 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -706,13 +706,14 @@ def is_monotonic_increasing(self):
             # we have mixed types and np.lexsort is not happy
             return Index(self.values).is_monotonic
 
-    @property
+    @cache_readonly
     def is_monotonic_decreasing(self):
         """
         return if the index is monotonic decreasing (only equal or
         decreasing) values.
         """
-        return False
+        # monotonic decreasing if and only if reverse is monotonic increasing
+        return self[::-1].is_monotonic_increasing
 
     @cache_readonly
     def is_unique(self):
diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py
index 13c3b35e4d85d9..dc59495f619b03 100644
--- a/pandas/tests/indexes/test_interval.py
+++ b/pandas/tests/indexes/test_interval.py
@@ -263,21 +263,109 @@ def test_take(self):
         actual = self.index.take([0, 0, 1])
         assert expected.equals(actual)
 
-    def test_monotonic_and_unique(self):
-        assert self.index.is_monotonic
-        assert self.index.is_unique
+    def test_unique(self):
+        # unique non-overlapping
+        idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)])
+        assert idx.is_unique
 
+        # unique overlapping - distinct endpoints
         idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)])
-        assert idx.is_monotonic
         assert idx.is_unique
 
-        idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (1, 2)])
-        assert not idx.is_monotonic
+        # unique overlapping - shared endpoints
+        idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)])
+        assert idx.is_unique
+
+        # unique nested
+        idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)])
+        assert idx.is_unique
+
+        # duplicate
+        idx = IntervalIndex.from_tuples([(0, 1), (0, 1), (2, 3)])
+        assert not idx.is_unique
+
+        # unique mixed
+        idx = IntervalIndex.from_tuples([(0, 1), ('a', 'b')])
         assert idx.is_unique
 
-        idx = IntervalIndex.from_tuples([(0, 2), (0, 2)])
+        # duplicate mixed
+        idx = IntervalIndex.from_tuples([(0, 1), ('a', 'b'), (0, 1)])
         assert not idx.is_unique
+
+        # empty
+        idx = IntervalIndex([])
+        assert idx.is_unique
+
+    def test_monotonic(self):
+        # increasing non-overlapping
+        idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)])
+        assert idx.is_monotonic
+        assert idx._is_strictly_monotonic_increasing
+        assert not idx.is_monotonic_decreasing
+        assert not idx._is_strictly_monotonic_decreasing
+
+        # decreasing non-overlapping
+        idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)])
+        assert not idx.is_monotonic
+        assert not idx._is_strictly_monotonic_increasing
+        assert idx.is_monotonic_decreasing
+        assert idx._is_strictly_monotonic_decreasing
+
+        # unordered non-overlapping
+        idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)])
+        assert not idx.is_monotonic
+        assert not idx._is_strictly_monotonic_increasing
+        assert not idx.is_monotonic_decreasing
+        assert not idx._is_strictly_monotonic_decreasing
+
+        # increasing overlapping
+        idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)])
+        assert idx.is_monotonic
+        assert idx._is_strictly_monotonic_increasing
+        assert not idx.is_monotonic_decreasing
+        assert not idx._is_strictly_monotonic_decreasing
+
+        # decreasing overlapping
+        idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)])
+        assert not idx.is_monotonic
+        assert not idx._is_strictly_monotonic_increasing
+        assert idx.is_monotonic_decreasing
+        assert idx._is_strictly_monotonic_decreasing
+
+        # unordered overlapping
+        idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)])
+        assert not idx.is_monotonic
+        assert not idx._is_strictly_monotonic_increasing
+        assert not idx.is_monotonic_decreasing
+        assert not idx._is_strictly_monotonic_decreasing
+
+        # increasing overlapping shared endpoints
+        idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)])
+        assert idx.is_monotonic
+        assert idx._is_strictly_monotonic_increasing
+        assert not idx.is_monotonic_decreasing
+        assert not idx._is_strictly_monotonic_decreasing
+
+        # decreasing overlapping shared endpoints
+        idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)])
+        assert not idx.is_monotonic
+        assert not idx._is_strictly_monotonic_increasing
+        assert idx.is_monotonic_decreasing
+        assert idx._is_strictly_monotonic_decreasing
+
+        # stationary
+        idx = IntervalIndex.from_tuples([(0, 1), (0, 1)])
+        assert idx.is_monotonic
+        assert not idx._is_strictly_monotonic_increasing
+        assert idx.is_monotonic_decreasing
+        assert not idx._is_strictly_monotonic_decreasing
+
+        # empty
+        idx = IntervalIndex([])
         assert idx.is_monotonic
+        assert idx._is_strictly_monotonic_increasing
+        assert idx.is_monotonic_decreasing
+        assert idx._is_strictly_monotonic_decreasing
 
     @pytest.mark.xfail(reason='not a valid repr as we use interval notation')
     def test_repr(self):
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 86308192c91665..b1b5413b4d0819 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -2381,7 +2381,7 @@ def test_level_setting_resets_attributes(self):
         # if this fails, probably didn't reset the cache correctly.
         assert not ind.is_monotonic
 
-    def test_is_monotonic(self):
+    def test_is_monotonic_increasing(self):
         i = MultiIndex.from_product([np.arange(10),
                                      np.arange(10)], names=['one', 'two'])
         assert i.is_monotonic
@@ -2442,14 +2442,89 @@ def test_is_monotonic(self):
         assert not i.is_monotonic
         assert not i._is_strictly_monotonic_increasing
 
-    def test_is_strictly_monotonic(self):
+        # empty
+        i = MultiIndex.from_arrays([[], []])
+        assert i.is_monotonic
+        assert Index(i.values).is_monotonic
+        assert i._is_strictly_monotonic_increasing
+        assert Index(i.values)._is_strictly_monotonic_increasing
+
+    def test_is_monotonic_decreasing(self):
+        i = MultiIndex.from_product([np.arange(9, -1, -1),
+                                     np.arange(9, -1, -1)],
+                                    names=['one', 'two'])
+        assert i.is_monotonic_decreasing
+        assert i._is_strictly_monotonic_decreasing
+        assert Index(i.values).is_monotonic_decreasing
+        assert i._is_strictly_monotonic_decreasing
+
+        i = MultiIndex.from_product([np.arange(10),
+                                     np.arange(10, 0, -1)],
+                                    names=['one', 'two'])
+        assert not i.is_monotonic_decreasing
+        assert not i._is_strictly_monotonic_decreasing
+        assert not Index(i.values).is_monotonic_decreasing
+        assert not Index(i.values)._is_strictly_monotonic_decreasing
+
+        i = MultiIndex.from_product([np.arange(10, 0, -1),
+                                     np.arange(10)], names=['one', 'two'])
+        assert not i.is_monotonic_decreasing
+        assert not i._is_strictly_monotonic_decreasing
+        assert not Index(i.values).is_monotonic_decreasing
+        assert not Index(i.values)._is_strictly_monotonic_decreasing
+
+        i = MultiIndex.from_product([[2.0, np.nan, 1.0], ['c', 'b', 'a']])
+        assert not i.is_monotonic_decreasing
+        assert not i._is_strictly_monotonic_decreasing
+        assert not Index(i.values).is_monotonic_decreasing
+        assert not Index(i.values)._is_strictly_monotonic_decreasing
+
+        # string ordering
+        i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'],
+                               ['three', 'two', 'one']],
+                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
+                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+                       names=['first', 'second'])
+        assert not i.is_monotonic_decreasing
+        assert not Index(i.values).is_monotonic_decreasing
+        assert not i._is_strictly_monotonic_decreasing
+        assert not Index(i.values)._is_strictly_monotonic_decreasing
+
+        i = MultiIndex(levels=[['qux', 'foo', 'baz', 'bar'],
+                               ['zenith', 'next', 'mom']],
+                       labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
+                               [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
+                       names=['first', 'second'])
+        assert i.is_monotonic_decreasing
+        assert Index(i.values).is_monotonic_decreasing
+        assert i._is_strictly_monotonic_decreasing
+        assert Index(i.values)._is_strictly_monotonic_decreasing
+
+        # mixed levels, hits the TypeError
+        i = MultiIndex(
+            levels=[[4, 3, 2, 1], ['nl0000301109', 'nl0000289965',
+                                   'nl0000289783', 'lu0197800237',
+                                   'gb00b03mlx29']],
+            labels=[[0, 1, 1, 2, 2, 2, 3], [4, 2, 0, 0, 1, 3, -1]],
+            names=['household_id', 'asset_id'])
+
+        assert not i.is_monotonic_decreasing
+        assert not i._is_strictly_monotonic_decreasing
+
+        # empty
+        i = MultiIndex.from_arrays([[], []])
+        assert i.is_monotonic_decreasing
+        assert Index(i.values).is_monotonic_decreasing
+        assert i._is_strictly_monotonic_decreasing
+        assert Index(i.values)._is_strictly_monotonic_decreasing
+
+    def test_is_strictly_monotonic_increasing(self):
         idx = pd.MultiIndex(levels=[['bar', 'baz'], ['mom', 'next']],
                             labels=[[0, 0, 1, 1], [0, 0, 0, 1]])
         assert idx.is_monotonic_increasing
         assert not idx._is_strictly_monotonic_increasing
 
-    @pytest.mark.xfail(reason="buggy MultiIndex.is_monotonic_decresaing.")
-    def test__is_strictly_monotonic_decreasing(self):
+    def test_is_strictly_monotonic_decreasing(self):
         idx = pd.MultiIndex(levels=[['baz', 'bar'], ['next', 'mom']],
                             labels=[[0, 0, 1, 1], [0, 0, 0, 1]])
         assert idx.is_monotonic_decreasing

From c5c3642e49f681b94d6d02ed25b25e18e05c2ef3 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 20 Sep 2017 05:04:29 -0700
Subject: [PATCH 107/188] remove unused cimport of is_null_datetimelike
 (#17598)

---
 pandas/_libs/algos.pyx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx
index 8cbc65633c786d..d159761c3f5e66 100644
--- a/pandas/_libs/algos.pyx
+++ b/pandas/_libs/algos.pyx
@@ -33,7 +33,6 @@ from libc.math cimport sqrt, fabs
 from util cimport numeric, get_nat
 
 cimport lib
-from lib cimport is_null_datetimelike
 from pandas._libs import lib
 
 cdef int64_t iNaT = get_nat()

From b59f107a529ac9b1e2859217ea58f7e26b6749d0 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Wed, 20 Sep 2017 09:09:15 -0400
Subject: [PATCH 108/188] BUG: in Timestamp.replace when replacing tzinfo
 around DST changes (#17507)

closes #15683
---
 asv_bench/benchmarks/timestamp.py      | 23 ++++++++++++
 doc/source/whatsnew/v0.21.0.txt        |  1 +
 pandas/_libs/tslib.pyx                 | 48 +++++++++++++++-----------
 pandas/tests/tseries/test_timezones.py | 21 +++++++++++
 4 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py
index 066479b22739a8..e4f3023037580d 100644
--- a/asv_bench/benchmarks/timestamp.py
+++ b/asv_bench/benchmarks/timestamp.py
@@ -1,5 +1,7 @@
 from .pandas_vb_common import *
 from pandas import to_timedelta, Timestamp
+import pytz
+import datetime
 
 
 class TimestampProperties(object):
@@ -58,3 +60,24 @@ def time_is_leap_year(self):
 
     def time_microsecond(self):
         self.ts.microsecond
+
+
+class TimestampOps(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.ts = Timestamp('2017-08-25 08:16:14')
+        self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')
+
+        dt = datetime.datetime(2016, 3, 27, 1)
+        self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
+        self.ts2 = Timestamp(dt)
+
+    def time_replace_tz(self):
+        self.ts.replace(tzinfo=pytz.timezone('US/Eastern'))
+
+    def time_replace_across_dst(self):
+        self.ts2.replace(tzinfo=self.tzinfo)
+
+    def time_replace_None(self):
+        self.ts_tz.replace(tzinfo=None)
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 5a353544a42831..8ed3a26a0ee8fc 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -487,6 +487,7 @@ Conversion
 - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`)
 - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`)
 - Bug in :attr:`Timestamp.weekday_name` returning a UTC-based weekday name when localized to a timezone (:issue:`17354`)
+- Bug in ``Timestamp.replace`` when replacing ``tzinfo`` around DST changes (:issue:`15683`)
 
 Indexing
 ^^^^^^^^
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index ec12611ae7f027..8238552b44e031 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -142,6 +142,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False):
 
     cdef:
         Py_ssize_t i, n = len(arr)
+        ndarray[int64_t] trans, deltas
         pandas_datetimestruct dts
         object dt
         int64_t value
@@ -417,8 +418,9 @@ class Timestamp(_Timestamp):
 
     def _round(self, freq, rounder):
 
-        cdef int64_t unit
-        cdef object result, value
+        cdef:
+            int64_t unit, r, value,  buff = 1000000
+            object result
 
         from pandas.tseries.frequencies import to_offset
         unit = to_offset(freq).nanos
@@ -429,16 +431,15 @@ class Timestamp(_Timestamp):
         if unit < 1000 and unit % 1000 != 0:
             # for nano rounding, work with the last 6 digits separately
             # due to float precision
-            buff = 1000000
-            result = (buff * (value // buff) + unit *
-                      (rounder((value % buff) / float(unit))).astype('i8'))
+            r = (buff * (value // buff) + unit *
+                 (rounder((value % buff) / float(unit))).astype('i8'))
         elif unit >= 1000 and unit % 1000 != 0:
             msg = 'Precision will be lost using frequency: {}'
             warnings.warn(msg.format(freq))
-            result = (unit * rounder(value / float(unit)).astype('i8'))
+            r = (unit * rounder(value / float(unit)).astype('i8'))
         else:
-            result = (unit * rounder(value / float(unit)).astype('i8'))
-        result = Timestamp(result, unit='ns')
+            r = (unit * rounder(value / float(unit)).astype('i8'))
+        result = Timestamp(r, unit='ns')
         if self.tz is not None:
             result = result.tz_localize(self.tz)
         return result
@@ -683,14 +684,16 @@ class Timestamp(_Timestamp):
 
         cdef:
             pandas_datetimestruct dts
-            int64_t value
+            int64_t value, value_tz, offset
             object _tzinfo, result, k, v
+            datetime ts_input
 
         # set to naive if needed
         _tzinfo = self.tzinfo
         value = self.value
         if _tzinfo is not None:
-            value = tz_convert_single(value, 'UTC', _tzinfo)
+            value_tz = tz_convert_single(value, _tzinfo, 'UTC')
+            value += value - value_tz
 
         # setup components
         pandas_datetime_to_datetimestruct(value, PANDAS_FR_ns, &dts)
@@ -724,16 +727,14 @@ class Timestamp(_Timestamp):
             _tzinfo = tzinfo
 
         # reconstruct & check bounds
-        value = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
+        ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min,
+                            dts.sec, dts.us, tzinfo=_tzinfo)
+        ts = convert_to_tsobject(ts_input, _tzinfo, None, 0, 0)
+        value = ts.value + (dts.ps // 1000)
         if value != NPY_NAT:
             _check_dts_bounds(&dts)
 
-        # set tz if needed
-        if _tzinfo is not None:
-            value = tz_convert_single(value, _tzinfo, 'UTC')
-
-        result = create_timestamp_from_ts(value, dts, _tzinfo, self.freq)
-        return result
+        return create_timestamp_from_ts(value, dts, _tzinfo, self.freq)
 
     def isoformat(self, sep='T'):
         base = super(_Timestamp, self).isoformat(sep=sep)
@@ -1175,7 +1176,7 @@ cdef class _Timestamp(datetime):
         return np.datetime64(self.value, 'ns')
 
     def __add__(self, other):
-        cdef int64_t other_int
+        cdef int64_t other_int, nanos
 
         if is_timedelta64_object(other):
             other_int = other.astype('timedelta64[ns]').view('i8')
@@ -1625,6 +1626,10 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
     """
     Take a TSObject in UTC and localizes to timezone tz.
     """
+    cdef:
+        ndarray[int64_t] trans, deltas
+        Py_ssize_t delta, posn
+
     if is_utc(tz):
         obj.tzinfo = tz
     elif is_tzlocal(tz):
@@ -1676,7 +1681,7 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
             obj.tzinfo = tz
 
 
-def _localize_pydatetime(object dt, object tz):
+cpdef inline object _localize_pydatetime(object dt, object tz):
     """
     Take a datetime/Timestamp in UTC and localizes to timezone tz.
     """
@@ -3892,7 +3897,7 @@ for _maybe_method_name in dir(NaTType):
 # Conversion routines
 
 
-def _delta_to_nanoseconds(delta):
+cpdef int64_t _delta_to_nanoseconds(delta):
     if isinstance(delta, np.ndarray):
         return delta.astype('m8[ns]').astype('int64')
     if hasattr(delta, 'nanos'):
@@ -4137,7 +4142,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
     return result
 
 
-def tz_convert_single(int64_t val, object tz1, object tz2):
+cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
     """
     Convert the val (in i8) from timezone1 to timezone2
 
@@ -5006,6 +5011,7 @@ cdef inline int64_t _normalized_stamp(pandas_datetimestruct *dts) nogil:
 def dates_normalized(ndarray[int64_t] stamps, tz=None):
     cdef:
         Py_ssize_t i, n = len(stamps)
+        ndarray[int64_t] trans, deltas
         pandas_datetimestruct dts
 
     if tz is None or is_utc(tz):
diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py
index a9ecfd797a32bc..ac1a338d2844d4 100644
--- a/pandas/tests/tseries/test_timezones.py
+++ b/pandas/tests/tseries/test_timezones.py
@@ -1269,6 +1269,27 @@ def test_ambiguous_compat(self):
             assert (result_pytz.to_pydatetime().tzname() ==
                     result_dateutil.to_pydatetime().tzname())
 
+    def test_replace_tzinfo(self):
+        # GH 15683
+        dt = datetime(2016, 3, 27, 1)
+        tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
+
+        result_dt = dt.replace(tzinfo=tzinfo)
+        result_pd = Timestamp(dt).replace(tzinfo=tzinfo)
+
+        if hasattr(result_dt, 'timestamp'):  # New method in Py 3.3
+            assert result_dt.timestamp() == result_pd.timestamp()
+        assert result_dt == result_pd
+        assert result_dt == result_pd.to_pydatetime()
+
+        result_dt = dt.replace(tzinfo=tzinfo).replace(tzinfo=None)
+        result_pd = Timestamp(dt).replace(tzinfo=tzinfo).replace(tzinfo=None)
+
+        if hasattr(result_dt, 'timestamp'):  # New method in Py 3.3
+            assert result_dt.timestamp() == result_pd.timestamp()
+        assert result_dt == result_pd
+        assert result_dt == result_pd.to_pydatetime()
+
     def test_index_equals_with_tz(self):
         left = date_range('1/1/2011', periods=100, freq='H', tz='utc')
         right = date_range('1/1/2011', periods=100, freq='H', tz='US/Eastern')

From 40c499fe217a308eee182c376b7e98dbd7b01d5b Mon Sep 17 00:00:00 2001
From: Alex Marchenko <avmarchenko@users.noreply.github.com>
Date: Wed, 20 Sep 2017 17:19:07 -0400
Subject: [PATCH 109/188] MAINT: Remove duplicated _constructor_sliced
 definition in sparse.frame (#17604)

---
 pandas/core/sparse/frame.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/sparse/frame.py b/pandas/core/sparse/frame.py
index 1e98e919baa33f..7aa49efa82f610 100644
--- a/pandas/core/sparse/frame.py
+++ b/pandas/core/sparse/frame.py
@@ -49,7 +49,6 @@ class SparseDataFrame(DataFrame):
         Default fill_value for converting Series to SparseSeries
         (default: nan). Will not override SparseSeries passed in.
     """
-    _constructor_sliced = SparseSeries
     _subtyp = 'sparse_frame'
 
     def __init__(self, data=None, index=None, columns=None, default_kind=None,

From 4d7f3ef097208de8fe99b195cbf760b6a3844aa7 Mon Sep 17 00:00:00 2001
From: Carol Willing <carolcode@willingconsulting.com>
Date: Wed, 20 Sep 2017 14:22:36 -0700
Subject: [PATCH 110/188] DOC: Add a more welcoming tone for new contributors
 (#17580)

* Add a more welcoming tone for new contributors

* create doc section in whatsnew

* Edit per @gfyoung review

* Edit first 2 sentences and revert whatsnew change
---
 doc/README.rst                  | 8 +++++---
 doc/source/whatsnew/v0.21.0.txt | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/doc/README.rst b/doc/README.rst
index 0ea3234dec3488..b2c66611b68bbb 100644
--- a/doc/README.rst
+++ b/doc/README.rst
@@ -3,9 +3,11 @@
 Contributing to the documentation
 =================================
 
-If you're not the developer type, contributing to the documentation is still
-of huge value. You don't even have to be an expert on
-*pandas* to do so! Something as simple as rewriting small passages for clarity
+Whether you are someone who loves writing, teaching, or development,
+contributing to the documentation is a huge value. If you don't see yourself
+as a developer type, please don't stress and know that we want you to
+contribute. You don't even have to be an expert on *pandas* to do so!
+Something as simple as rewriting small passages for clarity
 as you reference the docs is a simple but effective way to contribute. The
 next person to read that passage will be in your debt!
 
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 8ed3a26a0ee8fc..42174c228ef3c0 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -389,7 +389,7 @@ New Behavior:
   ---------------------------------------------------------------------------
   ValueError: Of the three parameters: start, end, and periods, exactly two must be specified
 
-  In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q')  
+  In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q')
   ---------------------------------------------------------------------------
   ValueError: Of the three parameters: start, end, and periods, exactly two must be specified
 

From fedf92287d0216d54fe756829362e1d10a3fdc58 Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Wed, 20 Sep 2017 22:23:58 +0100
Subject: [PATCH 111/188] DOC: add examples to .get_loc methods (#17563)

---
 pandas/core/indexes/base.py     | 20 ++++++++++++++++---
 pandas/core/indexes/category.py | 18 +++++++++++++++--
 pandas/core/indexes/interval.py | 35 +++++++++++++++++++++++++++++++++
 pandas/core/indexes/multi.py    | 15 ++++++++++++++
 4 files changed, 83 insertions(+), 5 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 008828cf4f309a..ca145eeaaa7b89 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2421,7 +2421,7 @@ def _get_unique_index(self, dropna=False):
         return self._shallow_copy(values)
 
     _index_shared_docs['get_loc'] = """
-        Get integer location for requested label.
+        Get integer location, slice or boolean mask for requested label.
 
         Parameters
         ----------
@@ -2441,8 +2441,22 @@ def _get_unique_index(self, dropna=False):
 
         Returns
         -------
-        loc : int if unique index, possibly slice or mask if not
-    """
+        loc : int if unique index, slice if monotonic index, else mask
+
+        Examples
+        ---------
+        >>> unique_index = pd.Index(list('abc'))
+        >>> unique_index.get_loc('b')
+        1
+
+        >>> monotonic_index = pd.Index(list('abbc'))
+        >>> monotonic_index.get_loc('b')
+        slice(1, 3, None)
+
+        >>> non_monotonic_index = pd.Index(list('abcb'))
+        >>> non_monotonic_index.get_loc('b')
+        array([False,  True, False,  True], dtype=bool)
+        """
 
     @Appender(_index_shared_docs['get_loc'])
     def get_loc(self, key, method=None, tolerance=None):
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index ef1dc4d971f37f..447087d3c75637 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -354,7 +354,7 @@ def _to_safe_for_reshape(self):
 
     def get_loc(self, key, method=None):
         """
-        Get integer location for requested label
+        Get integer location, slice or boolean mask for requested label.
 
         Parameters
         ----------
@@ -364,7 +364,21 @@ def get_loc(self, key, method=None):
 
         Returns
         -------
-        loc : int if unique index, possibly slice or mask if not
+        loc : int if unique index, slice if monotonic index, else mask
+
+        Examples
+        ---------
+        >>> unique_index = pd.CategoricalIndex(list('abc'))
+        >>> unique_index.get_loc('b')
+        1
+
+        >>> monotonic_index = pd.CategoricalIndex(list('abbc'))
+        >>> monotonic_index.get_loc('b')
+        slice(1, 3, None)
+
+        >>> non_monotonic_index = p.dCategoricalIndex(list('abcb'))
+        >>> non_monotonic_index.get_loc('b')
+        array([False,  True, False,  True], dtype=bool)
         """
         codes = self.categories.get_loc(key)
         if (codes == -1):
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index c0a9c139722f57..8120c93ad33643 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -689,6 +689,41 @@ def _find_non_overlapping_monotonic_bounds(self, key):
         return start, stop
 
     def get_loc(self, key, method=None):
+        """Get integer location, slice or boolean mask for requested label.
+
+        Parameters
+        ----------
+        key : label
+        method : {None}, optional
+            * default: matches where the label is within an interval only.
+
+        Returns
+        -------
+        loc : int if unique index, slice if monotonic index, else mask
+
+        Examples
+        ---------
+        >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)
+        >>> index = pd.IntervalIndex.from_intervals([i1, i2])
+        >>> index.get_loc(1)
+        0
+
+        You can also supply an interval or an location for a point inside an
+        interval.
+
+        >>> index.get_loc(pd.Interval(0, 2))
+        array([0, 1], dtype=int64)
+        >>> index.get_loc(1.5)
+        1
+
+        If a label is in several intervals, you get the locations of all the
+        relevant intervals.
+
+        >>> i3 = pd.Interval(0, 2)
+        >>> overlapping_index = pd.IntervalIndex.from_intervals([i2, i3])
+        >>> overlapping_index.get_loc(1.5)
+        array([0, 1], dtype=int64)
+        """
         self._check_method(method)
 
         original_key = key
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index ea613a27b65219..66209ecd3a0303 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1967,6 +1967,21 @@ def get_loc(self, key, method=None):
         Returns
         -------
         loc : int, slice object or boolean mask
+
+        Examples
+        ---------
+        >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
+        >>> mi.get_loc('b')
+        slice(1, 3, None)
+        >>> mi.get_loc(('b', 'e'))
+        1
+
+        See also
+        --------
+        Index.get_loc : get_loc method for (single-level) index.
+        get_locs : Given a tuple of slices/lists/labels/boolean indexer to a
+                   level-wise spec, produce an indexer to extract those
+                   locations.
         """
         if method is not None:
             raise NotImplementedError('only the default get_loc method is '

From 6930f27e78b2b61a4df31b667a816fa53e49ffed Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 21 Sep 2017 16:19:48 -0400
Subject: [PATCH 112/188] BLD: pin numpy to particular variant that is built
 for all our deps (#17619)

closes #17617
---
 ci/requirements-3.6.build | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ci/requirements-3.6.build b/ci/requirements-3.6.build
index 1c4b46aea3865d..31ffd5acc7fcc7 100644
--- a/ci/requirements-3.6.build
+++ b/ci/requirements-3.6.build
@@ -2,5 +2,7 @@ python=3.6*
 python-dateutil
 pytz
 nomkl
-numpy
 cython
+
+# pin numpy that is built for all our deps
+numpy=1.13.1=py36_blas_openblas_201

From b3087efed7321bcfbfa86143bb81e93bc4552eda Mon Sep 17 00:00:00 2001
From: Alan Velasco <AlanVelasco.A@gmail.com>
Date: Thu, 21 Sep 2017 17:45:20 -0500
Subject: [PATCH 113/188] Allow for dict-like argument to
 Categorical.rename_categories (#17586)

closes #17336
---
 doc/source/categorical.rst       |  4 ++++
 doc/source/whatsnew/v0.21.0.txt  |  1 +
 pandas/core/categorical.py       | 23 +++++++++++++--------
 pandas/tests/test_categorical.py | 34 ++++++++++++++++++++++++++++++++
 4 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index 65361886436d65..ff5e550ebd97f4 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -206,6 +206,10 @@ by using the :func:`Categorical.rename_categories` method:
     s.cat.categories = ["Group %s" % g for g in s.cat.categories]
     s
     s.cat.rename_categories([1,2,3])
+    s
+    # You can also pass a dict-like object to map the renaming
+    s.cat.rename_categories({1: 'x', 2: 'y', 3: 'z'})
+    s
 
 .. note::
 
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 42174c228ef3c0..8bc9834ebef09d 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -115,6 +115,7 @@ Other Enhancements
 - :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`)
 - :func:`Styler.where` has been implemented. It is as a convenience for :func:`Styler.applymap` and enables simple DataFrame styling on the Jupyter notebook (:issue:`17474`).
 - :func:`MultiIndex.is_monotonic_decreasing` has been implemented.  Previously returned ``False`` in all cases. (:issue:`16554`)
+- :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`)
 
 
 .. _whatsnew_0210.api_breaking:
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index ddca93f07ad5ea..6f7eafe43dbbb2 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -25,7 +25,8 @@
     is_categorical_dtype,
     is_integer_dtype, is_bool,
     is_list_like, is_sequence,
-    is_scalar)
+    is_scalar,
+    is_dict_like)
 from pandas.core.common import is_null_slice, _maybe_box_datetimelike
 
 from pandas.core.algorithms import factorize, take_1d, unique1d
@@ -792,19 +793,20 @@ def set_categories(self, new_categories, ordered=None, rename=False,
     def rename_categories(self, new_categories, inplace=False):
         """ Renames categories.
 
-        The new categories has to be a list-like object. All items must be
-        unique and the number of items in the new categories must be the same
-        as the number of items in the old categories.
+        The new categories can be either a list-like dict-like object.
+        If it is list-like, all items must be unique and the number of items
+        in the new categories must be the same as the number of items in the
+        old categories.
 
         Raises
         ------
         ValueError
-            If the new categories do not have the same number of items than the
-            current categories or do not validate as categories
+            If new categories are list-like and do not have the same number of
+            items than the current categories or do not validate as categories
 
         Parameters
         ----------
-        new_categories : Index-like
+        new_categories : Index-like or dict-like (>=0.21.0)
            The renamed categories.
         inplace : boolean (default: False)
            Whether or not to rename the categories inplace or return a copy of
@@ -824,7 +826,12 @@ def rename_categories(self, new_categories, inplace=False):
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
         cat = self if inplace else self.copy()
-        cat.categories = new_categories
+
+        if is_dict_like(new_categories):
+            cat.categories = [new_categories.get(item, item)
+                              for item in cat.categories]
+        else:
+            cat.categories = new_categories
         if not inplace:
             return cat
 
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index c361b430cfd8a1..e6fa5d1af55bec 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -983,6 +983,40 @@ def test_rename_categories(self):
         with pytest.raises(ValueError):
             cat.rename_categories([1, 2])
 
+    def test_rename_categories_dict(self):
+        # GH 17336
+        cat = pd.Categorical(['a', 'b', 'c', 'd'])
+        res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1})
+        expected = Index([4, 3, 2, 1])
+        tm.assert_index_equal(res.categories, expected)
+
+        # Test for inplace
+        res = cat.rename_categories({'a': 4, 'b': 3, 'c': 2, 'd': 1},
+                                    inplace=True)
+        assert res is None
+        tm.assert_index_equal(cat.categories, expected)
+
+        # Test for dicts of smaller length
+        cat = pd.Categorical(['a', 'b', 'c', 'd'])
+        res = cat.rename_categories({'a': 1, 'c': 3})
+
+        expected = Index([1, 'b', 3, 'd'])
+        tm.assert_index_equal(res.categories, expected)
+
+        # Test for dicts with bigger length
+        cat = pd.Categorical(['a', 'b', 'c', 'd'])
+        res = cat.rename_categories({'a': 1, 'b': 2, 'c': 3,
+                                     'd': 4, 'e': 5, 'f': 6})
+        expected = Index([1, 2, 3, 4])
+        tm.assert_index_equal(res.categories, expected)
+
+        # Test for dicts with no items from old categories
+        cat = pd.Categorical(['a', 'b', 'c', 'd'])
+        res = cat.rename_categories({'f': 1, 'g': 3})
+
+        expected = Index(['a', 'b', 'c', 'd'])
+        tm.assert_index_equal(res.categories, expected)
+
     @pytest.mark.parametrize('codes, old, new, expected', [
         ([0, 1], ['a', 'b'], ['a', 'b'], [0, 1]),
         ([0, 1], ['b', 'a'], ['b', 'a'], [0, 1]),

From dfb6c481ec059a03c3d3779e8a05e4736425f9c3 Mon Sep 17 00:00:00 2001
From: maxwasserman <maxwasserman@gmail.com>
Date: Fri, 22 Sep 2017 00:19:39 -0700
Subject: [PATCH 114/188] BUG: Fix css for displaying DataFrames in notebook
 #16792 (#16879)

---
 doc/source/whatsnew/v0.21.0.txt         |  1 +
 pandas/io/formats/format.py             | 50 ++++++++++++++++++-------
 pandas/tests/io/formats/test_to_html.py |  8 +++-
 3 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 8bc9834ebef09d..1cd65bb530f731 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -522,6 +522,7 @@ I/O
 - Bug in :func:`read_stata` where the index was not set (:issue:`16342`)
 - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`)
 - Bug in :func:`read_csv` where automatic delimiter detection caused a ``TypeError`` to be thrown when a bad line was encountered rather than the correct error message (:issue:`13374`)
+- Bug in ``DataFrame.to_html()`` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`) 
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 6a98497aa1bfef..547b9676717c99 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1132,20 +1132,42 @@ def write_tr(self, line, indent=0, indent_delta=4, header=False,
         self.write('</tr>', indent)
 
     def write_style(self):
-        template = dedent("""\
-            <style>
-                .dataframe thead tr:only-child th {
-                    text-align: right;
-                }
-
-                .dataframe thead th {
-                    text-align: left;
-                }
-
-                .dataframe tbody tr th {
-                    vertical-align: top;
-                }
-            </style>""")
+        # We use the "scoped" attribute here so that the desired
+        # style properties for the data frame are not then applied
+        # throughout the entire notebook.
+        template_first = """\
+            <style scoped>"""
+        template_last = """\
+            </style>"""
+        template_select = """\
+                .dataframe %s {
+                    %s: %s;
+                }"""
+        element_props = [('tbody tr th:only-of-type',
+                          'vertical-align',
+                          'middle'),
+                         ('tbody tr th',
+                          'vertical-align',
+                          'top')]
+        if isinstance(self.columns, MultiIndex):
+            element_props.append(('thead tr th',
+                                  'text-align',
+                                  'left'))
+            if all((self.fmt.has_index_names,
+                    self.fmt.index,
+                    self.fmt.show_index_names)):
+                element_props.append(('thead tr:last-of-type th',
+                                      'text-align',
+                                      'right'))
+        else:
+            element_props.append(('thead th',
+                                  'text-align',
+                                  'right'))
+        template_mid = '\n\n'.join(map(lambda t: template_select % t,
+                                       element_props))
+        template = dedent('\n'.join((template_first,
+                                     template_mid,
+                                     template_last)))
         if self.notebook:
             self.write(template)
 
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
index 1e174c34221d55..194b5ba3e02765 100644
--- a/pandas/tests/io/formats/test_to_html.py
+++ b/pandas/tests/io/formats/test_to_html.py
@@ -1868,12 +1868,16 @@ def test_to_html_no_index_max_rows(self):
     def test_to_html_notebook_has_style(self):
         df = pd.DataFrame({"A": [1, 2, 3]})
         result = df.to_html(notebook=True)
-        assert "thead tr:only-child" in result
+        assert "tbody tr th:only-of-type" in result
+        assert "vertical-align: middle;" in result
+        assert "thead th" in result
 
     def test_to_html_notebook_has_no_style(self):
         df = pd.DataFrame({"A": [1, 2, 3]})
         result = df.to_html()
-        assert "thead tr:only-child" not in result
+        assert "tbody tr th:only-of-type" not in result
+        assert "vertical-align: middle;" not in result
+        assert "thead th" not in result
 
     def test_to_html_with_index_names_false(self):
         # gh-16493

From 8276a420a36c26eaab38856177023cb064963f19 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Fri, 22 Sep 2017 01:22:19 -0600
Subject: [PATCH 115/188] DOC: Remove experimental warning from custom offsets
 (#17584)

---
 pandas/tseries/offsets.py | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 6a518937b11957..452d30322b4cfa 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -951,14 +951,9 @@ def next_bday(self):
 
 class CustomBusinessDay(BusinessDay):
     """
-    **EXPERIMENTAL** DateOffset subclass representing possibly n business days
+    DateOffset subclass representing possibly n custom business days,
     excluding holidays
 
-    .. warning:: EXPERIMENTAL
-
-        This class is not officially supported and the API is likely to change
-        in future versions. Use this at your own risk.
-
     Parameters
     ----------
     n : int, default 1
@@ -1405,12 +1400,8 @@ def onOffset(self, dt):
 
 class CustomBusinessMonthEnd(BusinessMixin, MonthOffset):
     """
-    **EXPERIMENTAL** DateOffset of one custom business month
-
-    .. warning:: EXPERIMENTAL
-
-        This class is not officially supported and the API is likely to change
-        in future versions. Use this at your own risk.
+    DateOffset subclass representing one custom business month, incrementing
+    between end of month dates
 
     Parameters
     ----------
@@ -1479,12 +1470,8 @@ def apply(self, other):
 
 class CustomBusinessMonthBegin(BusinessMixin, MonthOffset):
     """
-    **EXPERIMENTAL** DateOffset of one custom business month
-
-    .. warning:: EXPERIMENTAL
-
-        This class is not officially supported and the API is likely to change
-        in future versions. Use this at your own risk.
+    DateOffset subclass representing one custom business month, incrementing
+    between beginning of month dates
 
     Parameters
     ----------

From 9732af248a6bcc6db05413fd671c08a23724dba4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 22 Sep 2017 06:10:31 -0700
Subject: [PATCH 116/188] Separate properties module (#17590)

---
 pandas/_libs/lib.pyx                  | 1 -
 pandas/_libs/{src => }/properties.pyx | 5 ++++-
 pandas/core/generic.py                | 4 ++--
 pandas/util/_decorators.py            | 2 +-
 setup.py                              | 3 ++-
 5 files changed, 9 insertions(+), 6 deletions(-)
 rename pandas/_libs/{src => }/properties.pyx (95%)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 53ca41e4b24893..01548e17d39abf 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -1907,5 +1907,4 @@ cdef class BlockPlacement:
 
 
 include "reduce.pyx"
-include "properties.pyx"
 include "inference.pyx"
diff --git a/pandas/_libs/src/properties.pyx b/pandas/_libs/properties.pyx
similarity index 95%
rename from pandas/_libs/src/properties.pyx
rename to pandas/_libs/properties.pyx
index 4a3fd4b771a171..22d66356ebdc34 100644
--- a/pandas/_libs/src/properties.pyx
+++ b/pandas/_libs/properties.pyx
@@ -1,5 +1,8 @@
+
+from cython cimport Py_ssize_t
+
 from cpython cimport (
-    PyDict_Contains, PyDict_GetItem, PyDict_GetItem, PyDict_SetItem)
+    PyDict_Contains, PyDict_GetItem, PyDict_SetItem)
 
 
 cdef class cache_readonly(object):
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a71bf7be1bc753..e0a9fdb08dcb2c 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -9,7 +9,7 @@
 import numpy as np
 import pandas as pd
 
-from pandas._libs import tslib, lib
+from pandas._libs import tslib, lib, properties
 from pandas.core.dtypes.common import (
     _ensure_int64,
     _ensure_object,
@@ -258,7 +258,7 @@ def _setup_axes(cls, axes, info_axis=None, stat_axis=None, aliases=None,
         if build_axes:
 
             def set_axis(a, i):
-                setattr(cls, a, lib.AxisProperty(i))
+                setattr(cls, a, properties.AxisProperty(i))
                 cls._internal_names_set.add(a)
 
             if axes_are_reversed:
diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py
index bb7ffe45c689b0..31e27817913c5a 100644
--- a/pandas/util/_decorators.py
+++ b/pandas/util/_decorators.py
@@ -1,5 +1,5 @@
 from pandas.compat import callable, signature
-from pandas._libs.lib import cache_readonly  # noqa
+from pandas._libs.properties import cache_readonly  # noqa
 import types
 import warnings
 from textwrap import dedent
diff --git a/setup.py b/setup.py
index 0e4e22b875e1db..d28c4ba8be5b00 100755
--- a/setup.py
+++ b/setup.py
@@ -437,7 +437,7 @@ def get_tag(self):
     cmdclass['build_src'] = DummyBuildSrc
     cmdclass['build_ext'] = CheckingBuildExt
 
-lib_depends = ['reduce', 'inference', 'properties']
+lib_depends = ['reduce', 'inference']
 
 
 def srcpath(name=None, suffix='.pyx', subdir='src'):
@@ -478,6 +478,7 @@ def pxd(name):
 ext_data = {
     '_libs.lib': {'pyxfile': '_libs/lib',
                   'depends': lib_depends + tseries_depends},
+    '_libs.properties': {'pyxfile': '_libs/properties', 'include': []},
     '_libs.hashtable': {'pyxfile': '_libs/hashtable',
                         'pxdfiles': ['_libs/hashtable'],
                         'depends': (['pandas/_libs/src/klib/khash_python.h']

From 26681db1ce339af641d276bc45fbb48dc329b044 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Fri, 22 Sep 2017 07:15:12 -0600
Subject: [PATCH 117/188] PERF: Implement RangeIndex min/max using RangeIndex
 properties (#17611)

---
 asv_bench/benchmarks/index_object.py | 20 ++++++++++++++++++++
 doc/source/api.rst                   | 14 ++++++++++++++
 doc/source/whatsnew/v0.21.0.txt      |  1 +
 pandas/core/indexes/range.py         | 18 ++++++++++++++++++
 pandas/tests/indexes/test_range.py   | 21 ++++++++++++++++++++-
 5 files changed, 73 insertions(+), 1 deletion(-)

diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py
index 3fb53ce9b3c98e..454d9ccdda102f 100644
--- a/asv_bench/benchmarks/index_object.py
+++ b/asv_bench/benchmarks/index_object.py
@@ -199,3 +199,23 @@ def time_datetime_level_values_full(self):
 
     def time_datetime_level_values_sliced(self):
         self.mi[:10].values
+
+
+class Range(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.idx_inc = RangeIndex(start=0, stop=10**7, step=3)
+        self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3)
+
+    def time_max(self):
+        self.idx_inc.max()
+
+    def time_max_trivial(self):
+        self.idx_dec.max()
+
+    def time_min(self):
+        self.idx_dec.min()
+
+    def time_min_trivial(self):
+        self.idx_inc.min()
diff --git a/doc/source/api.rst b/doc/source/api.rst
index 6b3e6bedcb24b3..96c7f68f57aaaa 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1416,6 +1416,20 @@ Selecting
    Index.slice_indexer
    Index.slice_locs
 
+.. _api.numericindex:
+
+Numeric Index
+-------------
+
+.. autosummary::
+   :toctree: generated/
+   :template: autosummary/class_without_autosummary.rst
+
+   RangeIndex
+   Int64Index
+   UInt64Index
+   Float64Index
+
 .. _api.categoricalindex:
 
 CategoricalIndex
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 1cd65bb530f731..bf3a4f28b0a4c2 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -473,6 +473,7 @@ Performance Improvements
 - Improved performance of :meth:`Categorical.set_categories` by not materializing the values (:issue:`17508`)
 - :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`)
 - Improved performance of the :class:`CategoricalIndex` for data that is already categorical dtype (:issue:`17513`)
+- Improved performance of :meth:`RangeIndex.min` and :meth:`RangeIndex.max` by using ``RangeIndex`` properties to perform the computations (:issue:`17607`)
 
 .. _whatsnew_0210.bug_fixes:
 
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index b759abaed4e564..16523257c2f77c 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -269,6 +269,24 @@ def copy(self, name=None, deep=False, dtype=None, **kwargs):
         return RangeIndex(name=name, fastpath=True,
                           **dict(self._get_data_as_items()))
 
+    def _minmax(self, meth):
+        no_steps = len(self) - 1
+        if no_steps == -1:
+            return np.nan
+        elif ((meth == 'min' and self._step > 0) or
+              (meth == 'max' and self._step < 0)):
+            return self._start
+
+        return self._start + self._step * no_steps
+
+    def min(self):
+        """The minimum value of the RangeIndex"""
+        return self._minmax('min')
+
+    def max(self):
+        """The maximum value of the RangeIndex"""
+        return self._minmax('max')
+
     def argsort(self, *args, **kwargs):
         """
         Returns the indices that would sort the index and its
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index d206c36ee51c95..8dc5a40ced4bfd 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -10,7 +10,7 @@
 
 import numpy as np
 
-from pandas import (notna, Series, Index, Float64Index,
+from pandas import (isna, notna, Series, Index, Float64Index,
                     Int64Index, RangeIndex)
 
 import pandas.util.testing as tm
@@ -994,3 +994,22 @@ def test_append(self):
                 # Append single item rather than list
                 result2 = indices[0].append(indices[1])
                 tm.assert_index_equal(result2, expected, exact=True)
+
+    @pytest.mark.parametrize('start,stop,step',
+                             [(0, 400, 3), (500, 0, -6), (-10**6, 10**6, 4),
+                              (10**6, -10**6, -4), (0, 10, 20)])
+    def test_max_min(self, start, stop, step):
+        # GH17607
+        idx = RangeIndex(start, stop, step)
+        expected = idx._int64index.max()
+        result = idx.max()
+        assert result == expected
+
+        expected = idx._int64index.min()
+        result = idx.min()
+        assert result == expected
+
+        # empty
+        idx = RangeIndex(start, stop, -step)
+        assert isna(idx.max())
+        assert isna(idx.min())

From 49cfdd7a0d72e732d07dbf4d4b96c6801cdb6719 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 22 Sep 2017 06:22:54 -0700
Subject: [PATCH 118/188] Simplify to_pydatetime() (#17592)

---
 asv_bench/benchmarks/timestamp.py |  6 ++++++
 pandas/_libs/tslib.pyx            | 13 ++++---------
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py
index e4f3023037580d..e8cb4c9d1c75bd 100644
--- a/asv_bench/benchmarks/timestamp.py
+++ b/asv_bench/benchmarks/timestamp.py
@@ -81,3 +81,9 @@ def time_replace_across_dst(self):
 
     def time_replace_None(self):
         self.ts_tz.replace(tzinfo=None)
+
+    def time_to_pydatetime(self):
+        self.ts.to_pydatetime()
+
+    def time_to_pydatetime_tz(self):
+        self.ts_tz.to_pydatetime()
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 8238552b44e031..6ba37062ac8691 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -1158,18 +1158,13 @@ cdef class _Timestamp(datetime):
 
         If warn=True, issue a warning if nanoseconds is nonzero.
         """
-        cdef:
-            pandas_datetimestruct dts
-            _TSObject ts
-
         if self.nanosecond != 0 and warn:
             warnings.warn("Discarding nonzero nanoseconds in conversion",
                           UserWarning, stacklevel=2)
-        ts = convert_to_tsobject(self, self.tzinfo, None, 0, 0)
-        dts = ts.dts
-        return datetime(dts.year, dts.month, dts.day,
-                        dts.hour, dts.min, dts.sec,
-                        dts.us, ts.tzinfo)
+
+        return datetime(self.year, self.month, self.day,
+                        self.hour, self.minute, self.second,
+                        self.microsecond, self.tzinfo)
 
     cpdef to_datetime64(self):
         """ Returns a numpy.datetime64 object with 'ns' precision """

From 2352fd6f88a0cc96488849d288b93ea8f46d1f7b Mon Sep 17 00:00:00 2001
From: Guilherme Beltramini <guilherme.beltramini@nubank.com.br>
Date: Fri, 22 Sep 2017 09:30:26 -0400
Subject: [PATCH 119/188] ERR: Raise ImportError when xlrd is not present

Related issues: #8515, #14673

Author: Guilherme Beltramini <guilherme.beltramini@nubank.com.br>

Closes #17613 from gcbeltramini/xlrd-import and squashes the following commits:

dee1998 [Guilherme Beltramini] Add PR number and blank line
c2759cb [Guilherme Beltramini] Throw ImportError
---
 doc/source/whatsnew/v0.21.0.txt |  3 ++-
 pandas/io/excel.py              | 15 ++++++++++-----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index bf3a4f28b0a4c2..885babfdd1d19c 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -116,6 +116,7 @@ Other Enhancements
 - :func:`Styler.where` has been implemented. It is as a convenience for :func:`Styler.applymap` and enables simple DataFrame styling on the Jupyter notebook (:issue:`17474`).
 - :func:`MultiIndex.is_monotonic_decreasing` has been implemented.  Previously returned ``False`` in all cases. (:issue:`16554`)
 - :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`)
+- :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`)
 
 
 .. _whatsnew_0210.api_breaking:
@@ -523,7 +524,7 @@ I/O
 - Bug in :func:`read_stata` where the index was not set (:issue:`16342`)
 - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`)
 - Bug in :func:`read_csv` where automatic delimiter detection caused a ``TypeError`` to be thrown when a bad line was encountered rather than the correct error message (:issue:`13374`)
-- Bug in ``DataFrame.to_html()`` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`) 
+- Bug in ``DataFrame.to_html()`` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index 5db4603c37be0a..faafdba435ff21 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -239,12 +239,17 @@ class ExcelFile(object):
 
     def __init__(self, io, **kwds):
 
-        import xlrd  # throw an ImportError if we need to
+        err_msg = "Install xlrd >= 0.9.0 for Excel support"
 
-        ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
-        if ver < (0, 9):  # pragma: no cover
-            raise ImportError("pandas requires xlrd >= 0.9.0 for excel "
-                              "support, current version " + xlrd.__VERSION__)
+        try:
+            import xlrd
+        except ImportError:
+            raise ImportError(err_msg)
+        else:
+            ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2]))
+            if ver < (0, 9):  # pragma: no cover
+                raise ImportError(err_msg +
+                                  ". Current version " + xlrd.__VERSION__)
 
         # could be a str, ExcelFile, Book, etc.
         self.io = io

From a5c9abf9cc88f2245637156a1b8fcc238d5f2100 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 22 Sep 2017 09:39:40 -0400
Subject: [PATCH 120/188] DOC: whatsnew fixes (#17626)

closes #17601
---
 doc/source/whatsnew/v0.21.0.txt | 111 ++++++++++++++++----------------
 1 file changed, 57 insertions(+), 54 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 885babfdd1d19c..a80fa744780a2a 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -135,7 +135,7 @@ We have updated our minimum supported versions of dependencies (:issue:`15206`,
 
    +--------------+-----------------+----------+
    | Package      | Minimum Version | Required |
-   +======================+=========+==========+
+   +==============+=================+==========+
    | Numpy        | 1.9.0           |    X     |
    +--------------+-----------------+----------+
    | Matplotlib   | 1.4.3           |          |
@@ -241,54 +241,53 @@ New Behaviour:
 Dtype Conversions
 ^^^^^^^^^^^^^^^^^
 
-- Previously assignments, ``.where()`` and ``.fillna()`` with a ``bool`` assignment, would coerce to
-  same the type (e.g. int / float), or raise for datetimelikes. These will now preseve the bools with ``object`` dtypes. (:issue:`16821`).
+Previously assignments, ``.where()`` and ``.fillna()`` with a ``bool`` assignment, would coerce to same the type (e.g. int / float), or raise for datetimelikes. These will now preseve the bools with ``object`` dtypes. (:issue:`16821`).
 
-  .. ipython:: python
+.. ipython:: python
 
-     s = Series([1, 2, 3])
+   s = Series([1, 2, 3])
 
-  .. code-block:: python
+.. code-block:: python
 
-     In [5]: s[1] = True
+   In [5]: s[1] = True
 
-     In [6]: s
-     Out[6]:
-     0    1
-     1    1
-     2    3
-     dtype: int64
+   In [6]: s
+   Out[6]:
+   0    1
+   1    1
+   2    3
+   dtype: int64
 
-  New Behavior
+New Behavior
 
-  .. ipython:: python
+.. ipython:: python
 
-     s[1] = True
-     s
+   s[1] = True
+   s
 
-- Previously, as assignment to a datetimelike with a non-datetimelike would coerce the
-  non-datetime-like item being assigned (:issue:`14145`).
+Previously, as assignment to a datetimelike with a non-datetimelike would coerce the
+non-datetime-like item being assigned (:issue:`14145`).
 
-  .. ipython:: python
+.. ipython:: python
 
-     s = pd.Series([pd.Timestamp('2011-01-01'), pd.Timestamp('2012-01-01')])
+   s = pd.Series([pd.Timestamp('2011-01-01'), pd.Timestamp('2012-01-01')])
 
-  .. code-block:: python
+.. code-block:: python
 
-     In [1]: s[1] = 1
+   In [1]: s[1] = 1
 
-     In [2]: s
-     Out[2]:
-     0   2011-01-01 00:00:00.000000000
-     1   1970-01-01 00:00:00.000000001
-     dtype: datetime64[ns]
+   In [2]: s
+   Out[2]:
+   0   2011-01-01 00:00:00.000000000
+   1   1970-01-01 00:00:00.000000001
+   dtype: datetime64[ns]
 
-  These now coerce to ``object`` dtype.
+These now coerce to ``object`` dtype.
 
-  .. ipython:: python
+.. ipython:: python
 
-     s[1] = 1
-     s
+   s[1] = 1
+   s
 
 - Inconsistent behavior in ``.where()`` with datetimelikes which would raise rather than coerce to ``object`` (:issue:`16402`)
 - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
@@ -338,26 +337,26 @@ UTC Localization with Series
 
 Previously, :func:`to_datetime` did not localize datetime ``Series`` data when ``utc=True`` was passed. Now, :func:`to_datetime` will correctly localize ``Series`` with a ``datetime64[ns, UTC]`` dtype to be consistent with how list-like and ``Index`` data are handled. (:issue:`6415`).
 
-  Previous Behavior
+Previous Behavior
 
-  .. ipython:: python
+.. ipython:: python
 
-     s = Series(['20130101 00:00:00'] * 3)
+   s = Series(['20130101 00:00:00'] * 3)
 
-  .. code-block:: ipython
+.. code-block:: ipython
 
-     In [12]: pd.to_datetime(s, utc=True)
-     Out[12]:
-     0   2013-01-01
-     1   2013-01-01
-     2   2013-01-01
-     dtype: datetime64[ns]
+   In [12]: pd.to_datetime(s, utc=True)
+   Out[12]:
+   0   2013-01-01
+   1   2013-01-01
+   2   2013-01-01
+   dtype: datetime64[ns]
 
-  New Behavior
+New Behavior
 
-  .. ipython:: python
+.. ipython:: python
 
-     pd.to_datetime(s, utc=True)
+   pd.to_datetime(s, utc=True)
 
 Additionally, DataFrames with datetime columns that were parsed by :func:`read_sql_table` and :func:`read_sql_query` will also be localized to UTC only if the original SQL columns were timezone aware datetime columns.
 
@@ -410,9 +409,9 @@ Previous Behavior:
 
 New Behavior:
 
-  .. ipython:: python
+.. ipython:: python
 
-     pd.interval_range(start=0, end=4)
+   pd.interval_range(start=0, end=4)
 
 .. _whatsnew_0210.api:
 
@@ -476,6 +475,14 @@ Performance Improvements
 - Improved performance of the :class:`CategoricalIndex` for data that is already categorical dtype (:issue:`17513`)
 - Improved performance of :meth:`RangeIndex.min` and :meth:`RangeIndex.max` by using ``RangeIndex`` properties to perform the computations (:issue:`17607`)
 
+.. _whatsnew_0210.docs:
+
+Documentation Changes
+~~~~~~~~~~~~~~~~~~~~~
+
+- Several ``NaT`` method docstrings (e.g. :func:`NaT.ctime`) were incorrect (:issue:`17327`)
+- The documentation has had references to versions < v0.17 removed and cleaned up (:issue:`17442`, :issue:`17442`, :issue:`17404` & :issue:`17504`)
+
 .. _whatsnew_0210.bug_fixes:
 
 Bug Fixes
@@ -530,7 +537,7 @@ Plotting
 ^^^^^^^^
 - Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`)
 - Bug when plotting ``timedelta`` and ``datetime`` dtypes on y-axis (:issue:`16953`)
-- Line plots no longer assume monotonic x data when calculating xlims, they show the entire lines now even for unsorted x data. (:issue:`11310`)(:issue:`11471`)
+- Line plots no longer assume monotonic x data when calculating xlims, they show the entire lines now even for unsorted x data. (:issue:`11310`, :issue:`11471`)
 - With matplotlib 2.0.0 and above, calculation of x limits for line plots is left to matplotlib, so that its new default settings are applied. (:issue:`15495`)
 - Bug in ``Series.plot.bar`` or ``DataFramee.plot.bar`` with ``y`` not respecting user-passed ``color`` (:issue:`16822`)
 
@@ -575,10 +582,8 @@ Numeric
 Categorical
 ^^^^^^^^^^^
 - Bug in :func:`Series.isin` when called with a categorical (:issue`16639`)
-- Bug in the categorical constructor with empty values and categories causing
-  the ``.categories`` to be an empty ``Float64Index`` rather than an empty
-  ``Index`` with object dtype (:issue:`17248`)
-- Bug in categorical operations with :ref:`Series.cat <categorical.cat>' not preserving the original Series' name (:issue:`17509`)
+- Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`)
+- Bug in categorical operations with :ref:`Series.cat <categorical.cat>' not preserving the original Series' name (:issue:`17509`)
 
 PyPy
 ^^^^
@@ -593,5 +598,3 @@ PyPy
 Other
 ^^^^^
 - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
-- Several ``NaT`` method docstrings (e.g. :func:`NaT.ctime`) were incorrect (:issue:`17327`)
-- The documentation has had references to versions < v0.17 removed and cleaned up (:issue:`17442`, :issue:`17442`, :issue:`17404` & :issue:`17504`)

From d1fe892a754bf48839d9ac4029e258883ee64a2e Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 22 Sep 2017 09:41:31 -0400
Subject: [PATCH 121/188] Revert "BLD: pin numpy to particular variant that is
 built for all our deps (#17619)" (#17625)

This reverts commit 6930f27e78b2b61a4df31b667a816fa53e49ffed.

closes #17620
---
 ci/requirements-3.6.build | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/ci/requirements-3.6.build b/ci/requirements-3.6.build
index 31ffd5acc7fcc7..1c4b46aea3865d 100644
--- a/ci/requirements-3.6.build
+++ b/ci/requirements-3.6.build
@@ -2,7 +2,5 @@ python=3.6*
 python-dateutil
 pytz
 nomkl
+numpy
 cython
-
-# pin numpy that is built for all our deps
-numpy=1.13.1=py36_blas_openblas_201

From e6d8953f8cd5ad9f22894a8948e9b6340ad819f4 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 22 Sep 2017 12:50:53 -0700
Subject: [PATCH 122/188] Fix make_signature TypeError in py3 (#17609)

---
 pandas/tests/util/test_util.py | 16 +++++++++++++++-
 pandas/util/_decorators.py     |  2 +-
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/util/test_util.py b/pandas/tests/util/test_util.py
index abd82cfa89f942..ffc9703abff41d 100644
--- a/pandas/tests/util/test_util.py
+++ b/pandas/tests/util/test_util.py
@@ -9,7 +9,7 @@
 import pytest
 from pandas.compat import intern
 from pandas.util._move import move_into_mutable_buffer, BadMove, stolenbuf
-from pandas.util._decorators import deprecate_kwarg
+from pandas.util._decorators import deprecate_kwarg, make_signature
 from pandas.util._validators import (validate_args, validate_kwargs,
                                      validate_args_and_kwargs,
                                      validate_bool_kwarg)
@@ -467,3 +467,17 @@ def test_set_locale(self):
 
         current_locale = locale.getlocale()
         assert current_locale == self.current_locale
+
+
+def test_make_signature():
+    # See GH 17608
+    # Case where the func does not have default kwargs
+    sig = make_signature(validate_kwargs)
+    assert sig == (['fname', 'kwargs', 'compat_args'],
+                   ['fname', 'kwargs', 'compat_args'])
+
+    # Case where the func does have default kwargs
+    sig = make_signature(deprecate_kwarg)
+    assert sig == (['old_arg_name', 'new_arg_name',
+                    'mapping=None', 'stacklevel=2'],
+                   ['old_arg_name', 'new_arg_name', 'mapping', 'stacklevel'])
diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py
index 31e27817913c5a..3733e4311aa732 100644
--- a/pandas/util/_decorators.py
+++ b/pandas/util/_decorators.py
@@ -242,7 +242,7 @@ def make_signature(func):
         defaults = ('',) * n_wo_defaults
     else:
         n_wo_defaults = len(spec.args) - len(spec.defaults)
-        defaults = ('',) * n_wo_defaults + spec.defaults
+        defaults = ('',) * n_wo_defaults + tuple(spec.defaults)
     args = []
     for i, (var, default) in enumerate(zip(spec.args, defaults)):
         args.append(var if default == '' else var + '=' + repr(default))

From f797c1dc8d838eb9df5ede3be681949dab852148 Mon Sep 17 00:00:00 2001
From: Licht Takeuchi <licht-t@outlook.jp>
Date: Sat, 23 Sep 2017 06:39:12 +0900
Subject: [PATCH 123/188] BUG: Fix groupby nunique with NaT (#17624)

---
 doc/source/whatsnew/v0.21.0.txt          |  1 +
 pandas/core/groupby.py                   |  8 +++++++-
 pandas/tests/groupby/test_timegrouper.py | 13 +++++++++++++
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index a80fa744780a2a..5003aa0d97c1c6 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -552,6 +552,7 @@ Groupby/Resample/Rolling
 - Bug in ``Series.resample(...).apply()`` where an empty ``Series`` modified the source index and did not return the name of a ``Series`` (:issue:`14313`)
 - Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`)
 - Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`)
+- Bug in ``groupby.nunique()`` with ``TimeGrouper`` which cannot handle ``NaT`` correctly (:issue:`17575`)
 
 Sparse
 ^^^^^^
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index f14ed08a27fae8..a62ae40a85941f 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -3177,7 +3177,13 @@ def nunique(self, dropna=True):
 
         out = np.add.reduceat(inc, idx).astype('int64', copy=False)
         if len(ids):
-            res = out if ids[0] != -1 else out[1:]
+            # NaN/NaT group exists if the head of ids is -1,
+            # so remove it from res and exclude its index from idx
+            if ids[0] == -1:
+                res = out[1:]
+                idx = idx[np.flatnonzero(idx)]
+            else:
+                res = out
         else:
             res = out[1:]
         ri = self.grouper.result_index
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index df0a93d7833759..f83a3fcd0668d9 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -608,3 +608,16 @@ def test_first_last_max_min_on_time_data(self):
         assert_frame_equal(grouped_ref.min(), grouped_test.min())
         assert_frame_equal(grouped_ref.first(), grouped_test.first())
         assert_frame_equal(grouped_ref.last(), grouped_test.last())
+
+    def test_nunique_with_timegrouper_and_nat(self):
+        # GH 17575
+        test = pd.DataFrame({
+            'time': [Timestamp('2016-06-28 09:35:35'),
+                     pd.NaT,
+                     Timestamp('2016-06-28 16:46:28')],
+            'data': ['1', '2', '3']})
+
+        grouper = pd.TimeGrouper(key='time', freq='h')
+        result = test.groupby(grouper)['data'].nunique()
+        expected = test[test.time.notnull()].groupby(grouper)['data'].nunique()
+        tm.assert_series_equal(result, expected)

From da93f51c0802db6f32e63218e96e3aa7206db6c6 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Fri, 22 Sep 2017 22:22:35 -0400
Subject: [PATCH 124/188] TST: remove some warnings (#17638)

---
 pandas/core/reshape/reshape.py        | 2 +-
 pandas/plotting/_core.py              | 2 +-
 pandas/tests/frame/test_operators.py  | 6 ++++++
 pandas/tests/indexes/test_interval.py | 2 +-
 pandas/tests/io/test_stata.py         | 8 +++++---
 5 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index 7260bc9a8b7a14..bff09be6149f32 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -851,7 +851,7 @@ def lreshape(data, groups, dropna=True, label=None):
     return DataFrame(mdata, columns=id_cols + pivot_cols)
 
 
-def wide_to_long(df, stubnames, i, j, sep="", suffix='\d+'):
+def wide_to_long(df, stubnames, i, j, sep="", suffix=r'\d+'):
     r"""
     Wide panel to long format. Less flexible but more user-friendly than melt.
 
diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
index 7a40018494fc4f..aa919d600ec526 100644
--- a/pandas/plotting/_core.py
+++ b/pandas/plotting/_core.py
@@ -2726,7 +2726,7 @@ def barh(self, x=None, y=None, **kwds):
         return self(kind='barh', x=x, y=y, **kwds)
 
     def box(self, by=None, **kwds):
-        """
+        r"""
         Boxplot
 
         .. versionadded:: 0.17.0
diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py
index 5052bef24e95a6..309c0f0244d7c8 100644
--- a/pandas/tests/frame/test_operators.py
+++ b/pandas/tests/frame/test_operators.py
@@ -1035,6 +1035,12 @@ def test_boolean_comparison(self):
         result = df == tup
         assert_frame_equal(result, expected)
 
+    def test_boolean_comparison_error(self):
+
+        # GH 4576
+        # boolean comparisons with a tuple/list give unexpected results
+        df = DataFrame(np.arange(6).reshape((3, 2)))
+
         # not shape compatible
         pytest.raises(ValueError, lambda: df == (2, 2))
         pytest.raises(ValueError, lambda: df == [2, 2])
diff --git a/pandas/tests/indexes/test_interval.py b/pandas/tests/indexes/test_interval.py
index dc59495f619b03..b55bab3a210cc4 100644
--- a/pandas/tests/indexes/test_interval.py
+++ b/pandas/tests/indexes/test_interval.py
@@ -1068,7 +1068,7 @@ def test_errors(self):
             interval_range(start='foo', periods=10)
 
         # invalid end
-        msg = 'end must be numeric or datetime-like, got \(0, 1\]'
+        msg = r'end must be numeric or datetime-like, got \(0, 1\]'
         with tm.assert_raises_regex(ValueError, msg):
             interval_range(end=Interval(0, 1), periods=10)
 
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index 94a0ac31e093e4..d6bdb764f1c8e9 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -1053,7 +1053,8 @@ def test_iterator(self):
             tm.assert_frame_equal(parsed.iloc[0:5, :], chunk)
 
         # GH12153
-        from_chunks = pd.concat(read_stata(fname, chunksize=4))
+        with read_stata(fname, chunksize=4) as itr:
+            from_chunks = pd.concat(itr)
         tm.assert_frame_equal(parsed, from_chunks)
 
     def test_read_chunks_115(self):
@@ -1306,8 +1307,9 @@ def test_value_labels_iterator(self, write_index):
         df['A'] = df['A'].astype('category')
         with tm.ensure_clean() as path:
             df.to_stata(path, write_index=write_index)
-            dta_iter = pd.read_stata(path, iterator=True)
-            value_labels = dta_iter.value_labels()
+
+            with pd.read_stata(path, iterator=True) as dta_iter:
+                value_labels = dta_iter.value_labels()
         assert value_labels == {'A': {0: 'A', 1: 'B', 2: 'C', 3: 'E'}}
 
     def test_set_index(self):

From 4004367fb815645bb7f5bbb518eee62cbd476e3a Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 22 Sep 2017 19:52:56 -0700
Subject: [PATCH 125/188] BLD: fix inline warnings (#17528)

---
 pandas/_libs/parsers.pyx       |   2 +-
 pandas/_libs/src/inference.pyx |   2 +-
 pandas/_libs/src/khash.pxd     | 112 ++++++++++++++++-----------------
 pandas/_libs/src/skiplist.pxd  |  10 +--
 pandas/_libs/src/util.pxd      |  36 +++++------
 5 files changed, 81 insertions(+), 81 deletions(-)

diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index 3e8b5c4bd3febd..5bf9f4ce83cbfa 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -255,7 +255,7 @@ cdef extern from "parser/tokenizer.h":
 
 #    inline int to_complex(char *item, double *p_real,
 #                          double *p_imag, char sci, char decimal)
-    inline int to_longlong(char *item, long long *p_value) nogil
+    int to_longlong(char *item, long long *p_value) nogil
 #    inline int to_longlong_thousands(char *item, long long *p_value,
 #                                     char tsep)
     int to_boolean(const char *item, uint8_t *val) nogil
diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx
index 2bb362eab40975..a2764e87eec556 100644
--- a/pandas/_libs/src/inference.pyx
+++ b/pandas/_libs/src/inference.pyx
@@ -1015,7 +1015,7 @@ cpdef bint is_interval_array(ndarray[object] values):
 
 
 cdef extern from "parse_helper.h":
-    inline int floatify(object, double *result, int *maybe_int) except -1
+    int floatify(object, double *result, int *maybe_int) except -1
 
 # constants that will be compared to potentially arbitrarily large
 # python int
diff --git a/pandas/_libs/src/khash.pxd b/pandas/_libs/src/khash.pxd
index adb0fe285dbb8d..ba9a3c70097b23 100644
--- a/pandas/_libs/src/khash.pxd
+++ b/pandas/_libs/src/khash.pxd
@@ -11,13 +11,13 @@ cdef extern from "khash_python.h":
         PyObject **keys
         size_t *vals
 
-    inline kh_pymap_t* kh_init_pymap()
-    inline void kh_destroy_pymap(kh_pymap_t*)
-    inline void kh_clear_pymap(kh_pymap_t*)
-    inline khint_t kh_get_pymap(kh_pymap_t*, PyObject*)
-    inline void kh_resize_pymap(kh_pymap_t*, khint_t)
-    inline khint_t kh_put_pymap(kh_pymap_t*, PyObject*, int*)
-    inline void kh_del_pymap(kh_pymap_t*, khint_t)
+    kh_pymap_t* kh_init_pymap()
+    void kh_destroy_pymap(kh_pymap_t*)
+    void kh_clear_pymap(kh_pymap_t*)
+    khint_t kh_get_pymap(kh_pymap_t*, PyObject*)
+    void kh_resize_pymap(kh_pymap_t*, khint_t)
+    khint_t kh_put_pymap(kh_pymap_t*, PyObject*, int*)
+    void kh_del_pymap(kh_pymap_t*, khint_t)
 
     bint kh_exist_pymap(kh_pymap_t*, khiter_t)
 
@@ -27,13 +27,13 @@ cdef extern from "khash_python.h":
         PyObject **keys
         size_t *vals
 
-    inline kh_pyset_t* kh_init_pyset()
-    inline void kh_destroy_pyset(kh_pyset_t*)
-    inline void kh_clear_pyset(kh_pyset_t*)
-    inline khint_t kh_get_pyset(kh_pyset_t*, PyObject*)
-    inline void kh_resize_pyset(kh_pyset_t*, khint_t)
-    inline khint_t kh_put_pyset(kh_pyset_t*, PyObject*, int*)
-    inline void kh_del_pyset(kh_pyset_t*, khint_t)
+    kh_pyset_t* kh_init_pyset()
+    void kh_destroy_pyset(kh_pyset_t*)
+    void kh_clear_pyset(kh_pyset_t*)
+    khint_t kh_get_pyset(kh_pyset_t*, PyObject*)
+    void kh_resize_pyset(kh_pyset_t*, khint_t)
+    khint_t kh_put_pyset(kh_pyset_t*, PyObject*, int*)
+    void kh_del_pyset(kh_pyset_t*, khint_t)
 
     bint kh_exist_pyset(kh_pyset_t*, khiter_t)
 
@@ -45,13 +45,13 @@ cdef extern from "khash_python.h":
         kh_cstr_t *keys
         size_t *vals
 
-    inline kh_str_t* kh_init_str() nogil
-    inline void kh_destroy_str(kh_str_t*) nogil
-    inline void kh_clear_str(kh_str_t*) nogil
-    inline khint_t kh_get_str(kh_str_t*, kh_cstr_t) nogil
-    inline void kh_resize_str(kh_str_t*, khint_t) nogil
-    inline khint_t kh_put_str(kh_str_t*, kh_cstr_t, int*) nogil
-    inline void kh_del_str(kh_str_t*, khint_t) nogil
+    kh_str_t* kh_init_str() nogil
+    void kh_destroy_str(kh_str_t*) nogil
+    void kh_clear_str(kh_str_t*) nogil
+    khint_t kh_get_str(kh_str_t*, kh_cstr_t) nogil
+    void kh_resize_str(kh_str_t*, khint_t) nogil
+    khint_t kh_put_str(kh_str_t*, kh_cstr_t, int*) nogil
+    void kh_del_str(kh_str_t*, khint_t) nogil
 
     bint kh_exist_str(kh_str_t*, khiter_t) nogil
 
@@ -61,13 +61,13 @@ cdef extern from "khash_python.h":
         int64_t *keys
         size_t *vals
 
-    inline kh_int64_t* kh_init_int64() nogil
-    inline void kh_destroy_int64(kh_int64_t*) nogil
-    inline void kh_clear_int64(kh_int64_t*) nogil
-    inline khint_t kh_get_int64(kh_int64_t*, int64_t) nogil
-    inline void kh_resize_int64(kh_int64_t*, khint_t) nogil
-    inline khint_t kh_put_int64(kh_int64_t*, int64_t, int*) nogil
-    inline void kh_del_int64(kh_int64_t*, khint_t) nogil
+    kh_int64_t* kh_init_int64() nogil
+    void kh_destroy_int64(kh_int64_t*) nogil
+    void kh_clear_int64(kh_int64_t*) nogil
+    khint_t kh_get_int64(kh_int64_t*, int64_t) nogil
+    void kh_resize_int64(kh_int64_t*, khint_t) nogil
+    khint_t kh_put_int64(kh_int64_t*, int64_t, int*) nogil
+    void kh_del_int64(kh_int64_t*, khint_t) nogil
 
     bint kh_exist_int64(kh_int64_t*, khiter_t) nogil
 
@@ -79,13 +79,13 @@ cdef extern from "khash_python.h":
         khuint64_t *keys
         size_t *vals
 
-    inline kh_uint64_t* kh_init_uint64() nogil
-    inline void kh_destroy_uint64(kh_uint64_t*) nogil
-    inline void kh_clear_uint64(kh_uint64_t*) nogil
-    inline khint_t kh_get_uint64(kh_uint64_t*, int64_t) nogil
-    inline void kh_resize_uint64(kh_uint64_t*, khint_t) nogil
-    inline khint_t kh_put_uint64(kh_uint64_t*, int64_t, int*) nogil
-    inline void kh_del_uint64(kh_uint64_t*, khint_t) nogil
+    kh_uint64_t* kh_init_uint64() nogil
+    void kh_destroy_uint64(kh_uint64_t*) nogil
+    void kh_clear_uint64(kh_uint64_t*) nogil
+    khint_t kh_get_uint64(kh_uint64_t*, int64_t) nogil
+    void kh_resize_uint64(kh_uint64_t*, khint_t) nogil
+    khint_t kh_put_uint64(kh_uint64_t*, int64_t, int*) nogil
+    void kh_del_uint64(kh_uint64_t*, khint_t) nogil
 
     bint kh_exist_uint64(kh_uint64_t*, khiter_t) nogil
 
@@ -95,13 +95,13 @@ cdef extern from "khash_python.h":
         float64_t *keys
         size_t *vals
 
-    inline kh_float64_t* kh_init_float64() nogil
-    inline void kh_destroy_float64(kh_float64_t*) nogil
-    inline void kh_clear_float64(kh_float64_t*) nogil
-    inline khint_t kh_get_float64(kh_float64_t*, float64_t) nogil
-    inline void kh_resize_float64(kh_float64_t*, khint_t) nogil
-    inline khint_t kh_put_float64(kh_float64_t*, float64_t, int*) nogil
-    inline void kh_del_float64(kh_float64_t*, khint_t) nogil
+    kh_float64_t* kh_init_float64() nogil
+    void kh_destroy_float64(kh_float64_t*) nogil
+    void kh_clear_float64(kh_float64_t*) nogil
+    khint_t kh_get_float64(kh_float64_t*, float64_t) nogil
+    void kh_resize_float64(kh_float64_t*, khint_t) nogil
+    khint_t kh_put_float64(kh_float64_t*, float64_t, int*) nogil
+    void kh_del_float64(kh_float64_t*, khint_t) nogil
 
     bint kh_exist_float64(kh_float64_t*, khiter_t) nogil
 
@@ -111,13 +111,13 @@ cdef extern from "khash_python.h":
         int32_t *keys
         size_t *vals
 
-    inline kh_int32_t* kh_init_int32() nogil
-    inline void kh_destroy_int32(kh_int32_t*) nogil
-    inline void kh_clear_int32(kh_int32_t*) nogil
-    inline khint_t kh_get_int32(kh_int32_t*, int32_t) nogil
-    inline void kh_resize_int32(kh_int32_t*, khint_t) nogil
-    inline khint_t kh_put_int32(kh_int32_t*, int32_t, int*) nogil
-    inline void kh_del_int32(kh_int32_t*, khint_t) nogil
+    kh_int32_t* kh_init_int32() nogil
+    void kh_destroy_int32(kh_int32_t*) nogil
+    void kh_clear_int32(kh_int32_t*) nogil
+    khint_t kh_get_int32(kh_int32_t*, int32_t) nogil
+    void kh_resize_int32(kh_int32_t*, khint_t) nogil
+    khint_t kh_put_int32(kh_int32_t*, int32_t, int*) nogil
+    void kh_del_int32(kh_int32_t*, khint_t) nogil
 
     bint kh_exist_int32(kh_int32_t*, khiter_t) nogil
 
@@ -129,12 +129,12 @@ cdef extern from "khash_python.h":
         kh_cstr_t *keys
         PyObject **vals
 
-    inline kh_strbox_t* kh_init_strbox() nogil
-    inline void kh_destroy_strbox(kh_strbox_t*) nogil
-    inline void kh_clear_strbox(kh_strbox_t*) nogil
-    inline khint_t kh_get_strbox(kh_strbox_t*, kh_cstr_t) nogil
-    inline void kh_resize_strbox(kh_strbox_t*, khint_t) nogil
-    inline khint_t kh_put_strbox(kh_strbox_t*, kh_cstr_t, int*) nogil
-    inline void kh_del_strbox(kh_strbox_t*, khint_t) nogil
+    kh_strbox_t* kh_init_strbox() nogil
+    void kh_destroy_strbox(kh_strbox_t*) nogil
+    void kh_clear_strbox(kh_strbox_t*) nogil
+    khint_t kh_get_strbox(kh_strbox_t*, kh_cstr_t) nogil
+    void kh_resize_strbox(kh_strbox_t*, khint_t) nogil
+    khint_t kh_put_strbox(kh_strbox_t*, kh_cstr_t, int*) nogil
+    void kh_del_strbox(kh_strbox_t*, khint_t) nogil
 
     bint kh_exist_strbox(kh_strbox_t*, khiter_t) nogil
diff --git a/pandas/_libs/src/skiplist.pxd b/pandas/_libs/src/skiplist.pxd
index 69e9df5b542aa6..214aa1c7aeaf00 100644
--- a/pandas/_libs/src/skiplist.pxd
+++ b/pandas/_libs/src/skiplist.pxd
@@ -14,9 +14,9 @@ cdef extern from "skiplist.h":
         int size
         int maxlevels
 
-    inline skiplist_t* skiplist_init(int) nogil
-    inline void skiplist_destroy(skiplist_t*) nogil
-    inline double skiplist_get(skiplist_t*, int, int*) nogil
-    inline int skiplist_insert(skiplist_t*, double) nogil
-    inline int skiplist_remove(skiplist_t*, double) nogil
+    skiplist_t* skiplist_init(int) nogil
+    void skiplist_destroy(skiplist_t*) nogil
+    double skiplist_get(skiplist_t*, int, int*) nogil
+    int skiplist_insert(skiplist_t*, double) nogil
+    int skiplist_remove(skiplist_t*, double) nogil
 
diff --git a/pandas/_libs/src/util.pxd b/pandas/_libs/src/util.pxd
index 076bc1cd56003a..f7a68c4ade71b5 100644
--- a/pandas/_libs/src/util.pxd
+++ b/pandas/_libs/src/util.pxd
@@ -3,26 +3,26 @@ cimport numpy as cnp
 cimport cpython
 
 cdef extern from "numpy_helper.h":
-    inline void set_array_owndata(ndarray ao)
-    inline void set_array_not_contiguous(ndarray ao)
-
-    inline int is_integer_object(object)
-    inline int is_float_object(object)
-    inline int is_complex_object(object)
-    inline int is_bool_object(object)
-    inline int is_string_object(object)
-    inline int is_datetime64_object(object)
-    inline int is_timedelta64_object(object)
-    inline int assign_value_1d(ndarray, Py_ssize_t, object) except -1
-    inline cnp.int64_t get_nat()
-    inline object get_value_1d(ndarray, Py_ssize_t)
-    inline int floatify(object, double*) except -1
-    inline char *get_c_string(object) except NULL
-    inline object char_to_string(char*)
-    inline void transfer_object_column(char *dst, char *src, size_t stride,
+    void set_array_owndata(ndarray ao)
+    void set_array_not_contiguous(ndarray ao)
+
+    int is_integer_object(object)
+    int is_float_object(object)
+    int is_complex_object(object)
+    int is_bool_object(object)
+    int is_string_object(object)
+    int is_datetime64_object(object)
+    int is_timedelta64_object(object)
+    int assign_value_1d(ndarray, Py_ssize_t, object) except -1
+    cnp.int64_t get_nat()
+    object get_value_1d(ndarray, Py_ssize_t)
+    int floatify(object, double*) except -1
+    char *get_c_string(object) except NULL
+    object char_to_string(char*)
+    void transfer_object_column(char *dst, char *src, size_t stride,
                                        size_t length)
     object sarr_from_data(cnp.dtype, int length, void* data)
-    inline object unbox_if_zerodim(object arr)
+    object unbox_if_zerodim(object arr)
 
 ctypedef fused numeric:
     cnp.int8_t

From 76d17449f868e25b68bd636906b8f70c683761af Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 23 Sep 2017 10:11:01 -0400
Subject: [PATCH 126/188] BUG: overflow on Timedelta construction & arithmetic
 now raises (#17640)

closes #17637
---
 doc/source/whatsnew/v0.21.0.txt              |  1 +
 pandas/_libs/tslib.pyx                       |  6 +++---
 pandas/tests/indexes/datetimes/test_tools.py |  7 +++++++
 pandas/tests/scalar/test_timedelta.py        | 15 +++++++++++++++
 4 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 5003aa0d97c1c6..43e90f06ed5045 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -498,6 +498,7 @@ Conversion
 - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`)
 - Bug in :attr:`Timestamp.weekday_name` returning a UTC-based weekday name when localized to a timezone (:issue:`17354`)
 - Bug in ``Timestamp.replace`` when replacing ``tzinfo`` around DST changes (:issue:`15683`)
+- Bug in ``Timedelta`` construction and arithmetic that would not propagate the ``Overflow`` exception (:issue:`17367`)
 
 Indexing
 ^^^^^^^^
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 6ba37062ac8691..077603af96947c 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -3514,7 +3514,7 @@ cpdef convert_to_timedelta64(object ts, object unit):
         ts = np.timedelta64(_delta_to_nanoseconds(ts), 'ns')
 
     if isinstance(ts, timedelta):
-        ts = np.timedelta64(ts)
+        ts = np.timedelta64(_delta_to_nanoseconds(ts), 'ns')
     elif not isinstance(ts, np.timedelta64):
         raise ValueError("Invalid type for timedelta "
                          "scalar: %s" % type(ts))
@@ -3891,8 +3891,7 @@ for _maybe_method_name in dir(NaTType):
 #----------------------------------------------------------------------
 # Conversion routines
 
-
-cpdef int64_t _delta_to_nanoseconds(delta):
+cpdef int64_t _delta_to_nanoseconds(delta) except? -1:
     if isinstance(delta, np.ndarray):
         return delta.astype('m8[ns]').astype('int64')
     if hasattr(delta, 'nanos'):
@@ -3903,6 +3902,7 @@ cpdef int64_t _delta_to_nanoseconds(delta):
         return delta.astype("timedelta64[ns]").item()
     if is_integer_object(delta):
         return delta
+
     return (delta.days * 24 * 60 * 60 * 1000000
             + delta.seconds * 1000000
             + delta.microseconds) * 1000
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index be27334384f6b7..e0ccedb834adf9 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -787,6 +787,13 @@ def test_to_datetime_freq(self):
         assert xp.freq == rs.freq
         assert xp.tzinfo == rs.tzinfo
 
+    def test_to_datetime_overflow(self):
+        # gh-17637
+        # we are overflowing Timedelta range here
+
+        with pytest.raises(OverflowError):
+            date_range(start='1/1/1700', freq='B', periods=100000)
+
     def test_string_na_nat_conversion(self):
         # GH #999, #858
 
diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py
index bc9a0388df9d91..2cabbfacf64161 100644
--- a/pandas/tests/scalar/test_timedelta.py
+++ b/pandas/tests/scalar/test_timedelta.py
@@ -166,6 +166,13 @@ def test_overflow_on_construction(self):
         value = pd.Timedelta('1day').value * 20169940
         pytest.raises(OverflowError, pd.Timedelta, value)
 
+        # xref gh-17637
+        with pytest.raises(OverflowError):
+            pd.Timedelta(7 * 19999, unit='D')
+
+        with pytest.raises(OverflowError):
+            pd.Timedelta(timedelta(days=13 * 19999))
+
     def test_total_seconds_scalar(self):
         # see gh-10939
         rng = Timedelta('1 days, 10:11:12.100123456')
@@ -612,6 +619,14 @@ def test_timedelta_arithmetic(self):
             tm.assert_series_equal(result_operator, expected)
             tm.assert_series_equal(result_method, expected)
 
+    def test_arithmetic_overflow(self):
+
+        with pytest.raises(OverflowError):
+            pd.Timestamp('1700-01-01') + pd.Timedelta(13 * 19999, unit='D')
+
+        with pytest.raises(OverflowError):
+            pd.Timestamp('1700-01-01') + timedelta(days=13 * 19999)
+
     def test_apply_to_timedelta(self):
         timedelta_NaT = pd.to_timedelta('NaT')
 

From e2757a2db0faa7878858b36f602235daa936a674 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 23 Sep 2017 16:13:01 +0200
Subject: [PATCH 127/188] DOC: correct example use of nth dropna keyword
 (#17641)

dropna=True is deprecated, see https://github.com/pandas-dev/pandas/pull/17493
---
 doc/source/groupby.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index e9a7d8dd0a46ea..91d806ca5dd4f8 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -1060,7 +1060,7 @@ To select from a DataFrame or Series the nth item, use the nth method. This is a
    g.nth(-1)
    g.nth(1)
 
-If you want to select the nth not-null item, use the ``dropna`` kwarg. For a DataFrame this should be either ``'any'`` or ``'all'`` just like you would pass to dropna, for a Series this just needs to be truthy.
+If you want to select the nth not-null item, use the ``dropna`` kwarg. For a DataFrame this should be either ``'any'`` or ``'all'`` just like you would pass to dropna:
 
 .. ipython:: python
 
@@ -1072,7 +1072,7 @@ If you want to select the nth not-null item, use the ``dropna`` kwarg. For a Dat
    g.nth(-1, dropna='any')  # NaNs denote group exhausted when using dropna
    g.last()
 
-   g.B.nth(0, dropna=True)
+   g.B.nth(0, dropna='all')
 
 As with other methods, passing ``as_index=False``, will achieve a filtration, which returns the grouped row.
 

From 85a10671f814301be3e0f3c24c2863488ec27ddd Mon Sep 17 00:00:00 2001
From: skwbc <shota.kawabuchi+GitHub@gmail.com>
Date: Sun, 24 Sep 2017 01:14:09 +0900
Subject: [PATCH 128/188] BUG: DataFrame.first_valid_index() fails if there is
 no valid entry. (#17488)

Closes #17400
---
 doc/source/whatsnew/v0.21.0.txt       |  1 +
 pandas/core/frame.py                  | 20 ++++++++++++--------
 pandas/core/generic.py                | 16 ++++++++++++++++
 pandas/core/series.py                 | 10 ++++------
 pandas/tests/frame/test_timeseries.py |  5 +++++
 5 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 43e90f06ed5045..32e4294f06d6bb 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -518,6 +518,7 @@ Indexing
 - Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`)
 - Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`)
 - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`)
+- Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`)
 
 I/O
 ^^^
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index dd5d490ea66a8f..346eeb8d2642cd 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -4063,23 +4063,27 @@ def update(self, other, join='left', overwrite=True, filter_func=None,
     # ----------------------------------------------------------------------
     # Misc methods
 
+    def _get_valid_indices(self):
+        is_valid = self.count(1) > 0
+        return self.index[is_valid]
+
+    @Appender(_shared_docs['valid_index'] % {
+        'position': 'first', 'klass': 'DataFrame'})
     def first_valid_index(self):
-        """
-        Return label for first non-NA/null value
-        """
         if len(self) == 0:
             return None
 
-        return self.index[self.count(1) > 0][0]
+        valid_indices = self._get_valid_indices()
+        return valid_indices[0] if len(valid_indices) else None
 
+    @Appender(_shared_docs['valid_index'] % {
+        'position': 'first', 'klass': 'DataFrame'})
     def last_valid_index(self):
-        """
-        Return label for last non-NA/null value
-        """
         if len(self) == 0:
             return None
 
-        return self.index[self.count(1) > 0][-1]
+        valid_indices = self._get_valid_indices()
+        return valid_indices[-1] if len(valid_indices) else None
 
     # ----------------------------------------------------------------------
     # Data reshaping
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index e0a9fdb08dcb2c..241204ef555f6e 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6757,6 +6757,22 @@ def transform(self, func, *args, **kwargs):
 
         cls.transform = transform
 
+    # ----------------------------------------------------------------------
+    # Misc methods
+
+    _shared_docs['valid_index'] = """
+        Return index for %(position)s non-NA/null value.
+
+        Notes
+        --------
+        If all elements are non-NA/null, returns None.
+        Also returns None for empty %(klass)s.
+
+        Returns
+        --------
+        scalar : type of index
+        """
+
 
 def _doc_parms(cls):
     """Return a tuple of the doc parms."""
diff --git a/pandas/core/series.py b/pandas/core/series.py
index ac11c5f908fdcf..02690dec3e1c4c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2825,10 +2825,9 @@ def dropna(self, axis=0, inplace=False, **kwargs):
     valid = lambda self, inplace=False, **kwargs: self.dropna(inplace=inplace,
                                                               **kwargs)
 
+    @Appender(generic._shared_docs['valid_index'] % {
+        'position': 'first', 'klass': 'Series'})
     def first_valid_index(self):
-        """
-        Return label for first non-NA/null value
-        """
         if len(self) == 0:
             return None
 
@@ -2839,10 +2838,9 @@ def first_valid_index(self):
         else:
             return self.index[i]
 
+    @Appender(generic._shared_docs['valid_index'] % {
+        'position': 'last', 'klass': 'Series'})
     def last_valid_index(self):
-        """
-        Return label for last non-NA/null value
-        """
         if len(self) == 0:
             return None
 
diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py
index 19fbf854256c6e..26a2c6f9a50450 100644
--- a/pandas/tests/frame/test_timeseries.py
+++ b/pandas/tests/frame/test_timeseries.py
@@ -440,6 +440,11 @@ def test_first_last_valid(self):
         assert empty.last_valid_index() is None
         assert empty.first_valid_index() is None
 
+        # GH17400: no valid entries
+        frame[:] = nan
+        assert frame.last_valid_index() is None
+        assert frame.first_valid_index() is None
+
     def test_at_time_frame(self):
         rng = date_range('1/1/2000', '1/5/2000', freq='5min')
         ts = DataFrame(np.random.randn(len(rng), 2), index=rng)

From b555613259572640e173f45b170c41265a6a7d79 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 23 Sep 2017 10:00:50 -0700
Subject: [PATCH 129/188] Fix apparent copy/paste error skewness--> excess
 kurtosis (#17647)

---
 pandas/core/nanops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 858aed7fd3e237..388b2ecdff445d 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -565,7 +565,7 @@ def nanskew(values, axis=None, skipna=True):
 
 @disallow('M8', 'm8')
 def nankurt(values, axis=None, skipna=True):
-    """ Compute the sample skewness.
+    """ Compute the sample excess kurtosis.
 
     The statistic computed here is the adjusted Fisher-Pearson standardized
     moment coefficient G2, computed directly from the second and fourth

From 2eb568a9c968a3cffd4e585f644bd53e6e8a600b Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 23 Sep 2017 10:36:28 -0700
Subject: [PATCH 130/188] Bitesize offsets (#17318)

---
 asv_bench/benchmarks/timeseries.py |   2 +-
 pandas/tseries/frequencies.py      |   1 +
 pandas/tseries/offsets.py          | 131 ++++++++++++-----------------
 3 files changed, 58 insertions(+), 76 deletions(-)

diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py
index b7151ad2eaa999..779fc0bd20964a 100644
--- a/asv_bench/benchmarks/timeseries.py
+++ b/asv_bench/benchmarks/timeseries.py
@@ -56,7 +56,7 @@ def setup(self):
         self.no_freq = self.rng7[:50000].append(self.rng7[50002:])
         self.d_freq = self.rng7[:50000].append(self.rng7[50000:])
 
-        self.rng8 = date_range(start='1/1/1700', freq='B', periods=100000)
+        self.rng8 = date_range(start='1/1/1700', freq='B', periods=75000)
         self.b_freq = self.rng8[:50000].append(self.rng8[50000:])
 
     def time_add_timedelta(self):
diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py
index 085a3a784557ba..b055c4b4cb27f0 100644
--- a/pandas/tseries/frequencies.py
+++ b/pandas/tseries/frequencies.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from datetime import timedelta
 from pandas.compat import long, zip
 from pandas import compat
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index 452d30322b4cfa..ea37434e3a8d98 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 from datetime import date, datetime, timedelta
 from pandas.compat import range
 from pandas import compat
@@ -323,37 +324,42 @@ def _params(self):
 
     def __repr__(self):
         className = getattr(self, '_outputName', type(self).__name__)
+
+        if abs(self.n) != 1:
+            plural = 's'
+        else:
+            plural = ''
+
+        n_str = ""
+        if self.n != 1:
+            n_str = "%s * " % self.n
+
+        out = '<%s' % n_str + className + plural + self._repr_attrs() + '>'
+        return out
+
+    # TODO: Combine this with BusinessMixin version by defining a whitelisted
+    # set of attributes on each object rather than the existing behavior of
+    # iterating over internal ``__dict__``
+    def _repr_attrs(self):
         exclude = set(['n', 'inc', 'normalize'])
         attrs = []
         for attr in sorted(self.__dict__):
-            if ((attr == 'kwds' and len(self.kwds) == 0) or
-                    attr.startswith('_')):
+            if attr.startswith('_'):
                 continue
-            elif attr == 'kwds':
+            elif attr == 'kwds':  # TODO: get rid of this
                 kwds_new = {}
                 for key in self.kwds:
                     if not hasattr(self, key):
                         kwds_new[key] = self.kwds[key]
                 if len(kwds_new) > 0:
-                    attrs.append('='.join((attr, repr(kwds_new))))
-            else:
-                if attr not in exclude:
-                    attrs.append('='.join((attr, repr(getattr(self, attr)))))
-
-        plural = ''
-        if abs(self.n) != 1:
-            plural = 's'
-
-        n_str = ''
-        if self.n != 1:
-            n_str = '{n} * '.format(n=self.n)
+                    attrs.append('kwds=%s' % (kwds_new))
+            elif attr not in exclude:
+                value = getattr(self, attr)
+                attrs.append('%s=%s' % (attr, value))
 
-        attrs_str = ''
+        out = ''
         if attrs:
-            attrs_str = ': ' + ', '.join(attrs)
-
-        repr_content = ''.join([n_str, className, plural, attrs_str])
-        out = '<{content}>'.format(content=repr_content)
+            out += ': ' + ', '.join(attrs)
         return out
 
     @property
@@ -507,8 +513,18 @@ def freqstr(self):
         else:
             fstr = code
 
+        try:
+            if self._offset:
+                fstr += self._offset_str()
+        except AttributeError:
+            # TODO: standardize `_offset` vs `offset` naming convention
+            pass
+
         return fstr
 
+    def _offset_str(self):
+        return ''
+
     @property
     def nanos(self):
         raise ValueError("{name} is a non-fixed frequency".format(name=self))
@@ -527,23 +543,11 @@ def _from_name(cls, suffix=None):
 class BusinessMixin(object):
     """ mixin to business types to provide related functions """
 
-    # TODO: Combine this with DateOffset by defining a whitelisted set of
-    # attributes on each object rather than the existing behavior of iterating
-    # over internal ``__dict__``
-    def __repr__(self):
-        className = getattr(self, '_outputName', self.__class__.__name__)
-
-        plural = ''
-        if abs(self.n) != 1:
-            plural = 's'
-
-        n_str = ''
-        if self.n != 1:
-            n_str = '{n} * '.format(n=self.n)
-
-        repr_content = ''.join([n_str, className, plural, self._repr_attrs()])
-        out = '<{content}>'.format(content=repr_content)
-        return out
+    @property
+    def offset(self):
+        """Alias for self._offset"""
+        # Alias for backward compat
+        return self._offset
 
     def _repr_attrs(self):
         if self.offset:
@@ -572,6 +576,11 @@ def __getstate__(self):
 
     def __setstate__(self, state):
         """Reconstruct an instance from a pickled state"""
+        if 'offset' in state:
+            # Older versions have offset attribute instead of _offset
+            if '_offset' in state:  # pragma: no cover
+                raise ValueError('Unexpected key `_offset`')
+            state['_offset'] = state.pop('offset')
         self.__dict__ = state
         if 'weekmask' in state and 'holidays' in state:
             calendar, holidays = _get_calendar(weekmask=self.weekmask,
@@ -593,24 +602,7 @@ def __init__(self, n=1, normalize=False, **kwds):
         self.n = int(n)
         self.normalize = normalize
         self.kwds = kwds
-        self.offset = kwds.get('offset', timedelta(0))
-
-    @property
-    def freqstr(self):
-        try:
-            code = self.rule_code
-        except NotImplementedError:
-            return repr(self)
-
-        if self.n != 1:
-            fstr = '{n}{code}'.format(n=self.n, code=code)
-        else:
-            fstr = code
-
-        if self.offset:
-            fstr += self._offset_str()
-
-        return fstr
+        self._offset = kwds.get('offset', timedelta(0))
 
     def _offset_str(self):
         def get_str(td):
@@ -643,9 +635,6 @@ def get_str(td):
         else:
             return '+' + repr(self.offset)
 
-    def isAnchored(self):
-        return (self.n == 1)
-
     @apply_wraps
     def apply(self, other):
         if isinstance(other, datetime):
@@ -709,7 +698,7 @@ def __init__(self, **kwds):
         kwds['start'] = self._validate_time(kwds.get('start', '09:00'))
         kwds['end'] = self._validate_time(kwds.get('end', '17:00'))
         self.kwds = kwds
-        self.offset = kwds.get('offset', timedelta(0))
+        self._offset = kwds.get('offset', timedelta(0))
         self.start = kwds.get('start', '09:00')
         self.end = kwds.get('end', '17:00')
 
@@ -776,7 +765,7 @@ def _get_business_hours_by_sec(self):
         Return business hours in a day by seconds.
         """
         if self._get_daytime_flag():
-            # create dummy datetime to calcurate businesshours in a day
+            # create dummy datetime to calculate businesshours in a day
             dtstart = datetime(2014, 4, 1, self.start.hour, self.start.minute)
             until = datetime(2014, 4, 1, self.end.hour, self.end.minute)
             return (until - dtstart).total_seconds()
@@ -811,7 +800,7 @@ def rollforward(self, dt):
 
     @apply_wraps
     def apply(self, other):
-        # calcurate here because offset is not immutable
+        # calculate here because offset is not immutable
         daytime = self._get_daytime_flag()
         businesshours = self._get_business_hours_by_sec()
         bhdelta = timedelta(seconds=businesshours)
@@ -860,7 +849,7 @@ def apply(self, other):
                 if n >= 0:
                     bday_edge = self._prev_opening_time(other)
                     bday_edge = bday_edge + bhdelta
-                    # calcurate remainder
+                    # calculate remainder
                     bday_remain = result - bday_edge
                     result = self._next_opening_time(other)
                     result += bday_remain
@@ -898,7 +887,7 @@ def onOffset(self, dt):
 
     def _onOffset(self, dt, businesshours):
         """
-        Slight speedups using calcurated values
+        Slight speedups using calculated values
         """
         # if self.normalize and not _is_normalized(dt):
         #     return False
@@ -975,7 +964,8 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
         self.n = int(n)
         self.normalize = normalize
         self.kwds = kwds
-        self.offset = kwds.get('offset', timedelta(0))
+        self._offset = kwds.get('offset', timedelta(0))
+
         calendar, holidays = _get_calendar(weekmask=weekmask,
                                            holidays=holidays,
                                            calendar=calendar)
@@ -1337,9 +1327,6 @@ def _apply_index_days(self, i, roll):
 class BusinessMonthEnd(MonthOffset):
     """DateOffset increments between business EOM dates"""
 
-    def isAnchored(self):
-        return (self.n == 1)
-
     @apply_wraps
     def apply(self, other):
         n = self.n
@@ -1425,7 +1412,7 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
         self.n = int(n)
         self.normalize = normalize
         self.kwds = kwds
-        self.offset = kwds.get('offset', timedelta(0))
+        self._offset = kwds.get('offset', timedelta(0))
 
         calendar, holidays = _get_calendar(weekmask=weekmask,
                                            holidays=holidays,
@@ -1495,7 +1482,7 @@ def __init__(self, n=1, normalize=False, weekmask='Mon Tue Wed Thu Fri',
         self.n = int(n)
         self.normalize = normalize
         self.kwds = kwds
-        self.offset = kwds.get('offset', timedelta(0))
+        self._offset = kwds.get('offset', timedelta(0))
 
         # _get_calendar does validation and possible transformation
         # of calendar and holidays.
@@ -1966,9 +1953,6 @@ class QuarterEnd(QuarterOffset):
     _default_startingMonth = 3
     _prefix = 'Q'
 
-    def isAnchored(self):
-        return (self.n == 1 and self.startingMonth is not None)
-
     @apply_wraps
     def apply(self, other):
         n = self.n
@@ -2004,9 +1988,6 @@ class QuarterBegin(QuarterOffset):
     _from_name_startingMonth = 1
     _prefix = 'QS'
 
-    def isAnchored(self):
-        return (self.n == 1 and self.startingMonth is not None)
-
     @apply_wraps
     def apply(self, other):
         n = self.n

From ecd2ad9ff58fa37bbdb66a09736dfb14db5caa6b Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sat, 23 Sep 2017 14:52:11 -0400
Subject: [PATCH 131/188] TST: remove some more warnings (#17645)

TST: parametrize stata tests
---
 pandas/core/dtypes/missing.py        |  20 +-
 pandas/core/internals.py             |   9 +
 pandas/tests/frame/test_analytics.py |   8 +-
 pandas/tests/io/test_stata.py        | 333 ++++++++++++++-------------
 pandas/tests/test_window.py          |   2 +-
 pandas/util/testing.py               |   4 +-
 6 files changed, 189 insertions(+), 187 deletions(-)

diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py
index 101612893cb025..49b7b1d1d3a9b4 100644
--- a/pandas/core/dtypes/missing.py
+++ b/pandas/core/dtypes/missing.py
@@ -327,25 +327,7 @@ def array_equivalent(left, right, strict_nan=False):
         left = left.view('i8')
         right = right.view('i8')
 
-    # NaNs cannot occur otherwise.
-    try:
-        return np.array_equal(left, right)
-    except AttributeError:
-        # see gh-13388
-        #
-        # NumPy v1.7.1 has a bug in its array_equal
-        # function that prevents it from correctly
-        # comparing two arrays with complex dtypes.
-        # This bug is corrected in v1.8.0, so remove
-        # this try-except block as soon as we stop
-        # supporting NumPy versions < 1.8.0
-        if not is_dtype_equal(left.dtype, right.dtype):
-            return False
-
-        left = left.tolist()
-        right = right.tolist()
-
-        return left == right
+    return np.array_equal(left, right)
 
 
 def _infer_fill_value(val):
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 83b382ec0ed723..6799d3b5746d0d 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -1289,6 +1289,15 @@ def get_result(other):
             elif is_numeric_v_string_like(values, other):
                 result = False
 
+            # avoid numpy warning of elementwise comparisons
+            elif func.__name__ == 'eq':
+                if is_list_like(other) and not isinstance(other, np.ndarray):
+                    other = np.asarray(other)
+
+                    # if we can broadcast, then ok
+                    if values.shape[-1] != other.shape[-1]:
+                        return False
+                result = func(values, other)
             else:
                 result = func(values, other)
 
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 93514a8a422151..aac8f785f3d992 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -2,6 +2,7 @@
 
 from __future__ import print_function
 
+import warnings
 from datetime import timedelta
 from distutils.version import LooseVersion
 import sys
@@ -102,7 +103,6 @@ def test_corr_int(self):
         # dtypes other than float64 #1761
         df3 = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]})
 
-        # it works!
         df3.cov()
         df3.corr()
 
@@ -117,7 +117,11 @@ def test_corr_int_and_boolean(self):
         expected = DataFrame(np.ones((2, 2)), index=[
                              'a', 'b'], columns=['a', 'b'])
         for meth in ['pearson', 'kendall', 'spearman']:
-            tm.assert_frame_equal(df.corr(meth), expected)
+
+            # RuntimeWarning
+            with warnings.catch_warnings(record=True):
+                result = df.corr(meth)
+            tm.assert_frame_equal(result, expected)
 
     def test_corr_cov_independent_index_column(self):
         # GH 14617
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
index d6bdb764f1c8e9..055a490bc6b5d8 100644
--- a/pandas/tests/io/test_stata.py
+++ b/pandas/tests/io/test_stata.py
@@ -23,6 +23,19 @@
                              PossiblePrecisionLoss, StataMissingValue)
 
 
+@pytest.fixture
+def dirpath():
+    return tm.get_data_path()
+
+
+@pytest.fixture
+def parsed_114(dirpath):
+    dta14_114 = os.path.join(dirpath, 'stata5_114.dta')
+    parsed_114 = read_stata(dta14_114, convert_dates=True)
+    parsed_114.index.name = 'index'
+    return parsed_114
+
+
 class TestStata(object):
 
     def setup_method(self, method):
@@ -108,10 +121,12 @@ def test_data_method(self):
             parsed_114_read = rdr.read()
         tm.assert_frame_equal(parsed_114_data, parsed_114_read)
 
-    def test_read_dta1(self):
+    @pytest.mark.parametrize(
+        'file', ['dta1_114', 'dta1_117'])
+    def test_read_dta1(self, file):
 
-        parsed_114 = self.read_dta(self.dta1_114)
-        parsed_117 = self.read_dta(self.dta1_117)
+        file = getattr(self, file)
+        parsed = self.read_dta(file)
 
         # Pandas uses np.nan as missing value.
         # Thus, all columns will be of type float, regardless of their name.
@@ -123,8 +138,7 @@ def test_read_dta1(self):
         # the casting doesn't fail so need to match stata here
         expected['float_miss'] = expected['float_miss'].astype(np.float32)
 
-        tm.assert_frame_equal(parsed_114, expected)
-        tm.assert_frame_equal(parsed_117, expected)
+        tm.assert_frame_equal(parsed, expected)
 
     def test_read_dta2(self):
         if LooseVersion(sys.version) < '2.7':
@@ -193,11 +207,12 @@ def test_read_dta2(self):
         tm.assert_frame_equal(parsed_117, expected,
                               check_datetimelike_compat=True)
 
-    def test_read_dta3(self):
-        parsed_113 = self.read_dta(self.dta3_113)
-        parsed_114 = self.read_dta(self.dta3_114)
-        parsed_115 = self.read_dta(self.dta3_115)
-        parsed_117 = self.read_dta(self.dta3_117)
+    @pytest.mark.parametrize(
+        'file', ['dta3_113', 'dta3_114', 'dta3_115', 'dta3_117'])
+    def test_read_dta3(self, file):
+
+        file = getattr(self, file)
+        parsed = self.read_dta(file)
 
         # match stata here
         expected = self.read_csv(self.csv3)
@@ -205,16 +220,14 @@ def test_read_dta3(self):
         expected['year'] = expected['year'].astype(np.int16)
         expected['quarter'] = expected['quarter'].astype(np.int8)
 
-        tm.assert_frame_equal(parsed_113, expected)
-        tm.assert_frame_equal(parsed_114, expected)
-        tm.assert_frame_equal(parsed_115, expected)
-        tm.assert_frame_equal(parsed_117, expected)
+        tm.assert_frame_equal(parsed, expected)
+
+    @pytest.mark.parametrize(
+        'file', ['dta4_113', 'dta4_114', 'dta4_115', 'dta4_117'])
+    def test_read_dta4(self, file):
 
-    def test_read_dta4(self):
-        parsed_113 = self.read_dta(self.dta4_113)
-        parsed_114 = self.read_dta(self.dta4_114)
-        parsed_115 = self.read_dta(self.dta4_115)
-        parsed_117 = self.read_dta(self.dta4_117)
+        file = getattr(self, file)
+        parsed = self.read_dta(file)
 
         expected = DataFrame.from_records(
             [
@@ -237,10 +250,7 @@ def test_read_dta4(self):
                               for col in expected], axis=1)
 
         # stata doesn't save .category metadata
-        tm.assert_frame_equal(parsed_113, expected, check_categorical=False)
-        tm.assert_frame_equal(parsed_114, expected, check_categorical=False)
-        tm.assert_frame_equal(parsed_115, expected, check_categorical=False)
-        tm.assert_frame_equal(parsed_117, expected, check_categorical=False)
+        tm.assert_frame_equal(parsed, expected, check_categorical=False)
 
     # File containing strls
     def test_read_dta12(self):
@@ -427,7 +437,13 @@ def test_read_write_dta13(self):
             tm.assert_frame_equal(written_and_read_again.set_index('index'),
                                   formatted)
 
-    def test_read_write_reread_dta14(self):
+    @pytest.mark.parametrize(
+        'file', ['dta14_113', 'dta14_114', 'dta14_115', 'dta14_117'])
+    def test_read_write_reread_dta14(self, file, parsed_114):
+        file = getattr(self, file)
+        parsed = self.read_dta(file)
+        parsed.index.name = 'index'
+
         expected = self.read_csv(self.csv14)
         cols = ['byte_', 'int_', 'long_', 'float_', 'double_']
         for col in cols:
@@ -436,18 +452,7 @@ def test_read_write_reread_dta14(self):
         expected['date_td'] = pd.to_datetime(
             expected['date_td'], errors='coerce')
 
-        parsed_113 = self.read_dta(self.dta14_113)
-        parsed_113.index.name = 'index'
-        parsed_114 = self.read_dta(self.dta14_114)
-        parsed_114.index.name = 'index'
-        parsed_115 = self.read_dta(self.dta14_115)
-        parsed_115.index.name = 'index'
-        parsed_117 = self.read_dta(self.dta14_117)
-        parsed_117.index.name = 'index'
-
-        tm.assert_frame_equal(parsed_114, parsed_113)
-        tm.assert_frame_equal(parsed_114, parsed_115)
-        tm.assert_frame_equal(parsed_114, parsed_117)
+        tm.assert_frame_equal(parsed_114, parsed)
 
         with tm.ensure_clean() as path:
             parsed_114.to_stata(path, {'date_td': 'td'})
@@ -455,7 +460,10 @@ def test_read_write_reread_dta14(self):
             tm.assert_frame_equal(
                 written_and_read_again.set_index('index'), parsed_114)
 
-    def test_read_write_reread_dta15(self):
+    @pytest.mark.parametrize(
+        'file', ['dta15_113', 'dta15_114', 'dta15_115', 'dta15_117'])
+    def test_read_write_reread_dta15(self, file):
+
         expected = self.read_csv(self.csv15)
         expected['byte_'] = expected['byte_'].astype(np.int8)
         expected['int_'] = expected['int_'].astype(np.int16)
@@ -465,15 +473,10 @@ def test_read_write_reread_dta15(self):
         expected['date_td'] = expected['date_td'].apply(
             datetime.strptime, args=('%Y-%m-%d',))
 
-        parsed_113 = self.read_dta(self.dta15_113)
-        parsed_114 = self.read_dta(self.dta15_114)
-        parsed_115 = self.read_dta(self.dta15_115)
-        parsed_117 = self.read_dta(self.dta15_117)
+        file = getattr(self, file)
+        parsed = self.read_dta(file)
 
-        tm.assert_frame_equal(expected, parsed_114)
-        tm.assert_frame_equal(parsed_113, parsed_114)
-        tm.assert_frame_equal(parsed_114, parsed_115)
-        tm.assert_frame_equal(parsed_114, parsed_117)
+        tm.assert_frame_equal(expected, parsed)
 
     def test_timestamp_and_label(self):
         original = DataFrame([(1,)], columns=['variable'])
@@ -710,7 +713,9 @@ def test_missing_value_generator(self):
             '<d', b'\x00\x00\x00\x00\x00\x1a\xe0\x7f')[0])
         assert val.string == '.z'
 
-    def test_missing_value_conversion(self):
+    @pytest.mark.parametrize(
+        'file', ['dta17_113', 'dta17_115', 'dta17_117'])
+    def test_missing_value_conversion(self, file):
         columns = ['int8_', 'int16_', 'int32_', 'float32_', 'float64_']
         smv = StataMissingValue(101)
         keys = [key for key in iterkeys(smv.MISSING_VALUES)]
@@ -721,13 +726,8 @@ def test_missing_value_conversion(self):
             data.append(row)
         expected = DataFrame(data, columns=columns)
 
-        parsed_113 = read_stata(self.dta17_113, convert_missing=True)
-        parsed_115 = read_stata(self.dta17_115, convert_missing=True)
-        parsed_117 = read_stata(self.dta17_117, convert_missing=True)
-
-        tm.assert_frame_equal(expected, parsed_113)
-        tm.assert_frame_equal(expected, parsed_115)
-        tm.assert_frame_equal(expected, parsed_117)
+        parsed = read_stata(getattr(self, file), convert_missing=True)
+        tm.assert_frame_equal(parsed, expected)
 
     def test_big_dates(self):
         yr = [1960, 2000, 9999, 100, 2262, 1677]
@@ -919,7 +919,9 @@ def test_categorical_with_stata_missing_values(self):
             res = written_and_read_again.set_index('index')
             tm.assert_frame_equal(res, original, check_categorical=False)
 
-    def test_categorical_order(self):
+    @pytest.mark.parametrize(
+        'file', ['dta19_115', 'dta19_117'])
+    def test_categorical_order(self, file):
         # Directly construct using expected codes
         # Format is is_cat, col_name, labels (in order), underlying data
         expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)),
@@ -944,91 +946,91 @@ def test_categorical_order(self):
         expected = DataFrame.from_items(cols)
 
         # Read with and with out categoricals, ensure order is identical
-        parsed_115 = read_stata(self.dta19_115)
-        parsed_117 = read_stata(self.dta19_117)
-        tm.assert_frame_equal(expected, parsed_115, check_categorical=False)
-        tm.assert_frame_equal(expected, parsed_117, check_categorical=False)
+        file = getattr(self, file)
+        parsed = read_stata(file)
+        tm.assert_frame_equal(expected, parsed, check_categorical=False)
 
         # Check identity of codes
         for col in expected:
             if is_categorical_dtype(expected[col]):
                 tm.assert_series_equal(expected[col].cat.codes,
-                                       parsed_115[col].cat.codes)
+                                       parsed[col].cat.codes)
                 tm.assert_index_equal(expected[col].cat.categories,
-                                      parsed_115[col].cat.categories)
+                                      parsed[col].cat.categories)
+
+    @pytest.mark.parametrize(
+        'file', ['dta20_115', 'dta20_117'])
+    def test_categorical_sorting(self, file):
+        parsed = read_stata(getattr(self, file))
 
-    def test_categorical_sorting(self):
-        parsed_115 = read_stata(self.dta20_115)
-        parsed_117 = read_stata(self.dta20_117)
         # Sort based on codes, not strings
-        parsed_115 = parsed_115.sort_values("srh")
-        parsed_117 = parsed_117.sort_values("srh")
+        parsed = parsed.sort_values("srh")
+
         # Don't sort index
-        parsed_115.index = np.arange(parsed_115.shape[0])
-        parsed_117.index = np.arange(parsed_117.shape[0])
+        parsed.index = np.arange(parsed.shape[0])
         codes = [-1, -1, 0, 1, 1, 1, 2, 2, 3, 4]
         categories = ["Poor", "Fair", "Good", "Very good", "Excellent"]
         cat = pd.Categorical.from_codes(codes=codes, categories=categories)
         expected = pd.Series(cat, name='srh')
-        tm.assert_series_equal(expected, parsed_115["srh"],
-                               check_categorical=False)
-        tm.assert_series_equal(expected, parsed_117["srh"],
+        tm.assert_series_equal(expected, parsed["srh"],
                                check_categorical=False)
 
-    def test_categorical_ordering(self):
-        parsed_115 = read_stata(self.dta19_115)
-        parsed_117 = read_stata(self.dta19_117)
+    @pytest.mark.parametrize(
+        'file', ['dta19_115', 'dta19_117'])
+    def test_categorical_ordering(self, file):
+        file = getattr(self, file)
+        parsed = read_stata(file)
 
-        parsed_115_unordered = read_stata(self.dta19_115,
-                                          order_categoricals=False)
-        parsed_117_unordered = read_stata(self.dta19_117,
-                                          order_categoricals=False)
-        for col in parsed_115:
-            if not is_categorical_dtype(parsed_115[col]):
+        parsed_unordered = read_stata(file,
+                                      order_categoricals=False)
+        for col in parsed:
+            if not is_categorical_dtype(parsed[col]):
                 continue
-            assert parsed_115[col].cat.ordered
-            assert parsed_117[col].cat.ordered
-            assert not parsed_115_unordered[col].cat.ordered
-            assert not parsed_117_unordered[col].cat.ordered
-
-    def test_read_chunks_117(self):
-        files_117 = [self.dta1_117, self.dta2_117, self.dta3_117,
-                     self.dta4_117, self.dta14_117, self.dta15_117,
-                     self.dta16_117, self.dta17_117, self.dta18_117,
-                     self.dta19_117, self.dta20_117]
-
-        for fname in files_117:
-            for chunksize in 1, 2:
-                for convert_categoricals in False, True:
-                    for convert_dates in False, True:
-
-                        with warnings.catch_warnings(record=True) as w:
-                            warnings.simplefilter("always")
-                            parsed = read_stata(
-                                fname,
-                                convert_categoricals=convert_categoricals,
-                                convert_dates=convert_dates)
-                        itr = read_stata(
-                            fname, iterator=True,
-                            convert_categoricals=convert_categoricals,
-                            convert_dates=convert_dates)
-
-                        pos = 0
-                        for j in range(5):
-                            with warnings.catch_warnings(record=True) as w:  # noqa
-                                warnings.simplefilter("always")
-                                try:
-                                    chunk = itr.read(chunksize)
-                                except StopIteration:
-                                    break
-                            from_frame = parsed.iloc[pos:pos + chunksize, :]
-                            tm.assert_frame_equal(
-                                from_frame, chunk, check_dtype=False,
-                                check_datetimelike_compat=True,
-                                check_categorical=False)
-
-                            pos += chunksize
-                        itr.close()
+            assert parsed[col].cat.ordered
+            assert not parsed_unordered[col].cat.ordered
+
+    @pytest.mark.parametrize(
+        'file', ['dta1_117', 'dta2_117', 'dta3_117',
+                 'dta4_117', 'dta14_117', 'dta15_117',
+                 'dta16_117', 'dta17_117', 'dta18_117',
+                 'dta19_117', 'dta20_117'])
+    @pytest.mark.parametrize(
+        'chunksize', [1, 2])
+    @pytest.mark.parametrize(
+        'convert_categoricals', [False, True])
+    @pytest.mark.parametrize(
+        'convert_dates', [False, True])
+    def test_read_chunks_117(self, file, chunksize,
+                             convert_categoricals, convert_dates):
+        fname = getattr(self, file)
+
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            parsed = read_stata(
+                fname,
+                convert_categoricals=convert_categoricals,
+                convert_dates=convert_dates)
+        itr = read_stata(
+            fname, iterator=True,
+            convert_categoricals=convert_categoricals,
+            convert_dates=convert_dates)
+
+        pos = 0
+        for j in range(5):
+            with warnings.catch_warnings(record=True) as w:  # noqa
+                warnings.simplefilter("always")
+                try:
+                    chunk = itr.read(chunksize)
+                except StopIteration:
+                    break
+            from_frame = parsed.iloc[pos:pos + chunksize, :]
+            tm.assert_frame_equal(
+                from_frame, chunk, check_dtype=False,
+                check_datetimelike_compat=True,
+                check_categorical=False)
+
+            pos += chunksize
+        itr.close()
 
     def test_iterator(self):
 
@@ -1057,46 +1059,50 @@ def test_iterator(self):
             from_chunks = pd.concat(itr)
         tm.assert_frame_equal(parsed, from_chunks)
 
-    def test_read_chunks_115(self):
-        files_115 = [self.dta2_115, self.dta3_115, self.dta4_115,
-                     self.dta14_115, self.dta15_115, self.dta16_115,
-                     self.dta17_115, self.dta18_115, self.dta19_115,
-                     self.dta20_115]
-
-        for fname in files_115:
-            for chunksize in 1, 2:
-                for convert_categoricals in False, True:
-                    for convert_dates in False, True:
-
-                        # Read the whole file
-                        with warnings.catch_warnings(record=True) as w:
-                            warnings.simplefilter("always")
-                            parsed = read_stata(
-                                fname,
-                                convert_categoricals=convert_categoricals,
-                                convert_dates=convert_dates)
-
-                        # Compare to what we get when reading by chunk
-                        itr = read_stata(
-                            fname, iterator=True,
-                            convert_dates=convert_dates,
-                            convert_categoricals=convert_categoricals)
-                        pos = 0
-                        for j in range(5):
-                            with warnings.catch_warnings(record=True) as w:  # noqa
-                                warnings.simplefilter("always")
-                                try:
-                                    chunk = itr.read(chunksize)
-                                except StopIteration:
-                                    break
-                            from_frame = parsed.iloc[pos:pos + chunksize, :]
-                            tm.assert_frame_equal(
-                                from_frame, chunk, check_dtype=False,
-                                check_datetimelike_compat=True,
-                                check_categorical=False)
-
-                            pos += chunksize
-                        itr.close()
+    @pytest.mark.parametrize(
+        'file', ['dta2_115', 'dta3_115', 'dta4_115',
+                 'dta14_115', 'dta15_115', 'dta16_115',
+                 'dta17_115', 'dta18_115', 'dta19_115',
+                 'dta20_115'])
+    @pytest.mark.parametrize(
+        'chunksize', [1, 2])
+    @pytest.mark.parametrize(
+        'convert_categoricals', [False, True])
+    @pytest.mark.parametrize(
+        'convert_dates', [False, True])
+    def test_read_chunks_115(self, file, chunksize,
+                             convert_categoricals, convert_dates):
+        fname = getattr(self, file)
+
+        # Read the whole file
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            parsed = read_stata(
+                fname,
+                convert_categoricals=convert_categoricals,
+                convert_dates=convert_dates)
+
+        # Compare to what we get when reading by chunk
+        itr = read_stata(
+            fname, iterator=True,
+            convert_dates=convert_dates,
+            convert_categoricals=convert_categoricals)
+        pos = 0
+        for j in range(5):
+            with warnings.catch_warnings(record=True) as w:  # noqa
+                warnings.simplefilter("always")
+                try:
+                    chunk = itr.read(chunksize)
+                except StopIteration:
+                    break
+            from_frame = parsed.iloc[pos:pos + chunksize, :]
+            tm.assert_frame_equal(
+                from_frame, chunk, check_dtype=False,
+                check_datetimelike_compat=True,
+                check_categorical=False)
+
+            pos += chunksize
+        itr.close()
 
     def test_read_chunks_columns(self):
         fname = self.dta3_117
@@ -1299,7 +1305,8 @@ def test_pickle_path_localpath(self):
         result = tm.round_trip_localpath(df.to_stata, reader)
         tm.assert_frame_equal(df, result)
 
-    @pytest.mark.parametrize('write_index', [True, False])
+    @pytest.mark.parametrize(
+        'write_index', [True, False])
     def test_value_labels_iterator(self, write_index):
         # GH 16923
         d = {'A': ['B', 'E', 'C', 'A', 'E']}
diff --git a/pandas/tests/test_window.py b/pandas/tests/test_window.py
index 1cc0ad8bb40416..0fe51121abef6c 100644
--- a/pandas/tests/test_window.py
+++ b/pandas/tests/test_window.py
@@ -1175,7 +1175,7 @@ def test_rolling_quantile_np_percentile(self):
         # is analogus to Numpy's percentile
         row = 10
         col = 5
-        idx = pd.date_range(20100101, periods=row, freq='B')
+        idx = pd.date_range('20100101', periods=row, freq='B')
         df = pd.DataFrame(np.random.rand(row * col).reshape((row, -1)),
                           index=idx)
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 7dac83953ad8f7..14952e391c63ff 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -1892,7 +1892,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None,
     for i in range(nlevels):
         def keyfunc(x):
             import re
-            numeric_tuple = re.sub("[^\d_]_?", "", x).split("_")
+            numeric_tuple = re.sub(r"[^\d_]_?", "", x).split("_")
             return lmap(int, numeric_tuple)
 
         # build a list of lists to create the index from
@@ -2427,7 +2427,7 @@ def stdin_encoding(encoding=None):
 
 def assert_raises_regex(_exception, _regexp, _callable=None,
                         *args, **kwargs):
-    """
+    r"""
     Check that the specified Exception is raised and that the error message
     matches a given regular expression pattern. This may be a regular
     expression object or a string containing a regular expression suitable

From e57f1894b55ecec3687a4484e119124d32ff942b Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Sat, 23 Sep 2017 13:56:11 -0500
Subject: [PATCH 132/188] Categorical type (#16015)

Closes #14711
Closes #15078
Closes #14676
---
 doc/source/advanced.rst                       |   4 +-
 doc/source/api.rst                            |   5 +-
 doc/source/categorical.rst                    | 103 ++++-
 doc/source/merging.rst                        |  11 +-
 doc/source/whatsnew/v0.21.0.txt               |  27 ++
 pandas/core/categorical.py                    | 357 ++++++++++--------
 pandas/core/dtypes/common.py                  |  38 +-
 pandas/core/dtypes/dtypes.py                  | 217 ++++++++++-
 pandas/core/indexes/base.py                   |  15 +-
 pandas/core/indexes/category.py               |  54 ++-
 pandas/core/indexes/interval.py               |   3 +-
 pandas/core/indexes/multi.py                  |   2 +-
 pandas/core/indexes/range.py                  |   2 +-
 pandas/core/internals.py                      |  20 +-
 pandas/core/series.py                         |   3 +-
 pandas/core/sorting.py                        |   3 +-
 pandas/core/util/hashing.py                   |   2 +-
 pandas/tests/dtypes/test_common.py            |  10 +-
 pandas/tests/dtypes/test_dtypes.py            | 141 ++++++-
 pandas/tests/frame/test_analytics.py          |   3 +
 pandas/tests/indexes/test_category.py         |  10 +-
 .../tests/io/json/test_json_table_schema.py   |   5 +-
 pandas/tests/io/test_parquet.py               |   2 +
 pandas/tests/io/test_pytables.py              |  10 +-
 pandas/tests/reshape/test_merge.py            |   4 +-
 pandas/tests/series/test_analytics.py         |  11 +-
 pandas/tests/series/test_constructors.py      |  21 ++
 pandas/tests/series/test_dtypes.py            |  34 +-
 pandas/tests/test_algos.py                    |  72 ++--
 pandas/tests/test_categorical.py              | 182 ++++++++-
 pandas/util/testing.py                        |   9 +-
 31 files changed, 1092 insertions(+), 288 deletions(-)

diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
index 3bda8c7eacb61b..799d04859cc2ac 100644
--- a/doc/source/advanced.rst
+++ b/doc/source/advanced.rst
@@ -638,9 +638,11 @@ and allows efficient indexing and storage of an index with a large number of dup
 
 .. ipython:: python
 
+   from pandas.api.types import CategoricalDtype
+
    df = pd.DataFrame({'A': np.arange(6),
                       'B': list('aabbca')})
-   df['B'] = df['B'].astype('category', categories=list('cab'))
+   df['B'] = df['B'].astype(CategoricalDtype(list('cab')))
    df
    df.dtypes
    df.B.cat.categories
diff --git a/doc/source/api.rst b/doc/source/api.rst
index 96c7f68f57aaaa..4ffeb5035912f5 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -646,7 +646,10 @@ strings and apply several methods to it. These can be accessed like
 Categorical
 ~~~~~~~~~~~
 
-If the Series is of dtype ``category``, ``Series.cat`` can be used to change the the categorical
+.. autoclass:: api.types.CategoricalDtype
+   :members: categories, ordered
+
+If the Series is of dtype ``CategoricalDtype``, ``Series.cat`` can be used to change the categorical
 data. This accessor is similar to the ``Series.dt`` or ``Series.str`` and has the
 following usable methods and properties:
 
diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index ff5e550ebd97f4..cadbc895354b71 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -89,12 +89,22 @@ By passing a :class:`pandas.Categorical` object to a `Series` or assigning it to
     df["B"] = raw_cat
     df
 
-You can also specify differently ordered categories or make the resulting data ordered, by passing these arguments to ``astype()``:
+Anywhere above we passed a keyword ``dtype='category'``, we used the default behavior of
+
+1. categories are inferred from the data
+2. categories are unordered.
+
+To control those behaviors, instead of passing ``'category'``, use an instance
+of :class:`~pandas.api.types.CategoricalDtype`.
 
 .. ipython:: python
 
-    s = pd.Series(["a","b","c","a"])
-    s_cat = s.astype("category", categories=["b","c","d"], ordered=False)
+    from pandas.api.types import CategoricalDtype
+
+    s = pd.Series(["a", "b", "c", "a"])
+    cat_type = CategoricalDtype(categories=["b", "c", "d"],
+                                ordered=True)
+    s_cat = s.astype(cat_type)
     s_cat
 
 Categorical data has a specific ``category`` :ref:`dtype <basics.dtypes>`:
@@ -133,6 +143,75 @@ constructor to save the factorize step during normal constructor mode:
     splitter = np.random.choice([0,1], 5, p=[0.5,0.5])
     s = pd.Series(pd.Categorical.from_codes(splitter, categories=["train", "test"]))
 
+.. _categorical.categoricaldtype:
+
+CategoricalDtype
+----------------
+
+.. versionchanged:: 0.21.0
+
+A categorical's type is fully described by
+
+1. ``categories``: a sequence of unique values and no missing values
+2. ``ordered``: a boolean
+
+This information can be stored in a :class:`~pandas.api.types.CategoricalDtype`.
+The ``categories`` argument is optional, which implies that the actual categories
+should be inferred from whatever is present in the data when the
+:class:`pandas.Categorical` is created. The categories are assumed to be unordered
+by default.      
+
+.. ipython:: python
+
+   from pandas.api.types import CategoricalDtype
+
+   CategoricalDtype(['a', 'b', 'c'])
+   CategoricalDtype(['a', 'b', 'c'], ordered=True)
+   CategoricalDtype()
+
+A :class:`~pandas.api.types.CategoricalDtype` can be used in any place pandas
+expects a `dtype`. For example :func:`pandas.read_csv`,
+:func:`pandas.DataFrame.astype`, or in the Series constructor.
+
+.. note::
+
+    As a convenience, you can use the string ``'category'`` in place of a
+    :class:`~pandas.api.types.CategoricalDtype` when you want the default behavior of
+    the categories being unordered, and equal to the set values present in the
+    array. In other words, ``dtype='category'`` is equivalent to
+    ``dtype=CategoricalDtype()``.
+
+Equality Semantics
+~~~~~~~~~~~~~~~~~~
+
+Two instances of :class:`~pandas.api.types.CategoricalDtype` compare equal
+whenever they have the same categories and orderedness. When comparing two
+unordered categoricals, the order of the ``categories`` is not considered
+
+.. ipython:: python
+
+   c1 = CategoricalDtype(['a', 'b', 'c'], ordered=False)
+
+   # Equal, since order is not considered when ordered=False
+   c1 == CategoricalDtype(['b', 'c', 'a'], ordered=False)
+
+   # Unequal, since the second CategoricalDtype is ordered
+   c1 == CategoricalDtype(['a',  'b', 'c'], ordered=True)
+
+All instances of ``CategoricalDtype`` compare equal to the string ``'category'``
+
+.. ipython:: python
+
+   c1 == 'category'
+
+.. warning::
+
+   Since ``dtype='category'`` is essentially ``CategoricalDtype(None, False)``,
+   and since all instances ``CategoricalDtype`` compare equal to ``'category'``,
+   all instances of ``CategoricalDtype`` compare equal to a
+   ``CategoricalDtype(None, False)``, regardless of ``categories`` or
+   ``ordered``.
+
 Description
 -----------
 
@@ -184,7 +263,7 @@ It's also possible to pass in the categories in a specific order:
 
     .. ipython:: python
 
-         s = pd.Series(list('babc')).astype('category', categories=list('abcd'))
+         s = pd.Series(list('babc')).astype(CategoricalDtype(list('abcd')))
          s
 
          # categories
@@ -301,7 +380,9 @@ meaning and certain operations are possible. If the categorical is unordered, ``
 
     s = pd.Series(pd.Categorical(["a","b","c","a"], ordered=False))
     s.sort_values(inplace=True)
-    s = pd.Series(["a","b","c","a"]).astype('category', ordered=True)
+    s = pd.Series(["a","b","c","a"]).astype(
+        CategoricalDtype(ordered=True)
+    )
     s.sort_values(inplace=True)
     s
     s.min(), s.max()
@@ -401,9 +482,15 @@ categories or a categorical with any list-like object, will raise a TypeError.
 
 .. ipython:: python
 
-    cat = pd.Series([1,2,3]).astype("category", categories=[3,2,1], ordered=True)
-    cat_base = pd.Series([2,2,2]).astype("category", categories=[3,2,1], ordered=True)
-    cat_base2 = pd.Series([2,2,2]).astype("category", ordered=True)
+    cat = pd.Series([1,2,3]).astype(
+        CategoricalDtype([3, 2, 1], ordered=True)
+    )
+    cat_base = pd.Series([2,2,2]).astype(
+        CategoricalDtype([3, 2, 1], ordered=True)
+    )
+    cat_base2 = pd.Series([2,2,2]).astype(
+        CategoricalDtype(ordered=True)
+    )
 
     cat
     cat_base
diff --git a/doc/source/merging.rst b/doc/source/merging.rst
index 72787ea97a7824..ad40c75a62722c 100644
--- a/doc/source/merging.rst
+++ b/doc/source/merging.rst
@@ -830,8 +830,10 @@ The left frame.
 
 .. ipython:: python
 
+   from pandas.api.types import CategoricalDtype
+
    X = pd.Series(np.random.choice(['foo', 'bar'], size=(10,)))
-   X = X.astype('category', categories=['foo', 'bar'])
+   X = X.astype(CategoricalDtype(categories=['foo', 'bar']))
 
    left = pd.DataFrame({'X': X,
                         'Y': np.random.choice(['one', 'two', 'three'], size=(10,))})
@@ -842,8 +844,11 @@ The right frame.
 
 .. ipython:: python
 
-   right = pd.DataFrame({'X': pd.Series(['foo', 'bar']).astype('category', categories=['foo', 'bar']),
-                         'Z': [1, 2]})
+   right = pd.DataFrame({
+        'X': pd.Series(['foo', 'bar'],
+                       dtype=CategoricalDtype(['foo', 'bar'])),
+        'Z': [1, 2]
+   })
    right
    right.dtypes
 
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 32e4294f06d6bb..261e12b8245094 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -10,6 +10,8 @@ users upgrade to this version.
 Highlights include:
 
 - Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here <io.parquet>`.
+- New user-facing :class:`pandas.api.types.CategoricalDtype` for specifying
+  categoricals independent of the data, see :ref:`here <whatsnew_0210.enhancements.categorical_dtype>`.
 
 Check the :ref:`API Changes <whatsnew_0210.api_breaking>` and :ref:`deprecations <whatsnew_0210.deprecations>` before updating.
 
@@ -89,6 +91,31 @@ This does not raise any obvious exceptions, but also does not create a new colum
 
 Setting a list-like data structure into a new attribute now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access <indexing.attribute_access>`.
 
+.. _whatsnew_0210.enhancements.categorical_dtype:
+
+``CategoricalDtype`` for specifying categoricals
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:class:`pandas.api.types.CategoricalDtype` has been added to the public API and
+expanded to include the ``categories`` and ``ordered`` attributes. A
+``CategoricalDtype`` can be used to specify the set of categories and
+orderedness of an array, independent of the data themselves. This can be useful,
+e.g., when converting string data to a ``Categorical`` (:issue:`14711`,
+:issue:`15078`, :issue:`16015`):
+
+.. ipython:: python
+
+   from pandas.api.types import CategoricalDtype
+
+   s = pd.Series(['a', 'b', 'c', 'a'])  # strings
+   dtype = CategoricalDtype(categories=['a', 'b', 'c', 'd'], ordered=True)
+   s.astype(dtype)
+
+The ``.dtype`` property of a ``Categorical``, ``CategoricalIndex`` or a
+``Series`` with categorical type will now return an instance of ``CategoricalDtype``.
+
+See the :ref:`CategoricalDtype docs <categorical.categoricaldtype>` for more.
+
 .. _whatsnew_0210.enhancements.other:
 
 Other Enhancements
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 6f7eafe43dbbb2..98d6d7a68017ad 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -23,7 +23,7 @@
     is_datetimelike,
     is_categorical,
     is_categorical_dtype,
-    is_integer_dtype, is_bool,
+    is_integer_dtype,
     is_list_like, is_sequence,
     is_scalar,
     is_dict_like)
@@ -140,33 +140,6 @@ def maybe_to_categorical(array):
 setter to change values in the categorical.
 """
 
-_categories_doc = """The categories of this categorical.
-
-Setting assigns new values to each category (effectively a rename of
-each individual category).
-
-The assigned value has to be a list-like object. All items must be unique and
-the number of items in the new categories must be the same as the number of
-items in the old categories.
-
-Assigning to `categories` is a inplace operation!
-
-Raises
-------
-ValueError
-    If the new categories do not validate as categories or if the number of new
-    categories is unequal the number of old categories
-
-See also
---------
-rename_categories
-reorder_categories
-add_categories
-remove_categories
-remove_unused_categories
-set_categories
-"""
-
 
 class Categorical(PandasObject):
     """
@@ -193,6 +166,10 @@ class Categorical(PandasObject):
     ordered : boolean, (default False)
         Whether or not this categorical is treated as a ordered categorical.
         If not given, the resulting categorical will not be ordered.
+    dtype : CategoricalDtype
+        An instance of ``CategoricalDtype`` to use for this categorical
+
+        .. versionadded:: 0.21.0
 
     Attributes
     ----------
@@ -203,6 +180,11 @@ class Categorical(PandasObject):
         categorical, read only.
     ordered : boolean
         Whether or not this Categorical is ordered.
+    dtype : CategoricalDtype
+        The instance of ``CategoricalDtype`` storing the ``categories``
+        and ``ordered``.
+
+        .. versionadded:: 0.21.0
 
     Raises
     ------
@@ -212,7 +194,6 @@ class Categorical(PandasObject):
         If an explicit ``ordered=True`` is given but no `categories` and the
         `values` are not sortable.
 
-
     Examples
     --------
     >>> from pandas import Categorical
@@ -224,17 +205,17 @@ class Categorical(PandasObject):
     [a, b, c, a, b, c]
     Categories (3, object): [a < b < c]
 
+    Only ordered `Categoricals` can be sorted (according to the order
+    of the categories) and have a min and max value.
+
     >>> a = Categorical(['a','b','c','a','b','c'], ['c', 'b', 'a'],
                         ordered=True)
     >>> a.min()
     'c'
-    """
-    dtype = CategoricalDtype()
-    """The dtype (always "category")"""
-    """Whether or not this Categorical is ordered.
 
-    Only ordered `Categoricals` can be sorted (according to the order
-    of the categories) and have a min and max value.
+    Notes
+    -----
+    See the :ref:`user guide <categorical>` for more.
 
     See also
     --------
@@ -242,23 +223,58 @@ class Categorical(PandasObject):
     Categorical.order
     Categorical.min
     Categorical.max
+    pandas.api.types.CategoricalDtype
     """
 
     # For comparisons, so that numpy uses our implementation if the compare
     # ops, which raise
     __array_priority__ = 1000
+    _dtype = CategoricalDtype()
     _typ = 'categorical'
 
-    def __init__(self, values, categories=None, ordered=False, fastpath=False):
+    def __init__(self, values, categories=None, ordered=None, dtype=None,
+                 fastpath=False):
+
+        # Ways of specifying the dtype (prioritized ordered)
+        # 1. dtype is a CategoricalDtype
+        #    a.) with known categories, use dtype.categories
+        #    b.) else with Categorical values, use values.dtype
+        #    c.) else, infer from values
+        #    d.) specifying dtype=CategoricalDtype and categories is an error
+        # 2. dtype is a string 'category'
+        #    a.) use categories, ordered
+        #    b.) use values.dtype
+        #    c.) infer from values
+        # 3. dtype is None
+        #    a.) use categories, ordered
+        #    b.) use values.dtype
+        #    c.) infer from values
+
+        if dtype is not None:
+            if isinstance(dtype, compat.string_types):
+                if dtype == 'category':
+                    dtype = CategoricalDtype(categories, ordered)
+                else:
+                    raise ValueError("Unknown `dtype` {}".format(dtype))
+            elif categories is not None or ordered is not None:
+                raise ValueError("Cannot specify both `dtype` and `categories`"
+                                 " or `ordered`.")
+
+            categories = dtype.categories
+            ordered = dtype.ordered
+
+        elif is_categorical(values):
+            dtype = values.dtype._from_categorical_dtype(values.dtype,
+                                                         categories, ordered)
+        else:
+            dtype = CategoricalDtype(categories, ordered)
 
-        self._validate_ordered(ordered)
+        # At this point, dtype is always a CategoricalDtype
+        # if dtype.categories is None, we are inferring
 
         if fastpath:
-            # fast path
             self._codes = coerce_indexer_dtype(values, categories)
-            self._categories = self._validate_categories(
-                categories, fastpath=isinstance(categories, ABCIndexClass))
-            self._ordered = ordered
+            self._dtype = dtype
             return
 
         # sanitize input
@@ -275,6 +291,7 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
             values = values.get_values()
 
         elif isinstance(values, (ABCIndexClass, ABCSeries)):
+            # we'll do inference later
             pass
 
         else:
@@ -292,12 +309,12 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
                 # "object" dtype to prevent this. In the end objects will be
                 # casted to int/... in the category assignment step.
                 if len(values) == 0 or isna(values).any():
-                    dtype = 'object'
+                    sanitize_dtype = 'object'
                 else:
-                    dtype = None
-                values = _sanitize_array(values, None, dtype=dtype)
+                    sanitize_dtype = None
+                values = _sanitize_array(values, None, dtype=sanitize_dtype)
 
-        if categories is None:
+        if dtype.categories is None:
             try:
                 codes, categories = factorize(values, sort=True)
             except TypeError:
@@ -314,7 +331,9 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
                 raise NotImplementedError("> 1 ndim Categorical are not "
                                           "supported at this time")
 
-            categories = self._validate_categories(categories)
+            if dtype.categories is None:
+                # we're inferring from values
+                dtype = CategoricalDtype(categories, ordered)
 
         else:
             # there were two ways if categories are present
@@ -324,14 +343,12 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
             # - the new one, where each value is also in the categories array
             #   (or np.nan)
 
-            # make sure that we always have the same type here, no matter what
-            # we get passed in
-            categories = self._validate_categories(categories)
-            codes = _get_codes_for_values(values, categories)
+            codes = _get_codes_for_values(values, dtype.categories)
 
             # TODO: check for old style usage. These warnings should be removes
             # after 0.18/ in 2016
-            if is_integer_dtype(values) and not is_integer_dtype(categories):
+            if (is_integer_dtype(values) and
+                    not is_integer_dtype(dtype.categories)):
                 warn("Values and categories have different dtypes. Did you "
                      "mean to use\n'Categorical.from_codes(codes, "
                      "categories)'?", RuntimeWarning, stacklevel=2)
@@ -342,9 +359,57 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
                      "mean to use\n'Categorical.from_codes(codes, "
                      "categories)'?", RuntimeWarning, stacklevel=2)
 
-        self.set_ordered(ordered or False, inplace=True)
-        self._categories = categories
-        self._codes = coerce_indexer_dtype(codes, categories)
+        self._dtype = dtype
+        self._codes = coerce_indexer_dtype(codes, dtype.categories)
+
+    @property
+    def categories(self):
+        """The categories of this categorical.
+
+        Setting assigns new values to each category (effectively a rename of
+        each individual category).
+
+        The assigned value has to be a list-like object. All items must be
+        unique and the number of items in the new categories must be the same
+        as the number of items in the old categories.
+
+        Assigning to `categories` is a inplace operation!
+
+        Raises
+        ------
+        ValueError
+            If the new categories do not validate as categories or if the
+            number of new categories is unequal the number of old categories
+
+        See also
+        --------
+        rename_categories
+        reorder_categories
+        add_categories
+        remove_categories
+        remove_unused_categories
+        set_categories
+        """
+        return self.dtype.categories
+
+    @categories.setter
+    def categories(self, categories):
+        new_dtype = CategoricalDtype(categories, ordered=self.ordered)
+        if (self.dtype.categories is not None and
+                len(self.dtype.categories) != len(new_dtype.categories)):
+            raise ValueError("new categories need to have the same number of "
+                             "items as the old categories!")
+        self._dtype = new_dtype
+
+    @property
+    def ordered(self):
+        """Whether the categories have an ordered relationship"""
+        return self.dtype.ordered
+
+    @property
+    def dtype(self):
+        """The :ref:`~pandas.api.types.CategoricalDtype` for this instance"""
+        return self._dtype
 
     def __dir__(self):
         # Avoid IPython warnings for deprecated properties
@@ -493,7 +558,7 @@ def from_codes(cls, codes, categories, ordered=False):
             raise ValueError(
                 "codes need to be convertible to an arrays of integers")
 
-        categories = cls._validate_categories(categories)
+        categories = CategoricalDtype._validate_categories(categories)
 
         if len(codes) and (codes.max() >= len(categories) or codes.min() < -1):
             raise ValueError("codes need to be between -1 and "
@@ -536,94 +601,38 @@ def _get_labels(self):
 
     labels = property(fget=_get_labels, fset=_set_codes)
 
-    _categories = None
-
-    @classmethod
-    def _validate_ordered(cls, ordered):
-        """
-        Validates that we have a valid ordered parameter. If
-        it is not a boolean, a TypeError will be raised.
-
-        Parameters
-        ----------
-        ordered : object
-            The parameter to be verified.
-
-        Raises
-        ------
-        TypeError
-            If 'ordered' is not a boolean.
-        """
-        if not is_bool(ordered):
-            raise TypeError("'ordered' must either be 'True' or 'False'")
-
-    @classmethod
-    def _validate_categories(cls, categories, fastpath=False):
-        """
-        Validates that we have good categories
-
-        Parameters
-        ----------
-        fastpath : boolean (default: False)
-           Don't perform validation of the categories for uniqueness or nulls
-
-        """
-        if not isinstance(categories, ABCIndexClass):
-            dtype = None
-            if not hasattr(categories, "dtype"):
-                if not is_list_like(categories):
-                    raise TypeError("`categories` must be list-like. "
-                                    "Got {} instead".format(repr(categories)))
-                categories = _convert_to_list_like(categories)
-                # On categories with NaNs, int values would be converted to
-                # float. Use "object" dtype to prevent this.
-                if isna(categories).any():
-                    without_na = np.array([x for x in categories
-                                           if notna(x)])
-                    with_na = np.array(categories)
-                    if with_na.dtype != without_na.dtype:
-                        dtype = "object"
-
-            from pandas import Index
-            categories = Index(categories, dtype=dtype)
-
-        if not fastpath:
-
-            # Categories cannot contain NaN.
-            if categories.hasnans:
-                raise ValueError('Categorial categories cannot be null')
-
-            # Categories must be unique.
-            if not categories.is_unique:
-                raise ValueError('Categorical categories must be unique')
-
-        return categories
-
     def _set_categories(self, categories, fastpath=False):
-        """ Sets new categories
+        """ Sets new categories inplace
 
         Parameters
         ----------
         fastpath : boolean (default: False)
            Don't perform validation of the categories for uniqueness or nulls
 
+        Examples
+        --------
+        >>> c = Categorical(['a', 'b'])
+        >>> c
+        [a, b]
+        Categories (2, object): [a, b]
+
+        >>> c._set_categories(pd.Index(['a', 'c']))
+        >>> c
+        [a, c]
+        Categories (2, object): [a, c]
         """
 
-        categories = self._validate_categories(categories, fastpath=fastpath)
-        if (not fastpath and self._categories is not None and
-                len(categories) != len(self._categories)):
+        if fastpath:
+            new_dtype = CategoricalDtype._from_fastpath(categories,
+                                                        self.ordered)
+        else:
+            new_dtype = CategoricalDtype(categories, ordered=self.ordered)
+        if (not fastpath and self.dtype.categories is not None and
+                len(new_dtype.categories) != len(self.dtype.categories)):
             raise ValueError("new categories need to have the same number of "
                              "items than the old categories!")
 
-        self._categories = categories
-
-    def _get_categories(self):
-        """ Gets the categories """
-        # categories is an Index, which is immutable -> no need to copy
-        return self._categories
-
-    categories = property(fget=_get_categories, fset=_set_categories,
-                          doc=_categories_doc)
+        self._dtype = new_dtype
 
     def _codes_for_groupby(self, sort):
         """
@@ -665,7 +674,21 @@ def _codes_for_groupby(self, sort):
 
         return self.reorder_categories(cat.categories)
 
-    _ordered = None
+    def _set_dtype(self, dtype):
+        """Internal method for directly updating the CategoricalDtype
+
+        Parameters
+        ----------
+        dtype : CategoricalDtype
+
+        Notes
+        -----
+        We don't do any validation here. It's assumed that the dtype is
+        a (valid) instance of `CategoricalDtype`.
+        """
+        codes = _recode_for_categories(self.codes, self.categories,
+                                       dtype.categories)
+        return type(self)(codes, dtype=dtype, fastpath=True)
 
     def set_ordered(self, value, inplace=False):
         """
@@ -680,9 +703,9 @@ def set_ordered(self, value, inplace=False):
            of this categorical with ordered set to the value
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
-        self._validate_ordered(value)
+        new_dtype = CategoricalDtype(self.categories, ordered=value)
         cat = self if inplace else self.copy()
-        cat._ordered = value
+        cat._dtype = new_dtype
         if not inplace:
             return cat
 
@@ -712,12 +735,6 @@ def as_unordered(self, inplace=False):
         inplace = validate_bool_kwarg(inplace, 'inplace')
         return self.set_ordered(False, inplace=inplace)
 
-    def _get_ordered(self):
-        """ Gets the ordered attribute """
-        return self._ordered
-
-    ordered = property(fget=_get_ordered)
-
     def set_categories(self, new_categories, ordered=None, rename=False,
                        inplace=False):
         """ Sets the categories to the specified new_categories.
@@ -770,22 +787,21 @@ def set_categories(self, new_categories, ordered=None, rename=False,
         remove_unused_categories
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
-        new_categories = self._validate_categories(new_categories)
+        if ordered is None:
+            ordered = self.dtype.ordered
+        new_dtype = CategoricalDtype(new_categories, ordered=ordered)
+
         cat = self if inplace else self.copy()
         if rename:
-            if (cat._categories is not None and
-                    len(new_categories) < len(cat._categories)):
+            if (cat.dtype.categories is not None and
+                    len(new_dtype.categories) < len(cat.dtype.categories)):
                 # remove all _codes which are larger and set to -1/NaN
-                self._codes[self._codes >= len(new_categories)] = -1
+                self._codes[self._codes >= len(new_dtype.categories)] = -1
         else:
             codes = _recode_for_categories(self.codes, self.categories,
-                                           new_categories)
+                                           new_dtype.categories)
             cat._codes = codes
-        cat._categories = new_categories
-
-        if ordered is None:
-            ordered = self.ordered
-        cat.set_ordered(ordered, inplace=True)
+        cat._dtype = new_dtype
 
         if not inplace:
             return cat
@@ -871,7 +887,7 @@ def reorder_categories(self, new_categories, ordered=None, inplace=False):
         set_categories
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
-        if set(self._categories) != set(new_categories):
+        if set(self.dtype.categories) != set(new_categories):
             raise ValueError("items in new_categories are not the same as in "
                              "old categories")
         return self.set_categories(new_categories, ordered=ordered,
@@ -912,15 +928,17 @@ def add_categories(self, new_categories, inplace=False):
         inplace = validate_bool_kwarg(inplace, 'inplace')
         if not is_list_like(new_categories):
             new_categories = [new_categories]
-        already_included = set(new_categories) & set(self._categories)
+        already_included = set(new_categories) & set(self.dtype.categories)
         if len(already_included) != 0:
             msg = ("new categories must not include old categories: %s" %
                    str(already_included))
             raise ValueError(msg)
-        new_categories = list(self._categories) + list(new_categories)
+        new_categories = list(self.dtype.categories) + list(new_categories)
+        new_dtype = CategoricalDtype(new_categories, self.ordered)
+
         cat = self if inplace else self.copy()
-        cat._categories = self._validate_categories(new_categories)
-        cat._codes = coerce_indexer_dtype(cat._codes, new_categories)
+        cat._dtype = new_dtype
+        cat._codes = coerce_indexer_dtype(cat._codes, new_dtype.categories)
         if not inplace:
             return cat
 
@@ -960,8 +978,9 @@ def remove_categories(self, removals, inplace=False):
             removals = [removals]
 
         removal_set = set(list(removals))
-        not_included = removal_set - set(self._categories)
-        new_categories = [c for c in self._categories if c not in removal_set]
+        not_included = removal_set - set(self.dtype.categories)
+        new_categories = [c for c in self.dtype.categories
+                          if c not in removal_set]
 
         # GH 10156
         if any(isna(removals)):
@@ -1003,8 +1022,11 @@ def remove_unused_categories(self, inplace=False):
         if idx.size != 0 and idx[0] == -1:  # na sentinel
             idx, inv = idx[1:], inv - 1
 
-        cat._categories = cat.categories.take(idx)
-        cat._codes = coerce_indexer_dtype(inv, self._categories)
+        new_categories = cat.dtype.categories.take(idx)
+        new_dtype = CategoricalDtype._from_fastpath(new_categories,
+                                                    ordered=self.ordered)
+        cat._dtype = new_dtype
+        cat._codes = coerce_indexer_dtype(inv, new_dtype.categories)
 
         if not inplace:
             return cat
@@ -1105,7 +1127,7 @@ def __setstate__(self, state):
 
         # Provide compatibility with pre-0.15.0 Categoricals.
         if '_categories' not in state and '_levels' in state:
-            state['_categories'] = self._validate_categories(state.pop(
+            state['_categories'] = self.dtype._validate_categories(state.pop(
                 '_levels'))
         if '_codes' not in state and 'labels' in state:
             state['_codes'] = coerce_indexer_dtype(
@@ -1120,6 +1142,11 @@ def __setstate__(self, state):
             else:
                 state['_ordered'] = False
 
+        # 0.21.0 CategoricalDtype change
+        if '_dtype' not in state:
+            state['_dtype'] = CategoricalDtype(state['_categories'],
+                                               state['_ordered'])
+
         for k, v in compat.iteritems(state):
             setattr(self, k, v)
 
@@ -1129,7 +1156,7 @@ def T(self):
 
     @property
     def nbytes(self):
-        return self._codes.nbytes + self._categories.values.nbytes
+        return self._codes.nbytes + self.dtype.categories.values.nbytes
 
     def memory_usage(self, deep=False):
         """
@@ -1154,7 +1181,8 @@ def memory_usage(self, deep=False):
         --------
         numpy.ndarray.nbytes
         """
-        return self._codes.nbytes + self._categories.memory_usage(deep=deep)
+        return self._codes.nbytes + self.dtype.categories.memory_usage(
+            deep=deep)
 
     @Substitution(klass='Categorical')
     @Appender(_shared_docs['searchsorted'])
@@ -1285,7 +1313,7 @@ def value_counts(self, dropna=True):
             count = bincount(np.where(mask, code, ncat))
             ix = np.append(ix, -1)
 
-        ix = self._constructor(ix, categories=cat, ordered=obj.ordered,
+        ix = self._constructor(ix, dtype=self.dtype,
                                fastpath=True)
 
         return Series(count, index=CategoricalIndex(ix), dtype='int64')
@@ -1998,8 +2026,7 @@ def is_dtype_equal(self, other):
         """
 
         try:
-            return (self.categories.equals(other.categories) and
-                    self.ordered == other.ordered)
+            return hash(self.dtype) == hash(other.dtype)
         except (AttributeError, TypeError):
             return False
 
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index c47e61dc446be2..f60c0d5ffdca0b 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -692,6 +692,40 @@ def is_dtype_equal(source, target):
         return False
 
 
+def is_dtype_union_equal(source, target):
+    """
+    Check whether two arrays have compatible dtypes to do a union.
+    numpy types are checked with ``is_dtype_equal``. Extension types are
+    checked separately.
+
+    Parameters
+    ----------
+    source : The first dtype to compare
+    target : The second dtype to compare
+
+    Returns
+    ----------
+    boolean : Whether or not the two dtypes are equal.
+
+    >>> is_dtype_equal("int", int)
+    True
+
+    >>> is_dtype_equal(CategoricalDtype(['a', 'b'],
+    ...                CategoricalDtype(['b', 'c']))
+    True
+
+    >>> is_dtype_equal(CategoricalDtype(['a', 'b'],
+    ...                CategoricalDtype(['b', 'c'], ordered=True))
+    False
+    """
+    source = _get_dtype(source)
+    target = _get_dtype(target)
+    if is_categorical_dtype(source) and is_categorical_dtype(target):
+        # ordered False for both
+        return source.ordered is target.ordered
+    return is_dtype_equal(source, target)
+
+
 def is_any_int_dtype(arr_or_dtype):
     """
     DEPRECATED: This function will be removed in a future version.
@@ -1671,7 +1705,9 @@ def _coerce_to_dtype(dtype):
     """
 
     if is_categorical_dtype(dtype):
-        dtype = CategoricalDtype()
+        categories = getattr(dtype, 'categories', None)
+        ordered = getattr(dtype, 'ordered', False)
+        dtype = CategoricalDtype(categories=categories, ordered=ordered)
     elif is_datetime64tz_dtype(dtype):
         dtype = DatetimeTZDtype(dtype)
     elif is_period_dtype(dtype):
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index dc2c56ea476f9d..d2487905caced2 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -3,6 +3,7 @@
 import re
 import numpy as np
 from pandas import compat
+from pandas.core.dtypes.generic import ABCIndexClass
 
 
 class ExtensionDtype(object):
@@ -110,37 +111,161 @@ class CategoricalDtypeType(type):
 class CategoricalDtype(ExtensionDtype):
 
     """
-    A np.dtype duck-typed class, suitable for holding a custom categorical
-    dtype.
-
-    THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.object
+    Type for categorical data with the categories and orderedness
+
+    .. versionchanged:: 0.21.0
+
+    Parameters
+    ----------
+    categories : sequence, optional
+        Must be unique, and must not contain any nulls.
+    ordered : bool, default False
+
+    Notes
+    -----
+    This class is useful for specifying the type of a ``Categorical``
+    independent of the values. See :ref:`categorical.categoricaldtype`
+    for more.
+
+    Examples
+    --------
+    >>> t = CategoricalDtype(categories=['b', 'a'], ordered=True)
+    >>> pd.Series(['a', 'b', 'a', 'c'], dtype=t)
+    0      a
+    1      b
+    2      a
+    3    NaN
+    dtype: category
+    Categories (2, object): [b < a]
+
+    See Also
+    --------
+    Categorical
     """
+    # TODO: Document public vs. private API
     name = 'category'
     type = CategoricalDtypeType
     kind = 'O'
     str = '|O08'
     base = np.dtype('O')
-    _metadata = []
+    _metadata = ['categories', 'ordered']
     _cache = {}
 
-    def __new__(cls):
+    def __init__(self, categories=None, ordered=False):
+        self._finalize(categories, ordered, fastpath=False)
 
-        try:
-            return cls._cache[cls.name]
-        except KeyError:
-            c = object.__new__(cls)
-            cls._cache[cls.name] = c
-            return c
+    @classmethod
+    def _from_fastpath(cls, categories=None, ordered=False):
+        self = cls.__new__(cls)
+        self._finalize(categories, ordered, fastpath=True)
+        return self
+
+    @classmethod
+    def _from_categorical_dtype(cls, dtype, categories=None, ordered=None):
+        if categories is ordered is None:
+            return dtype
+        if categories is None:
+            categories = dtype.categories
+        if ordered is None:
+            ordered = dtype.ordered
+        return cls(categories, ordered)
+
+    def _finalize(self, categories, ordered, fastpath=False):
+        from pandas.core.indexes.base import Index
+
+        if ordered is None:
+            ordered = False
+
+        if categories is not None:
+            categories = Index(categories, tupleize_cols=False)
+            # validation
+            self._validate_categories(categories)
+            self._validate_ordered(ordered)
+        self._categories = categories
+        self._ordered = ordered
+
+    def __setstate__(self, state):
+        self._categories = state.pop('categories', None)
+        self._ordered = state.pop('ordered', False)
 
     def __hash__(self):
-        # make myself hashable
-        return hash(str(self))
+        # _hash_categories returns a uint64, so use the negative
+        # space for when we have unknown categories to avoid a conflict
+        if self.categories is None:
+            if self.ordered:
+                return -1
+            else:
+                return -2
+        # We *do* want to include the real self.ordered here
+        return int(self._hash_categories(self.categories, self.ordered))
 
     def __eq__(self, other):
         if isinstance(other, compat.string_types):
             return other == self.name
 
-        return isinstance(other, CategoricalDtype)
+        if not (hasattr(other, 'ordered') and hasattr(other, 'categories')):
+            return False
+        elif self.categories is None or other.categories is None:
+            # We're forced into a suboptimal corner thanks to math and
+            # backwards compatibility. We require that `CDT(...) == 'category'`
+            # for all CDTs **including** `CDT(None, ...)`. Therefore, *all*
+            # CDT(., .) = CDT(None, False) and *all*
+            # CDT(., .) = CDT(None, True).
+            return True
+        elif self.ordered:
+            return other.ordered and self.categories.equals(other.categories)
+        elif other.ordered:
+            return False
+        else:
+            # both unordered; this could probably be optimized / cached
+            return hash(self) == hash(other)
+
+    def __unicode__(self):
+        tpl = u'CategoricalDtype(categories={}ordered={})'
+        if self.categories is None:
+            data = u"None, "
+        else:
+            data = self.categories._format_data(name=self.__class__.__name__)
+        return tpl.format(data, self.ordered)
+
+    @staticmethod
+    def _hash_categories(categories, ordered=True):
+        from pandas.core.util.hashing import (
+            hash_array, _combine_hash_arrays, hash_tuples
+        )
+
+        if len(categories) and isinstance(categories[0], tuple):
+            # assumes if any individual category is a tuple, then all our. ATM
+            # I don't really want to support just some of the categories being
+            # tuples.
+            categories = list(categories)  # breaks if a np.array of categories
+            cat_array = hash_tuples(categories)
+        else:
+            if categories.dtype == 'O':
+                types = [type(x) for x in categories]
+                if not len(set(types)) == 1:
+                    # TODO: hash_array doesn't handle mixed types. It casts
+                    # everything to a str first, which means we treat
+                    # {'1', '2'} the same as {'1', 2}
+                    # find a better solution
+                    cat_array = np.array([hash(x) for x in categories])
+                    hashed = hash((tuple(categories), ordered))
+                    return hashed
+            cat_array = hash_array(np.asarray(categories), categorize=False)
+        if ordered:
+            cat_array = np.vstack([
+                cat_array, np.arange(len(cat_array), dtype=cat_array.dtype)
+            ])
+        else:
+            cat_array = [cat_array]
+        hashed = _combine_hash_arrays(iter(cat_array),
+                                      num_items=len(cat_array))
+        if len(hashed) == 0:
+            # bug in Numpy<1.12 for length 0 arrays. Just return the correct
+            # value of 0
+            return 0
+        else:
+            return np.bitwise_xor.reduce(hashed)
 
     @classmethod
     def construct_from_string(cls, string):
@@ -154,6 +279,68 @@ def construct_from_string(cls, string):
 
         raise TypeError("cannot construct a CategoricalDtype")
 
+    @staticmethod
+    def _validate_ordered(ordered):
+        """
+        Validates that we have a valid ordered parameter. If
+        it is not a boolean, a TypeError will be raised.
+
+        Parameters
+        ----------
+        ordered : object
+            The parameter to be verified.
+
+        Raises
+        ------
+        TypeError
+            If 'ordered' is not a boolean.
+        """
+        from pandas.core.dtypes.common import is_bool
+        if not is_bool(ordered):
+            raise TypeError("'ordered' must either be 'True' or 'False'")
+
+    @staticmethod
+    def _validate_categories(categories, fastpath=False):
+        """
+        Validates that we have good categories
+
+        Parameters
+        ----------
+        categories : array-like
+        fastpath : bool
+            Whether to skip nan and uniqueness checks
+
+        Returns
+        -------
+        categories : Index
+        """
+        from pandas import Index
+
+        if not isinstance(categories, ABCIndexClass):
+            categories = Index(categories)
+
+        if not fastpath:
+
+            if categories.hasnans:
+                raise ValueError('Categorial categories cannot be null')
+
+            if not categories.is_unique:
+                raise ValueError('Categorical categories must be unique')
+
+        return categories
+
+    @property
+    def categories(self):
+        """
+        An ``Index`` containing the unique categories allowed.
+        """
+        return self._categories
+
+    @property
+    def ordered(self):
+        """Whether the categories have an ordered relationship"""
+        return self._ordered
+
 
 class DatetimeTZDtypeType(type):
     """
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index ca145eeaaa7b89..562a758f83edc7 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -27,6 +27,7 @@
     is_integer,
     is_float,
     is_dtype_equal,
+    is_dtype_union_equal,
     is_object_dtype,
     is_categorical_dtype,
     is_interval_dtype,
@@ -847,7 +848,7 @@ def _formatter_func(self):
         """
         return default_pprint
 
-    def _format_data(self):
+    def _format_data(self, name=None):
         """
         Return the formatted data as a unicode string
         """
@@ -856,9 +857,11 @@ def _format_data(self):
         display_width, _ = get_console_size()
         if display_width is None:
             display_width = get_option('display.width') or 80
+        if name is None:
+            name = self.__class__.__name__
 
-        space1 = "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
-        space2 = "\n%s" % (' ' * (len(self.__class__.__name__) + 2))
+        space1 = "\n%s" % (' ' * (len(name) + 1))
+        space2 = "\n%s" % (' ' * (len(name) + 2))
 
         n = len(self)
         sep = ','
@@ -2170,7 +2173,11 @@ def union(self, other):
         if len(self) == 0:
             return other._get_consensus_name(self)
 
-        if not is_dtype_equal(self.dtype, other.dtype):
+        # TODO: is_dtype_union_equal is a hack around
+        # 1. buggy set ops with duplicates (GH #13432)
+        # 2. CategoricalIndex lacking setops (GH #10186)
+        # Once those are fixed, this workaround can be removed
+        if not is_dtype_union_equal(self.dtype, other.dtype):
             this = self.astype('O')
             other = other.astype('O')
             return this.union(other)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 447087d3c75637..9a055afccd7997 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -58,16 +58,18 @@ def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
                 copy=False, name=None, fastpath=False, **kwargs):
 
         if fastpath:
-            return cls._simple_new(data, name=name)
+            return cls._simple_new(data, name=name, dtype=dtype)
 
         if name is None and hasattr(data, 'name'):
             name = data.name
 
         if isinstance(data, ABCCategorical):
-            data = cls._create_categorical(cls, data, categories, ordered)
+            data = cls._create_categorical(cls, data, categories, ordered,
+                                           dtype)
         elif isinstance(data, CategoricalIndex):
             data = data._data
-            data = cls._create_categorical(cls, data, categories, ordered)
+            data = cls._create_categorical(cls, data, categories, ordered,
+                                           dtype)
         else:
 
             # don't allow scalars
@@ -114,7 +116,8 @@ def _create_from_codes(self, codes, categories=None, ordered=None,
         return CategoricalIndex(cat, name=name)
 
     @staticmethod
-    def _create_categorical(self, data, categories=None, ordered=None):
+    def _create_categorical(self, data, categories=None, ordered=None,
+                            dtype=None):
         """
         *this is an internal non-public method*
 
@@ -125,6 +128,7 @@ def _create_categorical(self, data, categories=None, ordered=None):
         data : data for new Categorical
         categories : optional categories, defaults to existing
         ordered : optional ordered attribute, defaults to existing
+        dtype : CategoricalDtype, defaults to existing
 
         Returns
         -------
@@ -135,22 +139,30 @@ def _create_categorical(self, data, categories=None, ordered=None):
             data = data.values
 
         if not isinstance(data, ABCCategorical):
-            ordered = False if ordered is None else ordered
+            if ordered is None and dtype is None:
+                ordered = False
             from pandas.core.categorical import Categorical
-            data = Categorical(data, categories=categories, ordered=ordered)
+            data = Categorical(data, categories=categories, ordered=ordered,
+                               dtype=dtype)
         else:
+            from pandas.core.dtypes.dtypes import CategoricalDtype
+
             if categories is not None:
-                data = data.set_categories(categories)
-            if ordered is not None:
+                data = data.set_categories(categories, ordered=ordered)
+            elif ordered is not None and ordered != data.ordered:
                 data = data.set_ordered(ordered)
+            if isinstance(dtype, CategoricalDtype):
+                # we want to silently ignore dtype='category'
+                data = data._set_dtype(dtype)
         return data
 
     @classmethod
     def _simple_new(cls, values, name=None, categories=None, ordered=None,
-                    **kwargs):
+                    dtype=None, **kwargs):
         result = object.__new__(cls)
 
-        values = cls._create_categorical(cls, values, categories, ordered)
+        values = cls._create_categorical(cls, values, categories, ordered,
+                                         dtype=dtype)
         result._data = values
         result.name = name
         for k, v in compat.iteritems(kwargs):
@@ -161,16 +173,28 @@ def _simple_new(cls, values, name=None, categories=None, ordered=None,
 
     @Appender(_index_shared_docs['_shallow_copy'])
     def _shallow_copy(self, values=None, categories=None, ordered=None,
-                      **kwargs):
+                      dtype=None, **kwargs):
         # categories and ordered can't be part of attributes,
         # as these are properties
+        # we want to reuse self.dtype if possible, i.e. neither are
+        # overridden.
+        if dtype is not None and (categories is not None or
+                                  ordered is not None):
+            raise TypeError("Cannot specify both `dtype` and `categories` "
+                            "or `ordered`")
+
+        if categories is None and ordered is None:
+            dtype = self.dtype if dtype is None else dtype
+            return super(CategoricalIndex, self)._shallow_copy(
+                values=values, dtype=dtype, **kwargs)
         if categories is None:
             categories = self.categories
         if ordered is None:
             ordered = self.ordered
-        return super(CategoricalIndex,
-                     self)._shallow_copy(values=values, categories=categories,
-                                         ordered=ordered, **kwargs)
+
+        return super(CategoricalIndex, self)._shallow_copy(
+            values=values, categories=categories,
+            ordered=ordered, **kwargs)
 
     def _is_dtype_compat(self, other):
         """
@@ -236,7 +260,7 @@ def _format_attrs(self):
             ('ordered', self.ordered)]
         if self.name is not None:
             attrs.append(('name', ibase.default_pprint(self.name)))
-        attrs.append(('dtype', "'%s'" % self.dtype))
+        attrs.append(('dtype', "'%s'" % self.dtype.name))
         max_seq_items = get_option('display.max_seq_items') or len(self)
         if len(self) > max_seq_items:
             attrs.append(('length', len(self)))
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 8120c93ad33643..55ed2342571ab8 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -985,9 +985,10 @@ def _format_native_types(self, na_rep='', quoting=None, **kwargs):
                                       na_rep=na_rep,
                                       justify='all').get_result()
 
-    def _format_data(self):
+    def _format_data(self, name=None):
 
         # TODO: integrate with categorical and make generic
+        # name argument is unused here; just for compat with base / categorical
         n = len(self)
         max_seq_items = min((get_option(
             'display.max_seq_items') or n) // 10, 10)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 66209ecd3a0303..0b7c5f414b1789 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -490,7 +490,7 @@ def _format_attrs(self):
     def _format_space(self):
         return "\n%s" % (' ' * (len(self.__class__.__name__) + 1))
 
-    def _format_data(self):
+    def _format_data(self, name=None):
         # we are formatting thru the attributes
         return None
 
diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
index 16523257c2f77c..a3b899d58255b1 100644
--- a/pandas/core/indexes/range.py
+++ b/pandas/core/indexes/range.py
@@ -189,7 +189,7 @@ def _format_attrs(self):
             attrs.append(('name', ibase.default_pprint(self.name)))
         return attrs
 
-    def _format_data(self):
+    def _format_data(self, name=None):
         # we are formatting thru the attributes
         return None
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 6799d3b5746d0d..2046bae759b9ab 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -139,14 +139,14 @@ def is_categorical_astype(self, dtype):
         validate that we have a astypeable to categorical,
         returns a boolean if we are a categorical
         """
-        if is_categorical_dtype(dtype):
-            if dtype == CategoricalDtype():
-                return True
-
+        if dtype is Categorical or dtype is CategoricalDtype:
             # this is a pd.Categorical, but is not
             # a valid type for astypeing
             raise TypeError("invalid type {0} for astype".format(dtype))
 
+        elif is_categorical_dtype(dtype):
+            return True
+
         return False
 
     def external_values(self, dtype=None):
@@ -548,6 +548,18 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
         # may need to convert to categorical
         # this is only called for non-categoricals
         if self.is_categorical_astype(dtype):
+            if (('categories' in kwargs or 'ordered' in kwargs) and
+                    isinstance(dtype, CategoricalDtype)):
+                raise TypeError("Cannot specify a CategoricalDtype and also "
+                                "`categories` or `ordered`. Use "
+                                "`dtype=CategoricalDtype(categories, ordered)`"
+                                " instead.")
+            kwargs = kwargs.copy()
+            categories = getattr(dtype, 'categories', None)
+            ordered = getattr(dtype, 'ordered', False)
+
+            kwargs.setdefault('categories', categories)
+            kwargs.setdefault('ordered', ordered)
             return self.make_block(Categorical(self.values, **kwargs))
 
         # astype processing
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 02690dec3e1c4c..ea9aeefe3b6651 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2985,7 +2985,8 @@ def _try_cast(arr, take_fast_path):
                 subarr = np.array(subarr, dtype=dtype, copy=copy)
         except (ValueError, TypeError):
             if is_categorical_dtype(dtype):
-                subarr = Categorical(arr)
+                subarr = Categorical(arr, dtype.categories,
+                                     ordered=dtype.ordered)
             elif dtype is not None and raise_cast_failure:
                 raise
             else:
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 12e8d8aba91779..27252b9616a445 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -2,7 +2,6 @@
 
 import numpy as np
 from pandas.compat import long, string_types, PY3
-from pandas.core.categorical import Categorical
 from pandas.core.dtypes.common import (
     _ensure_platform_int,
     _ensure_int64,
@@ -183,6 +182,8 @@ def indexer_from_factorized(labels, shape, compress=True):
 
 
 def lexsort_indexer(keys, orders=None, na_position='last'):
+    from pandas.core.categorical import Categorical
+
     labels = []
     shape = []
     if isinstance(orders, bool):
diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py
index 07e993d7ef5092..0c82773b75c289 100644
--- a/pandas/core/util/hashing.py
+++ b/pandas/core/util/hashing.py
@@ -260,7 +260,7 @@ def hash_array(vals, encoding='utf8', hash_key=None, categorize=True):
 
     # For categoricals, we hash the categories, then remap the codes to the
     # hash values. (This check is above the complex check so that we don't ask
-    # numpy if categorical is a subdtype of complex, as it will choke.
+    # numpy if categorical is a subdtype of complex, as it will choke).
     if is_categorical_dtype(dtype):
         return _hash_categorical(vals, encoding, hash_key)
 
diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
index 8a36f234484b4a..e0be34b14a97de 100644
--- a/pandas/tests/dtypes/test_common.py
+++ b/pandas/tests/dtypes/test_common.py
@@ -545,10 +545,12 @@ def test_is_complex_dtype():
     (pd.Index([1, 2]), np.dtype('int64')),
     (pd.Index(['a', 'b']), np.dtype(object)),
     ('category', 'category'),
-    (pd.Categorical(['a', 'b']).dtype, CategoricalDtype()),
-    (pd.Categorical(['a', 'b']), CategoricalDtype()),
-    (pd.CategoricalIndex(['a', 'b']).dtype, CategoricalDtype()),
-    (pd.CategoricalIndex(['a', 'b']), CategoricalDtype()),
+    (pd.Categorical(['a', 'b']).dtype, CategoricalDtype(['a', 'b'])),
+    (pd.Categorical(['a', 'b']), CategoricalDtype(['a', 'b'])),
+    (pd.CategoricalIndex(['a', 'b']).dtype, CategoricalDtype(['a', 'b'])),
+    (pd.CategoricalIndex(['a', 'b']), CategoricalDtype(['a', 'b'])),
+    (CategoricalDtype(), CategoricalDtype()),
+    (CategoricalDtype(['a', 'b']), CategoricalDtype()),
     (pd.DatetimeIndex([1, 2]), np.dtype('<M8[ns]')),
     (pd.DatetimeIndex([1, 2]).dtype, np.dtype('<M8[ns]')),
     ('<M8[ns]', np.dtype('<M8[ns]')),
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index fb20571213c15e..be3e5fdc467d3e 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -66,21 +66,13 @@ def test_pickle(self):
 
         # force back to the cache
         result = tm.round_trip_pickle(self.dtype)
-
-        # we are a singular object so we are added
-        # back to the cache upon unpickling
-        # this is to ensure object identity
-        assert len(self.dtype._cache) == 1
         assert result == self.dtype
 
     def test_hash_vs_equality(self):
-        # make sure that we satisfy is semantics
         dtype = self.dtype
         dtype2 = CategoricalDtype()
         assert dtype == dtype2
         assert dtype2 == dtype
-        assert dtype is dtype2
-        assert dtype2 is dtype
         assert hash(dtype) == hash(dtype2)
 
     def test_equality(self):
@@ -94,6 +86,11 @@ def test_construction_from_string(self):
         pytest.raises(
             TypeError, lambda: CategoricalDtype.construct_from_string('foo'))
 
+    def test_constructor_invalid(self):
+        with tm.assert_raises_regex(TypeError,
+                                    "CategoricalIndex.* must be called"):
+            CategoricalDtype("category")
+
     def test_is_dtype(self):
         assert CategoricalDtype.is_dtype(self.dtype)
         assert CategoricalDtype.is_dtype('category')
@@ -119,6 +116,11 @@ def test_basic(self):
         assert not is_categorical(np.dtype('float64'))
         assert not is_categorical(1.0)
 
+    def test_tuple_categories(self):
+        categories = [(1, 'a'), (2, 'b'), (3, 'c')]
+        result = CategoricalDtype(categories)
+        assert all(result.categories == categories)
+
 
 class TestDatetimeTZDtype(Base):
 
@@ -524,3 +526,126 @@ def test_caching(self):
         IntervalDtype.reset_cache()
         tm.round_trip_pickle(dtype)
         assert len(IntervalDtype._cache) == 0
+
+
+class TestCategoricalDtypeParametrized(object):
+
+    @pytest.mark.parametrize('categories, ordered', [
+        (['a', 'b', 'c', 'd'], False),
+        (['a', 'b', 'c', 'd'], True),
+        (np.arange(1000), False),
+        (np.arange(1000), True),
+        (['a', 'b', 10, 2, 1.3, True], False),
+        ([True, False], True),
+        ([True, False], False),
+        (pd.date_range('2017', periods=4), True),
+        (pd.date_range('2017', periods=4), False),
+    ])
+    def test_basic(self, categories, ordered):
+        c1 = CategoricalDtype(categories, ordered=ordered)
+        tm.assert_index_equal(c1.categories, pd.Index(categories))
+        assert c1.ordered is ordered
+
+    def test_order_matters(self):
+        categories = ['a', 'b']
+        c1 = CategoricalDtype(categories, ordered=False)
+        c2 = CategoricalDtype(categories, ordered=True)
+        assert c1 is not c2
+
+    def test_unordered_same(self):
+        c1 = CategoricalDtype(['a', 'b'])
+        c2 = CategoricalDtype(['b', 'a'])
+        assert hash(c1) == hash(c2)
+
+    def test_categories(self):
+        result = CategoricalDtype(['a', 'b', 'c'])
+        tm.assert_index_equal(result.categories, pd.Index(['a', 'b', 'c']))
+        assert result.ordered is False
+
+    def test_equal_but_different(self):
+        c1 = CategoricalDtype([1, 2, 3])
+        c2 = CategoricalDtype([1., 2., 3.])
+        assert c1 is not c2
+        assert c1 != c2
+
+    @pytest.mark.parametrize('v1, v2', [
+        ([1, 2, 3], [1, 2, 3]),
+        ([1, 2, 3], [3, 2, 1]),
+    ])
+    def test_order_hashes_different(self, v1, v2):
+        c1 = CategoricalDtype(v1)
+        c2 = CategoricalDtype(v2, ordered=True)
+        assert c1 is not c2
+
+    def test_nan_invalid(self):
+        with pytest.raises(ValueError):
+            CategoricalDtype([1, 2, np.nan])
+
+    def test_non_unique_invalid(self):
+        with pytest.raises(ValueError):
+            CategoricalDtype([1, 2, 1])
+
+    def test_same_categories_different_order(self):
+        c1 = CategoricalDtype(['a', 'b'], ordered=True)
+        c2 = CategoricalDtype(['b', 'a'], ordered=True)
+        assert c1 is not c2
+
+    @pytest.mark.parametrize('ordered, other, expected', [
+        (True, CategoricalDtype(['a', 'b'], True), True),
+        (False, CategoricalDtype(['a', 'b'], False), True),
+        (True, CategoricalDtype(['a', 'b'], False), False),
+        (False, CategoricalDtype(['a', 'b'], True), False),
+        (True, CategoricalDtype([1, 2], False), False),
+        (False, CategoricalDtype([1, 2], True), False),
+        (False, CategoricalDtype(None, True), True),
+        (True, CategoricalDtype(None, True), True),
+        (False, CategoricalDtype(None, False), True),
+        (True, CategoricalDtype(None, False), True),
+        (True, 'category', True),
+        (False, 'category', True),
+        (True, 'not a category', False),
+        (False, 'not a category', False),
+    ])
+    def test_categorical_equality(self, ordered, other, expected):
+        c1 = CategoricalDtype(['a', 'b'], ordered)
+        result = c1 == other
+        assert result == expected
+
+    def test_invalid_raises(self):
+        with tm.assert_raises_regex(TypeError, 'ordered'):
+            CategoricalDtype(['a', 'b'], ordered='foo')
+
+        with tm.assert_raises_regex(TypeError, 'collection'):
+            CategoricalDtype('category')
+
+    def test_mixed(self):
+        a = CategoricalDtype(['a', 'b', 1, 2])
+        b = CategoricalDtype(['a', 'b', '1', '2'])
+        assert hash(a) != hash(b)
+
+    def test_from_categorical_dtype_identity(self):
+        c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
+        # Identity test for no changes
+        c2 = CategoricalDtype._from_categorical_dtype(c1)
+        assert c2 is c1
+
+    def test_from_categorical_dtype_categories(self):
+        c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
+        # override categories
+        result = CategoricalDtype._from_categorical_dtype(
+            c1, categories=[2, 3])
+        assert result == CategoricalDtype([2, 3], ordered=True)
+
+    def test_from_categorical_dtype_ordered(self):
+        c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
+        # override ordered
+        result = CategoricalDtype._from_categorical_dtype(
+            c1, ordered=False)
+        assert result == CategoricalDtype([1, 2, 3], ordered=False)
+
+    def test_from_categorical_dtype_both(self):
+        c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
+        # override ordered
+        result = CategoricalDtype._from_categorical_dtype(
+            c1, categories=[1, 2], ordered=False)
+        assert result == CategoricalDtype([1, 2], ordered=False)
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index aac8f785f3d992..dca905b47000e6 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -2086,6 +2086,9 @@ def test_n_error(self, df_main_dtypes, method, columns):
         df = df_main_dtypes
         error_msg = self.dtype_error_msg_template.format(
             column=columns[1], method=method, dtype=df[columns[1]].dtype)
+        # escape some characters that may be in the repr
+        error_msg = (error_msg.replace('(', '\\(').replace(")", "\\)")
+                              .replace("[", "\\[").replace("]", "\\]"))
         with tm.assert_raises_regex(TypeError, error_msg):
             getattr(df, method)(2, columns)
 
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index cf365465763fab..d8ec23b9c7e0e4 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -654,7 +654,11 @@ def test_equals_categorical(self):
         # make sure that we are testing for category inclusion properly
         ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b'])
         assert not ci.equals(list('aabca'))
-        assert not ci.equals(CategoricalIndex(list('aabca')))
+        # Same categories, but different order
+        # Unordered
+        assert ci.equals(CategoricalIndex(list('aabca')))
+        # Ordered
+        assert not ci.equals(CategoricalIndex(list('aabca'), ordered=True))
         assert ci.equals(ci.copy())
 
         ci = CategoricalIndex(list('aabca') + [np.nan],
@@ -666,7 +670,9 @@ def test_equals_categorical(self):
         ci = CategoricalIndex(list('aabca') + [np.nan],
                               categories=['c', 'a', 'b'])
         assert not ci.equals(list('aabca') + [np.nan])
-        assert not ci.equals(CategoricalIndex(list('aabca') + [np.nan]))
+        assert ci.equals(CategoricalIndex(list('aabca') + [np.nan]))
+        assert not ci.equals(CategoricalIndex(list('aabca') + [np.nan],
+                                              ordered=True))
         assert ci.equals(ci.copy())
 
     def test_string_categorical_index_repr(self):
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
index e097194674cf66..dab56e264b9558 100644
--- a/pandas/tests/io/json/test_json_table_schema.py
+++ b/pandas/tests/io/json/test_json_table_schema.py
@@ -164,7 +164,10 @@ def test_as_json_table_type_string_dtypes(self):
             assert as_json_table_type(t) == 'string'
 
     def test_as_json_table_type_categorical_dtypes(self):
-        assert as_json_table_type(pd.Categorical) == 'any'
+        # TODO: I think before is_categorical_dtype(Categorical)
+        # returned True, but now it's False. Figure out why or
+        # if it matters
+        assert as_json_table_type(pd.Categorical(['a'])) == 'any'
         assert as_json_table_type(CategoricalDtype()) == 'any'
 
 
diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
index 2d69f6d38475e1..ecd4e8f7190146 100644
--- a/pandas/tests/io/test_parquet.py
+++ b/pandas/tests/io/test_parquet.py
@@ -394,6 +394,8 @@ def test_unsupported(self, fp):
         self.check_error_on_write(df, fp, ValueError)
 
     def test_categorical(self, fp):
+        if LooseVersion(fastparquet.__version__) < LooseVersion("0.1.3"):
+            pytest.skip("CategoricalDtype not supported for older fp")
         df = pd.DataFrame({'a': pd.Categorical(list('abc'))})
         self.check_round_trip(df, fp, compression=None)
 
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index f331378b654be9..2a6d16fb39cc33 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -18,6 +18,7 @@
 
 from pandas.compat import is_platform_windows, PY3, PY35, BytesIO, text_type
 from pandas.io.formats.printing import pprint_thing
+from pandas.core.dtypes.common import is_categorical_dtype
 
 tables = pytest.importorskip('tables')
 from pandas.io.pytables import TableIterator
@@ -1090,7 +1091,12 @@ def roundtrip(s, key='data', encoding='latin-1', nan_rep=''):
                          nan_rep=nan_rep)
                 retr = read_hdf(store, key)
                 s_nan = s.replace(nan_rep, np.nan)
-                assert_series_equal(s_nan, retr, check_categorical=False)
+                if is_categorical_dtype(s_nan):
+                    assert is_categorical_dtype(retr)
+                    assert_series_equal(s_nan, retr, check_dtype=False,
+                                        check_categorical=False)
+                else:
+                    assert_series_equal(s_nan, retr)
 
         for s in examples:
             roundtrip(s)
@@ -4845,7 +4851,7 @@ def test_categorical(self):
             # Make sure the metadata is OK
             info = store.info()
             assert '/df2   ' in info
-            assert '/df2/meta/values_block_0/meta' in info
+            # assert '/df2/meta/values_block_0/meta' in info
             assert '/df2/meta/values_block_1/meta' in info
 
             # unordered
diff --git a/pandas/tests/reshape/test_merge.py b/pandas/tests/reshape/test_merge.py
index 338596d1523e4f..df75983a29d80f 100644
--- a/pandas/tests/reshape/test_merge.py
+++ b/pandas/tests/reshape/test_merge.py
@@ -1468,8 +1468,6 @@ def test_other_columns(self, left, right):
 
     @pytest.mark.parametrize(
         'change', [lambda x: x,
-                   lambda x: x.astype('category',
-                                      categories=['bar', 'foo']),
                    lambda x: x.astype('category',
                                       categories=['foo', 'bar', 'bah']),
                    lambda x: x.astype('category', ordered=True)])
@@ -1481,7 +1479,7 @@ def test_dtype_on_merged_different(self, change, how, left, right):
         X = change(right.X.astype('object'))
         right = right.assign(X=X)
         assert is_categorical_dtype(left.X.values)
-        assert not left.X.values.is_dtype_equal(right.X.values)
+        # assert not left.X.values.is_dtype_equal(right.X.values)
 
         merged = pd.merge(left, right, on='X', how=how)
 
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index f1d044f7a11325..914181dc941549 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1756,7 +1756,6 @@ class TestNLargestNSmallest(object):
               # not supported on some archs
               # Series([3., 2, 1, 2, 5], dtype='complex256'),
               Series([3., 2, 1, 2, 5], dtype='complex128'),
-              Series(list('abcde'), dtype='category'),
               Series(list('abcde'))])
     def test_error(self, r):
         dt = r.dtype
@@ -1768,6 +1767,16 @@ def test_error(self, r):
             with tm.assert_raises_regex(TypeError, msg):
                 method(arg)
 
+    def test_error_categorical_dtype(self):
+        # same as test_error, but regex hard to escape properly
+        msg = ("Cannot use method 'n(larg|small)est' with dtype "
+               "CategoricalDtype.+")
+        with tm.assert_raises_regex(TypeError, msg):
+            Series(list('ab'), dtype='category').nlargest(2)
+
+        with tm.assert_raises_regex(TypeError, msg):
+            Series(list('ab'), dtype='category').nsmallest(2)
+
     @pytest.mark.parametrize(
         "s",
         [v for k, v in s_main_dtypes().iteritems()])
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 3b95c2803dd9e6..df7d7a946e881b 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -10,6 +10,7 @@
 import numpy.ma as ma
 import pandas as pd
 
+from pandas.api.types import CategoricalDtype
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     is_datetime64tz_dtype)
@@ -157,6 +158,26 @@ def test_constructor_categorical(self):
         assert is_categorical_dtype(s)
         assert is_categorical_dtype(s.dtype)
 
+    def test_constructor_categorical_dtype(self):
+        result = pd.Series(['a', 'b'],
+                           dtype=CategoricalDtype(['a', 'b', 'c'],
+                                                  ordered=True))
+        assert is_categorical_dtype(result) is True
+        tm.assert_index_equal(result.cat.categories, pd.Index(['a', 'b', 'c']))
+        assert result.cat.ordered
+
+        result = pd.Series(['a', 'b'], dtype=CategoricalDtype(['b', 'a']))
+        assert is_categorical_dtype(result)
+        tm.assert_index_equal(result.cat.categories, pd.Index(['b', 'a']))
+        assert result.cat.ordered is False
+
+    def test_unordered_compare_equal(self):
+        left = pd.Series(['a', 'b', 'c'],
+                         dtype=CategoricalDtype(['a', 'b']))
+        right = pd.Series(pd.Categorical(['a', 'b', np.nan],
+                                         categories=['a', 'b']))
+        tm.assert_series_equal(left, right)
+
     def test_constructor_maskedarray(self):
         data = ma.masked_all((3, ), dtype=float)
         result = Series(data)
diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py
index fa9feb016726ec..3099c02e4aabd3 100644
--- a/pandas/tests/series/test_dtypes.py
+++ b/pandas/tests/series/test_dtypes.py
@@ -12,7 +12,11 @@
 from numpy import nan
 import numpy as np
 
-from pandas import Series, Timestamp, Timedelta, DataFrame, date_range
+from pandas import (
+    Series, Timestamp, Timedelta, DataFrame, date_range,
+    Categorical, Index
+)
+from pandas.api.types import CategoricalDtype
 
 from pandas.compat import lrange, range, u
 from pandas import compat
@@ -182,6 +186,34 @@ def test_astype_dict_like(self, dtype_class):
         with pytest.raises(KeyError):
             s.astype(dt5)
 
+    def test_astype_categoricaldtype(self):
+        s = Series(['a', 'b', 'a'])
+        result = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
+        expected = Series(Categorical(['a', 'b', 'a'], ordered=True))
+        tm.assert_series_equal(result, expected)
+
+        result = s.astype(CategoricalDtype(['a', 'b'], ordered=False))
+        expected = Series(Categorical(['a', 'b', 'a'], ordered=False))
+        tm.assert_series_equal(result, expected)
+
+        result = s.astype(CategoricalDtype(['a', 'b', 'c'], ordered=False))
+        expected = Series(Categorical(['a', 'b', 'a'],
+                                      categories=['a', 'b', 'c'],
+                                      ordered=False))
+        tm.assert_series_equal(result, expected)
+        tm.assert_index_equal(result.cat.categories, Index(['a', 'b', 'c']))
+
+    def test_astype_categoricaldtype_with_args(self):
+        s = Series(['a', 'b'])
+        type_ = CategoricalDtype(['a', 'b'])
+
+        with pytest.raises(TypeError):
+            s.astype(type_, ordered=True)
+        with pytest.raises(TypeError):
+            s.astype(type_, categories=['a', 'b'])
+        with pytest.raises(TypeError):
+            s.astype(type_, categories=['a', 'b'], ordered=False)
+
     def test_astype_generic_timestamp_deprecated(self):
         # see gh-15524
         data = [1]
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index b26089ea7a8226..3694bba594adb2 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -760,55 +760,57 @@ def test_duplicated_with_nas(self):
         expected = np.array(trues + trues)
         tm.assert_numpy_array_equal(result, expected)
 
-    def test_numeric_object_likes(self):
-        cases = [np.array([1, 2, 1, 5, 3,
-                           2, 4, 1, 5, 6]),
-                 np.array([1.1, 2.2, 1.1, np.nan, 3.3,
-                           2.2, 4.4, 1.1, np.nan, 6.6]),
-                 np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j,
-                           2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]),
-                 np.array(['a', 'b', 'a', 'e', 'c',
-                           'b', 'd', 'a', 'e', 'f'], dtype=object),
-                 np.array([1, 2**63, 1, 3**5, 10,
-                           2**63, 39, 1, 3**5, 7], dtype=np.uint64)]
-
+    @pytest.mark.parametrize('case', [
+        np.array([1, 2, 1, 5, 3,
+                  2, 4, 1, 5, 6]),
+        np.array([1.1, 2.2, 1.1, np.nan, 3.3,
+                  2.2, 4.4, 1.1, np.nan, 6.6]),
+        pytest.mark.xfail(resaon="Complex bug. GH 16399")(
+            np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j,
+                     2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j])
+        ),
+        np.array(['a', 'b', 'a', 'e', 'c',
+                  'b', 'd', 'a', 'e', 'f'], dtype=object),
+        np.array([1, 2**63, 1, 3**5, 10, 2**63, 39, 1, 3**5, 7],
+                 dtype=np.uint64),
+    ])
+    def test_numeric_object_likes(self, case):
         exp_first = np.array([False, False, True, False, False,
                               True, False, True, True, False])
         exp_last = np.array([True, True, True, True, False,
                              False, False, False, False, False])
         exp_false = exp_first | exp_last
 
-        for case in cases:
-            res_first = algos.duplicated(case, keep='first')
-            tm.assert_numpy_array_equal(res_first, exp_first)
+        res_first = algos.duplicated(case, keep='first')
+        tm.assert_numpy_array_equal(res_first, exp_first)
 
-            res_last = algos.duplicated(case, keep='last')
-            tm.assert_numpy_array_equal(res_last, exp_last)
+        res_last = algos.duplicated(case, keep='last')
+        tm.assert_numpy_array_equal(res_last, exp_last)
 
-            res_false = algos.duplicated(case, keep=False)
-            tm.assert_numpy_array_equal(res_false, exp_false)
+        res_false = algos.duplicated(case, keep=False)
+        tm.assert_numpy_array_equal(res_false, exp_false)
 
-            # index
-            for idx in [pd.Index(case), pd.Index(case, dtype='category')]:
-                res_first = idx.duplicated(keep='first')
-                tm.assert_numpy_array_equal(res_first, exp_first)
+        # index
+        for idx in [pd.Index(case), pd.Index(case, dtype='category')]:
+            res_first = idx.duplicated(keep='first')
+            tm.assert_numpy_array_equal(res_first, exp_first)
 
-                res_last = idx.duplicated(keep='last')
-                tm.assert_numpy_array_equal(res_last, exp_last)
+            res_last = idx.duplicated(keep='last')
+            tm.assert_numpy_array_equal(res_last, exp_last)
 
-                res_false = idx.duplicated(keep=False)
-                tm.assert_numpy_array_equal(res_false, exp_false)
+            res_false = idx.duplicated(keep=False)
+            tm.assert_numpy_array_equal(res_false, exp_false)
 
-            # series
-            for s in [Series(case), Series(case, dtype='category')]:
-                res_first = s.duplicated(keep='first')
-                tm.assert_series_equal(res_first, Series(exp_first))
+        # series
+        for s in [Series(case), Series(case, dtype='category')]:
+            res_first = s.duplicated(keep='first')
+            tm.assert_series_equal(res_first, Series(exp_first))
 
-                res_last = s.duplicated(keep='last')
-                tm.assert_series_equal(res_last, Series(exp_last))
+            res_last = s.duplicated(keep='last')
+            tm.assert_series_equal(res_last, Series(exp_last))
 
-                res_false = s.duplicated(keep=False)
-                tm.assert_series_equal(res_false, Series(exp_false))
+            res_false = s.duplicated(keep=False)
+            tm.assert_series_equal(res_false, Series(exp_false))
 
     def test_datetime_likes(self):
 
diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
index e6fa5d1af55bec..d43901ea091b7a 100644
--- a/pandas/tests/test_categorical.py
+++ b/pandas/tests/test_categorical.py
@@ -142,6 +142,26 @@ def test_constructor_empty(self):
         expected = pd.Int64Index([1, 2, 3])
         tm.assert_index_equal(c.categories, expected)
 
+    def test_constructor_tuples(self):
+        values = np.array([(1,), (1, 2), (1,), (1, 2)], dtype=object)
+        result = Categorical(values)
+        expected = Index([(1,), (1, 2)], tupleize_cols=False)
+        tm.assert_index_equal(result.categories, expected)
+        assert result.ordered is False
+
+    def test_constructor_tuples_datetimes(self):
+        # numpy will auto reshape when all of the tuples are the
+        # same len, so add an extra one with 2 items and slice it off
+        values = np.array([(Timestamp('2010-01-01'),),
+                           (Timestamp('2010-01-02'),),
+                           (Timestamp('2010-01-01'),),
+                           (Timestamp('2010-01-02'),),
+                           ('a', 'b')], dtype=object)[:-1]
+        result = Categorical(values)
+        expected = Index([(Timestamp('2010-01-01'),),
+                          (Timestamp('2010-01-02'),)], tupleize_cols=False)
+        tm.assert_index_equal(result.categories, expected)
+
     def test_constructor_unsortable(self):
 
         # it works!
@@ -173,12 +193,12 @@ def test_is_equal_dtype(self):
         assert c1.is_dtype_equal(c1)
         assert c2.is_dtype_equal(c2)
         assert c3.is_dtype_equal(c3)
-        assert not c1.is_dtype_equal(c2)
+        assert c1.is_dtype_equal(c2)
         assert not c1.is_dtype_equal(c3)
         assert not c1.is_dtype_equal(Index(list('aabca')))
         assert not c1.is_dtype_equal(c1.astype(object))
         assert c1.is_dtype_equal(CategoricalIndex(c1))
-        assert not (c1.is_dtype_equal(
+        assert (c1.is_dtype_equal(
             CategoricalIndex(c1, categories=list('cab'))))
         assert not c1.is_dtype_equal(CategoricalIndex(c1, ordered=True))
 
@@ -432,6 +452,73 @@ def test_constructor_invariant(self):
             c2 = Categorical(c)
             tm.assert_categorical_equal(c, c2)
 
+    @pytest.mark.parametrize('ordered', [True, False])
+    def test_constructor_with_dtype(self, ordered):
+        categories = ['b', 'a', 'c']
+        dtype = CategoricalDtype(categories, ordered=ordered)
+        result = pd.Categorical(['a', 'b', 'a', 'c'], dtype=dtype)
+        expected = pd.Categorical(['a', 'b', 'a', 'c'], categories=categories,
+                                  ordered=ordered)
+        tm.assert_categorical_equal(result, expected)
+        assert result.ordered is ordered
+
+    def test_constructor_dtype_and_others_raises(self):
+        dtype = CategoricalDtype(['a', 'b'], ordered=True)
+        with tm.assert_raises_regex(ValueError, "Cannot"):
+            Categorical(['a', 'b'], categories=['a', 'b'], dtype=dtype)
+
+        with tm.assert_raises_regex(ValueError, "Cannot"):
+            Categorical(['a', 'b'], ordered=True, dtype=dtype)
+
+        with tm.assert_raises_regex(ValueError, "Cannot"):
+            Categorical(['a', 'b'], ordered=False, dtype=dtype)
+
+    @pytest.mark.parametrize('categories', [
+        None, ['a', 'b'], ['a', 'c'],
+    ])
+    @pytest.mark.parametrize('ordered', [True, False])
+    def test_constructor_str_category(self, categories, ordered):
+        result = Categorical(['a', 'b'], categories=categories,
+                             ordered=ordered, dtype='category')
+        expected = Categorical(['a', 'b'], categories=categories,
+                               ordered=ordered)
+        tm.assert_categorical_equal(result, expected)
+
+    def test_constructor_str_unknown(self):
+        with tm.assert_raises_regex(ValueError, "Unknown `dtype`"):
+            Categorical([1, 2], dtype="foo")
+
+    def test_constructor_from_categorical_with_dtype(self):
+        dtype = CategoricalDtype(['a', 'b', 'c'], ordered=True)
+        values = Categorical(['a', 'b', 'd'])
+        result = Categorical(values, dtype=dtype)
+        # We use dtype.categories, not values.categories
+        expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'c'],
+                               ordered=True)
+        tm.assert_categorical_equal(result, expected)
+
+    def test_constructor_from_categorical_with_unknown_dtype(self):
+        dtype = CategoricalDtype(None, ordered=True)
+        values = Categorical(['a', 'b', 'd'])
+        result = Categorical(values, dtype=dtype)
+        # We use values.categories, not dtype.categories
+        expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'd'],
+                               ordered=True)
+        tm.assert_categorical_equal(result, expected)
+
+    def test_contructor_from_categorical_string(self):
+        values = Categorical(['a', 'b', 'd'])
+        # use categories, ordered
+        result = Categorical(values, categories=['a', 'b', 'c'], ordered=True,
+                             dtype='category')
+        expected = Categorical(['a', 'b', 'd'], categories=['a', 'b', 'c'],
+                               ordered=True)
+        tm.assert_categorical_equal(result, expected)
+
+        # No string
+        result = Categorical(values, categories=['a', 'b', 'c'], ordered=True)
+        tm.assert_categorical_equal(result, expected)
+
     def test_from_codes(self):
 
         # too few categories
@@ -643,6 +730,11 @@ def test_categories_none(self):
                               'a', 'c', 'c', 'c'], ordered=True)
         tm.assert_categorical_equal(factor, self.factor)
 
+    def test_set_categories_inplace(self):
+        cat = self.factor.copy()
+        cat.set_categories(['a', 'b', 'c', 'd'], inplace=True)
+        tm.assert_index_equal(cat.categories, pd.Index(['a', 'b', 'c', 'd']))
+
     def test_describe(self):
         # string type
         desc = self.factor.describe()
@@ -853,6 +945,72 @@ def test_ordered_api(self):
         tm.assert_index_equal(cat4.categories, Index(['b', 'c', 'a']))
         assert cat4.ordered
 
+    def test_set_dtype_same(self):
+        c = Categorical(['a', 'b', 'c'])
+        result = c._set_dtype(CategoricalDtype(['a', 'b', 'c']))
+        tm.assert_categorical_equal(result, c)
+
+    def test_set_dtype_new_categories(self):
+        c = Categorical(['a', 'b', 'c'])
+        result = c._set_dtype(CategoricalDtype(['a', 'b', 'c', 'd']))
+        tm.assert_numpy_array_equal(result.codes, c.codes)
+        tm.assert_index_equal(result.dtype.categories,
+                              pd.Index(['a', 'b', 'c', 'd']))
+
+    def test_set_dtype_nans(self):
+        c = Categorical(['a', 'b', np.nan])
+        result = c._set_dtype(CategoricalDtype(['a', 'c']))
+        tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1],
+                                                           dtype='int8'))
+
+    def test_set_categories_private(self):
+        cat = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c', 'd'])
+        cat._set_categories(['a', 'c', 'd', 'e'])
+        expected = Categorical(['a', 'c', 'd'], categories=list('acde'))
+        tm.assert_categorical_equal(cat, expected)
+
+        # fastpath
+        cat = Categorical(['a', 'b', 'c'], categories=['a', 'b', 'c', 'd'])
+        cat._set_categories(['a', 'c', 'd', 'e'], fastpath=True)
+        expected = Categorical(['a', 'c', 'd'], categories=list('acde'))
+        tm.assert_categorical_equal(cat, expected)
+
+    @pytest.mark.parametrize('values, categories, new_categories', [
+        # No NaNs, same cats, same order
+        (['a', 'b', 'a'], ['a', 'b'], ['a', 'b'],),
+        # No NaNs, same cats, different order
+        (['a', 'b', 'a'], ['a', 'b'], ['b', 'a'],),
+        # Same, unsorted
+        (['b', 'a', 'a'], ['a', 'b'], ['a', 'b'],),
+        # No NaNs, same cats, different order
+        (['b', 'a', 'a'], ['a', 'b'], ['b', 'a'],),
+        # NaNs
+        (['a', 'b', 'c'], ['a', 'b'], ['a', 'b']),
+        (['a', 'b', 'c'], ['a', 'b'], ['b', 'a']),
+        (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
+        (['b', 'a', 'c'], ['a', 'b'], ['a', 'b']),
+        # Introduce NaNs
+        (['a', 'b', 'c'], ['a', 'b'], ['a']),
+        (['a', 'b', 'c'], ['a', 'b'], ['b']),
+        (['b', 'a', 'c'], ['a', 'b'], ['a']),
+        (['b', 'a', 'c'], ['a', 'b'], ['a']),
+        # No overlap
+        (['a', 'b', 'c'], ['a', 'b'], ['d', 'e']),
+    ])
+    @pytest.mark.parametrize('ordered', [True, False])
+    def test_set_dtype_many(self, values, categories, new_categories,
+                            ordered):
+        c = Categorical(values, categories)
+        expected = Categorical(values, new_categories, ordered)
+        result = c._set_dtype(expected.dtype)
+        tm.assert_categorical_equal(result, expected)
+
+    def test_set_dtype_no_overlap(self):
+        c = Categorical(['a', 'b', 'c'], ['d', 'e'])
+        result = c._set_dtype(CategoricalDtype(['a', 'b']))
+        expected = Categorical([None, None, None], categories=['a', 'b'])
+        tm.assert_categorical_equal(result, expected)
+
     def test_set_ordered(self):
 
         cat = Categorical(["a", "b", "c", "a"], ordered=True)
@@ -1560,7 +1718,7 @@ def test_shift(self):
 
     def test_nbytes(self):
         cat = pd.Categorical([1, 2, 3])
-        exp = cat._codes.nbytes + cat._categories.values.nbytes
+        exp = 3 + 3 * 8  # 3 int8s for values + 3 int64s for categories
         assert cat.nbytes == exp
 
     def test_memory_usage(self):
@@ -1734,6 +1892,13 @@ def test_validate_inplace(self):
             with pytest.raises(ValueError):
                 cat.sort_values(inplace=value)
 
+    @pytest.mark.xfail(reason="Imaginary values not supported in Categorical")
+    def test_imaginary(self):
+        values = [1, 2, 3 + 1j]
+        c1 = pd.Categorical(values)
+        tm.assert_index_equal(c1.categories, pd.Index(values))
+        tm.assert_numpy_array_equal(np.array(c1), np.array(values))
+
 
 class TestCategoricalAsBlock(object):
 
@@ -2166,15 +2331,18 @@ def test_assignment_to_dataframe(self):
 
         result = df.dtypes
         expected = Series(
-            [np.dtype('int32'), CategoricalDtype()], index=['value', 'D'])
+            [np.dtype('int32'), CategoricalDtype(categories=labels,
+                                                 ordered=False)],
+            index=['value', 'D'])
         tm.assert_series_equal(result, expected)
 
         df['E'] = s
         str(df)
 
         result = df.dtypes
-        expected = Series([np.dtype('int32'), CategoricalDtype(),
-                           CategoricalDtype()],
+        expected = Series([np.dtype('int32'),
+                           CategoricalDtype(categories=labels, ordered=False),
+                           CategoricalDtype(categories=labels, ordered=False)],
                           index=['value', 'D', 'E'])
         tm.assert_series_equal(result, expected)
 
@@ -4084,7 +4252,7 @@ def test_categorical_index_preserver(self):
 
         # wrong catgories
         df3 = DataFrame({'A': a,
-                         'B': pd.Categorical(b, categories=list('abc'))
+                         'B': pd.Categorical(b, categories=list('abe'))
                          }).set_index('B')
         pytest.raises(TypeError, lambda: pd.concat([df2, df3]))
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 14952e391c63ff..5adbd1498bb6aa 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -1244,7 +1244,14 @@ def assert_series_equal(left, right, check_dtype=True,
                        obj='{obj}.index'.format(obj=obj))
 
     if check_dtype:
-        assert_attr_equal('dtype', left, right)
+        # We want to skip exact dtype checking when `check_categorical`
+        # is False. We'll still raise if only one is a `Categorical`,
+        # regardless of `check_categorical`
+        if (is_categorical_dtype(left) and is_categorical_dtype(right) and
+                not check_categorical):
+            pass
+        else:
+            assert_attr_equal('dtype', left, right)
 
     if check_exact:
         assert_numpy_array_equal(left.get_values(), right.get_values(),

From 148200f8721f302024658c4f959471ae88133f9d Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 23 Sep 2017 12:42:54 -0700
Subject: [PATCH 133/188] Dont check for NaTType, just NaT (#17564)

---
 pandas/core/indexes/timedeltas.py     | 2 +-
 pandas/io/packers.py                  | 5 ++---
 pandas/tests/scalar/test_timedelta.py | 6 +++---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py
index d7b7d56d74a3a9..12b7936503ad70 100644
--- a/pandas/core/indexes/timedeltas.py
+++ b/pandas/core/indexes/timedeltas.py
@@ -847,7 +847,7 @@ def insert(self, loc, item):
                 pass
 
         freq = None
-        if isinstance(item, (Timedelta, libts.NaTType)):
+        if isinstance(item, Timedelta) or item is NaT:
 
             # check freq can be preserved on edge cases
             if self.freq is not None:
diff --git a/pandas/io/packers.py b/pandas/io/packers.py
index a2fc4db23700c2..92270b39f56ef5 100644
--- a/pandas/io/packers.py
+++ b/pandas/io/packers.py
@@ -56,7 +56,6 @@
                     Index, MultiIndex, Float64Index, Int64Index,
                     Panel, RangeIndex, PeriodIndex, DatetimeIndex, NaT,
                     Categorical, CategoricalIndex)
-from pandas._libs.tslib import NaTType
 from pandas.core.sparse.api import SparseSeries, SparseDataFrame
 from pandas.core.sparse.array import BlockIndex, IntIndex
 from pandas.core.generic import NDFrame
@@ -470,7 +469,7 @@ def encode(obj):
                     }
 
     elif isinstance(obj, (datetime, date, np.datetime64, timedelta,
-                          np.timedelta64, NaTType)):
+                          np.timedelta64)) or obj is NaT:
         if isinstance(obj, Timestamp):
             tz = obj.tzinfo
             if tz is not None:
@@ -482,7 +481,7 @@ def encode(obj):
                     u'value': obj.value,
                     u'freq': freq,
                     u'tz': tz}
-        if isinstance(obj, NaTType):
+        if obj is NaT:
             return {u'typ': u'nat'}
         elif isinstance(obj, np.timedelta64):
             return {u'typ': u'timedelta64',
diff --git a/pandas/tests/scalar/test_timedelta.py b/pandas/tests/scalar/test_timedelta.py
index 2cabbfacf64161..b5a8ce24fa4f81 100644
--- a/pandas/tests/scalar/test_timedelta.py
+++ b/pandas/tests/scalar/test_timedelta.py
@@ -9,7 +9,7 @@
 from pandas.core.tools.timedeltas import _coerce_scalar_to_timedelta_type as ct
 from pandas import (Timedelta, TimedeltaIndex, timedelta_range, Series,
                     to_timedelta, compat)
-from pandas._libs.tslib import iNaT, NaTType
+from pandas._libs.tslib import iNaT, NaT
 
 
 class TestTimedeltas(object):
@@ -579,7 +579,7 @@ def test_implementation_limits(self):
         assert max_td.value == np.iinfo(np.int64).max
 
         # Beyond lower limit, a NAT before the Overflow
-        assert isinstance(min_td - Timedelta(1, 'ns'), NaTType)
+        assert (min_td - Timedelta(1, 'ns')) is NaT
 
         with pytest.raises(OverflowError):
             min_td - Timedelta(2, 'ns')
@@ -589,7 +589,7 @@ def test_implementation_limits(self):
 
         # Same tests using the internal nanosecond values
         td = Timedelta(min_td.value - 1, 'ns')
-        assert isinstance(td, NaTType)
+        assert td is NaT
 
         with pytest.raises(OverflowError):
             Timedelta(min_td.value - 2, 'ns')

From 87e2f549a28874955b741c782ad99232e9669ad9 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 23 Sep 2017 12:44:52 -0700
Subject: [PATCH 134/188] Remove unused cimports (#17585)

---
 pandas/_libs/groupby.pyx     |   2 -
 pandas/_libs/join.pyx        |   2 -
 pandas/_libs/period.pyx      |  30 ++-
 pandas/_libs/reshape.pyx     |   2 -
 pandas/_libs/src/offsets.pyx | 367 -----------------------------------
 setup.py                     |   4 +-
 6 files changed, 15 insertions(+), 392 deletions(-)
 delete mode 100644 pandas/_libs/src/offsets.pyx

diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
index 9500e685367c86..1cb7b18fa4f61b 100644
--- a/pandas/_libs/groupby.pyx
+++ b/pandas/_libs/groupby.pyx
@@ -7,8 +7,6 @@ cimport cython
 
 cnp.import_array()
 
-cimport util
-
 from numpy cimport (ndarray,
                     double_t,
                     int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
index 503bdda75875f7..33c3650fa04250 100644
--- a/pandas/_libs/join.pyx
+++ b/pandas/_libs/join.pyx
@@ -8,8 +8,6 @@ from cython cimport Py_ssize_t
 
 np.import_array()
 
-cimport util
-
 from numpy cimport (ndarray,
                     int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
                     uint32_t, uint64_t, float16_t, float32_t, float64_t)
diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index 49353f7b0491c0..75164748128e26 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -27,13 +27,12 @@ from datetime cimport (
     INT32_MIN)
 
 
-cimport util, lib
+cimport util
 from util cimport is_period_object, is_string_object
 
-from lib cimport is_null_datetimelike, is_period
-from pandas._libs import tslib, lib
-from pandas._libs.tslib import (Timedelta, Timestamp, iNaT,
-                                NaT)
+from lib cimport is_null_datetimelike
+from pandas._libs import tslib
+from pandas._libs.tslib import Timestamp, iNaT, NaT
 from tslibs.timezones cimport (
     is_utc, is_tzlocal, get_utcoffset, _get_dst_info, maybe_get_tz)
 from tslib cimport _nat_scalar_rules
@@ -485,7 +484,7 @@ def extract_freq(ndarray[object] values):
 
         try:
             # now Timestamp / NaT has freq attr
-            if is_period(p):
+            if is_period_object(p):
                 return p.freq
         except AttributeError:
             pass
@@ -728,8 +727,7 @@ cdef class _Period(object):
         return hash((self.ordinal, self.freqstr))
 
     def _add_delta(self, other):
-        if isinstance(other, (timedelta, np.timedelta64,
-                              offsets.Tick, Timedelta)):
+        if isinstance(other, (timedelta, np.timedelta64, offsets.Tick)):
             offset = frequencies.to_offset(self.freq.rule_code)
             if isinstance(offset, offsets.Tick):
                 nanos = tslib._delta_to_nanoseconds(other)
@@ -754,12 +752,11 @@ cdef class _Period(object):
     def __add__(self, other):
         if is_period_object(self):
             if isinstance(other, (timedelta, np.timedelta64,
-                                  offsets.DateOffset,
-                                  Timedelta)):
+                                  offsets.DateOffset)):
                 return self._add_delta(other)
             elif other is NaT:
                 return NaT
-            elif lib.is_integer(other):
+            elif util.is_integer_object(other):
                 ordinal = self.ordinal + other * self.freq.n
                 return Period(ordinal=ordinal, freq=self.freq)
             else:  # pragma: no cover
@@ -772,11 +769,10 @@ cdef class _Period(object):
     def __sub__(self, other):
         if is_period_object(self):
             if isinstance(other, (timedelta, np.timedelta64,
-                                  offsets.DateOffset,
-                                  Timedelta)):
+                                  offsets.DateOffset)):
                 neg_other = -other
                 return self + neg_other
-            elif lib.is_integer(other):
+            elif util.is_integer_object(other):
                 ordinal = self.ordinal - other * self.freq.n
                 return Period(ordinal=ordinal, freq=self.freq)
             elif is_period_object(other):
@@ -1159,7 +1155,7 @@ class Period(_Period):
             raise ValueError(("Only value or ordinal but not both should be "
                               "given but not both"))
         elif ordinal is not None:
-            if not lib.is_integer(ordinal):
+            if not util.is_integer_object(ordinal):
                 raise ValueError("Ordinal must be an integer")
             if freq is None:
                 raise ValueError('Must supply freq for ordinal value')
@@ -1196,8 +1192,8 @@ class Period(_Period):
         elif is_null_datetimelike(value) or value in tslib._nat_strings:
             ordinal = iNaT
 
-        elif is_string_object(value) or lib.is_integer(value):
-            if lib.is_integer(value):
+        elif is_string_object(value) or util.is_integer_object(value):
+            if util.is_integer_object(value):
                 value = str(value)
             value = value.upper()
             dt, _, reso = parse_time_string(value, freq)
diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx
index d6996add374a95..db2e8b43d1ead7 100644
--- a/pandas/_libs/reshape.pyx
+++ b/pandas/_libs/reshape.pyx
@@ -8,8 +8,6 @@ from cython cimport Py_ssize_t
 
 np.import_array()
 
-cimport util
-
 from numpy cimport (ndarray,
                     int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t,
                     uint32_t, uint64_t, float16_t, float32_t, float64_t)
diff --git a/pandas/_libs/src/offsets.pyx b/pandas/_libs/src/offsets.pyx
deleted file mode 100644
index c963e256d0aa5b..00000000000000
--- a/pandas/_libs/src/offsets.pyx
+++ /dev/null
@@ -1,367 +0,0 @@
-
-ctypedef enum time_res:
-    r_min = 0
-    r_microsecond
-    r_second
-    r_minute
-    r_hour
-    r_day
-    r_month
-    r_year
-    r_max = 98
-    r_invalid = 99
-
-
-cdef conversion_factor(time_res res1, time_res res2):
-    cdef:
-        time_res min_res, max_res
-        int64_t factor
-
-    min_res = min(res1, res2)
-    max_res = max(res1, res2)
-    factor = 1
-
-    if min_res == max_res:
-        return factor
-
-    while min_res < max_res:
-        if min_res < r_microsecond:
-            raise "Cannot convert from less than us"
-        elif min_res == r_microsecond:
-            factor *= 1000000
-            min_res = r_second
-        elif min_res == r_second:
-            factor *= 60
-            min_res = r_minute
-        elif min_res == r_minute:
-            factor *= 60
-            min_res = r_hour
-        elif min_res == r_hour:
-            factor *= 24
-            min_res = r_day
-        else:
-            raise "Cannot convert to month or year"
-
-    return factor
-
-# Logic to generate ranges
-# -----------------------------------------------------------------------------
-
-cdef inline int64_t weekend_adjustment(int64_t dow, int bkwd):
-    if dow > 4:                         # sat or sun?
-        if bkwd:                        # roll back 1 or 2 days
-            return (4 - dow)
-        else:                           # roll forward 2 or 1 days
-            return (7 - dow)
-    return 0
-
-cdef int64_t us_in_day = conversion_factor(r_microsecond, r_day)
-
-cdef class _Offset:
-    """
-    Base class to generate timestamps. Set the anchor, and then move offsets
-    with next & prev. Retrieve timestamp with ts attribute.
-    """
-    cdef:
-        int64_t t, dow, biz, dayoffset
-        object start
-        _TSObject ts
-
-    def __cinit__(self):
-        self.t=0
-        self.dow=0
-        self.biz=0
-        self.dayoffset=0
-
-    cpdef anchor(self, object start=None):
-        if start is not None:
-            self.start = start
-        self.ts = convert_to_tsobject(self.start, None, None)
-        self._setup()
-
-    cdef _setup(self):
-        pass
-
-    cpdef next(self):
-        pass
-
-    cpdef __next__(self):
-        """wrapper around next"""
-        return self.next()
-
-    cpdef prev(self):
-        pass
-
-    cdef int64_t _ts(self):
-        """
-        Access the current timestamp value, with a possible weekday
-        adjustment.
-        """
-        cdef int64_t adj
-
-        if self.biz != 0:
-            adj = weekend_adjustment(self.dow, self.biz < 0)
-            return self.t + us_in_day * adj
-        else:
-            return self.t
-
-    cdef int64_t _get_anchor(self):
-        """
-        Retrieve an anchor relating to current offset we're on.
-        """
-        return self.t - self.dayoffset * us_in_day
-
-    property ts:
-        def __get__(self):
-            return self._ts()
-
-cdef class YearOffset(_Offset):
-    """
-    Generate annual timestamps from provided start time; apply dayoffset to
-    each timestamp. If biz > 0, we choose the next business day at each time;
-    previous if < 0.
-
-    Parameters
-    ----------
-    dayoffset : int
-    biz : int
-    """
-    cdef:
-        int64_t y, ly
-
-    def __init__(self, int64_t dayoffset=0, int64_t biz=0, object anchor=None):
-        self.dayoffset = dayoffset
-        self.biz = biz
-
-        if anchor is not None:
-            self.anchor(anchor)
-
-    cdef _setup(self):
-        cdef _TSObject ts = self.ts
-
-        self.t = ts.value + self.dayoffset * us_in_day
-        self.y = ts.dts.year
-
-        self.ly = (ts.dts.month > 2 or
-                   ts.dts.month == 2 and ts.dts.day == 29)
-
-        if self.biz != 0:
-            self.dow = (ts_dayofweek(ts) + self.dayoffset) % 7
-
-    cpdef next(self):
-        cdef int64_t days
-
-        days = 365 + is_leapyear(self.y + self.ly)
-
-        self.t += days * us_in_day
-        self.y += 1
-
-        if self.biz != 0:
-            self.dow = (self.dow + days) % 7
-
-    cpdef prev(self):
-        cdef int64_t days
-
-        days = 365 + is_leapyear(self.y - (1 - self.ly))
-
-        self.t -= days * us_in_day
-        self.y -= 1
-
-        if self.biz != 0:
-            self.dow = (self.dow - days) % 7
-
-cdef class MonthOffset(_Offset):
-    """
-    Generate monthly timestamps from provided start time, and apply dayoffset
-    to each timestamp.  Stride to construct strided timestamps (eg quarterly).
-    If biz > 0, we choose the next business day at each time; previous if < 0.
-
-    Parameters
-    ----------
-    dayoffset : int
-    stride : int, > 0
-    biz : int
-    """
-    cdef:
-        Py_ssize_t stride, ly, m
-        int64_t y
-
-    def __init__(self, int64_t dayoffset=0, Py_ssize_t stride=1,
-                 int64_t biz=0, object anchor=None):
-        self.dayoffset = dayoffset
-        self.stride = stride
-        self.biz = biz
-
-        if stride <= 0:
-            raise ValueError("Stride must be positive")
-
-        if anchor is not None:
-            self.anchor(anchor)
-
-    cdef _setup(self):
-        cdef _TSObject ts = self.ts
-
-        self.t = ts.value + (self.dayoffset * us_in_day)
-
-        # for day counting
-        self.m = ts.dts.month - 1
-        self.y = ts.dts.year
-        self.ly = is_leapyear(self.y)
-
-        if self.biz != 0:
-            self.dow = (ts_dayofweek(ts) + self.dayoffset) % 7
-
-    cpdef next(self):
-        cdef:
-            int64_t tmp, days
-            Py_ssize_t j
-
-        days = 0
-        for j in range(0, self.stride):
-            if self.m >= 12:
-                self.m -= 12
-                self.y += 1
-                self.ly = is_leapyear(self.y)
-            days += days_per_month_table[self.ly][self.m]
-            self.m += 1
-
-        self.t += days * us_in_day
-
-        if self.biz != 0:
-            self.dow = (self.dow + days) % 7
-
-    cpdef prev(self):
-        cdef:
-            int64_t tmp, days
-            Py_ssize_t j
-
-        days = 0
-        for j in range(0, self.stride):
-            self.m -= 1
-            if self.m < 0:
-                self.m += 12
-                self.y -= 1
-                self.ly = is_leapyear(self.y)
-            days += days_per_month_table[self.ly][self.m]
-
-        self.t -= days * us_in_day
-
-        if self.biz != 0:
-            self.dow = (self.dow - days) % 7
-
-cdef class DayOfMonthOffset(_Offset):
-    """
-    Generate relative monthly timestamps from month & year of provided start
-    time. For example, fridays of the third week of each month (week=3, day=4);
-    or, thursdays of the last week of each month (week=-1, day=3).
-
-    Parameters
-    ----------
-    week : int
-    day : int, 0 to 6
-    """
-    cdef:
-        Py_ssize_t ly, m
-        int64_t y, day, week
-
-    def __init__(self, int64_t week=0, int64_t day=0, object anchor=None):
-        self.week = week
-        self.day = day
-
-        if self.day < 0 or self.day > 6:
-            raise ValueError("Day offset must be 0 to 6")
-
-        if anchor is not None:
-            self.anchor(anchor)
-
-    cdef _setup(self):
-        cdef _TSObject ts = self.ts
-
-        # rewind to beginning of month
-        self.t = ts.value - (ts.dts.day - 1) * us_in_day
-        self.dow = dayofweek(ts.dts.year, ts.dts.month, 1)
-
-        # for day counting
-        self.m = ts.dts.month - 1
-        self.y = ts.dts.year
-        self.ly = is_leapyear(self.y)
-
-    cpdef next(self):
-        cdef:
-            int64_t tmp, days
-
-        days = days_per_month_table[self.ly][self.m]
-        self.t += days * us_in_day
-        self.dow = (self.dow + days) % 7
-
-        self.m += 1
-        if self.m >= 12:
-            self.m -= 12
-            self.y += 1
-            self.ly = is_leapyear(self.y)
-
-    cpdef prev(self):
-        cdef:
-            int64_t tmp, days
-
-        days = days_per_month_table[self.ly][(self.m - 1) % 12]
-        self.t -= days * us_in_day
-        self.dow = (self.dow - days) % 7
-
-        self.m -= 1
-        if self.m < 0:
-            self.m += 12
-            self.y -= 1
-            self.ly = is_leapyear(self.y)
-
-    cdef int64_t _ts(self):
-        """
-        Overwrite default adjustment
-        """
-        cdef int64_t adj = (self.week * 7) + (self.day - self.dow) % 7
-        return self.t + us_in_day * adj
-
-cdef class DayOffset(_Offset):
-    """
-    Generate daily timestamps beginning with first valid time >= start time. If
-    biz != 0, we skip weekends. Stride, to construct weekly timestamps.
-
-    Parameters
-    ----------
-    stride : int, > 0
-    biz : boolean
-    """
-    cdef:
-        Py_ssize_t stride
-
-    def __init__(self, int64_t stride=1, int64_t biz=0, object anchor=None):
-        self.stride = stride
-        self.biz = biz
-
-        if self.stride <= 0:
-            raise ValueError("Stride must be positive")
-
-        if anchor is not None:
-            self.anchor(anchor)
-
-    cdef _setup(self):
-        cdef _TSObject ts = self.ts
-        self.t = ts.value
-        if self.biz != 0:
-            self.dow = ts_dayofweek(ts)
-
-    cpdef next(self):
-        self.t += (self.stride * us_in_day)
-        if self.biz != 0:
-            self.dow = (self.dow + self.stride) % 7
-            if self.dow >= 5:
-                self.t += (7 - self.dow) * us_in_day
-                self.dow = 0
-
-    cpdef prev(self):
-        self.t -= (self.stride * us_in_day)
-        if self.biz != 0:
-            self.dow = (self.dow - self.stride) % 7
-            if self.dow >= 5:
-                self.t += (4 - self.dow) * us_in_day
-                self.dow = 4
diff --git a/setup.py b/setup.py
index d28c4ba8be5b00..555cf9dc4a9b39 100755
--- a/setup.py
+++ b/setup.py
@@ -512,7 +512,7 @@ def pxd(name):
                    'pxdfiles': ['_libs/src/util', '_libs/hashtable'],
                    'depends': _pxi_dep['join']},
     '_libs.reshape': {'pyxfile': '_libs/reshape',
-                      'depends': _pxi_dep['reshape']},
+                      'depends': _pxi_dep['reshape'], 'include': []},
     '_libs.interval': {'pyxfile': '_libs/interval',
                        'pxdfiles': ['_libs/hashtable'],
                        'depends': _pxi_dep['interval']},
@@ -528,7 +528,7 @@ def pxd(name):
                                   'pandas/_libs/src/parser/io.c']},
     '_libs.sparse': {'pyxfile': '_libs/sparse',
                      'depends': (['pandas/_libs/sparse.pyx'] +
-                                 _pxi_dep['sparse'])},
+                                 _pxi_dep['sparse']), 'include': []},
     '_libs.testing': {'pyxfile': '_libs/testing',
                       'depends': ['pandas/_libs/testing.pyx']},
     '_libs.hashing': {'pyxfile': '_libs/hashing',

From d43aba82e218cd8187769a07c487709aa86de693 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sat, 23 Sep 2017 12:46:05 -0700
Subject: [PATCH 135/188] de-privatize timezone functions (#17543)

---
 pandas/_libs/period.pyx                       |  6 ++--
 pandas/_libs/tslib.pyx                        | 32 ++++++++-----------
 pandas/_libs/tslibs/timezones.pxd             |  4 +--
 pandas/_libs/tslibs/timezones.pyx             | 26 +++++++--------
 pandas/core/indexes/datetimes.py              | 20 ++++++------
 pandas/core/tools/datetimes.py                |  3 +-
 pandas/io/pytables.py                         |  7 ++--
 .../indexes/datetimes/test_date_range.py      |  2 +-
 pandas/tests/indexes/datetimes/test_setops.py |  4 +--
 pandas/tests/io/test_pytables.py              |  2 +-
 pandas/tests/scalar/test_period.py            | 12 +++----
 pandas/tests/scalar/test_timestamp.py         |  4 +--
 pandas/tests/series/test_indexing.py          |  2 +-
 pandas/tests/tseries/test_offsets.py          |  3 +-
 pandas/tests/tseries/test_timezones.py        | 22 +++++++------
 15 files changed, 75 insertions(+), 74 deletions(-)

diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index 75164748128e26..943f925ec5b04a 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -34,7 +34,7 @@ from lib cimport is_null_datetimelike
 from pandas._libs import tslib
 from pandas._libs.tslib import Timestamp, iNaT, NaT
 from tslibs.timezones cimport (
-    is_utc, is_tzlocal, get_utcoffset, _get_dst_info, maybe_get_tz)
+    is_utc, is_tzlocal, get_utcoffset, get_dst_info, maybe_get_tz)
 from tslib cimport _nat_scalar_rules
 
 from tslibs.frequencies cimport get_freq_code
@@ -556,7 +556,7 @@ cdef _reso_local(ndarray[int64_t] stamps, object tz):
                 reso = curr_reso
     else:
         # Adjust datetime64 timestamp, recompute datetimestruct
-        trans, deltas, typ = _get_dst_info(tz)
+        trans, deltas, typ = get_dst_info(tz)
 
         _pos = trans.searchsorted(stamps, side='right') - 1
         if _pos.dtype != np.int64:
@@ -623,7 +623,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
                                            dts.us, dts.ps, freq)
     else:
         # Adjust datetime64 timestamp, recompute datetimestruct
-        trans, deltas, typ = _get_dst_info(tz)
+        trans, deltas, typ = get_dst_info(tz)
 
         _pos = trans.searchsorted(stamps, side='right') - 1
         if _pos.dtype != np.int64:
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 077603af96947c..c629ccbd8e1fd4 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -100,16 +100,10 @@ iNaT = NPY_NAT
 
 
 from tslibs.timezones cimport (
-    is_utc, is_tzlocal, _is_fixed_offset,
+    is_utc, is_tzlocal, is_fixed_offset,
     treat_tz_as_dateutil, treat_tz_as_pytz,
     get_timezone, get_utcoffset, maybe_get_tz,
-    _get_dst_info
-    )
-from tslibs.timezones import (  # noqa
-    get_timezone, get_utcoffset, maybe_get_tz,
-    _p_tz_cache_key, dst_cache,
-    _unbox_utcoffsets,
-    _dateutil_gettz
+    get_dst_info
     )
 
 
@@ -168,7 +162,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False):
                     pandas_datetime_to_datetimestruct(
                         value, PANDAS_FR_ns, &dts)
                     result[i] = func_create(value, dts, tz, freq)
-        elif is_tzlocal(tz) or _is_fixed_offset(tz):
+        elif is_tzlocal(tz) or is_fixed_offset(tz):
             for i in range(n):
                 value = arr[i]
                 if value == NPY_NAT:
@@ -182,7 +176,7 @@ def ints_to_pydatetime(ndarray[int64_t] arr, tz=None, freq=None, box=False):
                         dt = Timestamp(dt)
                     result[i] = dt
         else:
-            trans, deltas, typ = _get_dst_info(tz)
+            trans, deltas, typ = get_dst_info(tz)
 
             for i in range(n):
 
@@ -1641,12 +1635,12 @@ cdef inline void _localize_tso(_TSObject obj, object tz):
         obj.tzinfo = tz
     else:
         # Adjust datetime64 timestamp, recompute datetimestruct
-        trans, deltas, typ = _get_dst_info(tz)
+        trans, deltas, typ = get_dst_info(tz)
 
         pos = trans.searchsorted(obj.value, side='right') - 1
 
         # static/pytz/dateutil specific code
-        if _is_fixed_offset(tz):
+        if is_fixed_offset(tz):
             # statictzinfo
             if len(deltas) > 0 and obj.value != NPY_NAT:
                 pandas_datetime_to_datetimestruct(obj.value + deltas[0],
@@ -4066,7 +4060,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
                              * 1000000000)
                     utc_dates[i] = v - delta
         else:
-            trans, deltas, typ = _get_dst_info(tz1)
+            trans, deltas, typ = get_dst_info(tz1)
 
             # all-NaT
             tt = vals[vals!=NPY_NAT]
@@ -4108,7 +4102,7 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
         return result
 
     # Convert UTC to other timezone
-    trans, deltas, typ = _get_dst_info(tz2)
+    trans, deltas, typ = get_dst_info(tz2)
 
     # use first non-NaT element
     # if all-NaT, return all-NaT
@@ -4172,7 +4166,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
         delta = int(get_utcoffset(tz1, dt).total_seconds()) * 1000000000
         utc_date = val - delta
     elif get_timezone(tz1) != 'UTC':
-        trans, deltas, typ = _get_dst_info(tz1)
+        trans, deltas, typ = get_dst_info(tz1)
         pos = trans.searchsorted(val, side='right') - 1
         if pos < 0:
             raise ValueError('First time before start of DST info')
@@ -4191,7 +4185,7 @@ cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2):
         return utc_date + delta
 
     # Convert UTC to other timezone
-    trans, deltas, typ = _get_dst_info(tz2)
+    trans, deltas, typ = get_dst_info(tz2)
 
     pos = trans.searchsorted(utc_date, side='right') - 1
     if pos < 0:
@@ -4261,7 +4255,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
                 "Length of ambiguous bool-array must be the same size as vals")
         ambiguous_array = np.asarray(ambiguous)
 
-    trans, deltas, typ = _get_dst_info(tz)
+    trans, deltas, typ = get_dst_info(tz)
 
     tdata = <int64_t*> trans.data
     ntrans = len(trans)
@@ -4967,7 +4961,7 @@ cdef _normalize_local(ndarray[int64_t] stamps, object tz):
             result[i] = _normalized_stamp(&dts)
     else:
         # Adjust datetime64 timestamp, recompute datetimestruct
-        trans, deltas, typ = _get_dst_info(tz)
+        trans, deltas, typ = get_dst_info(tz)
 
         _pos = trans.searchsorted(stamps, side='right') - 1
         if _pos.dtype != np.int64:
@@ -5023,7 +5017,7 @@ def dates_normalized(ndarray[int64_t] stamps, tz=None):
             if (dt.hour + dt.minute + dt.second + dt.microsecond) > 0:
                 return False
     else:
-        trans, deltas, typ = _get_dst_info(tz)
+        trans, deltas, typ = get_dst_info(tz)
 
         for i in range(n):
             # Adjust datetime64 timestamp, recompute datetimestruct
diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd
index fac0018a78bc2e..e5d1343e1c9843 100644
--- a/pandas/_libs/tslibs/timezones.pxd
+++ b/pandas/_libs/tslibs/timezones.pxd
@@ -13,6 +13,6 @@ cpdef object get_timezone(object tz)
 cpdef object maybe_get_tz(object tz)
 
 cpdef get_utcoffset(tzinfo, obj)
-cdef bint _is_fixed_offset(object tz)
+cdef bint is_fixed_offset(object tz)
 
-cdef object _get_dst_info(object tz)
+cdef object get_dst_info(object tz)
diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
index 346da41e7073be..48d82996a0bd0f 100644
--- a/pandas/_libs/tslibs/timezones.pyx
+++ b/pandas/_libs/tslibs/timezones.pyx
@@ -13,9 +13,9 @@ from dateutil.tz import (
 import sys
 if sys.platform == 'win32' or sys.platform == 'cygwin':
     # equiv pd.compat.is_platform_windows()
-    from dateutil.zoneinfo import gettz as _dateutil_gettz
+    from dateutil.zoneinfo import gettz as dateutil_gettz
 else:
-    from dateutil.tz import gettz as _dateutil_gettz
+    from dateutil.tz import gettz as dateutil_gettz
 
 
 from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
@@ -100,7 +100,7 @@ cpdef inline object maybe_get_tz(object tz):
             tz = _dateutil_tzlocal()
         elif tz.startswith('dateutil/'):
             zone = tz[9:]
-            tz = _dateutil_gettz(zone)
+            tz = dateutil_gettz(zone)
             # On Python 3 on Windows, the filename is not always set correctly.
             if isinstance(tz, _dateutil_tzfile) and '.tar.gz' in tz._filename:
                 tz._filename = zone
@@ -113,14 +113,14 @@ cpdef inline object maybe_get_tz(object tz):
 
 def _p_tz_cache_key(tz):
     """ Python interface for cache function to facilitate testing."""
-    return _tz_cache_key(tz)
+    return tz_cache_key(tz)
 
 
 # Timezone data caches, key is the pytz string or dateutil file name.
 dst_cache = {}
 
 
-cdef inline object _tz_cache_key(object tz):
+cdef inline object tz_cache_key(object tz):
     """
     Return the key in the cache for the timezone info object or None
     if unknown.
@@ -163,7 +163,7 @@ cpdef get_utcoffset(tzinfo, obj):
         return tzinfo.utcoffset(obj)
 
 
-cdef inline bint _is_fixed_offset(object tz):
+cdef inline bint is_fixed_offset(object tz):
     if treat_tz_as_dateutil(tz):
         if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0:
             return 1
@@ -178,7 +178,7 @@ cdef inline bint _is_fixed_offset(object tz):
     return 1
 
 
-cdef object _get_utc_trans_times_from_dateutil_tz(object tz):
+cdef object get_utc_trans_times_from_dateutil_tz(object tz):
     """
     Transition times in dateutil timezones are stored in local non-dst
     time.  This code converts them to UTC. It's the reverse of the code
@@ -193,7 +193,7 @@ cdef object _get_utc_trans_times_from_dateutil_tz(object tz):
     return new_trans
 
 
-cpdef ndarray _unbox_utcoffsets(object transinfo):
+cpdef ndarray unbox_utcoffsets(object transinfo):
     cdef:
         Py_ssize_t i, sz
         ndarray[int64_t] arr
@@ -211,7 +211,7 @@ cpdef ndarray _unbox_utcoffsets(object transinfo):
 # Daylight Savings
 
 
-cdef object _get_dst_info(object tz):
+cdef object get_dst_info(object tz):
     """
     return a tuple of :
       (UTC times of DST transitions,
@@ -219,7 +219,7 @@ cdef object _get_dst_info(object tz):
        string of type of transitions)
 
     """
-    cache_key = _tz_cache_key(tz)
+    cache_key = tz_cache_key(tz)
     if cache_key is None:
         num = int(get_utcoffset(tz, None).total_seconds()) * 1000000000
         return (np.array([NPY_NAT + 1], dtype=np.int64),
@@ -235,13 +235,13 @@ cdef object _get_dst_info(object tz):
                     trans[0] = NPY_NAT + 1
             except Exception:
                 pass
-            deltas = _unbox_utcoffsets(tz._transition_info)
+            deltas = unbox_utcoffsets(tz._transition_info)
             typ = 'pytz'
 
         elif treat_tz_as_dateutil(tz):
             if len(tz._trans_list):
                 # get utc trans times
-                trans_list = _get_utc_trans_times_from_dateutil_tz(tz)
+                trans_list = get_utc_trans_times_from_dateutil_tz(tz)
                 trans = np.hstack([
                     np.array([0], dtype='M8[s]'), # place holder for first item
                     np.array(trans_list, dtype='M8[s]')]).astype(
@@ -255,7 +255,7 @@ cdef object _get_dst_info(object tz):
                 deltas *= 1000000000
                 typ = 'dateutil'
 
-            elif _is_fixed_offset(tz):
+            elif is_fixed_offset(tz):
                 trans = np.array([NPY_NAT + 1], dtype=np.int64)
                 deltas = np.array([tz._ttinfo_std.offset],
                                   dtype='i8') * 1000000000
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 6b1b61c2798f4f..39dc24642235ba 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -50,6 +50,7 @@
 from pandas._libs import (lib, index as libindex, tslib as libts,
                           algos as libalgos, join as libjoin,
                           Timestamp, period as libperiod)
+from pandas._libs.tslibs import timezones
 
 
 def _utc():
@@ -372,7 +373,7 @@ def __new__(cls, data=None,
                 tz = subarr.tz
         else:
             if tz is not None:
-                tz = libts.maybe_get_tz(tz)
+                tz = timezones.maybe_get_tz(tz)
 
                 if (not isinstance(data, DatetimeIndex) or
                         getattr(data, 'tz', None) is None):
@@ -447,17 +448,18 @@ def _generate(cls, start, end, periods, name, offset,
             raise TypeError('Start and end cannot both be tz-aware with '
                             'different timezones')
 
-        inferred_tz = libts.maybe_get_tz(inferred_tz)
+        inferred_tz = timezones.maybe_get_tz(inferred_tz)
 
         # these may need to be localized
-        tz = libts.maybe_get_tz(tz)
+        tz = timezones.maybe_get_tz(tz)
         if tz is not None:
             date = start or end
             if date.tzinfo is not None and hasattr(tz, 'localize'):
                 tz = tz.localize(date.replace(tzinfo=None)).tzinfo
 
         if tz is not None and inferred_tz is not None:
-            if not libts.get_timezone(inferred_tz) == libts.get_timezone(tz):
+            if not (timezones.get_timezone(inferred_tz) ==
+                    timezones.get_timezone(tz)):
                 raise AssertionError("Inferred time zone not equal to passed "
                                      "time zone")
 
@@ -593,7 +595,7 @@ def _simple_new(cls, values, name=None, freq=None, tz=None,
         result._data = values
         result.name = name
         result.offset = freq
-        result.tz = libts.maybe_get_tz(tz)
+        result.tz = timezones.maybe_get_tz(tz)
         result._reset_identity()
         return result
 
@@ -607,7 +609,7 @@ def tzinfo(self):
     @cache_readonly
     def _timezone(self):
         """ Comparable timezone both for pytz / dateutil"""
-        return libts.get_timezone(self.tzinfo)
+        return timezones.get_timezone(self.tzinfo)
 
     def _has_same_tz(self, other):
         zzone = self._timezone
@@ -616,7 +618,7 @@ def _has_same_tz(self, other):
         if isinstance(other, np.datetime64):
             # convert to Timestamp as np.datetime64 doesn't have tz attr
             other = Timestamp(other)
-        vzone = libts.get_timezone(getattr(other, 'tzinfo', '__no_tz__'))
+        vzone = timezones.get_timezone(getattr(other, 'tzinfo', '__no_tz__'))
         return zzone == vzone
 
     @classmethod
@@ -1779,7 +1781,7 @@ def tz_convert(self, tz):
         TypeError
             If DatetimeIndex is tz-naive.
         """
-        tz = libts.maybe_get_tz(tz)
+        tz = timezones.maybe_get_tz(tz)
 
         if self.tz is None:
             # tz naive, use tz_localize
@@ -1839,7 +1841,7 @@ def tz_localize(self, tz, ambiguous='raise', errors='raise'):
             else:
                 raise TypeError("Already tz-aware, use tz_convert to convert.")
         else:
-            tz = libts.maybe_get_tz(tz)
+            tz = timezones.maybe_get_tz(tz)
             # Convert to UTC
 
             new_dates = libts.tz_localize_to_utc(self.asi8, tz,
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 9dde26f43ad337..95fe3ab83c2abf 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -3,6 +3,7 @@
 from collections import MutableMapping
 
 from pandas._libs import lib, tslib
+from pandas._libs.tslibs.timezones import get_timezone
 
 from pandas.core.dtypes.common import (
     _ensure_object,
@@ -44,7 +45,7 @@ def _infer_tzinfo(start, end):
     def _infer(a, b):
         tz = a.tzinfo
         if b and b.tzinfo:
-            if not (tslib.get_timezone(tz) == tslib.get_timezone(b.tzinfo)):
+            if not (get_timezone(tz) == get_timezone(b.tzinfo)):
                 raise AssertionError('Inputs must both have the same timezone,'
                                      ' {timezone1} != {timezone2}'
                                      .format(timezone1=tz, timezone2=b.tzinfo))
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 9f819a4463bed4..4d300b200971ac 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -46,7 +46,8 @@
 from pandas.core.config import get_option
 from pandas.core.computation.pytables import Expr, maybe_expression
 
-from pandas._libs import tslib, algos, lib
+from pandas._libs import algos, lib
+from pandas._libs.tslibs import timezones
 
 from distutils.version import LooseVersion
 
@@ -4379,7 +4380,7 @@ def _get_info(info, name):
 
 def _get_tz(tz):
     """ for a tz-aware type, return an encoded zone """
-    zone = tslib.get_timezone(tz)
+    zone = timezones.get_timezone(tz)
     if zone is None:
         zone = tz.utcoffset().total_seconds()
     return zone
@@ -4401,7 +4402,7 @@ def _set_tz(values, tz, preserve_UTC=False, coerce=False):
     if tz is not None:
         name = getattr(values, 'name', None)
         values = values.ravel()
-        tz = tslib.get_timezone(_ensure_decoded(tz))
+        tz = timezones.get_timezone(_ensure_decoded(tz))
         values = DatetimeIndex(values, name=name)
         if values.tz is None:
             values = values.tz_localize('UTC').tz_convert(tz)
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
index 8d86bebdd4d5e4..c373942cb4c63c 100644
--- a/pandas/tests/indexes/datetimes/test_date_range.py
+++ b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -394,7 +394,7 @@ def test_range_tz_dateutil(self):
         # see gh-2906
 
         # Use maybe_get_tz to fix filename in tz under dateutil.
-        from pandas._libs.tslib import maybe_get_tz
+        from pandas._libs.tslibs.timezones import maybe_get_tz
         tz = lambda x: maybe_get_tz('dateutil/' + x)
 
         start = datetime(2011, 1, 1, tzinfo=tz('US/Eastern'))
diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py
index f43c010f59b9e7..4ffd2e1cd1e615 100644
--- a/pandas/tests/indexes/datetimes/test_setops.py
+++ b/pandas/tests/indexes/datetimes/test_setops.py
@@ -325,8 +325,8 @@ def test_month_range_union_tz_pytz(self):
     def test_month_range_union_tz_dateutil(self):
         tm._skip_if_windows_python_3()
 
-        from pandas._libs.tslib import _dateutil_gettz as timezone
-        tz = timezone('US/Eastern')
+        from pandas._libs.tslibs.timezones import dateutil_gettz
+        tz = dateutil_gettz('US/Eastern')
 
         early_start = datetime(2011, 1, 1)
         early_end = datetime(2011, 3, 1)
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index 2a6d16fb39cc33..ff21afc11d2205 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -5427,7 +5427,7 @@ def test_append_with_timezones_dateutil(self):
 
         # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
         # filename issues.
-        from pandas._libs.tslib import maybe_get_tz
+        from pandas._libs.tslibs.timezones import maybe_get_tz
         gettz = lambda x: maybe_get_tz('dateutil/' + x)
 
         # as columns
diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py
index a167c9c738b0bf..c17a216df44cbd 100644
--- a/pandas/tests/scalar/test_period.py
+++ b/pandas/tests/scalar/test_period.py
@@ -245,29 +245,29 @@ def test_timestamp_tz_arg(self):
             assert p.tz == exp.tz
 
     def test_timestamp_tz_arg_dateutil(self):
-        from pandas._libs.tslib import _dateutil_gettz as gettz
-        from pandas._libs.tslib import maybe_get_tz
+        from pandas._libs.tslibs.timezones import dateutil_gettz
+        from pandas._libs.tslibs.timezones import maybe_get_tz
         for case in ['dateutil/Europe/Brussels', 'dateutil/Asia/Tokyo',
                      'dateutil/US/Pacific']:
             p = Period('1/1/2005', freq='M').to_timestamp(
                 tz=maybe_get_tz(case))
             exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case)
             assert p == exp
-            assert p.tz == gettz(case.split('/', 1)[1])
+            assert p.tz == dateutil_gettz(case.split('/', 1)[1])
             assert p.tz == exp.tz
 
             p = Period('1/1/2005',
                        freq='M').to_timestamp(freq='3H', tz=maybe_get_tz(case))
             exp = Timestamp('1/1/2005', tz='UTC').tz_convert(case)
             assert p == exp
-            assert p.tz == gettz(case.split('/', 1)[1])
+            assert p.tz == dateutil_gettz(case.split('/', 1)[1])
             assert p.tz == exp.tz
 
     def test_timestamp_tz_arg_dateutil_from_string(self):
-        from pandas._libs.tslib import _dateutil_gettz as gettz
+        from pandas._libs.tslibs.timezones import dateutil_gettz
         p = Period('1/1/2005',
                    freq='M').to_timestamp(tz='dateutil/Europe/Brussels')
-        assert p.tz == gettz('Europe/Brussels')
+        assert p.tz == dateutil_gettz('Europe/Brussels')
 
     def test_timestamp_mult(self):
         p = pd.Period('2011-01', freq='M')
diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py
index 8d47ce4802ac65..c1b9f858a08de3 100644
--- a/pandas/tests/scalar/test_timestamp.py
+++ b/pandas/tests/scalar/test_timestamp.py
@@ -17,7 +17,7 @@
 import pandas.util.testing as tm
 from pandas.tseries import offsets, frequencies
 from pandas._libs import tslib, period
-from pandas._libs.tslib import get_timezone
+from pandas._libs.tslibs.timezones import get_timezone
 
 from pandas.compat import lrange, long
 from pandas.util.testing import assert_series_equal
@@ -1295,7 +1295,7 @@ def test_timestamp_to_datetime_explicit_pytz(self):
     def test_timestamp_to_datetime_explicit_dateutil(self):
         tm._skip_if_windows_python_3()
 
-        from pandas._libs.tslib import _dateutil_gettz as gettz
+        from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
         rng = date_range('20090415', '20090519', tz=gettz('US/Eastern'))
 
         stamp = rng[0]
diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py
index 45a92f6d6f50b0..91187b709463aa 100644
--- a/pandas/tests/series/test_indexing.py
+++ b/pandas/tests/series/test_indexing.py
@@ -387,7 +387,7 @@ def test_getitem_setitem_datetime_tz_pytz(self):
 
     def test_getitem_setitem_datetime_tz_dateutil(self):
         from dateutil.tz import tzutc
-        from pandas._libs.tslib import _dateutil_gettz as gettz
+        from pandas._libs.tslibs.timezones import dateutil_gettz as gettz
 
         tz = lambda x: tzutc() if x == 'UTC' else gettz(
             x)  # handle special case for utc in dateutil
diff --git a/pandas/tests/tseries/test_offsets.py b/pandas/tests/tseries/test_offsets.py
index cd2c29ffe3ac6b..543d21e162f048 100644
--- a/pandas/tests/tseries/test_offsets.py
+++ b/pandas/tests/tseries/test_offsets.py
@@ -33,6 +33,7 @@
     to_datetime, DateParseError)
 import pandas.tseries.offsets as offsets
 from pandas.io.pickle import read_pickle
+from pandas._libs.tslibs import timezones
 from pandas._libs.tslib import normalize_date, NaT, Timestamp, Timedelta
 import pandas._libs.tslib as tslib
 import pandas.util.testing as tm
@@ -288,7 +289,7 @@ def _check_offsetfunc_works(self, offset, funcname, dt, expected,
 
         for tz in self.timezones:
             expected_localize = expected.tz_localize(tz)
-            tz_obj = tslib.maybe_get_tz(tz)
+            tz_obj = timezones.maybe_get_tz(tz)
             dt_tz = tslib._localize_pydatetime(dt, tz_obj)
 
             result = func(dt_tz)
diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py
index ac1a338d2844d4..e7b470e01e2af3 100644
--- a/pandas/tests/tseries/test_timezones.py
+++ b/pandas/tests/tseries/test_timezones.py
@@ -18,6 +18,7 @@
 from pandas.core.indexes.datetimes import bdate_range, date_range
 from pandas.core.dtypes.dtypes import DatetimeTZDtype
 from pandas._libs import tslib
+from pandas._libs.tslibs import timezones
 from pandas import (Index, Series, DataFrame, isna, Timestamp, NaT,
                     DatetimeIndex, to_datetime)
 from pandas.util.testing import (assert_frame_equal, assert_series_equal,
@@ -943,7 +944,7 @@ def tz(self, tz):
         Use tslib.maybe_get_tz so that we get the filename on the tz right
         on windows. See #7337.
         """
-        return tslib.maybe_get_tz('dateutil/' + tz)
+        return timezones.maybe_get_tz('dateutil/' + tz)
 
     def tzstr(self, tz):
         """ Construct a timezone string from a string. Overridden in subclass
@@ -962,7 +963,7 @@ def test_utc_with_system_utc(self):
         # Skipped on win32 due to dateutil bug
         tm._skip_if_windows()
 
-        from pandas._libs.tslib import maybe_get_tz
+        from pandas._libs.tslibs.timezones import maybe_get_tz
 
         # from system utc to real utc
         ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC'))
@@ -1133,7 +1134,7 @@ def test_tzlocal(self):
         assert ts.tz == dateutil.tz.tzlocal()
         assert "tz='tzlocal()')" in repr(ts)
 
-        tz = tslib.maybe_get_tz('tzlocal()')
+        tz = timezones.maybe_get_tz('tzlocal()')
         assert tz == dateutil.tz.tzlocal()
 
         # get offset using normal datetime for test
@@ -1176,12 +1177,13 @@ def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self):
             if tz_name == 'UTC':
                 # skip utc as it's a special case in dateutil
                 continue
-            tz_p = tslib.maybe_get_tz(tz_name)
-            tz_d = tslib.maybe_get_tz('dateutil/' + tz_name)
+            tz_p = timezones.maybe_get_tz(tz_name)
+            tz_d = timezones.maybe_get_tz('dateutil/' + tz_name)
             if tz_d is None:
                 # skip timezones that dateutil doesn't know about.
                 continue
-            assert tslib._p_tz_cache_key(tz_p) != tslib._p_tz_cache_key(tz_d)
+            assert (timezones._p_tz_cache_key(tz_p) !=
+                    timezones._p_tz_cache_key(tz_d))
 
 
 class TestTimeZones(object):
@@ -1764,13 +1766,13 @@ def compare_local_to_utc(tz_didx, utc_didx):
 
         # Check empty array
         result = tslib.tz_convert(np.array([], dtype=np.int64),
-                                  tslib.maybe_get_tz('US/Eastern'),
-                                  tslib.maybe_get_tz('Asia/Tokyo'))
+                                  timezones.maybe_get_tz('US/Eastern'),
+                                  timezones.maybe_get_tz('Asia/Tokyo'))
         tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64))
 
         # Check all-NaT array
         result = tslib.tz_convert(np.array([tslib.iNaT], dtype=np.int64),
-                                  tslib.maybe_get_tz('US/Eastern'),
-                                  tslib.maybe_get_tz('Asia/Tokyo'))
+                                  timezones.maybe_get_tz('US/Eastern'),
+                                  timezones.maybe_get_tz('Asia/Tokyo'))
         tm.assert_numpy_array_equal(result, np.array(
             [tslib.iNaT], dtype=np.int64))

From c95eb3897f2f4d0893f7baa381b2b151805a960e Mon Sep 17 00:00:00 2001
From: Daniel Himmelstein <daniel.himmelstein@gmail.com>
Date: Sun, 24 Sep 2017 05:56:12 -0400
Subject: [PATCH 136/188] DOC: revise What's New for inferring compression from
 non-string paths (#17338)

Refs https://github.com/pandas-dev/pandas/issues/17262
Refs https://github.com/pandas-dev/pandas/pull/17206#issuecomment-322586996
---
 doc/source/whatsnew/v0.21.0.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 261e12b8245094..32dbeb32154e68 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -137,7 +137,7 @@ Other Enhancements
 - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`)
 - Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here <io.parquet>`. (:issue:`15838`, :issue:`17438`)
 - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`)
-- `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`).
+- Read/write methods that infer compression (:func:`read_csv`, :func:`read_table`, :func:`read_pickle`, and :meth:`~DataFrame.to_pickle`) can now infer from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`).
 - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`).
 - :func:`DataFrame.items` and :func:`Series.items` is now present in both Python 2 and 3 and is lazy in all cases (:issue:`13918`, :issue:`17213`)
 - :func:`Styler.where` has been implemented. It is as a convenience for :func:`Styler.applymap` and enables simple DataFrame styling on the Jupyter notebook (:issue:`17474`).

From ae16bf99467d7d26abe506ba95079b07442860a8 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 24 Sep 2017 13:48:00 +0200
Subject: [PATCH 137/188] API: harmonize drop/reindex/rename args (GH12392) -
 drop (#17644)

* API: harmonize drop/reindex/rename args (GH12392) - drop

* fixups

* add versionadded
---
 doc/source/whatsnew/v0.21.0.txt               |  18 ++++
 pandas/core/generic.py                        | 102 +++++++++++++-----
 .../tests/frame/test_axis_select_reindex.py   |  35 ++++++
 3 files changed, 129 insertions(+), 26 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 32dbeb32154e68..21abdccd2996c9 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -91,6 +91,24 @@ This does not raise any obvious exceptions, but also does not create a new colum
 
 Setting a list-like data structure into a new attribute now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access <indexing.attribute_access>`.
 
+``drop`` now also accepts index/columns keywords
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The :meth:`~DataFrame.drop` method has gained ``index``/``columns`` keywords as an
+alternative to specify the ``axis`` and to make it similar in usage to ``reindex``
+(:issue:`12392`).
+
+For example:
+
+.. ipython:: python
+
+    df = pd.DataFrame(np.arange(8).reshape(2,4),
+                      columns=['A', 'B', 'C', 'D'])
+    df
+    df.drop(['B', 'C'], axis=1)
+    # the following is now equivalent
+    df.drop(columns=['B', 'C'])
+
 .. _whatsnew_0210.enhancements.categorical_dtype:
 
 ``CategoricalDtype`` for specifying categoricals
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 241204ef555f6e..3d55e07df6eacb 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2333,14 +2333,23 @@ def reindex_like(self, other, method=None, copy=True, limit=None,
 
         return self.reindex(**d)
 
-    def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'):
+    def drop(self, labels=None, axis=0, index=None, columns=None, level=None,
+             inplace=False, errors='raise'):
         """
         Return new object with labels in requested axis removed.
 
         Parameters
         ----------
         labels : single label or list-like
+            Index or column labels to drop.
         axis : int or axis name
+            Whether to drop labels from the index (0 / 'index') or
+            columns (1 / 'columns').
+        index, columns : single label or list-like
+            Alternative to specifying `axis` (``labels, axis=1`` is
+            equivalent to ``columns=labels``).
+
+            .. versionadded:: 0.21.0
         level : int or level name, default None
             For MultiIndex
         inplace : bool, default False
@@ -2354,36 +2363,80 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'):
 
         Examples
         --------
-        >>> df = pd.DataFrame([[1, 2, 3, 4],
-        ...                    [5, 6, 7, 8],
-        ...                    [9, 1, 2, 3],
-        ...                    [4, 5, 6, 7]
-        ...                   ],
-        ...                   columns=list('ABCD'))
+        >>> df = pd.DataFrame(np.arange(12).reshape(3,4),
+                              columns=['A', 'B', 'C', 'D'])
         >>> df
-            A   B   C   D
-        0   1   2   3   4
-        1   5   6   7   8
-        2   9   1   2   3
-        3   4   5   6   7
+           A  B   C   D
+        0  0  1   2   3
+        1  4  5   6   7
+        2  8  9  10  11
+
+        Drop columns
+
+        >>> df.drop(['B', 'C'], axis=1)
+           A   D
+        0  0   3
+        1  4   7
+        2  8  11
+
+        >>> df.drop(columns=['B', 'C'])
+           A   D
+        0  0   3
+        1  4   7
+        2  8  11
 
         Drop a row by index
 
         >>> df.drop([0, 1])
-            A   B   C   D
-        2   9   1   2   3
-        3   4   5   6   7
+           A  B   C   D
+        2  8  9  10  11
 
-        Drop columns
+        Notes
+        -----
+        Specifying both `labels` and `index` or `columns` will raise a
+        ValueError.
 
-        >>> df.drop(['A', 'B'], axis=1)
-            C   D
-        0   3   4
-        1   7   8
-        2   2   3
-        3   6   7
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
+
+        if labels is not None:
+            if index is not None or columns is not None:
+                raise ValueError("Cannot specify both 'labels' and "
+                                 "'index'/'columns'")
+            axis_name = self._get_axis_name(axis)
+            axes = {axis_name: labels}
+        elif index is not None or columns is not None:
+            axes, _ = self._construct_axes_from_arguments((index, columns), {})
+        else:
+            raise ValueError("Need to specify at least one of 'labels', "
+                             "'index' or 'columns'")
+
+        obj = self
+
+        for axis, labels in axes.items():
+            if labels is not None:
+                obj = obj._drop_axis(labels, axis, level=level, errors=errors)
+
+        if inplace:
+            self._update_inplace(obj)
+        else:
+            return obj
+
+    def _drop_axis(self, labels, axis, level=None, errors='raise'):
+        """
+        Drop labels from specified axis. Used in the ``drop`` method
+        internally.
+
+        Parameters
+        ----------
+        labels : single label or list-like
+        axis : int or axis name
+        level : int or level name, default None
+            For MultiIndex
+        errors : {'ignore', 'raise'}, default 'raise'
+            If 'ignore', suppress error and existing labels are dropped.
+
+        """
         axis = self._get_axis_number(axis)
         axis_name = self._get_axis_name(axis)
         axis, axis_ = self._get_axis(axis), axis
@@ -2416,10 +2469,7 @@ def drop(self, labels, axis=0, level=None, inplace=False, errors='raise'):
 
             result = self.loc[tuple(slicer)]
 
-        if inplace:
-            self._update_inplace(result)
-        else:
-            return result
+        return result
 
     def _update_inplace(self, result, verify_is_copy=True):
         """
diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py
index e76869bf6712b5..fb9b8c2ed7affe 100644
--- a/pandas/tests/frame/test_axis_select_reindex.py
+++ b/pandas/tests/frame/test_axis_select_reindex.py
@@ -146,6 +146,41 @@ def test_drop_multiindex_not_lexsorted(self):
 
         tm.assert_frame_equal(result, expected)
 
+    def test_drop_api_equivalence(self):
+        # equivalence of the labels/axis and index/columns API's (GH12392)
+        df = DataFrame([[1, 2, 3], [3, 4, 5], [5, 6, 7]],
+                       index=['a', 'b', 'c'],
+                       columns=['d', 'e', 'f'])
+
+        res1 = df.drop('a')
+        res2 = df.drop(index='a')
+        tm.assert_frame_equal(res1, res2)
+
+        res1 = df.drop('d', 1)
+        res2 = df.drop(columns='d')
+        tm.assert_frame_equal(res1, res2)
+
+        res1 = df.drop(labels='e', axis=1)
+        res2 = df.drop(columns='e')
+        tm.assert_frame_equal(res1, res2)
+
+        res1 = df.drop(['a'], axis=0)
+        res2 = df.drop(index=['a'])
+        tm.assert_frame_equal(res1, res2)
+
+        res1 = df.drop(['a'], axis=0).drop(['d'], axis=1)
+        res2 = df.drop(index=['a'], columns=['d'])
+        tm.assert_frame_equal(res1, res2)
+
+        with pytest.raises(ValueError):
+            df.drop(labels='a', index='b')
+
+        with pytest.raises(ValueError):
+            df.drop(labels='a', columns='b')
+
+        with pytest.raises(ValueError):
+            df.drop(axis=1)
+
     def test_merge_join_different_levels(self):
         # GH 9455
 

From 1f5127144d9c2697445882b81505997a4a67d67e Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Sun, 24 Sep 2017 06:13:37 -0700
Subject: [PATCH 138/188] API: Warn about dups in names for read_csv (#17346)

xref gh-17095.
---
 doc/source/io.rst                      |  4 +--
 doc/source/whatsnew/v0.21.0.txt        |  1 +
 pandas/io/parsers.py                   | 33 ++++++++++++++++--
 pandas/tests/io/parser/common.py       | 14 --------
 pandas/tests/io/parser/dtypes.py       |  9 ++---
 pandas/tests/io/parser/mangle_dupes.py | 46 ++++++++++++++++++++------
 6 files changed, 74 insertions(+), 33 deletions(-)

diff --git a/doc/source/io.rst b/doc/source/io.rst
index ab1ad74ee8516b..d6abed6e9d1ad6 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -113,8 +113,8 @@ header : int or list of ints, default ``'infer'``
   rather than the first line of the file.
 names : array-like, default ``None``
   List of column names to use. If file contains no header row, then you should
-  explicitly pass ``header=None``. Duplicates in this list are not allowed unless
-  ``mangle_dupe_cols=True``, which is the default.
+  explicitly pass ``header=None``. Duplicates in this list will cause
+    a ``UserWarning`` to be issued.
 index_col :  int or sequence or ``False``, default ``None``
   Column to use as the row labels of the DataFrame. If a sequence is given, a
   MultiIndex is used. If you have a malformed file with delimiters at the end of
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 21abdccd2996c9..49d2c1767807c4 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -467,6 +467,7 @@ Other API Changes
 - The Categorical constructor no longer accepts a scalar for the ``categories`` keyword. (:issue:`16022`)
 - Accessing a non-existent attribute on a closed :class:`~pandas.HDFStore` will now
   raise an ``AttributeError`` rather than a ``ClosedFileError`` (:issue:`16301`)
+- :func:`read_csv` now issues a ``UserWarning`` if the ``names`` parameter contains duplicates (:issue:`17095`)
 - :func:`read_csv` now treats ``'null'`` strings as missing values by default (:issue:`16471`)
 - :func:`read_csv` now treats ``'n/a'`` strings as missing values by default (:issue:`16078`)
 - :class:`pandas.HDFStore`'s string representation is now faster and less detailed. For the previous behavior, use ``pandas.HDFStore.info()``. (:issue:`16503`).
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index d9e83176d0d6e3..ed15d4295d6881 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -84,8 +84,8 @@
     rather than the first line of the file.
 names : array-like, default None
     List of column names to use. If file contains no header row, then you
-    should explicitly pass header=None. Duplicates in this list are not
-    allowed unless mangle_dupe_cols=True, which is the default.
+    should explicitly pass header=None. Duplicates in this list will cause
+    a ``UserWarning`` to be issued.
 index_col : int or sequence or False, default None
     Column to use as the row labels of the DataFrame. If a sequence is given, a
     MultiIndex is used. If you have a malformed file with delimiters at the end
@@ -385,6 +385,32 @@ def _validate_integer(name, val, min_val=0):
     return val
 
 
+def _validate_names(names):
+    """
+    Check if the `names` parameter contains duplicates.
+
+    If duplicates are found, we issue a warning before returning.
+
+    Parameters
+    ----------
+    names : array-like or None
+        An array containing a list of the names used for the output DataFrame.
+
+    Returns
+    -------
+    names : array-like or None
+        The original `names` parameter.
+    """
+
+    if names is not None:
+        if len(names) != len(set(names)):
+            msg = ("Duplicate names specified. This "
+                   "will raise an error in the future.")
+            warnings.warn(msg, UserWarning, stacklevel=3)
+
+    return names
+
+
 def _read(filepath_or_buffer, kwds):
     """Generic reader of line files."""
     encoding = kwds.get('encoding', None)
@@ -407,6 +433,9 @@ def _read(filepath_or_buffer, kwds):
     chunksize = _validate_integer('chunksize', kwds.get('chunksize', None), 1)
     nrows = _validate_integer('nrows', kwds.get('nrows', None))
 
+    # Check for duplicates in names.
+    _validate_names(kwds.get("names", None))
+
     # Create the parser.
     parser = TextFileReader(filepath_or_buffer, **kwds)
 
diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py
index cfc4a1d7c55eb0..e85d3ad294655c 100644
--- a/pandas/tests/io/parser/common.py
+++ b/pandas/tests/io/parser/common.py
@@ -1357,20 +1357,6 @@ def test_euro_decimal_format(self):
         assert df2['Number2'].dtype == float
         assert df2['Number3'].dtype == float
 
-    def test_read_duplicate_names(self):
-        # See gh-7160
-        data = "a,b,a\n0,1,2\n3,4,5"
-        df = self.read_csv(StringIO(data))
-        expected = DataFrame([[0, 1, 2], [3, 4, 5]],
-                             columns=['a', 'b', 'a.1'])
-        tm.assert_frame_equal(df, expected)
-
-        data = "0,1,2\n3,4,5"
-        df = self.read_csv(StringIO(data), names=["a", "b", "a"])
-        expected = DataFrame([[0, 1, 2], [3, 4, 5]],
-                             columns=['a', 'b', 'a.1'])
-        tm.assert_frame_equal(df, expected)
-
     def test_inf_parsing(self):
         data = """\
 ,A
diff --git a/pandas/tests/io/parser/dtypes.py b/pandas/tests/io/parser/dtypes.py
index 7311c9200f269a..402fa0817595c7 100644
--- a/pandas/tests/io/parser/dtypes.py
+++ b/pandas/tests/io/parser/dtypes.py
@@ -204,10 +204,11 @@ def test_empty_with_dup_column_pass_dtype_by_indexes(self):
         result = self.read_csv(StringIO(data), dtype={0: 'u1', 1: 'f'})
         tm.assert_frame_equal(result, expected, check_index_type=False)
 
-        data = ''
-        result = self.read_csv(StringIO(data), names=['one', 'one'],
-                               dtype={0: 'u1', 1: 'f'})
-        tm.assert_frame_equal(result, expected, check_index_type=False)
+        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
+            data = ''
+            result = self.read_csv(StringIO(data), names=['one', 'one'],
+                                   dtype={0: 'u1', 1: 'f'})
+            tm.assert_frame_equal(result, expected, check_index_type=False)
 
     def test_raise_on_passed_int_dtype_with_nas(self):
         # see gh-2631
diff --git a/pandas/tests/io/parser/mangle_dupes.py b/pandas/tests/io/parser/mangle_dupes.py
index e2efb1377f8b0a..6df69eb475bf76 100644
--- a/pandas/tests/io/parser/mangle_dupes.py
+++ b/pandas/tests/io/parser/mangle_dupes.py
@@ -7,6 +7,9 @@
 """
 
 from pandas.compat import StringIO
+from pandas import DataFrame
+
+import pandas.util.testing as tm
 
 
 class DupeColumnTests(object):
@@ -25,6 +28,21 @@ def test_basic(self):
                                        mangle_dupe_cols=True)
             assert list(df.columns) == expected
 
+    def test_basic_names(self):
+        # See gh-7160
+        data = "a,b,a\n0,1,2\n3,4,5"
+        expected = DataFrame([[0, 1, 2], [3, 4, 5]],
+                             columns=["a", "b", "a.1"])
+
+        df = self.read_csv(StringIO(data))
+        tm.assert_frame_equal(df, expected)
+
+        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
+            data = "0,1,2\n3,4,5"
+            df = self.read_csv(StringIO(data),
+                               names=["a", "b", "a"])
+            tm.assert_frame_equal(df, expected)
+
     def test_thorough_mangle_columns(self):
         # see gh-17060
         data = "a,a,a.1\n1,2,3"
@@ -45,20 +63,26 @@ def test_thorough_mangle_names(self):
         # see gh-17095
         data = "a,b,b\n1,2,3"
         names = ["a.1", "a.1", "a.1.1"]
-        df = self.read_csv(StringIO(data), sep=",", names=names,
-                           mangle_dupe_cols=True)
-        assert list(df.columns) == ["a.1", "a.1.1", "a.1.1.1"]
+
+        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
+            df = self.read_csv(StringIO(data), sep=",", names=names,
+                               mangle_dupe_cols=True)
+            assert list(df.columns) == ["a.1", "a.1.1", "a.1.1.1"]
 
         data = "a,b,c,d,e,f\n1,2,3,4,5,6"
         names = ["a", "a", "a.1", "a.1.1", "a.1.1.1", "a.1.1.1.1"]
-        df = self.read_csv(StringIO(data), sep=",", names=names,
-                           mangle_dupe_cols=True)
-        assert list(df.columns) == ["a", "a.1", "a.1.1", "a.1.1.1",
-                                    "a.1.1.1.1", "a.1.1.1.1.1"]
+
+        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
+            df = self.read_csv(StringIO(data), sep=",", names=names,
+                               mangle_dupe_cols=True)
+            assert list(df.columns) == ["a", "a.1", "a.1.1", "a.1.1.1",
+                                        "a.1.1.1.1", "a.1.1.1.1.1"]
 
         data = "a,b,c,d,e,f,g\n1,2,3,4,5,6,7"
         names = ["a", "a", "a.3", "a.1", "a.2", "a", "a"]
-        df = self.read_csv(StringIO(data), sep=",", names=names,
-                           mangle_dupe_cols=True)
-        assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1",
-                                    "a.2", "a.2.1", "a.3.1"]
+
+        with tm.assert_produces_warning(UserWarning, check_stacklevel=False):
+            df = self.read_csv(StringIO(data), sep=",", names=names,
+                               mangle_dupe_cols=True)
+            assert list(df.columns) == ["a", "a.1", "a.3", "a.1.1",
+                                        "a.2", "a.2.1", "a.3.1"]

From f8bf12916e21bc03992f14b01a77355e180cdab9 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 24 Sep 2017 06:15:29 -0700
Subject: [PATCH 139/188] cut/paste AccessorProperty and PandasDelegate to
 core.accessor (#17651)

---
 pandas/core/accessor.py          | 95 ++++++++++++++++++++++++++++++++
 pandas/core/base.py              | 94 -------------------------------
 pandas/core/categorical.py       |  5 +-
 pandas/core/frame.py             |  5 +-
 pandas/core/indexes/accessors.py |  5 +-
 pandas/core/indexes/base.py      |  4 +-
 pandas/core/indexes/category.py  |  3 +-
 pandas/core/series.py            | 11 ++--
 pandas/tests/test_base.py        |  5 +-
 9 files changed, 117 insertions(+), 110 deletions(-)

diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py
index 9f8556d1e69616..c8476841bfce47 100644
--- a/pandas/core/accessor.py
+++ b/pandas/core/accessor.py
@@ -5,6 +5,7 @@
 that can be mixed into or pinned onto other pandas classes.
 
 """
+from pandas.core.common import AbstractMethodError
 
 
 class DirNamesMixin(object):
@@ -33,3 +34,97 @@ def __dir__(self):
         rv = set(dir(type(self)))
         rv = (rv - self._dir_deletions()) | self._dir_additions()
         return sorted(rv)
+
+
+class AccessorProperty(object):
+    """Descriptor for implementing accessor properties like Series.str
+    """
+
+    def __init__(self, accessor_cls, construct_accessor=None):
+        self.accessor_cls = accessor_cls
+        self.construct_accessor = (construct_accessor or
+                                   accessor_cls._make_accessor)
+        self.__doc__ = accessor_cls.__doc__
+
+    def __get__(self, instance, owner=None):
+        if instance is None:
+            # this ensures that Series.str.<method> is well defined
+            return self.accessor_cls
+        return self.construct_accessor(instance)
+
+    def __set__(self, instance, value):
+        raise AttributeError("can't set attribute")
+
+    def __delete__(self, instance):
+        raise AttributeError("can't delete attribute")
+
+
+class PandasDelegate(object):
+    """ an abstract base class for delegating methods/properties """
+
+    @classmethod
+    def _make_accessor(cls, data):
+        raise AbstractMethodError("_make_accessor should be implemented"
+                                  "by subclass and return an instance"
+                                  "of `cls`.")
+
+    def _delegate_property_get(self, name, *args, **kwargs):
+        raise TypeError("You cannot access the "
+                        "property {name}".format(name=name))
+
+    def _delegate_property_set(self, name, value, *args, **kwargs):
+        raise TypeError("The property {name} cannot be set".format(name=name))
+
+    def _delegate_method(self, name, *args, **kwargs):
+        raise TypeError("You cannot call method {name}".format(name=name))
+
+    @classmethod
+    def _add_delegate_accessors(cls, delegate, accessors, typ,
+                                overwrite=False):
+        """
+        add accessors to cls from the delegate class
+
+        Parameters
+        ----------
+        cls : the class to add the methods/properties to
+        delegate : the class to get methods/properties & doc-strings
+        acccessors : string list of accessors to add
+        typ : 'property' or 'method'
+        overwrite : boolean, default False
+           overwrite the method/property in the target class if it exists
+        """
+
+        def _create_delegator_property(name):
+
+            def _getter(self):
+                return self._delegate_property_get(name)
+
+            def _setter(self, new_values):
+                return self._delegate_property_set(name, new_values)
+
+            _getter.__name__ = name
+            _setter.__name__ = name
+
+            return property(fget=_getter, fset=_setter,
+                            doc=getattr(delegate, name).__doc__)
+
+        def _create_delegator_method(name):
+
+            def f(self, *args, **kwargs):
+                return self._delegate_method(name, *args, **kwargs)
+
+            f.__name__ = name
+            f.__doc__ = getattr(delegate, name).__doc__
+
+            return f
+
+        for name in accessors:
+
+            if typ == 'property':
+                f = _create_delegator_property(name)
+            else:
+                f = _create_delegator_method(name)
+
+            # don't overwrite existing methods/properties
+            if overwrite or not hasattr(cls, name):
+                setattr(cls, name, f)
diff --git a/pandas/core/base.py b/pandas/core/base.py
index be021f3621c735..19f67286426450 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -153,100 +153,6 @@ def __setattr__(self, key, value):
         object.__setattr__(self, key, value)
 
 
-class PandasDelegate(PandasObject):
-    """ an abstract base class for delegating methods/properties """
-
-    @classmethod
-    def _make_accessor(cls, data):
-        raise AbstractMethodError("_make_accessor should be implemented"
-                                  "by subclass and return an instance"
-                                  "of `cls`.")
-
-    def _delegate_property_get(self, name, *args, **kwargs):
-        raise TypeError("You cannot access the "
-                        "property {name}".format(name=name))
-
-    def _delegate_property_set(self, name, value, *args, **kwargs):
-        raise TypeError("The property {name} cannot be set".format(name=name))
-
-    def _delegate_method(self, name, *args, **kwargs):
-        raise TypeError("You cannot call method {name}".format(name=name))
-
-    @classmethod
-    def _add_delegate_accessors(cls, delegate, accessors, typ,
-                                overwrite=False):
-        """
-        add accessors to cls from the delegate class
-
-        Parameters
-        ----------
-        cls : the class to add the methods/properties to
-        delegate : the class to get methods/properties & doc-strings
-        acccessors : string list of accessors to add
-        typ : 'property' or 'method'
-        overwrite : boolean, default False
-           overwrite the method/property in the target class if it exists
-        """
-
-        def _create_delegator_property(name):
-
-            def _getter(self):
-                return self._delegate_property_get(name)
-
-            def _setter(self, new_values):
-                return self._delegate_property_set(name, new_values)
-
-            _getter.__name__ = name
-            _setter.__name__ = name
-
-            return property(fget=_getter, fset=_setter,
-                            doc=getattr(delegate, name).__doc__)
-
-        def _create_delegator_method(name):
-
-            def f(self, *args, **kwargs):
-                return self._delegate_method(name, *args, **kwargs)
-
-            f.__name__ = name
-            f.__doc__ = getattr(delegate, name).__doc__
-
-            return f
-
-        for name in accessors:
-
-            if typ == 'property':
-                f = _create_delegator_property(name)
-            else:
-                f = _create_delegator_method(name)
-
-            # don't overwrite existing methods/properties
-            if overwrite or not hasattr(cls, name):
-                setattr(cls, name, f)
-
-
-class AccessorProperty(object):
-    """Descriptor for implementing accessor properties like Series.str
-    """
-
-    def __init__(self, accessor_cls, construct_accessor=None):
-        self.accessor_cls = accessor_cls
-        self.construct_accessor = (construct_accessor or
-                                   accessor_cls._make_accessor)
-        self.__doc__ = accessor_cls.__doc__
-
-    def __get__(self, instance, owner=None):
-        if instance is None:
-            # this ensures that Series.str.<method> is well defined
-            return self.accessor_cls
-        return self.construct_accessor(instance)
-
-    def __set__(self, instance, value):
-        raise AttributeError("can't set attribute")
-
-    def __delete__(self, instance):
-        raise AttributeError("can't delete attribute")
-
-
 class GroupByError(Exception):
     pass
 
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 98d6d7a68017ad..743bae2fd2848c 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -30,7 +30,8 @@
 from pandas.core.common import is_null_slice, _maybe_box_datetimelike
 
 from pandas.core.algorithms import factorize, take_1d, unique1d
-from pandas.core.base import (PandasObject, PandasDelegate,
+from pandas.core.accessor import PandasDelegate
+from pandas.core.base import (PandasObject,
                               NoNewAttributesMixin, _shared_docs)
 import pandas.core.common as com
 from pandas.core.missing import interpolate_2d
@@ -2065,7 +2066,7 @@ def repeat(self, repeats, *args, **kwargs):
 # The Series.cat accessor
 
 
-class CategoricalAccessor(PandasDelegate, NoNewAttributesMixin):
+class CategoricalAccessor(PandasDelegate, PandasObject, NoNewAttributesMixin):
     """
     Accessor object for categorical properties of the Series values.
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 346eeb8d2642cd..899ae99d5deb1f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -90,7 +90,7 @@
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.indexes.timedeltas import TimedeltaIndex
 
-import pandas.core.base as base
+from pandas.core import accessor
 import pandas.core.common as com
 import pandas.core.nanops as nanops
 import pandas.core.ops as ops
@@ -5897,7 +5897,8 @@ def isin(self, values):
 
     # ----------------------------------------------------------------------
     # Add plotting methods to DataFrame
-    plot = base.AccessorProperty(gfx.FramePlotMethods, gfx.FramePlotMethods)
+    plot = accessor.AccessorProperty(gfx.FramePlotMethods,
+                                     gfx.FramePlotMethods)
     hist = gfx.hist_frame
     boxplot = gfx.boxplot_frame
 
diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py
index 88297ac70984dd..2176338574304a 100644
--- a/pandas/core/indexes/accessors.py
+++ b/pandas/core/indexes/accessors.py
@@ -11,7 +11,8 @@
     is_timedelta64_dtype, is_categorical_dtype,
     is_list_like)
 
-from pandas.core.base import PandasDelegate, NoNewAttributesMixin
+from pandas.core.accessor import PandasDelegate
+from pandas.core.base import NoNewAttributesMixin, PandasObject
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas._libs.period import IncompatibleFrequency  # noqa
 from pandas.core.indexes.period import PeriodIndex
@@ -81,7 +82,7 @@ def maybe_to_datetimelike(data, copy=False):
                     "datetimelike index".format(type(data)))
 
 
-class Properties(PandasDelegate, NoNewAttributesMixin):
+class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin):
 
     def __init__(self, values, index, name, orig=None):
         self.values = values
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 562a758f83edc7..f28ff9697e517f 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -57,7 +57,7 @@
 import pandas.core.sorting as sorting
 from pandas.io.formats.printing import pprint_thing
 from pandas.core.ops import _comp_method_OBJECT_ARRAY
-from pandas.core import strings
+from pandas.core import strings, accessor
 from pandas.core.config import get_option
 
 
@@ -159,7 +159,7 @@ class Index(IndexOpsMixin, PandasObject):
     _accessors = frozenset(['str'])
 
     # String Methods
-    str = base.AccessorProperty(strings.StringMethods)
+    str = accessor.AccessorProperty(strings.StringMethods)
 
     def __new__(cls, data=None, dtype=None, copy=False, name=None,
                 fastpath=False, tupleize_cols=True, **kwargs):
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 9a055afccd7997..8b680127723c32 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -19,6 +19,7 @@
 from pandas.util._decorators import Appender, cache_readonly
 from pandas.core.config import get_option
 from pandas.core.indexes.base import Index, _index_shared_docs
+from pandas.core import accessor
 import pandas.core.base as base
 import pandas.core.missing as missing
 import pandas.core.indexes.base as ibase
@@ -27,7 +28,7 @@
 _index_doc_kwargs.update(dict(target_klass='CategoricalIndex'))
 
 
-class CategoricalIndex(Index, base.PandasDelegate):
+class CategoricalIndex(Index, accessor.PandasDelegate):
     """
 
     Immutable Index implementing an ordered, sliceable set. CategoricalIndex
diff --git a/pandas/core/series.py b/pandas/core/series.py
index ea9aeefe3b6651..db8ee2529ef577 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -62,6 +62,7 @@
 from pandas.compat import zip, u, OrderedDict, StringIO
 from pandas.compat.numpy import function as nv
 
+from pandas.core import accessor
 import pandas.core.ops as ops
 import pandas.core.algorithms as algorithms
 
@@ -2901,19 +2902,19 @@ def to_period(self, freq=None, copy=True):
 
     # -------------------------------------------------------------------------
     # Datetimelike delegation methods
-    dt = base.AccessorProperty(CombinedDatetimelikeProperties)
+    dt = accessor.AccessorProperty(CombinedDatetimelikeProperties)
 
     # -------------------------------------------------------------------------
     # Categorical methods
-    cat = base.AccessorProperty(CategoricalAccessor)
+    cat = accessor.AccessorProperty(CategoricalAccessor)
 
     # String Methods
-    str = base.AccessorProperty(strings.StringMethods)
+    str = accessor.AccessorProperty(strings.StringMethods)
 
     # ----------------------------------------------------------------------
     # Add plotting methods to Series
-    plot = base.AccessorProperty(gfx.SeriesPlotMethods,
-                                 gfx.SeriesPlotMethods)
+    plot = accessor.AccessorProperty(gfx.SeriesPlotMethods,
+                                     gfx.SeriesPlotMethods)
     hist = gfx.hist_series
 
 
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 38d78b12b31aa5..5bfd8eb7eae248 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -18,7 +18,8 @@
                     CategoricalIndex, Timestamp)
 from pandas.compat import StringIO, PYPY, long
 from pandas.compat.numpy import np_array_datetime64_compat
-from pandas.core.base import PandasDelegate, NoNewAttributesMixin
+from pandas.core.accessor import PandasDelegate
+from pandas.core.base import PandasObject, NoNewAttributesMixin
 from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
 from pandas._libs.tslib import iNaT
 
@@ -105,7 +106,7 @@ def bar(self, *args, **kwargs):
             """ a test bar method """
             pass
 
-    class Delegate(PandasDelegate):
+    class Delegate(PandasDelegate, PandasObject):
 
         def __init__(self, obj):
             self.obj = obj

From 965c1c89b6df471d88dc0e1188fb8cbc0d89f867 Mon Sep 17 00:00:00 2001
From: Bob Haffner <bob.haffner@gmail.com>
Date: Sun, 24 Sep 2017 08:22:13 -0500
Subject: [PATCH 140/188] preserve kwargs order on assign func for py36plus -
 #14207 (#17632)

---
 doc/source/whatsnew/v0.21.0.txt           |  1 +
 pandas/core/frame.py                      | 23 ++++++++++++++---------
 pandas/tests/frame/test_mutate_columns.py | 16 +++++++++++++---
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 49d2c1767807c4..1365901c2ce5e3 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -162,6 +162,7 @@ Other Enhancements
 - :func:`MultiIndex.is_monotonic_decreasing` has been implemented.  Previously returned ``False`` in all cases. (:issue:`16554`)
 - :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`)
 - :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`)
+- :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names
 
 
 .. _whatsnew_0210.api_breaking:
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 899ae99d5deb1f..912dbdb9de7059 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -82,6 +82,7 @@
 from pandas.compat import (range, map, zip, lrange, lmap, lzip, StringIO, u,
                            OrderedDict, raise_with_traceback)
 from pandas import compat
+from pandas.compat import PY36
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import Appender, Substitution
 from pandas.util._validators import validate_bool_kwarg
@@ -2575,12 +2576,12 @@ def assign(self, **kwargs):
 
         Notes
         -----
-        Since ``kwargs`` is a dictionary, the order of your
-        arguments may not be preserved. To make things predicatable,
-        the columns are inserted in alphabetical order, at the end of
-        your DataFrame. Assigning multiple columns within the same
-        ``assign`` is possible, but you cannot reference other columns
-        created within the same ``assign`` call.
+        For python 3.6 and above, the columns are inserted in the order of
+        **kwargs. For python 3.5 and earlier, since **kwargs is unordered,
+        the columns are inserted in alphabetical order at the end of your
+        DataFrame.  Assigning multiple columns within the same ``assign``
+        is possible, but you cannot reference other columns created within
+        the same ``assign`` call.
 
         Examples
         --------
@@ -2620,14 +2621,18 @@ def assign(self, **kwargs):
         data = self.copy()
 
         # do all calculations first...
-        results = {}
+        results = OrderedDict()
         for k, v in kwargs.items():
             results[k] = com._apply_if_callable(v, data)
 
+        # preserve order for 3.6 and later, but sort by key for 3.5 and earlier
+        if PY36:
+            results = results.items()
+        else:
+            results = sorted(results.items())
         # ... and then assign
-        for k, v in sorted(results.items()):
+        for k, v in results:
             data[k] = v
-
         return data
 
     def _sanitize_column(self, key, value, broadcast=True):
diff --git a/pandas/tests/frame/test_mutate_columns.py b/pandas/tests/frame/test_mutate_columns.py
index 4462260a290d9b..0043475702f94b 100644
--- a/pandas/tests/frame/test_mutate_columns.py
+++ b/pandas/tests/frame/test_mutate_columns.py
@@ -4,6 +4,7 @@
 import pytest
 from pandas.compat import range, lrange
 import numpy as np
+from pandas.compat import PY36
 
 from pandas import DataFrame, Series, Index, MultiIndex
 
@@ -61,14 +62,23 @@ def test_assign_multiple(self):
                               [3, 6, 9, 3, 6]], columns=list('ABCDE'))
         assert_frame_equal(result, expected)
 
-    def test_assign_alphabetical(self):
+    def test_assign_order(self):
         # GH 9818
         df = DataFrame([[1, 2], [3, 4]], columns=['A', 'B'])
         result = df.assign(D=df.A + df.B, C=df.A - df.B)
-        expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]],
-                             columns=list('ABCD'))
+
+        if PY36:
+            expected = DataFrame([[1, 2, 3, -1], [3, 4, 7, -1]],
+                                 columns=list('ABDC'))
+        else:
+            expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]],
+                                 columns=list('ABCD'))
         assert_frame_equal(result, expected)
         result = df.assign(C=df.A - df.B, D=df.A + df.B)
+
+        expected = DataFrame([[1, 2, -1, 3], [3, 4, -1, 7]],
+                             columns=list('ABCD'))
+
         assert_frame_equal(result, expected)
 
     def test_assign_bad(self):

From e0743a1b9725c1bb63c738f6e730f52e269095ac Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 24 Sep 2017 14:42:53 -0400
Subject: [PATCH 141/188] TST: install cython from pip for 3.6_NUMPY_DEV build
 (#17657)

---
 ci/requirements-3.6_NUMPY_DEV.build    | 1 -
 ci/requirements-3.6_NUMPY_DEV.build.sh | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/ci/requirements-3.6_NUMPY_DEV.build b/ci/requirements-3.6_NUMPY_DEV.build
index 900c050f1cc9ef..336fbe86b57d88 100644
--- a/ci/requirements-3.6_NUMPY_DEV.build
+++ b/ci/requirements-3.6_NUMPY_DEV.build
@@ -1,3 +1,2 @@
 python=3.6*
 pytz
-cython
diff --git a/ci/requirements-3.6_NUMPY_DEV.build.sh b/ci/requirements-3.6_NUMPY_DEV.build.sh
index 90ed04f8f0c17a..fd79142c5cebbe 100644
--- a/ci/requirements-3.6_NUMPY_DEV.build.sh
+++ b/ci/requirements-3.6_NUMPY_DEV.build.sh
@@ -14,4 +14,7 @@ pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS numpy scipy
 # install dateutil from master
 pip install -U git+git://github.com/dateutil/dateutil.git
 
+# cython via pip
+pip install cython
+
 true

From 6da85b30d989855fe2a1f5d1323189f0fc639e60 Mon Sep 17 00:00:00 2001
From: Licht Takeuchi <licht-t@outlook.jp>
Date: Mon, 25 Sep 2017 04:06:42 +0900
Subject: [PATCH 142/188] TST: Fix repeat parameter overwritten the sparse asv
 test (#17659)

---
 asv_bench/benchmarks/sparse.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
index 7259e8cdb7d614..b958f5e0e5c342 100644
--- a/asv_bench/benchmarks/sparse.py
+++ b/asv_bench/benchmarks/sparse.py
@@ -1,4 +1,4 @@
-from itertools import repeat
+import itertools
 
 from .pandas_vb_common import *
 import scipy.sparse
@@ -33,7 +33,7 @@ def time_sparse_from_scipy(self):
         SparseDataFrame(scipy.sparse.rand(1000, 1000, 0.005))
 
     def time_sparse_from_dict(self):
-        SparseDataFrame(dict(zip(range(1000), repeat([0]))))
+        SparseDataFrame(dict(zip(range(1000), itertools.repeat([0]))))
 
 
 class sparse_series_from_coo(object):

From 0d06216e9aad8572350395b524a591e93c094836 Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Mon, 25 Sep 2017 09:13:10 +0200
Subject: [PATCH 143/188] DOC: fixed errors in doc string for Categorical +
 cleanup (#17655)

---
 pandas/core/categorical.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 743bae2fd2848c..8b055e9ae59c3a 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -197,34 +197,34 @@ class Categorical(PandasObject):
 
     Examples
     --------
-    >>> from pandas import Categorical
-    >>> Categorical([1, 2, 3, 1, 2, 3])
+    >>> pd.Categorical([1, 2, 3, 1, 2, 3])
     [1, 2, 3, 1, 2, 3]
-    Categories (3, int64): [1 < 2 < 3]
+    Categories (3, int64): [1, 2, 3]
 
-    >>> Categorical(['a', 'b', 'c', 'a', 'b', 'c'])
+    >>> pd.Categorical(['a', 'b', 'c', 'a', 'b', 'c'])
     [a, b, c, a, b, c]
-    Categories (3, object): [a < b < c]
+    Categories (3, object): [a, b, c]
 
-    Only ordered `Categoricals` can be sorted (according to the order
-    of the categories) and have a min and max value.
+    Ordered `Categoricals` can be sorted according to the custom order
+    of the categories and can have a min and max value.
 
-    >>> a = Categorical(['a','b','c','a','b','c'], ['c', 'b', 'a'],
-                        ordered=True)
-    >>> a.min()
+    >>> c = pd.Categorical(['a','b','c','a','b','c'], ordered=True,
+    ...                    categories=['c', 'b', 'a'])
+    >>> c
+    [a, b, c, a, b, c]
+    Categories (3, object): [c < b < a]
+    >>> c.min()
     'c'
 
     Notes
     -----
-    See the :ref:`user guide <categorical>` for more.
+    See the `user guide
+    <http://pandas.pydata.org/pandas-docs/stable/categorical.html>`_ for more.
 
     See also
     --------
-    Categorical.sort
-    Categorical.order
-    Categorical.min
-    Categorical.max
     pandas.api.types.CategoricalDtype
+    CategoricalIndex : An Index with an underlying ``Categorical``
     """
 
     # For comparisons, so that numpy uses our implementation if the compare

From 35bcd260a6e01fdb41c0e8b73c47db286250694b Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Mon, 25 Sep 2017 10:12:34 +0200
Subject: [PATCH 144/188] DOC: Added example to MultiIndex doc string (#17653)

---
 pandas/core/indexes/multi.py | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 0b7c5f414b1789..8c6b26c9070a9c 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -68,6 +68,33 @@ class MultiIndex(Index):
         Copy the meta-data
     verify_integrity : boolean, default True
         Check that the levels/labels are consistent and valid
+
+    Examples
+    ---------
+    A new ``MultiIndex`` is typically constructed using one of the helper
+    methods :meth:`MultiIndex.from_arrays``, :meth:`MultiIndex.from_product``
+    and :meth:`MultiIndex.from_tuples``. For example (using ``.from_arrays``):
+
+    >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
+    >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
+    MultiIndex(levels=[[1, 2], ['blue', 'red']],
+           labels=[[0, 0, 1, 1], [1, 0, 1, 0]],
+           names=['number', 'color'])
+
+    See further examples for how to construct a MultiIndex in the doc strings
+    of the mentioned helper methods.
+
+    Notes
+    -----
+    See the `user guide
+    <http://pandas.pydata.org/pandas-docs/stable/advanced.html>`_ for more.
+
+    See Also
+    --------
+    MultiIndex.from_arrays  : Convert list of arrays to MultiIndex
+    MultiIndex.from_product : Create a MultiIndex from the cartesian product
+                              of iterables
+    MultiIndex.from_tuples  : Convert list of tuples to a MultiIndex
     """
 
     # initialize to zero-length tuples to make everything work

From 0e2ce9a6001b8b9b8e5ba7ab2e57ea9201c74e8f Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Mon, 25 Sep 2017 10:13:55 +0200
Subject: [PATCH 145/188] DOC: Change plot style to matplotlib default from
 ggplot (#17462)

---
 doc/source/10min.rst         |  2 +-
 doc/source/computation.rst   |  2 +-
 doc/source/cookbook.rst      |  2 +-
 doc/source/dsintro.rst       |  2 +-
 doc/source/gotchas.rst       |  2 +-
 doc/source/groupby.rst       |  2 +-
 doc/source/missing_data.rst  |  2 +-
 doc/source/visualization.rst | 28 ++++++++++++++++++----------
 8 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
index ef6b2d6ef2c904..0a23f490e66283 100644
--- a/doc/source/10min.rst
+++ b/doc/source/10min.rst
@@ -11,7 +11,7 @@
    np.random.seed(123456)
    np.set_printoptions(precision=4, suppress=True)
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    pd.options.display.max_rows = 15
 
    #### portions of this were borrowed from the
diff --git a/doc/source/computation.rst b/doc/source/computation.rst
index 14cfdbc3648375..466ac3c9cbf51b 100644
--- a/doc/source/computation.rst
+++ b/doc/source/computation.rst
@@ -8,7 +8,7 @@
    np.set_printoptions(precision=4, suppress=True)
    import pandas as pd
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
    plt.close('all')
    pd.options.display.max_rows=15
diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst
index 5bb3ba75fe51bc..f13e5e67de07e6 100644
--- a/doc/source/cookbook.rst
+++ b/doc/source/cookbook.rst
@@ -20,7 +20,7 @@
    pd.options.display.max_rows=15
 
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
 
    np.set_printoptions(precision=4, suppress=True)
 
diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst
index ec0a1c7a00bf74..e5c7637ddb4993 100644
--- a/doc/source/dsintro.rst
+++ b/doc/source/dsintro.rst
@@ -10,7 +10,7 @@
    pd.options.display.max_rows = 15
 
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
    plt.close('all')
 
diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst
index 9e6f98923fca6c..8ae830d7fd76b2 100644
--- a/doc/source/gotchas.rst
+++ b/doc/source/gotchas.rst
@@ -14,7 +14,7 @@ Frequently Asked Questions (FAQ)
    import pandas as pd
    pd.options.display.max_rows = 15
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
    plt.close('all')
 
diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst
index 91d806ca5dd4f8..175ea281226062 100644
--- a/doc/source/groupby.rst
+++ b/doc/source/groupby.rst
@@ -10,7 +10,7 @@
    import pandas as pd
    pd.options.display.max_rows = 15
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
    plt.close('all')
    from collections import OrderedDict
diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst
index b33b5c304853ae..07740d66a21865 100644
--- a/doc/source/missing_data.rst
+++ b/doc/source/missing_data.rst
@@ -7,7 +7,7 @@
    import pandas as pd
    pd.options.display.max_rows=15
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
 
 .. _missing_data:
diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst
index 82ad8de93514e2..7db3b63fd8f08a 100644
--- a/doc/source/visualization.rst
+++ b/doc/source/visualization.rst
@@ -10,7 +10,7 @@
    np.set_printoptions(precision=4, suppress=True)
    pd.options.display.max_rows = 15
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
    plt.close('all')
 
@@ -24,13 +24,6 @@ We use the standard convention for referencing the matplotlib API:
 
    import matplotlib.pyplot as plt
 
-The plots in this document are made using matplotlib's ``ggplot`` style (new in version 1.4):
-
-.. code-block:: python
-
-   import matplotlib
-   matplotlib.style.use('ggplot')
-
 We provide the basics in pandas to easily create decent looking plots.
 See the :ref:`ecosystem <ecosystem.visualization>` section for visualization
 libraries that go beyond the basics documented here.
@@ -134,7 +127,7 @@ For example, a bar plot can be created the following way:
    plt.figure();
 
    @savefig bar_plot_ex.png
-   df.iloc[5].plot(kind='bar'); plt.axhline(0, color='k')
+   df.iloc[5].plot(kind='bar');
 
 .. versionadded:: 0.17.0
 
@@ -154,7 +147,7 @@ and :ref:`DataFrame.boxplot() <visualization.box>` methods, which use a separate
 
 Finally, there are several :ref:`plotting functions <visualization.tools>` in ``pandas.plotting``
 that take a :class:`Series` or :class:`DataFrame` as an argument. These
-include
+include:
 
 * :ref:`Scatter Matrix <visualization.scatter_matrix>`
 * :ref:`Andrews Curves <visualization.andrews_curves>`
@@ -1049,6 +1042,21 @@ be colored differently.
 Plot Formatting
 ---------------
 
+Setting the plot style
+~~~~~~~~~~~~~~~~~~~~~~
+
+From version 1.5 and up, matplotlib offers a range of preconfigured plotting styles. Setting the
+style can be used to easily give plots the general look that you want.
+Setting the style is as easy as calling ``matplotlib.style.use(my_plot_style)`` before
+creating your plot. For example you could do ``matplotlib.style.use('ggplot')`` for ggplot-style
+plots.
+
+You can see the various available style names at ``matplotlib.style.available`` and it's very
+easy to try them out.
+
+General plot style arguments
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 Most plotting methods have a set of keyword arguments that control the
 layout and formatting of the returned plot:
 

From 42195dbdc4e3c703f336dc618aa64f6efc4e4977 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 25 Sep 2017 03:07:46 -0700
Subject: [PATCH 146/188] Separate out strptime.pyx from tslib (#17342)

---
 pandas/_libs/__init__.py                |   1 +
 pandas/_libs/src/datetime.pxd           |  15 +
 pandas/_libs/src/datetime/np_datetime.c |   6 +
 pandas/_libs/src/datetime/np_datetime.h |   3 +
 pandas/_libs/tslib.pyx                  | 610 +---------------------
 pandas/_libs/tslibs/strptime.pyx        | 640 ++++++++++++++++++++++++
 pandas/core/tools/datetimes.py          |   5 +-
 setup.py                                |   5 +-
 8 files changed, 675 insertions(+), 610 deletions(-)
 create mode 100644 pandas/_libs/tslibs/strptime.pyx

diff --git a/pandas/_libs/__init__.py b/pandas/_libs/__init__.py
index ab3832d0292ba4..b4c3ff8008015f 100644
--- a/pandas/_libs/__init__.py
+++ b/pandas/_libs/__init__.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 # flake8: noqa
 
 from .tslib import iNaT, NaT, Timestamp, Timedelta, OutOfBoundsDatetime
diff --git a/pandas/_libs/src/datetime.pxd b/pandas/_libs/src/datetime.pxd
index 23620e790c1323..86c8f3bfc74f3b 100644
--- a/pandas/_libs/src/datetime.pxd
+++ b/pandas/_libs/src/datetime.pxd
@@ -94,6 +94,7 @@ cdef extern from "datetime/np_datetime.h":
                                            PANDAS_DATETIMEUNIT fr,
                                            pandas_datetimestruct *result) nogil
     int days_per_month_table[2][12]
+    pandas_datetimestruct _NS_MIN_DTS, _NS_MAX_DTS
 
     int dayofweek(int y, int m, int d) nogil
     int is_leapyear(int64_t year) nogil
@@ -161,3 +162,17 @@ cdef inline int64_t _date_to_datetime64(object val,
     dts.hour = dts.min = dts.sec = dts.us = 0
     dts.ps = dts.as = 0
     return pandas_datetimestruct_to_datetime(PANDAS_FR_ns, dts)
+
+
+cdef inline bint check_dts_bounds(pandas_datetimestruct *dts):
+    """Returns True if an error needs to be raised"""
+    cdef:
+        bint error = False
+
+    if (dts.year <= 1677 and
+            cmp_pandas_datetimestruct(dts, &_NS_MIN_DTS) == -1):
+        error = True
+    elif (dts.year >= 2262 and
+          cmp_pandas_datetimestruct(dts, &_NS_MAX_DTS) == 1):
+        error = True
+    return error
diff --git a/pandas/_libs/src/datetime/np_datetime.c b/pandas/_libs/src/datetime/np_datetime.c
index 84584189888636..ffb901981f939f 100644
--- a/pandas/_libs/src/datetime/np_datetime.c
+++ b/pandas/_libs/src/datetime/np_datetime.c
@@ -40,6 +40,12 @@ This file is derived from NumPy 1.7. See NUMPY_LICENSE.txt
 #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
 #endif
 
+const pandas_datetimestruct _NS_MIN_DTS = {
+    1677, 9, 21, 0, 12, 43, 145225, 0, 0};
+const pandas_datetimestruct _NS_MAX_DTS = {
+    2262, 4, 11, 23, 47, 16, 854775, 807000, 0};
+
+
 const int days_per_month_table[2][12] = {
     {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31},
     {31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}};
diff --git a/pandas/_libs/src/datetime/np_datetime.h b/pandas/_libs/src/datetime/np_datetime.h
index 97ec5782b625b5..a20bff60126aac 100644
--- a/pandas/_libs/src/datetime/np_datetime.h
+++ b/pandas/_libs/src/datetime/np_datetime.h
@@ -54,6 +54,9 @@ typedef struct {
     int num;
 } pandas_datetime_metadata;
 
+extern const pandas_datetimestruct _NS_MIN_DTS;
+extern const pandas_datetimestruct _NS_MAX_DTS;
+
 // stuff pandas needs
 // ----------------------------------------------------------------------------
 
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index c629ccbd8e1fd4..d4ca5af09367eb 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -50,6 +50,7 @@ from datetime cimport (
     npy_datetime,
     is_leapyear,
     dayofweek,
+    check_dts_bounds,
     PANDAS_FR_ns,
     PyDateTime_Check, PyDate_Check,
     PyDateTime_IMPORT,
@@ -69,6 +70,7 @@ from khash cimport (
 cimport cython
 
 import re
+import time
 
 # dateutil compat
 from dateutil.tz import (tzoffset, tzlocal as _dateutil_tzlocal,
@@ -1691,21 +1693,10 @@ class OutOfBoundsDatetime(ValueError):
     pass
 
 cdef inline _check_dts_bounds(pandas_datetimestruct *dts):
-    cdef:
-        bint error = False
-
-    if dts.year <= 1677 and cmp_pandas_datetimestruct(dts, &_NS_MIN_DTS) == -1:
-        error = True
-    elif (
-            dts.year >= 2262 and
-            cmp_pandas_datetimestruct(dts, &_NS_MAX_DTS) == 1):
-        error = True
-
-    if error:
+    if check_dts_bounds(dts):
         fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
                                                dts.day, dts.hour,
                                                dts.min, dts.sec)
-
         raise OutOfBoundsDatetime(
             'Out of bounds nanosecond timestamp: %s' % fmt)
 
@@ -3515,284 +3506,6 @@ cpdef convert_to_timedelta64(object ts, object unit):
     return ts.astype('timedelta64[ns]')
 
 
-def array_strptime(ndarray[object] values, object fmt,
-                   bint exact=True, errors='raise'):
-    """
-    Parameters
-    ----------
-    values : ndarray of string-like objects
-    fmt : string-like regex
-    exact : matches must be exact if True, search if False
-    coerce : if invalid values found, coerce to NaT
-    """
-
-    cdef:
-        Py_ssize_t i, n = len(values)
-        pandas_datetimestruct dts
-        ndarray[int64_t] iresult
-        int year, month, day, minute, hour, second, weekday, julian, tz
-        int week_of_year, week_of_year_start
-        int64_t us, ns
-        object val, group_key, ampm, found
-        dict found_key
-        bint is_raise = errors=='raise'
-        bint is_ignore = errors=='ignore'
-        bint is_coerce = errors=='coerce'
-
-    assert is_raise or is_ignore or is_coerce
-
-    global _TimeRE_cache, _regex_cache
-    with _cache_lock:
-        if _getlang() != _TimeRE_cache.locale_time.lang:
-            _TimeRE_cache = TimeRE()
-            _regex_cache.clear()
-        if len(_regex_cache) > _CACHE_MAX_SIZE:
-            _regex_cache.clear()
-        locale_time = _TimeRE_cache.locale_time
-        format_regex = _regex_cache.get(fmt)
-        if not format_regex:
-            try:
-                format_regex = _TimeRE_cache.compile(fmt)
-            # KeyError raised when a bad format is found; can be specified as
-            # \\, in which case it was a stray % but with a space after it
-            except KeyError, err:
-                bad_directive = err.args[0]
-                if bad_directive == "\\":
-                    bad_directive = "%"
-                del err
-                raise ValueError("'%s' is a bad directive in format '%s'" %
-                                    (bad_directive, fmt))
-            # IndexError only occurs when the format string is "%"
-            except IndexError:
-                raise ValueError("stray %% in format '%s'" % fmt)
-            _regex_cache[fmt] = format_regex
-
-    result = np.empty(n, dtype='M8[ns]')
-    iresult = result.view('i8')
-
-    dts.us = dts.ps = dts.as = 0
-
-    cdef dict _parse_code_table = {
-        'y': 0,
-        'Y': 1,
-        'm': 2,
-        'B': 3,
-        'b': 4,
-        'd': 5,
-        'H': 6,
-        'I': 7,
-        'M': 8,
-        'S': 9,
-        'f': 10,
-        'A': 11,
-        'a': 12,
-        'w': 13,
-        'j': 14,
-        'U': 15,
-        'W': 16,
-        'Z': 17,
-        'p': 18   # just an additional key, works only with I
-    }
-    cdef int parse_code
-
-    for i in range(n):
-        val = values[i]
-        if util.is_string_object(val):
-            if val in _nat_strings:
-                iresult[i] = NPY_NAT
-                continue
-        else:
-            if _checknull_with_nat(val):
-                iresult[i] = NPY_NAT
-                continue
-            else:
-                val = str(val)
-
-        # exact matching
-        if exact:
-            found = format_regex.match(val)
-            if not found:
-                if is_coerce:
-                    iresult[i] = NPY_NAT
-                    continue
-                raise ValueError("time data %r does not match "
-                                 "format %r (match)" % (values[i], fmt))
-            if len(val) != found.end():
-                if is_coerce:
-                    iresult[i] = NPY_NAT
-                    continue
-                raise ValueError("unconverted data remains: %s" %
-                                  values[i][found.end():])
-
-        # search
-        else:
-            found = format_regex.search(val)
-            if not found:
-                if is_coerce:
-                    iresult[i] = NPY_NAT
-                    continue
-                raise ValueError("time data %r does not match format "
-                                 "%r (search)" % (values[i], fmt))
-
-        year = 1900
-        month = day = 1
-        hour = minute = second = ns = us = 0
-        tz = -1
-        # Default to -1 to signify that values not known; not critical to have,
-        # though
-        week_of_year = -1
-        week_of_year_start = -1
-        # weekday and julian defaulted to -1 so as to signal need to calculate
-        # values
-        weekday = julian = -1
-        found_dict = found.groupdict()
-        for group_key in found_dict.iterkeys():
-            # Directives not explicitly handled below:
-            #   c, x, X
-            #      handled by making out of other directives
-            #   U, W
-            #      worthless without day of the week
-            parse_code = _parse_code_table[group_key]
-
-            if parse_code == 0:
-                year = int(found_dict['y'])
-                # Open Group specification for strptime() states that a %y
-                #value in the range of [00, 68] is in the century 2000, while
-                #[69,99] is in the century 1900
-                if year <= 68:
-                    year += 2000
-                else:
-                    year += 1900
-            elif parse_code == 1:
-                year = int(found_dict['Y'])
-            elif parse_code == 2:
-                month = int(found_dict['m'])
-            elif parse_code == 3:
-            # elif group_key == 'B':
-                month = locale_time.f_month.index(found_dict['B'].lower())
-            elif parse_code == 4:
-            # elif group_key == 'b':
-                month = locale_time.a_month.index(found_dict['b'].lower())
-            elif parse_code == 5:
-            # elif group_key == 'd':
-                day = int(found_dict['d'])
-            elif parse_code == 6:
-            # elif group_key == 'H':
-                hour = int(found_dict['H'])
-            elif parse_code == 7:
-                hour = int(found_dict['I'])
-                ampm = found_dict.get('p', '').lower()
-                # If there was no AM/PM indicator, we'll treat this like AM
-                if ampm in ('', locale_time.am_pm[0]):
-                    # We're in AM so the hour is correct unless we're
-                    # looking at 12 midnight.
-                    # 12 midnight == 12 AM == hour 0
-                    if hour == 12:
-                        hour = 0
-                elif ampm == locale_time.am_pm[1]:
-                    # We're in PM so we need to add 12 to the hour unless
-                    # we're looking at 12 noon.
-                    # 12 noon == 12 PM == hour 12
-                    if hour != 12:
-                        hour += 12
-            elif parse_code == 8:
-                minute = int(found_dict['M'])
-            elif parse_code == 9:
-                second = int(found_dict['S'])
-            elif parse_code == 10:
-                s = found_dict['f']
-                # Pad to always return nanoseconds
-                s += "0" * (9 - len(s))
-                us = long(s)
-                ns = us % 1000
-                us = us / 1000
-            elif parse_code == 11:
-                weekday = locale_time.f_weekday.index(found_dict['A'].lower())
-            elif parse_code == 12:
-                weekday = locale_time.a_weekday.index(found_dict['a'].lower())
-            elif parse_code == 13:
-                weekday = int(found_dict['w'])
-                if weekday == 0:
-                    weekday = 6
-                else:
-                    weekday -= 1
-            elif parse_code == 14:
-                julian = int(found_dict['j'])
-            elif parse_code == 15 or parse_code == 16:
-                week_of_year = int(found_dict[group_key])
-                if group_key == 'U':
-                    # U starts week on Sunday.
-                    week_of_year_start = 6
-                else:
-                    # W starts week on Monday.
-                    week_of_year_start = 0
-            elif parse_code == 17:
-                # Since -1 is default value only need to worry about setting tz
-                # if it can be something other than -1.
-                found_zone = found_dict['Z'].lower()
-                for value, tz_values in enumerate(locale_time.timezone):
-                    if found_zone in tz_values:
-                        # Deal w/ bad locale setup where timezone names are the
-                        # same and yet time.daylight is true; too ambiguous to
-                        # be able to tell what timezone has daylight savings
-                        if (time.tzname[0] == time.tzname[1] and
-                            time.daylight and found_zone not in (
-                                "utc", "gmt")):
-                            break
-                        else:
-                            tz = value
-                            break
-        # If we know the wk of the year and what day of that wk, we can figure
-        # out the Julian day of the year.
-        if julian == -1 and week_of_year != -1 and weekday != -1:
-            week_starts_Mon = True if week_of_year_start == 0 else False
-            julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
-                                                week_starts_Mon)
-        # Cannot pre-calculate datetime_date() since can change in Julian
-        # calculation and thus could have different value for the day of the wk
-        # calculation.
-        try:
-            if julian == -1:
-                # Need to add 1 to result since first day of the year is 1, not
-                # 0.
-                julian = datetime_date(year, month, day).toordinal() - \
-                    datetime_date(year, 1, 1).toordinal() + 1
-            else: # Assume that if they bothered to include Julian day it will
-                # be accurate.
-                datetime_result = datetime_date.fromordinal(
-                    (julian - 1) + datetime_date(year, 1, 1).toordinal())
-                year = datetime_result.year
-                month = datetime_result.month
-                day = datetime_result.day
-        except ValueError:
-            if is_coerce:
-                iresult[i] = NPY_NAT
-                continue
-            raise
-        if weekday == -1:
-            weekday = datetime_date(year, month, day).weekday()
-
-        dts.year = year
-        dts.month = month
-        dts.day = day
-        dts.hour = hour
-        dts.min = minute
-        dts.sec = second
-        dts.us = us
-        dts.ps = ns * 1000
-
-        iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
-        try:
-            _check_dts_bounds(&dts)
-        except ValueError:
-            if is_coerce:
-                iresult[i] = NPY_NAT
-                continue
-            raise
-
-    return result
-
-
 #----------------------------------------------------------------------
 # NaT methods/property setups
 
@@ -5176,320 +4889,3 @@ def shift_months(int64_t[:] dtindex, int months, object day=None):
         raise ValueError("day must be None, 'start' or 'end'")
 
     return np.asarray(out)
-
-#----------------------------------------------------------------------
-# Don't even ask
-
-"""Strptime-related classes and functions.
-
-CLASSES:
-    LocaleTime -- Discovers and stores locale-specific time information
-    TimeRE -- Creates regexes for pattern matching a string of text containing
-                time information
-
-FUNCTIONS:
-    _getlang -- Figure out what language is being used for the locale
-    strptime -- Calculates the time struct represented by the passed-in string
-
-"""
-import time
-import locale
-import calendar
-from re import compile as re_compile
-from re import IGNORECASE
-from re import escape as re_escape
-from datetime import date as datetime_date
-
-# Python 2 vs Python 3
-try:
-    from thread import allocate_lock as _thread_allocate_lock
-except:
-    try:
-        from _thread import allocate_lock as _thread_allocate_lock
-    except:
-        try:
-            from dummy_thread import allocate_lock as _thread_allocate_lock
-        except:
-            from _dummy_thread import allocate_lock as _thread_allocate_lock
-
-__all__ = []
-
-
-def _getlang():
-    # Figure out what the current language is set to.
-    return locale.getlocale(locale.LC_TIME)
-
-
-class LocaleTime(object):
-    """Stores and handles locale-specific information related to time.
-
-    ATTRIBUTES:
-        f_weekday -- full weekday names (7-item list)
-        a_weekday -- abbreviated weekday names (7-item list)
-        f_month -- full month names (13-item list; dummy value in [0], which
-                    is added by code)
-        a_month -- abbreviated month names (13-item list, dummy value in
-                    [0], which is added by code)
-        am_pm -- AM/PM representation (2-item list)
-        LC_date_time -- format string for date/time representation (string)
-        LC_date -- format string for date representation (string)
-        LC_time -- format string for time representation (string)
-        timezone -- daylight- and non-daylight-savings timezone representation
-                    (2-item list of sets)
-        lang -- Language used by instance (2-item tuple)
-    """
-
-    def __init__(self):
-        """Set all attributes.
-
-        Order of methods called matters for dependency reasons.
-
-        The locale language is set at the offset and then checked again before
-        exiting.  This is to make sure that the attributes were not set with a
-        mix of information from more than one locale.  This would most likely
-        happen when using threads where one thread calls a locale-dependent
-        function while another thread changes the locale while the function in
-        the other thread is still running.  Proper coding would call for
-        locks to prevent changing the locale while locale-dependent code is
-        running.  The check here is done in case someone does not think about
-        doing this.
-
-        Only other possible issue is if someone changed the timezone and did
-        not call tz.tzset .  That is an issue for the programmer, though,
-        since changing the timezone is worthless without that call.
-
-        """
-        self.lang = _getlang()
-        self.__calc_weekday()
-        self.__calc_month()
-        self.__calc_am_pm()
-        self.__calc_timezone()
-        self.__calc_date_time()
-        if _getlang() != self.lang:
-            raise ValueError("locale changed during initialization")
-
-    def __pad(self, seq, front):
-        # Add '' to seq to either the front (is True), else the back.
-        seq = list(seq)
-        if front:
-            seq.insert(0, '')
-        else:
-            seq.append('')
-        return seq
-
-    def __calc_weekday(self):
-        # Set self.a_weekday and self.f_weekday using the calendar
-        # module.
-        a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
-        f_weekday = [calendar.day_name[i].lower() for i in range(7)]
-        self.a_weekday = a_weekday
-        self.f_weekday = f_weekday
-
-    def __calc_month(self):
-        # Set self.f_month and self.a_month using the calendar module.
-        a_month = [calendar.month_abbr[i].lower() for i in range(13)]
-        f_month = [calendar.month_name[i].lower() for i in range(13)]
-        self.a_month = a_month
-        self.f_month = f_month
-
-    def __calc_am_pm(self):
-        # Set self.am_pm by using time.strftime().
-
-        # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
-        # magical; just happened to have used it everywhere else where a
-        # static date was needed.
-        am_pm = []
-        for hour in (01, 22):
-            time_tuple = time.struct_time(
-                (1999, 3, 17, hour, 44, 55, 2, 76, 0))
-            am_pm.append(time.strftime("%p", time_tuple).lower())
-        self.am_pm = am_pm
-
-    def __calc_date_time(self):
-        # Set self.date_time, self.date, & self.time by using
-        # time.strftime().
-
-        # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
-        # overloaded numbers is minimized.  The order in which searches for
-        # values within the format string is very important; it eliminates
-        # possible ambiguity for what something represents.
-        time_tuple = time.struct_time((1999, 3, 17, 22, 44, 55, 2, 76, 0))
-        date_time = [None, None, None]
-        date_time[0] = time.strftime("%c", time_tuple).lower()
-        date_time[1] = time.strftime("%x", time_tuple).lower()
-        date_time[2] = time.strftime("%X", time_tuple).lower()
-        replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
-                             (self.f_month[3],
-                              '%B'), (self.a_weekday[2], '%a'),
-                             (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
-                             ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
-                             ('44', '%M'), ('55', '%S'), ('76', '%j'),
-                             ('17', '%d'), ('03', '%m'), ('3', '%m'),
-                             # '3' needed for when no leading zero.
-                             ('2', '%w'), ('10', '%I')]
-        replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone
-                                                for tz in tz_values])
-        for offset, directive in ((0, '%c'), (1, '%x'), (2, '%X')):
-            current_format = date_time[offset]
-            for old, new in replacement_pairs:
-                # Must deal with possible lack of locale info
-                # manifesting itself as the empty string (e.g., Swedish's
-                # lack of AM/PM info) or a platform returning a tuple of empty
-                # strings (e.g., MacOS 9 having timezone as ('','')).
-                if old:
-                    current_format = current_format.replace(old, new)
-            # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
-            # 2005-01-03 occurs before the first Monday of the year.  Otherwise
-            # %U is used.
-            time_tuple = time.struct_time((1999, 1, 3, 1, 1, 1, 6, 3, 0))
-            if '00' in time.strftime(directive, time_tuple):
-                U_W = '%W'
-            else:
-                U_W = '%U'
-            date_time[offset] = current_format.replace('11', U_W)
-        self.LC_date_time = date_time[0]
-        self.LC_date = date_time[1]
-        self.LC_time = date_time[2]
-
-    def __calc_timezone(self):
-        # Set self.timezone by using time.tzname.
-        # Do not worry about possibility of time.tzname[0] == timetzname[1]
-        # and time.daylight; handle that in strptime .
-        try:
-            time.tzset()
-        except AttributeError:
-            pass
-        no_saving = frozenset(["utc", "gmt", time.tzname[0].lower()])
-        if time.daylight:
-            has_saving = frozenset([time.tzname[1].lower()])
-        else:
-            has_saving = frozenset()
-        self.timezone = (no_saving, has_saving)
-
-
-class TimeRE(dict):
-    """Handle conversion from format directives to regexes."""
-
-    def __init__(self, locale_time=None):
-        """Create keys/values.
-
-        Order of execution is important for dependency reasons.
-
-        """
-        if locale_time:
-            self.locale_time = locale_time
-        else:
-            self.locale_time = LocaleTime()
-        base = super(TimeRE, self)
-        base.__init__({
-            # The " \d" part of the regex is to make %c from ANSI C work
-            'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
-            'f': r"(?P<f>[0-9]{1,9})",
-            'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
-            'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
-            'j': (r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|"
-                  r"[1-9]\d|0[1-9]|[1-9])"),
-            'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
-            'M': r"(?P<M>[0-5]\d|\d)",
-            'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
-            'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
-            'w': r"(?P<w>[0-6])",
-            # W is set below by using 'U'
-            'y': r"(?P<y>\d\d)",
-            #XXX: Does 'Y' need to worry about having less or more than
-            #     4 digits?
-            'Y': r"(?P<Y>\d\d\d\d)",
-            'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
-            'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
-            'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
-            'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
-            'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
-            'Z': self.__seqToRE([tz for tz_names in self.locale_time.timezone
-                                 for tz in tz_names],
-                                'Z'),
-            '%': '%'})
-        base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
-        base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
-        base.__setitem__('x', self.pattern(self.locale_time.LC_date))
-        base.__setitem__('X', self.pattern(self.locale_time.LC_time))
-
-    def __seqToRE(self, to_convert, directive):
-        """Convert a list to a regex string for matching a directive.
-
-        Want possible matching values to be from longest to shortest.  This
-        prevents the possibility of a match occuring for a value that also
-        a substring of a larger value that should have matched (e.g., 'abc'
-        matching when 'abcdef' should have been the match).
-
-        """
-        to_convert = sorted(to_convert, key=len, reverse=True)
-        for value in to_convert:
-            if value != '':
-                break
-        else:
-            return ''
-        regex = '|'.join([re_escape(stuff) for stuff in to_convert])
-        regex = '(?P<%s>%s' % (directive, regex)
-        return '%s)' % regex
-
-    def pattern(self, format):
-        """Return regex pattern for the format string.
-
-        Need to make sure that any characters that might be interpreted as
-        regex syntax are escaped.
-
-        """
-        processed_format = ''
-        # The sub() call escapes all characters that might be misconstrued
-        # as regex syntax.  Cannot use re.escape since we have to deal with
-        # format directives (%m, etc.).
-        regex_chars = re_compile(r"([\\.^$*+?\(\){}\[\]|])")
-        format = regex_chars.sub(r"\\\1", format)
-        whitespace_replacement = re_compile(r'\s+')
-        format = whitespace_replacement.sub(r'\\s+', format)
-        while '%' in format:
-            directive_index = format.index('%') +1
-            processed_format = "%s%s%s" % (processed_format,
-                                           format[:directive_index -1],
-                                           self[format[directive_index]])
-            format = format[directive_index +1:]
-        return "%s%s" % (processed_format, format)
-
-    def compile(self, format):
-        """Return a compiled re object for the format string."""
-        return re_compile(self.pattern(format), IGNORECASE)
-
-_cache_lock = _thread_allocate_lock()
-# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
-# first!
-_TimeRE_cache = TimeRE()
-_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
-_regex_cache = {}
-
-cdef _calc_julian_from_U_or_W(int year, int week_of_year,
-                              int day_of_week, int week_starts_Mon):
-    """Calculate the Julian day based on the year, week of the year, and day of
-    the week, with week_start_day representing whether the week of the year
-    assumes the week starts on Sunday or Monday (6 or 0)."""
-
-    cdef:
-        int first_weekday,  week_0_length, days_to_week
-
-    first_weekday = datetime_date(year, 1, 1).weekday()
-    # If we are dealing with the %U directive (week starts on Sunday), it's
-    # easier to just shift the view to Sunday being the first day of the
-    # week.
-    if not week_starts_Mon:
-        first_weekday = (first_weekday + 1) % 7
-        day_of_week = (day_of_week + 1) % 7
-    # Need to watch out for a week 0 (when the first day of the year is not
-    # the same as that specified by %U or %W).
-    week_0_length = (7 - first_weekday) % 7
-    if week_of_year == 0:
-        return 1 + day_of_week - first_weekday
-    else:
-        days_to_week = week_0_length + (7 * (week_of_year - 1))
-        return 1 + days_to_week + day_of_week
-
-# def _strptime_time(data_string, format="%a %b %d %H:%M:%S %Y"):
-#     return _strptime(data_string, format)[0]
diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx
new file mode 100644
index 00000000000000..20b24d6be9a581
--- /dev/null
+++ b/pandas/_libs/tslibs/strptime.pyx
@@ -0,0 +1,640 @@
+# -*- coding: utf-8 -*-
+# cython: profile=False
+"""Strptime-related classes and functions.
+"""
+import time
+import locale
+import calendar
+import re
+
+
+# Python 2 vs Python 3
+try:
+    from thread import allocate_lock as _thread_allocate_lock
+except:
+    try:
+        from _thread import allocate_lock as _thread_allocate_lock
+    except:
+        try:
+            from dummy_thread import allocate_lock as _thread_allocate_lock
+        except:
+            from _dummy_thread import allocate_lock as _thread_allocate_lock
+
+
+from cython cimport Py_ssize_t
+from cpython cimport PyFloat_Check
+
+cimport cython
+
+import numpy as np
+cimport numpy as np
+from numpy cimport ndarray, int64_t
+
+from datetime import date as datetime_date
+from datetime cimport datetime
+
+# This is src/datetime.pxd
+from datetime cimport (
+    PANDAS_FR_ns,
+    check_dts_bounds,
+    pandas_datetimestruct,
+    pandas_datetimestruct_to_datetime)
+
+from util cimport is_string_object, get_nat
+
+cdef int64_t NPY_NAT = get_nat()
+
+cdef set _nat_strings = set(['NaT', 'nat', 'NAT', 'nan', 'NaN', 'NAN'])
+
+
+# TODO: Consolidate with other implementations
+cdef inline bint _checknull_with_nat(object val):
+    """ utility to check if a value is a nat or not """
+    return (val is None or
+            (PyFloat_Check(val) and val != val) or
+            (isinstance(val, datetime) and not val == val))
+
+
+def array_strptime(ndarray[object] values, object fmt,
+                   bint exact=True, errors='raise'):
+    """
+    Calculates the datetime structs represented by the passed array of strings
+
+    Parameters
+    ----------
+    values : ndarray of string-like objects
+    fmt : string-like regex
+    exact : matches must be exact if True, search if False
+    coerce : if invalid values found, coerce to NaT
+    """
+
+    cdef:
+        Py_ssize_t i, n = len(values)
+        pandas_datetimestruct dts
+        ndarray[int64_t] iresult
+        int year, month, day, minute, hour, second, weekday, julian, tz
+        int week_of_year, week_of_year_start
+        int64_t us, ns
+        object val, group_key, ampm, found
+        dict found_key
+        bint is_raise = errors=='raise'
+        bint is_ignore = errors=='ignore'
+        bint is_coerce = errors=='coerce'
+
+    assert is_raise or is_ignore or is_coerce
+
+    global _TimeRE_cache, _regex_cache
+    with _cache_lock:
+        if _getlang() != _TimeRE_cache.locale_time.lang:
+            _TimeRE_cache = TimeRE()
+            _regex_cache.clear()
+        if len(_regex_cache) > _CACHE_MAX_SIZE:
+            _regex_cache.clear()
+        locale_time = _TimeRE_cache.locale_time
+        format_regex = _regex_cache.get(fmt)
+        if not format_regex:
+            try:
+                format_regex = _TimeRE_cache.compile(fmt)
+            # KeyError raised when a bad format is found; can be specified as
+            # \\, in which case it was a stray % but with a space after it
+            except KeyError, err:
+                bad_directive = err.args[0]
+                if bad_directive == "\\":
+                    bad_directive = "%"
+                del err
+                raise ValueError("'%s' is a bad directive in format '%s'" %
+                                    (bad_directive, fmt))
+            # IndexError only occurs when the format string is "%"
+            except IndexError:
+                raise ValueError("stray %% in format '%s'" % fmt)
+            _regex_cache[fmt] = format_regex
+
+    result = np.empty(n, dtype='M8[ns]')
+    iresult = result.view('i8')
+
+    dts.us = dts.ps = dts.as = 0
+
+    cdef dict _parse_code_table = {
+        'y': 0,
+        'Y': 1,
+        'm': 2,
+        'B': 3,
+        'b': 4,
+        'd': 5,
+        'H': 6,
+        'I': 7,
+        'M': 8,
+        'S': 9,
+        'f': 10,
+        'A': 11,
+        'a': 12,
+        'w': 13,
+        'j': 14,
+        'U': 15,
+        'W': 16,
+        'Z': 17,
+        'p': 18   # just an additional key, works only with I
+    }
+    cdef int parse_code
+
+    for i in range(n):
+        val = values[i]
+        if is_string_object(val):
+            if val in _nat_strings:
+                iresult[i] = NPY_NAT
+                continue
+        else:
+            if _checknull_with_nat(val):
+                iresult[i] = NPY_NAT
+                continue
+            else:
+                val = str(val)
+
+        # exact matching
+        if exact:
+            found = format_regex.match(val)
+            if not found:
+                if is_coerce:
+                    iresult[i] = NPY_NAT
+                    continue
+                raise ValueError("time data %r does not match "
+                                 "format %r (match)" % (values[i], fmt))
+            if len(val) != found.end():
+                if is_coerce:
+                    iresult[i] = NPY_NAT
+                    continue
+                raise ValueError("unconverted data remains: %s" %
+                                  values[i][found.end():])
+
+        # search
+        else:
+            found = format_regex.search(val)
+            if not found:
+                if is_coerce:
+                    iresult[i] = NPY_NAT
+                    continue
+                raise ValueError("time data %r does not match format "
+                                 "%r (search)" % (values[i], fmt))
+
+        year = 1900
+        month = day = 1
+        hour = minute = second = ns = us = 0
+        tz = -1
+        # Default to -1 to signify that values not known; not critical to have,
+        # though
+        week_of_year = -1
+        week_of_year_start = -1
+        # weekday and julian defaulted to -1 so as to signal need to calculate
+        # values
+        weekday = julian = -1
+        found_dict = found.groupdict()
+        for group_key in found_dict.iterkeys():
+            # Directives not explicitly handled below:
+            #   c, x, X
+            #      handled by making out of other directives
+            #   U, W
+            #      worthless without day of the week
+            parse_code = _parse_code_table[group_key]
+
+            if parse_code == 0:
+                year = int(found_dict['y'])
+                # Open Group specification for strptime() states that a %y
+                #value in the range of [00, 68] is in the century 2000, while
+                #[69,99] is in the century 1900
+                if year <= 68:
+                    year += 2000
+                else:
+                    year += 1900
+            elif parse_code == 1:
+                year = int(found_dict['Y'])
+            elif parse_code == 2:
+                month = int(found_dict['m'])
+            elif parse_code == 3:
+            # elif group_key == 'B':
+                month = locale_time.f_month.index(found_dict['B'].lower())
+            elif parse_code == 4:
+            # elif group_key == 'b':
+                month = locale_time.a_month.index(found_dict['b'].lower())
+            elif parse_code == 5:
+            # elif group_key == 'd':
+                day = int(found_dict['d'])
+            elif parse_code == 6:
+            # elif group_key == 'H':
+                hour = int(found_dict['H'])
+            elif parse_code == 7:
+                hour = int(found_dict['I'])
+                ampm = found_dict.get('p', '').lower()
+                # If there was no AM/PM indicator, we'll treat this like AM
+                if ampm in ('', locale_time.am_pm[0]):
+                    # We're in AM so the hour is correct unless we're
+                    # looking at 12 midnight.
+                    # 12 midnight == 12 AM == hour 0
+                    if hour == 12:
+                        hour = 0
+                elif ampm == locale_time.am_pm[1]:
+                    # We're in PM so we need to add 12 to the hour unless
+                    # we're looking at 12 noon.
+                    # 12 noon == 12 PM == hour 12
+                    if hour != 12:
+                        hour += 12
+            elif parse_code == 8:
+                minute = int(found_dict['M'])
+            elif parse_code == 9:
+                second = int(found_dict['S'])
+            elif parse_code == 10:
+                s = found_dict['f']
+                # Pad to always return nanoseconds
+                s += "0" * (9 - len(s))
+                us = long(s)
+                ns = us % 1000
+                us = us / 1000
+            elif parse_code == 11:
+                weekday = locale_time.f_weekday.index(found_dict['A'].lower())
+            elif parse_code == 12:
+                weekday = locale_time.a_weekday.index(found_dict['a'].lower())
+            elif parse_code == 13:
+                weekday = int(found_dict['w'])
+                if weekday == 0:
+                    weekday = 6
+                else:
+                    weekday -= 1
+            elif parse_code == 14:
+                julian = int(found_dict['j'])
+            elif parse_code == 15 or parse_code == 16:
+                week_of_year = int(found_dict[group_key])
+                if group_key == 'U':
+                    # U starts week on Sunday.
+                    week_of_year_start = 6
+                else:
+                    # W starts week on Monday.
+                    week_of_year_start = 0
+            elif parse_code == 17:
+                # Since -1 is default value only need to worry about setting tz
+                # if it can be something other than -1.
+                found_zone = found_dict['Z'].lower()
+                for value, tz_values in enumerate(locale_time.timezone):
+                    if found_zone in tz_values:
+                        # Deal w/ bad locale setup where timezone names are the
+                        # same and yet time.daylight is true; too ambiguous to
+                        # be able to tell what timezone has daylight savings
+                        if (time.tzname[0] == time.tzname[1] and
+                            time.daylight and found_zone not in (
+                                "utc", "gmt")):
+                            break
+                        else:
+                            tz = value
+                            break
+        # If we know the wk of the year and what day of that wk, we can figure
+        # out the Julian day of the year.
+        if julian == -1 and week_of_year != -1 and weekday != -1:
+            week_starts_Mon = True if week_of_year_start == 0 else False
+            julian = _calc_julian_from_U_or_W(year, week_of_year, weekday,
+                                              week_starts_Mon)
+        # Cannot pre-calculate datetime_date() since can change in Julian
+        # calculation and thus could have different value for the day of the wk
+        # calculation.
+        try:
+            if julian == -1:
+                # Need to add 1 to result since first day of the year is 1, not
+                # 0.
+                julian = datetime_date(year, month, day).toordinal() - \
+                    datetime_date(year, 1, 1).toordinal() + 1
+            else: # Assume that if they bothered to include Julian day it will
+                # be accurate.
+                datetime_result = datetime_date.fromordinal(
+                    (julian - 1) + datetime_date(year, 1, 1).toordinal())
+                year = datetime_result.year
+                month = datetime_result.month
+                day = datetime_result.day
+        except ValueError:
+            if is_coerce:
+                iresult[i] = NPY_NAT
+                continue
+            raise
+        if weekday == -1:
+            weekday = datetime_date(year, month, day).weekday()
+
+        dts.year = year
+        dts.month = month
+        dts.day = day
+        dts.hour = hour
+        dts.min = minute
+        dts.sec = second
+        dts.us = us
+        dts.ps = ns * 1000
+
+        iresult[i] = pandas_datetimestruct_to_datetime(PANDAS_FR_ns, &dts)
+        if check_dts_bounds(&dts):
+            if is_coerce:
+                iresult[i] = NPY_NAT
+                continue
+            else:
+                from pandas._libs.tslib import OutOfBoundsDatetime
+                fmt = '%d-%.2d-%.2d %.2d:%.2d:%.2d' % (dts.year, dts.month,
+                                                       dts.day, dts.hour,
+                                                       dts.min, dts.sec)
+                raise OutOfBoundsDatetime(
+                    'Out of bounds nanosecond timestamp: %s' % fmt)
+
+    return result
+
+
+"""_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
+from the standard library, see
+https://github.com/python/cpython/blob/master/Lib/_strptime.py
+The original module-level docstring follows.
+
+Strptime-related classes and functions.
+CLASSES:
+    LocaleTime -- Discovers and stores locale-specific time information
+    TimeRE -- Creates regexes for pattern matching a string of text containing
+                time information
+FUNCTIONS:
+    _getlang -- Figure out what language is being used for the locale
+    strptime -- Calculates the time struct represented by the passed-in string
+"""
+
+
+def _getlang():
+    """Figure out what language is being used for the locale"""
+    return locale.getlocale(locale.LC_TIME)
+
+
+class LocaleTime(object):
+    """Stores and handles locale-specific information related to time.
+
+    ATTRIBUTES:
+        f_weekday -- full weekday names (7-item list)
+        a_weekday -- abbreviated weekday names (7-item list)
+        f_month -- full month names (13-item list; dummy value in [0], which
+                    is added by code)
+        a_month -- abbreviated month names (13-item list, dummy value in
+                    [0], which is added by code)
+        am_pm -- AM/PM representation (2-item list)
+        LC_date_time -- format string for date/time representation (string)
+        LC_date -- format string for date representation (string)
+        LC_time -- format string for time representation (string)
+        timezone -- daylight- and non-daylight-savings timezone representation
+                    (2-item list of sets)
+        lang -- Language used by instance (2-item tuple)
+    """
+
+    def __init__(self):
+        """Set all attributes.
+
+        Order of methods called matters for dependency reasons.
+
+        The locale language is set at the offset and then checked again before
+        exiting.  This is to make sure that the attributes were not set with a
+        mix of information from more than one locale.  This would most likely
+        happen when using threads where one thread calls a locale-dependent
+        function while another thread changes the locale while the function in
+        the other thread is still running.  Proper coding would call for
+        locks to prevent changing the locale while locale-dependent code is
+        running.  The check here is done in case someone does not think about
+        doing this.
+
+        Only other possible issue is if someone changed the timezone and did
+        not call tz.tzset .  That is an issue for the programmer, though,
+        since changing the timezone is worthless without that call.
+
+        """
+        self.lang = _getlang()
+        self.__calc_weekday()
+        self.__calc_month()
+        self.__calc_am_pm()
+        self.__calc_timezone()
+        self.__calc_date_time()
+        if _getlang() != self.lang:
+            raise ValueError("locale changed during initialization")
+
+    def __pad(self, seq, front):
+        # Add '' to seq to either the front (is True), else the back.
+        seq = list(seq)
+        if front:
+            seq.insert(0, '')
+        else:
+            seq.append('')
+        return seq
+
+    def __calc_weekday(self):
+        # Set self.a_weekday and self.f_weekday using the calendar
+        # module.
+        a_weekday = [calendar.day_abbr[i].lower() for i in range(7)]
+        f_weekday = [calendar.day_name[i].lower() for i in range(7)]
+        self.a_weekday = a_weekday
+        self.f_weekday = f_weekday
+
+    def __calc_month(self):
+        # Set self.f_month and self.a_month using the calendar module.
+        a_month = [calendar.month_abbr[i].lower() for i in range(13)]
+        f_month = [calendar.month_name[i].lower() for i in range(13)]
+        self.a_month = a_month
+        self.f_month = f_month
+
+    def __calc_am_pm(self):
+        # Set self.am_pm by using time.strftime().
+
+        # The magic date (1999,3,17,hour,44,55,2,76,0) is not really that
+        # magical; just happened to have used it everywhere else where a
+        # static date was needed.
+        am_pm = []
+        for hour in (01, 22):
+            time_tuple = time.struct_time(
+                (1999, 3, 17, hour, 44, 55, 2, 76, 0))
+            am_pm.append(time.strftime("%p", time_tuple).lower())
+        self.am_pm = am_pm
+
+    def __calc_date_time(self):
+        # Set self.date_time, self.date, & self.time by using
+        # time.strftime().
+
+        # Use (1999,3,17,22,44,55,2,76,0) for magic date because the amount of
+        # overloaded numbers is minimized.  The order in which searches for
+        # values within the format string is very important; it eliminates
+        # possible ambiguity for what something represents.
+        time_tuple = time.struct_time((1999, 3, 17, 22, 44, 55, 2, 76, 0))
+        date_time = [None, None, None]
+        date_time[0] = time.strftime("%c", time_tuple).lower()
+        date_time[1] = time.strftime("%x", time_tuple).lower()
+        date_time[2] = time.strftime("%X", time_tuple).lower()
+        replacement_pairs = [('%', '%%'), (self.f_weekday[2], '%A'),
+                             (self.f_month[3],
+                              '%B'), (self.a_weekday[2], '%a'),
+                             (self.a_month[3], '%b'), (self.am_pm[1], '%p'),
+                             ('1999', '%Y'), ('99', '%y'), ('22', '%H'),
+                             ('44', '%M'), ('55', '%S'), ('76', '%j'),
+                             ('17', '%d'), ('03', '%m'), ('3', '%m'),
+                             # '3' needed for when no leading zero.
+                             ('2', '%w'), ('10', '%I')]
+        replacement_pairs.extend([(tz, "%Z") for tz_values in self.timezone
+                                                for tz in tz_values])
+        for offset, directive in ((0, '%c'), (1, '%x'), (2, '%X')):
+            current_format = date_time[offset]
+            for old, new in replacement_pairs:
+                # Must deal with possible lack of locale info
+                # manifesting itself as the empty string (e.g., Swedish's
+                # lack of AM/PM info) or a platform returning a tuple of empty
+                # strings (e.g., MacOS 9 having timezone as ('','')).
+                if old:
+                    current_format = current_format.replace(old, new)
+            # If %W is used, then Sunday, 2005-01-03 will fall on week 0 since
+            # 2005-01-03 occurs before the first Monday of the year.  Otherwise
+            # %U is used.
+            time_tuple = time.struct_time((1999, 1, 3, 1, 1, 1, 6, 3, 0))
+            if '00' in time.strftime(directive, time_tuple):
+                U_W = '%W'
+            else:
+                U_W = '%U'
+            date_time[offset] = current_format.replace('11', U_W)
+        self.LC_date_time = date_time[0]
+        self.LC_date = date_time[1]
+        self.LC_time = date_time[2]
+
+    def __calc_timezone(self):
+        # Set self.timezone by using time.tzname.
+        # Do not worry about possibility of time.tzname[0] == timetzname[1]
+        # and time.daylight; handle that in strptime .
+        try:
+            time.tzset()
+        except AttributeError:
+            pass
+        no_saving = frozenset(["utc", "gmt", time.tzname[0].lower()])
+        if time.daylight:
+            has_saving = frozenset([time.tzname[1].lower()])
+        else:
+            has_saving = frozenset()
+        self.timezone = (no_saving, has_saving)
+
+
+class TimeRE(dict):
+    """
+    Handle conversion from format directives to regexes.
+
+    Creates regexes for pattern matching a string of text containing
+    time information
+    """
+
+    def __init__(self, locale_time=None):
+        """Create keys/values.
+
+        Order of execution is important for dependency reasons.
+
+        """
+        if locale_time:
+            self.locale_time = locale_time
+        else:
+            self.locale_time = LocaleTime()
+        base = super(TimeRE, self)
+        base.__init__({
+            # The " \d" part of the regex is to make %c from ANSI C work
+            'd': r"(?P<d>3[0-1]|[1-2]\d|0[1-9]|[1-9]| [1-9])",
+            'f': r"(?P<f>[0-9]{1,9})",
+            'H': r"(?P<H>2[0-3]|[0-1]\d|\d)",
+            'I': r"(?P<I>1[0-2]|0[1-9]|[1-9])",
+            'j': (r"(?P<j>36[0-6]|3[0-5]\d|[1-2]\d\d|0[1-9]\d|00[1-9]|"
+                  r"[1-9]\d|0[1-9]|[1-9])"),
+            'm': r"(?P<m>1[0-2]|0[1-9]|[1-9])",
+            'M': r"(?P<M>[0-5]\d|\d)",
+            'S': r"(?P<S>6[0-1]|[0-5]\d|\d)",
+            'U': r"(?P<U>5[0-3]|[0-4]\d|\d)",
+            'w': r"(?P<w>[0-6])",
+            # W is set below by using 'U'
+            'y': r"(?P<y>\d\d)",
+            #XXX: Does 'Y' need to worry about having less or more than
+            #     4 digits?
+            'Y': r"(?P<Y>\d\d\d\d)",
+            'A': self.__seqToRE(self.locale_time.f_weekday, 'A'),
+            'a': self.__seqToRE(self.locale_time.a_weekday, 'a'),
+            'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
+            'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
+            'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
+            'Z': self.__seqToRE([tz for tz_names in self.locale_time.timezone
+                                 for tz in tz_names],
+                                'Z'),
+            '%': '%'})
+        base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
+        base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
+        base.__setitem__('x', self.pattern(self.locale_time.LC_date))
+        base.__setitem__('X', self.pattern(self.locale_time.LC_time))
+
+    def __seqToRE(self, to_convert, directive):
+        """Convert a list to a regex string for matching a directive.
+
+        Want possible matching values to be from longest to shortest.  This
+        prevents the possibility of a match occuring for a value that also
+        a substring of a larger value that should have matched (e.g., 'abc'
+        matching when 'abcdef' should have been the match).
+
+        """
+        to_convert = sorted(to_convert, key=len, reverse=True)
+        for value in to_convert:
+            if value != '':
+                break
+        else:
+            return ''
+        regex = '|'.join([re.escape(stuff) for stuff in to_convert])
+        regex = '(?P<%s>%s' % (directive, regex)
+        return '%s)' % regex
+
+    def pattern(self, format):
+        """Return regex pattern for the format string.
+
+        Need to make sure that any characters that might be interpreted as
+        regex syntax are escaped.
+
+        """
+        processed_format = ''
+        # The sub() call escapes all characters that might be misconstrued
+        # as regex syntax.  Cannot use re.escape since we have to deal with
+        # format directives (%m, etc.).
+        regex_chars = re.compile(r"([\\.^$*+?\(\){}\[\]|])")
+        format = regex_chars.sub(r"\\\1", format)
+        whitespace_replacement = re.compile(r'\s+')
+        format = whitespace_replacement.sub(r'\\s+', format)
+        while '%' in format:
+            directive_index = format.index('%') +1
+            processed_format = "%s%s%s" % (processed_format,
+                                           format[:directive_index -1],
+                                           self[format[directive_index]])
+            format = format[directive_index +1:]
+        return "%s%s" % (processed_format, format)
+
+    def compile(self, format):
+        """Return a compiled re object for the format string."""
+        return re.compile(self.pattern(format), re.IGNORECASE)
+
+
+_cache_lock = _thread_allocate_lock()
+# DO NOT modify _TimeRE_cache or _regex_cache without acquiring the cache lock
+# first!
+_TimeRE_cache = TimeRE()
+_CACHE_MAX_SIZE = 5 # Max number of regexes stored in _regex_cache
+_regex_cache = {}
+
+
+cdef _calc_julian_from_U_or_W(int year, int week_of_year,
+                              int day_of_week, int week_starts_Mon):
+    """Calculate the Julian day based on the year, week of the year, and day of
+    the week, with week_start_day representing whether the week of the year
+    assumes the week starts on Sunday or Monday (6 or 0)."""
+
+    cdef:
+        int first_weekday,  week_0_length, days_to_week
+
+    first_weekday = datetime_date(year, 1, 1).weekday()
+    # If we are dealing with the %U directive (week starts on Sunday), it's
+    # easier to just shift the view to Sunday being the first day of the
+    # week.
+    if not week_starts_Mon:
+        first_weekday = (first_weekday + 1) % 7
+        day_of_week = (day_of_week + 1) % 7
+
+    # Need to watch out for a week 0 (when the first day of the year is not
+    # the same as that specified by %U or %W).
+    week_0_length = (7 - first_weekday) % 7
+    if week_of_year == 0:
+        return 1 + day_of_week - first_weekday
+    else:
+        days_to_week = week_0_length + (7 * (week_of_year - 1))
+        return 1 + days_to_week + day_of_week
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 95fe3ab83c2abf..bf89509fd17467 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -3,6 +3,7 @@
 from collections import MutableMapping
 
 from pandas._libs import lib, tslib
+from pandas._libs.tslibs.strptime import array_strptime
 from pandas._libs.tslibs.timezones import get_timezone
 
 from pandas.core.dtypes.common import (
@@ -416,8 +417,8 @@ def _convert_listlike(arg, box, format, name=None, tz=tz):
                 # fallback
                 if result is None:
                     try:
-                        result = tslib.array_strptime(arg, format, exact=exact,
-                                                      errors=errors)
+                        result = array_strptime(arg, format, exact=exact,
+                                                errors=errors)
                     except tslib.OutOfBoundsDatetime:
                         if errors == 'raise':
                             raise
diff --git a/setup.py b/setup.py
index 555cf9dc4a9b39..25a4924dad0bc8 100755
--- a/setup.py
+++ b/setup.py
@@ -471,7 +471,6 @@ def pxd(name):
                    'pandas/_libs/src/datetime/np_datetime_strings.h',
                    'pandas/_libs/src/datetime.pxd']
 
-
 # some linux distros require it
 libraries = ['m'] if not is_platform_windows() else []
 
@@ -483,6 +482,10 @@ def pxd(name):
                         'pxdfiles': ['_libs/hashtable'],
                         'depends': (['pandas/_libs/src/klib/khash_python.h']
                                     + _pxi_dep['hashtable'])},
+    '_libs.tslibs.strptime': {'pyxfile': '_libs/tslibs/strptime',
+                              'depends': tseries_depends,
+                              'sources': ['pandas/_libs/src/datetime/np_datetime.c',
+                                          'pandas/_libs/src/datetime/np_datetime_strings.c']},
     '_libs.tslib': {'pyxfile': '_libs/tslib',
                     'pxdfiles': ['_libs/src/util', '_libs/lib'],
                     'depends': tseries_depends,

From 4c9e98d1ded9660812e62c4a4ecadeaf9d3f0e6b Mon Sep 17 00:00:00 2001
From: cbertinato <chrisbertinato@mac.com>
Date: Mon, 25 Sep 2017 06:10:05 -0400
Subject: [PATCH 147/188] CLN: replace %s syntax with .format in io (#17660)

Progress toward issue #16130. Converted old string formatting to new string formatting in io/html.py, io/excel.py, msgpack/_packer.pyx, msgpack/_unpacker.pyx, clipboard/exceptions.py, json/json.py, json/normalize.py, sas/sas.pyx
---
 pandas/io/clipboard/exceptions.py |  2 +-
 pandas/io/excel.py                | 72 ++++++++++++++++++-------------
 pandas/io/html.py                 | 46 +++++++++++---------
 pandas/io/json/json.py            | 23 +++++-----
 pandas/io/json/normalize.py       |  7 +--
 pandas/io/msgpack/_packer.pyx     |  2 +-
 pandas/io/msgpack/_unpacker.pyx   |  7 +--
 pandas/io/sas/sas.pyx             | 16 ++++---
 8 files changed, 101 insertions(+), 74 deletions(-)

diff --git a/pandas/io/clipboard/exceptions.py b/pandas/io/clipboard/exceptions.py
index 413518e53660af..d948ad414327ca 100644
--- a/pandas/io/clipboard/exceptions.py
+++ b/pandas/io/clipboard/exceptions.py
@@ -8,5 +8,5 @@ class PyperclipException(RuntimeError):
 class PyperclipWindowsException(PyperclipException):
 
     def __init__(self, message):
-        message += " (%s)" % ctypes.WinError()
+        message += " ({err})".format(err=ctypes.WinError())
         super(PyperclipWindowsException, self).__init__(message)
diff --git a/pandas/io/excel.py b/pandas/io/excel.py
index faafdba435ff21..afecd76c498efa 100644
--- a/pandas/io/excel.py
+++ b/pandas/io/excel.py
@@ -165,7 +165,7 @@ def register_writer(klass):
         if ext.startswith('.'):
             ext = ext[1:]
         if ext not in _writer_extensions:
-            config.register_option("io.excel.%s.writer" % ext,
+            config.register_option("io.excel.{ext}.writer".format(ext=ext),
                                    engine_name, validator=str)
             _writer_extensions.append(ext)
 
@@ -190,7 +190,8 @@ def get_writer(engine_name):
     try:
         return _writers[engine_name]
     except KeyError:
-        raise ValueError("No Excel writer '%s'" % engine_name)
+        raise ValueError("No Excel writer '{engine}'"
+                         .format(engine=engine_name))
 
 
 @Appender(_read_excel_doc)
@@ -259,7 +260,7 @@ def __init__(self, io, **kwds):
         engine = kwds.pop('engine', None)
 
         if engine is not None and engine != 'xlrd':
-            raise ValueError("Unknown engine: %s" % engine)
+            raise ValueError("Unknown engine: {engine}".format(engine=engine))
 
         # If io is a url, want to keep the data as bytes so can't pass
         # to get_filepath_or_buffer()
@@ -445,7 +446,7 @@ def _parse_cell(cell_contents, cell_typ):
 
         for asheetname in sheets:
             if verbose:
-                print("Reading sheet %s" % asheetname)
+                print("Reading sheet {sheet}".format(sheet=asheetname))
 
             if isinstance(asheetname, compat.string_types):
                 sheet = self.book.sheet_by_name(asheetname)
@@ -634,7 +635,7 @@ def _conv_value(val):
     elif is_bool(val):
         val = bool(val)
     elif isinstance(val, Period):
-        val = "%s" % val
+        val = "{val}".format(val=val)
     elif is_list_like(val):
         val = str(val)
 
@@ -697,9 +698,11 @@ def __new__(cls, path, engine=None, **kwargs):
                     ext = 'xlsx'
 
                 try:
-                    engine = config.get_option('io.excel.%s.writer' % ext)
+                    engine = config.get_option('io.excel.{ext}.writer'
+                                               .format(ext=ext))
                 except KeyError:
-                    error = ValueError("No engine for filetype: '%s'" % ext)
+                    error = ValueError("No engine for filetype: '{ext}'"
+                                       .format(ext=ext))
                     raise error
             cls = get_writer(engine)
 
@@ -787,8 +790,9 @@ def check_extension(cls, ext):
         if ext.startswith('.'):
             ext = ext[1:]
         if not any(ext in extension for extension in cls.supported_extensions):
-            msg = (u("Invalid extension for engine '%s': '%s'") %
-                   (pprint_thing(cls.engine), pprint_thing(ext)))
+            msg = (u("Invalid extension for engine '{engine}': '{ext}'")
+                   .format(engine=pprint_thing(cls.engine),
+                           ext=pprint_thing(ext)))
             raise ValueError(msg)
         else:
             return True
@@ -813,8 +817,8 @@ class _Openpyxl1Writer(ExcelWriter):
     def __init__(self, path, engine=None, **engine_kwargs):
         if not openpyxl_compat.is_compat(major_ver=self.openpyxl_majorver):
             raise ValueError('Installed openpyxl is not supported at this '
-                             'time. Use {0}.x.y.'
-                             .format(self.openpyxl_majorver))
+                             'time. Use {majorver}.x.y.'
+                             .format(majorver=self.openpyxl_majorver))
         # Use the openpyxl module as the Excel writer.
         from openpyxl.workbook import Workbook
 
@@ -854,7 +858,8 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0,
 
         for cell in cells:
             colletter = get_column_letter(startcol + cell.col + 1)
-            xcell = wks.cell("%s%s" % (colletter, startrow + cell.row + 1))
+            xcell = wks.cell("{col}{row}".format(col=colletter,
+                                                 row=startrow + cell.row + 1))
             if (isinstance(cell.val, compat.string_types) and
                     xcell.data_type_for_value(cell.val) != xcell.TYPE_STRING):
                 xcell.set_value_explicit(cell.val)
@@ -876,10 +881,12 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0,
                 cletterstart = get_column_letter(startcol + cell.col + 1)
                 cletterend = get_column_letter(startcol + cell.mergeend + 1)
 
-                wks.merge_cells('%s%s:%s%s' % (cletterstart,
-                                               startrow + cell.row + 1,
-                                               cletterend,
-                                               startrow + cell.mergestart + 1))
+                wks.merge_cells('{start}{row}:{end}{mergestart}'
+                                .format(start=cletterstart,
+                                        row=startrow + cell.row + 1,
+                                        end=cletterend,
+                                        mergestart=startrow +
+                                        cell.mergestart + 1))
 
                 # Excel requires that the format of the first cell in a merged
                 # range is repeated in the rest of the merged range.
@@ -895,7 +902,8 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0,
                                 # Ignore first cell. It is already handled.
                                 continue
                             colletter = get_column_letter(col)
-                            xcell = wks.cell("%s%s" % (colletter, row))
+                            xcell = wks.cell("{col}{row}"
+                                             .format(col=colletter, row=row))
                             for field in style.__fields__:
                                 xcell.style.__setattr__(
                                     field, style.__getattribute__(field))
@@ -955,7 +963,8 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0,
 
         for cell in cells:
             colletter = get_column_letter(startcol + cell.col + 1)
-            xcell = wks["%s%s" % (colletter, startrow + cell.row + 1)]
+            xcell = wks["{col}{row}"
+                        .format(col=colletter, row=startrow + cell.row + 1)]
             xcell.value = _conv_value(cell.val)
             style_kwargs = {}
 
@@ -977,10 +986,12 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0,
                 cletterstart = get_column_letter(startcol + cell.col + 1)
                 cletterend = get_column_letter(startcol + cell.mergeend + 1)
 
-                wks.merge_cells('%s%s:%s%s' % (cletterstart,
-                                               startrow + cell.row + 1,
-                                               cletterend,
-                                               startrow + cell.mergestart + 1))
+                wks.merge_cells('{start}{row}:{end}{mergestart}'
+                                .format(start=cletterstart,
+                                        row=startrow + cell.row + 1,
+                                        end=cletterend,
+                                        mergestart=startrow +
+                                        cell.mergestart + 1))
 
                 # Excel requires that the format of the first cell in a merged
                 # range is repeated in the rest of the merged range.
@@ -996,7 +1007,8 @@ def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0,
                                 # Ignore first cell. It is already handled.
                                 continue
                             colletter = get_column_letter(col)
-                            xcell = wks["%s%s" % (colletter, row)]
+                            xcell = wks["{col}{row}"
+                                        .format(col=colletter, row=row)]
                             xcell.style = xcell.style.copy(**style_kwargs)
 
     @classmethod
@@ -1030,7 +1042,7 @@ def _convert_to_style_kwargs(cls, style_dict):
         for k, v in style_dict.items():
             if k in _style_key_map:
                 k = _style_key_map[k]
-            _conv_to_x = getattr(cls, '_convert_to_{0}'.format(k),
+            _conv_to_x = getattr(cls, '_convert_to_{k}'.format(k=k),
                                  lambda x: None)
             new_v = _conv_to_x(v)
             if new_v:
@@ -1505,17 +1517,19 @@ def _style_to_xlwt(cls, item, firstlevel=True, field_sep=',',
         """
         if hasattr(item, 'items'):
             if firstlevel:
-                it = ["%s: %s" % (key, cls._style_to_xlwt(value, False))
+                it = ["{key}: {val}"
+                      .format(key=key, val=cls._style_to_xlwt(value, False))
                       for key, value in item.items()]
-                out = "%s " % (line_sep).join(it)
+                out = "{sep} ".format(sep=(line_sep).join(it))
                 return out
             else:
-                it = ["%s %s" % (key, cls._style_to_xlwt(value, False))
+                it = ["{key} {val}"
+                      .format(key=key, val=cls._style_to_xlwt(value, False))
                       for key, value in item.items()]
-                out = "%s " % (field_sep).join(it)
+                out = "{sep} ".format(sep=(field_sep).join(it))
                 return out
         else:
-            item = "%s" % item
+            item = "{item}".format(item=item)
             item = item.replace("True", "on")
             item = item.replace("False", "off")
             return item
diff --git a/pandas/io/html.py b/pandas/io/html.py
index a4acb26af52590..b5aaffcf710c29 100644
--- a/pandas/io/html.py
+++ b/pandas/io/html.py
@@ -439,14 +439,15 @@ def _parse_tables(self, doc, match, attrs):
             unique_tables.add(table)
 
         if not result:
-            raise ValueError("No tables found matching pattern %r" %
-                             match.pattern)
+            raise ValueError("No tables found matching pattern {patt!r}"
+                             .format(patt=match.pattern))
         return result
 
     def _setup_build_doc(self):
         raw_text = _read(self.io)
         if not raw_text:
-            raise ValueError('No text parsed from document: %s' % self.io)
+            raise ValueError('No text parsed from document: {doc}'
+                             .format(doc=self.io))
         return raw_text
 
     def _build_doc(self):
@@ -473,8 +474,8 @@ def _build_xpath_expr(attrs):
     if 'class_' in attrs:
         attrs['class'] = attrs.pop('class_')
 
-    s = [u("@%s=%r") % (k, v) for k, v in iteritems(attrs)]
-    return u('[%s]') % ' and '.join(s)
+    s = [u("@{key}={val!r}").format(key=k, val=v) for k, v in iteritems(attrs)]
+    return u('[{expr}]').format(expr=' and '.join(s))
 
 
 _re_namespace = {'re': 'http://exslt.org/regular-expressions'}
@@ -517,8 +518,8 @@ def _parse_tables(self, doc, match, kwargs):
 
         # 1. check all descendants for the given pattern and only search tables
         # 2. go up the tree until we find a table
-        query = '//table//*[re:test(text(), %r)]/ancestor::table'
-        xpath_expr = u(query) % pattern
+        query = '//table//*[re:test(text(), {patt!r})]/ancestor::table'
+        xpath_expr = u(query).format(patt=pattern)
 
         # if any table attributes were given build an xpath expression to
         # search for them
@@ -528,7 +529,8 @@ def _parse_tables(self, doc, match, kwargs):
         tables = doc.xpath(xpath_expr, namespaces=_re_namespace)
 
         if not tables:
-            raise ValueError("No tables found matching regex %r" % pattern)
+            raise ValueError("No tables found matching regex {patt!r}"
+                             .format(patt=pattern))
         return tables
 
     def _build_doc(self):
@@ -574,8 +576,9 @@ def _build_doc(self):
                 scheme = parse_url(self.io).scheme
                 if scheme not in _valid_schemes:
                     # lxml can't parse it
-                    msg = ('%r is not a valid url scheme, valid schemes are '
-                           '%s') % (scheme, _valid_schemes)
+                    msg = (('{invalid!r} is not a valid url scheme, valid '
+                            'schemes are {valid}')
+                           .format(invalid=scheme, valid=_valid_schemes))
                     raise ValueError(msg)
                 else:
                     # something else happened: maybe a faulty connection
@@ -670,8 +673,9 @@ def _parser_dispatch(flavor):
     """
     valid_parsers = list(_valid_parsers.keys())
     if flavor not in valid_parsers:
-        raise ValueError('%r is not a valid flavor, valid flavors are %s' %
-                         (flavor, valid_parsers))
+        raise ValueError('{invalid!r} is not a valid flavor, valid flavors '
+                         'are {valid}'
+                         .format(invalid=flavor, valid=valid_parsers))
 
     if flavor in ('bs4', 'html5lib'):
         if not _HAS_HTML5LIB:
@@ -695,7 +699,7 @@ def _parser_dispatch(flavor):
 
 
 def _print_as_set(s):
-    return '{%s}' % ', '.join([pprint_thing(el) for el in s])
+    return '{{arg}}'.format(arg=', '.join([pprint_thing(el) for el in s]))
 
 
 def _validate_flavor(flavor):
@@ -705,21 +709,23 @@ def _validate_flavor(flavor):
         flavor = flavor,
     elif isinstance(flavor, collections.Iterable):
         if not all(isinstance(flav, string_types) for flav in flavor):
-            raise TypeError('Object of type %r is not an iterable of strings' %
-                            type(flavor).__name__)
+            raise TypeError('Object of type {typ!r} is not an iterable of '
+                            'strings'
+                            .format(typ=type(flavor).__name__))
     else:
-        fmt = '{0!r}' if isinstance(flavor, string_types) else '{0}'
+        fmt = '{flavor!r}' if isinstance(flavor, string_types) else '{flavor}'
         fmt += ' is not a valid flavor'
-        raise ValueError(fmt.format(flavor))
+        raise ValueError(fmt.format(flavor=flavor))
 
     flavor = tuple(flavor)
     valid_flavors = set(_valid_parsers)
     flavor_set = set(flavor)
 
     if not flavor_set & valid_flavors:
-        raise ValueError('%s is not a valid set of flavors, valid flavors are '
-                         '%s' % (_print_as_set(flavor_set),
-                                 _print_as_set(valid_flavors)))
+        raise ValueError('{invalid} is not a valid set of flavors, valid '
+                         'flavors are {valid}'
+                         .format(invalid=_print_as_set(flavor_set),
+                                 valid=_print_as_set(valid_flavors)))
     return flavor
 
 
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
index a1d48719ba9c0f..5dae6099446d0f 100644
--- a/pandas/io/json/json.py
+++ b/pandas/io/json/json.py
@@ -99,7 +99,7 @@ class SeriesWriter(Writer):
     def _format_axes(self):
         if not self.obj.index.is_unique and self.orient == 'index':
             raise ValueError("Series index must be unique for orient="
-                             "'%s'" % self.orient)
+                             "'{orient}'".format(orient=self.orient))
 
 
 class FrameWriter(Writer):
@@ -110,11 +110,11 @@ def _format_axes(self):
         if not self.obj.index.is_unique and self.orient in (
                 'index', 'columns'):
             raise ValueError("DataFrame index must be unique for orient="
-                             "'%s'." % self.orient)
+                             "'{orient}'.".format(orient=self.orient))
         if not self.obj.columns.is_unique and self.orient in (
                 'index', 'columns', 'records'):
             raise ValueError("DataFrame columns must be unique for orient="
-                             "'%s'." % self.orient)
+                             "'{orient}'.".format(orient=self.orient))
 
 
 class JSONTableWriter(FrameWriter):
@@ -134,8 +134,9 @@ def __init__(self, obj, orient, date_format, double_precision,
 
         if date_format != 'iso':
             msg = ("Trying to write with `orient='table'` and "
-                   "`date_format='%s'`. Table Schema requires dates "
-                   "to be formatted with `date_format='iso'`" % date_format)
+                   "`date_format='{fmt}'`. Table Schema requires dates "
+                   "to be formatted with `date_format='iso'`"
+                   .format(fmt=date_format))
             raise ValueError(msg)
 
         self.schema = build_table_schema(obj)
@@ -166,8 +167,8 @@ def __init__(self, obj, orient, date_format, double_precision,
 
     def write(self):
         data = super(JSONTableWriter, self).write()
-        serialized = '{{"schema": {}, "data": {}}}'.format(
-            dumps(self.schema), data)
+        serialized = '{{"schema": {schema}, "data": {data}}}'.format(
+            schema=dumps(self.schema), data=data)
         return serialized
 
 
@@ -391,8 +392,8 @@ def __init__(self, json, orient, dtype=True, convert_axes=True,
         if date_unit is not None:
             date_unit = date_unit.lower()
             if date_unit not in self._STAMP_UNITS:
-                raise ValueError('date_unit must be one of %s' %
-                                 (self._STAMP_UNITS,))
+                raise ValueError('date_unit must be one of {units}'
+                                 .format(units=self._STAMP_UNITS))
             self.min_stamp = self._MIN_STAMPS[date_unit]
         else:
             self.min_stamp = self._MIN_STAMPS['s']
@@ -410,8 +411,8 @@ def check_keys_split(self, decoded):
         bad_keys = set(decoded.keys()).difference(set(self._split_keys))
         if bad_keys:
             bad_keys = ", ".join(bad_keys)
-            raise ValueError(u("JSON data had unexpected key(s): %s") %
-                             pprint_thing(bad_keys))
+            raise ValueError(u("JSON data had unexpected key(s): {bad_keys}")
+                             .format(bad_keys=pprint_thing(bad_keys)))
 
     def parse(self):
 
diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py
index 72776ed01de15e..e811dd1eab1420 100644
--- a/pandas/io/json/normalize.py
+++ b/pandas/io/json/normalize.py
@@ -249,7 +249,8 @@ def _recursive_extract(data, path, seen_meta, level=0):
                                 raise \
                                     KeyError("Try running with "
                                              "errors='ignore' as key "
-                                             "%s is not always present", e)
+                                             "{err} is not always present"
+                                             .format(err=e))
                     meta_vals[key].append(meta_val)
 
                 records.extend(recs)
@@ -267,8 +268,8 @@ def _recursive_extract(data, path, seen_meta, level=0):
             k = meta_prefix + k
 
         if k in result:
-            raise ValueError('Conflicting metadata name %s, '
-                             'need distinguishing prefix ' % k)
+            raise ValueError('Conflicting metadata name {name}, '
+                             'need distinguishing prefix '.format(name=k))
 
         result[k] = np.array(v).repeat(lengths)
 
diff --git a/pandas/io/msgpack/_packer.pyx b/pandas/io/msgpack/_packer.pyx
index ad7ce1fb2531ae..fd3f4612fb4322 100644
--- a/pandas/io/msgpack/_packer.pyx
+++ b/pandas/io/msgpack/_packer.pyx
@@ -224,7 +224,7 @@ cdef class Packer(object):
                 default_used = 1
                 continue
             else:
-                raise TypeError("can't serialize %r" % (o,))
+                raise TypeError("can't serialize {thing!r}".format(thing=o))
             return ret
 
     cpdef pack(self, object obj):
diff --git a/pandas/io/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx
index 504bfed48df3ca..22401d7514f653 100644
--- a/pandas/io/msgpack/_unpacker.pyx
+++ b/pandas/io/msgpack/_unpacker.pyx
@@ -94,7 +94,7 @@ cdef inline init_ctx(unpack_context *ctx,
 
 def default_read_extended_type(typecode, data):
     raise NotImplementedError("Cannot decode extended type "
-                              "with typecode=%d" % typecode)
+                              "with typecode={code}".format(code=typecode))
 
 
 def unpackb(object packed, object object_hook=None, object list_hook=None,
@@ -144,7 +144,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
                 buf + off, buf_len - off))
         return obj
     else:
-        raise UnpackValueError("Unpack failed: error = %d" % (ret,))
+        raise UnpackValueError("Unpack failed: error = {ret}".format(ret=ret))
 
 
 def unpack(object stream, object object_hook=None, object list_hook=None,
@@ -411,7 +411,8 @@ cdef class Unpacker(object):
                 else:
                     raise OutOfData("No more data to unpack.")
             else:
-                raise ValueError("Unpack failed: error = %d" % (ret,))
+                raise ValueError("Unpack failed: error = {ret}"
+                                 .format(ret=ret))
 
     def read_bytes(self, Py_ssize_t nbytes):
         """Read a specified number of raw bytes from the stream"""
diff --git a/pandas/io/sas/sas.pyx b/pandas/io/sas/sas.pyx
index 4396180da44cbf..41c03cb2799a3f 100644
--- a/pandas/io/sas/sas.pyx
+++ b/pandas/io/sas/sas.pyx
@@ -101,10 +101,12 @@ cdef np.ndarray[uint8_t, ndim=1] rle_decompress(
                 result[rpos] = 0x00
                 rpos += 1
         else:
-            raise ValueError("unknown control byte: %v", control_byte)
+            raise ValueError("unknown control byte: {byte}"
+                             .format(byte=control_byte))
 
     if len(result) != result_length:
-        raise ValueError("RLE: %v != %v", (len(result), result_length))
+        raise ValueError("RLE: {got} != {expect}".format(got=len(result),
+                                                         expect=result_length))
 
     return np.asarray(result)
 
@@ -185,7 +187,8 @@ cdef np.ndarray[uint8_t, ndim=1] rdc_decompress(
             raise ValueError("unknown RDC command")
 
     if len(outbuff) != result_length:
-        raise ValueError("RDC: %v != %v\n", len(outbuff), result_length)
+        raise ValueError("RDC: {got} != {expect}\n"
+                         .format(got=len(outbuff), expect=result_length))
 
     return np.asarray(outbuff)
 
@@ -258,7 +261,8 @@ cdef class Parser(object):
                 self.column_types[j] = column_type_string
             else:
                 raise ValueError("unknown column type: "
-                                 "%s" % self.parser.columns[j].ctype)
+                                 "{typ}"
+                                 .format(typ=self.parser.columns[j].ctype))
 
         # compression
         if parser.compression == const.rle_compression:
@@ -378,8 +382,8 @@ cdef class Parser(object):
                         return True
                 return False
             else:
-                raise ValueError("unknown page type: %s",
-                                 self.current_page_type)
+                raise ValueError("unknown page type: {typ}"
+                                 .format(typ=self.current_page_type))
 
     cdef void process_byte_array_with_data(self, int offset, int length):
 

From 868389d3dbdde71df4244e53cdd79a94201db093 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 25 Sep 2017 06:14:47 -0400
Subject: [PATCH 148/188] DEPR: deprecate .as_blocks() (#17656)

closes #17302
---
 doc/source/10min.rst                       | 12 +----
 doc/source/whatsnew/v0.21.0.txt            |  3 +-
 pandas/core/computation/expressions.py     |  2 +-
 pandas/core/generic.py                     | 51 ++++++++++++----------
 pandas/core/internals.py                   | 25 +++++++++++
 pandas/core/window.py                      |  2 +-
 pandas/tests/frame/test_block_internals.py | 12 ++++-
 pandas/tests/frame/test_constructors.py    |  5 ++-
 pandas/tests/internals/test_internals.py   |  7 +--
 pandas/tests/sparse/test_frame.py          |  5 ++-
 pandas/util/testing.py                     |  4 +-
 11 files changed, 79 insertions(+), 49 deletions(-)

diff --git a/doc/source/10min.rst b/doc/source/10min.rst
index 0a23f490e66283..49142311ff0576 100644
--- a/doc/source/10min.rst
+++ b/doc/source/10min.rst
@@ -95,17 +95,7 @@ will be completed:
    df2.append             df2.combine_first
    df2.apply              df2.compound
    df2.applymap           df2.consolidate
-   df2.as_blocks          df2.convert_objects
-   df2.asfreq             df2.copy
-   df2.as_matrix          df2.corr
-   df2.astype             df2.corrwith
-   df2.at                 df2.count
-   df2.at_time            df2.cov
-   df2.axes               df2.cummax
-   df2.B                  df2.cummin
-   df2.between_time       df2.cumprod
-   df2.bfill              df2.cumsum
-   df2.blocks             df2.D
+   df2.D
 
 As you can see, the columns ``A``, ``B``, ``C``, and ``D`` are automatically
 tab completed. ``E`` is there as well; the rest of the attributes have been
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 1365901c2ce5e3..07cc00b3724e42 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -488,10 +488,9 @@ Other API Changes
 Deprecations
 ~~~~~~~~~~~~
 - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
-
 - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
-
 - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).
+- :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`)
 
 .. _whatsnew_0210.prior_deprecations:
 
diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py
index af068bd1f32b34..8ddc625887a511 100644
--- a/pandas/core/computation/expressions.py
+++ b/pandas/core/computation/expressions.py
@@ -165,7 +165,7 @@ def _has_bool_dtype(x):
         return x.dtype == bool
     except AttributeError:
         try:
-            return 'bool' in x.blocks
+            return 'bool' in x.dtypes
         except AttributeError:
             return isinstance(x, (bool, np.bool_))
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 3d55e07df6eacb..b49eeed6db85f0 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1650,7 +1650,7 @@ def to_xarray(self):
                                 coords=coords,
                                 )
 
-    _shared_docs['to_latex'] = """
+    _shared_docs['to_latex'] = r"""
         Render an object to a tabular environment table. You can splice
         this into a LaTeX document. Requires \\usepackage{booktabs}.
 
@@ -3271,7 +3271,7 @@ def sample(self, n=None, frac=None, replace=False, weights=None,
         locs = rs.choice(axis_length, size=n, replace=replace, p=weights)
         return self.take(locs, axis=axis, is_copy=False)
 
-    _shared_docs['pipe'] = ("""
+    _shared_docs['pipe'] = (r"""
         Apply func(self, \*args, \*\*kwargs)
 
         Parameters
@@ -3692,6 +3692,8 @@ def as_blocks(self, copy=True):
         Convert the frame to a dict of dtype -> Constructor Types that each has
         a homogeneous dtype.
 
+        .. deprecated:: 0.21.0
+
         NOTE: the dtypes of the blocks WILL BE PRESERVED HERE (unlike in
               as_matrix)
 
@@ -3699,32 +3701,34 @@ def as_blocks(self, copy=True):
         ----------
         copy : boolean, default True
 
-               .. versionadded: 0.16.1
-
         Returns
         -------
         values : a dict of dtype -> Constructor Types
         """
-        self._consolidate_inplace()
-
-        bd = {}
-        for b in self._data.blocks:
-            bd.setdefault(str(b.dtype), []).append(b)
-
-        result = {}
-        for dtype, blocks in bd.items():
-            # Must combine even after consolidation, because there may be
-            # sparse items which are never consolidated into one block.
-            combined = self._data.combine(blocks, copy=copy)
-            result[dtype] = self._constructor(combined).__finalize__(self)
-
-        return result
+        warnings.warn("as_blocks is deprecated and will "
+                      "be removed in a future version",
+                      FutureWarning, stacklevel=2)
+        return self._to_dict_of_blocks(copy=copy)
 
     @property
     def blocks(self):
-        """Internal property, property synonym for as_blocks()"""
+        """
+        Internal property, property synonym for as_blocks()
+
+        .. deprecated:: 0.21.0
+        """
         return self.as_blocks()
 
+    def _to_dict_of_blocks(self, copy=True):
+        """
+        Return a dict of dtype -> Constructor Types that
+        each is a homogeneous dtype.
+
+        Internal ONLY
+        """
+        return {k: self._constructor(v).__finalize__(self)
+                for k, v, in self._data.to_dict(copy=copy).items()}
+
     @deprecate_kwarg(old_arg_name='raise_on_error', new_arg_name='errors',
                      mapping={True: 'raise', False: 'ignore'})
     def astype(self, dtype, copy=True, errors='raise', **kwargs):
@@ -3931,13 +3935,12 @@ def convert_objects(self, convert_dates=True, convert_numeric=False,
         -------
         converted : same as input object
         """
-        from warnings import warn
         msg = ("convert_objects is deprecated.  To re-infer data dtypes for "
                "object columns, use {klass}.infer_objects()\nFor all "
                "other conversions use the data-type specific converters "
                "pd.to_datetime, pd.to_timedelta and pd.to_numeric."
                ).format(klass=self.__class__.__name__)
-        warn(msg, FutureWarning, stacklevel=2)
+        warnings.warn(msg, FutureWarning, stacklevel=2)
 
         return self._constructor(
             self._data.convert(convert_dates=convert_dates,
@@ -4310,9 +4313,9 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
             raise AssertionError("'to_replace' must be 'None' if 'regex' is "
                                  "not a bool")
         if axis is not None:
-            from warnings import warn
-            warn('the "axis" argument is deprecated and will be removed in'
-                 'v0.13; this argument has no effect')
+            warnings.warn('the "axis" argument is deprecated '
+                          'and will be removed in'
+                          'v0.13; this argument has no effect')
 
         self._consolidate_inplace()
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index 2046bae759b9ab..e6f61a22e31373 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -3583,6 +3583,31 @@ def _interleave(self):
 
         return result
 
+    def to_dict(self, copy=True):
+        """
+        Return a dict of str(dtype) -> BlockManager
+
+        Parameters
+        ----------
+        copy : boolean, default True
+
+        Returns
+        -------
+        values : a dict of dtype -> BlockManager
+
+        Notes
+        -----
+        This consolidates based on str(dtype)
+        """
+        self._consolidate_inplace()
+
+        bd = {}
+        for b in self.blocks:
+            bd.setdefault(str(b.dtype), []).append(b)
+
+        return {dtype: self.combine(blocks, copy=copy)
+                for dtype, blocks in bd.items()}
+
     def xs(self, key, axis=1, copy=True, takeable=False):
         if axis < 1:
             raise AssertionError('Can only take xs across axis >= 1, got %d' %
diff --git a/pandas/core/window.py b/pandas/core/window.py
index 4bd959f52673c9..869296503225d0 100644
--- a/pandas/core/window.py
+++ b/pandas/core/window.py
@@ -141,7 +141,7 @@ def _create_blocks(self, how):
             if obj.ndim == 2:
                 obj = obj.reindex(columns=obj.columns.difference([self.on]),
                                   copy=False)
-        blocks = obj.as_blocks(copy=False).values()
+        blocks = obj._to_dict_of_blocks(copy=False).values()
 
         return blocks, obj, index
 
diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py
index afa3c4f25789ae..3ca185cf158a7c 100644
--- a/pandas/tests/frame/test_block_internals.py
+++ b/pandas/tests/frame/test_block_internals.py
@@ -320,7 +320,11 @@ def test_copy_blocks(self):
         column = df.columns[0]
 
         # use the default copy=True, change a column
-        blocks = df.as_blocks()
+
+        # deprecated 0.21.0
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            blocks = df.as_blocks()
         for dtype, _df in blocks.items():
             if column in _df:
                 _df.loc[:, column] = _df[column] + 1
@@ -334,7 +338,11 @@ def test_no_copy_blocks(self):
         column = df.columns[0]
 
         # use the copy=False, change a column
-        blocks = df.as_blocks(copy=False)
+
+        # deprecated 0.21.0
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            blocks = df.as_blocks(copy=False)
         for dtype, _df in blocks.items():
             if column in _df:
                 _df.loc[:, column] = _df[column] + 1
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index d942330ecd8a6b..d0cd1899a0a3c5 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -1766,7 +1766,7 @@ def test_from_records_sequencelike(self):
 
         # this is actually tricky to create the recordlike arrays and
         # have the dtypes be intact
-        blocks = df.blocks
+        blocks = df._to_dict_of_blocks()
         tuples = []
         columns = []
         dtypes = []
@@ -1841,8 +1841,9 @@ def test_from_records_dictlike(self):
 
         # columns is in a different order here than the actual items iterated
         # from the dict
+        blocks = df._to_dict_of_blocks()
         columns = []
-        for dtype, b in compat.iteritems(df.blocks):
+        for dtype, b in compat.iteritems(blocks):
             columns.extend(b.columns)
 
         asdict = dict((x, y) for x, y in compat.iteritems(df))
diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py
index 0900d21b250ede..f40fc151676da1 100644
--- a/pandas/tests/internals/test_internals.py
+++ b/pandas/tests/internals/test_internals.py
@@ -469,10 +469,11 @@ def test_set_change_dtype_slice(self):  # GH8850
         df = DataFrame([[1.0, 2, 3], [4.0, 5, 6]], columns=cols)
         df['2nd'] = df['2nd'] * 2.0
 
-        assert sorted(df.blocks.keys()) == ['float64', 'int64']
-        assert_frame_equal(df.blocks['float64'], DataFrame(
+        blocks = df._to_dict_of_blocks()
+        assert sorted(blocks.keys()) == ['float64', 'int64']
+        assert_frame_equal(blocks['float64'], DataFrame(
             [[1.0, 4.0], [4.0, 10.0]], columns=cols[:2]))
-        assert_frame_equal(df.blocks['int64'], DataFrame(
+        assert_frame_equal(blocks['int64'], DataFrame(
             [[3], [6]], columns=cols[2:]))
 
     def test_copy(self, mgr):
diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py
index 004af5066fe835..ed4a3a9e5f75f8 100644
--- a/pandas/tests/sparse/test_frame.py
+++ b/pandas/tests/sparse/test_frame.py
@@ -1099,7 +1099,10 @@ def test_as_blocks(self):
         df = SparseDataFrame({'A': [1.1, 3.3], 'B': [nan, -3.9]},
                              dtype='float64')
 
-        df_blocks = df.blocks
+        # deprecated 0.21.0
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            df_blocks = df.blocks
         assert list(df_blocks.keys()) == ['float64']
         tm.assert_frame_equal(df_blocks['float64'], df)
 
diff --git a/pandas/util/testing.py b/pandas/util/testing.py
index 5adbd1498bb6aa..c5f73ca0e885bb 100644
--- a/pandas/util/testing.py
+++ b/pandas/util/testing.py
@@ -1385,8 +1385,8 @@ def assert_frame_equal(left, right, check_dtype=True,
 
     # compare by blocks
     if by_blocks:
-        rblocks = right.blocks
-        lblocks = left.blocks
+        rblocks = right._to_dict_of_blocks()
+        lblocks = left._to_dict_of_blocks()
         for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))):
             assert dtype in lblocks
             assert dtype in rblocks

From d2b166885496ebf5f25cdedd27dce69379878aaa Mon Sep 17 00:00:00 2001
From: Sam Foo <sfoohei@gmail.com>
Date: Mon, 25 Sep 2017 06:20:50 -0400
Subject: [PATCH 149/188] TST: Use fixtures in indexes common tests (#17622)

---
 pandas/tests/indexes/common.py       | 332 +++++++++++++--------------
 pandas/tests/indexes/conftest.py     |  24 ++
 pandas/tests/indexes/datetimelike.py |   4 +-
 pandas/tests/indexes/test_base.py    |   4 +-
 pandas/tests/indexes/test_numeric.py |   4 +-
 pandas/tests/indexes/test_range.py   |   4 +-
 6 files changed, 186 insertions(+), 186 deletions(-)
 create mode 100644 pandas/tests/indexes/conftest.py

diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
index 90618cd6e235f6..970dd7b63225ab 100644
--- a/pandas/tests/indexes/common.py
+++ b/pandas/tests/indexes/common.py
@@ -30,9 +30,9 @@ def setup_indices(self):
         for name, idx in self.indices.items():
             setattr(self, name, idx)
 
-    def verify_pickle(self, index):
-        unpickled = tm.round_trip_pickle(index)
-        assert index.equals(unpickled)
+    def verify_pickle(self, indices):
+        unpickled = tm.round_trip_pickle(indices)
+        assert indices.equals(unpickled)
 
     def test_pickle_compat_construction(self):
         # this is testing for pickle compat
@@ -97,7 +97,7 @@ def test_numeric_compat(self):
                                lambda: 1 * idx)
 
         div_err = "cannot perform __truediv__" if PY3 \
-                  else "cannot perform __div__"
+            else "cannot perform __div__"
         tm.assert_raises_regex(TypeError, div_err, lambda: idx / 1)
         tm.assert_raises_regex(TypeError, div_err, lambda: 1 / idx)
         tm.assert_raises_regex(TypeError, "cannot perform __floordiv__",
@@ -178,11 +178,10 @@ def test_str(self):
         assert "'foo'" in str(idx)
         assert idx.__class__.__name__ in str(idx)
 
-    def test_dtype_str(self):
-        for idx in self.indices.values():
-            dtype = idx.dtype_str
-            assert isinstance(dtype, compat.string_types)
-            assert dtype == str(idx.dtype)
+    def test_dtype_str(self, indices):
+        dtype = indices.dtype_str
+        assert isinstance(dtype, compat.string_types)
+        assert dtype == str(indices.dtype)
 
     def test_repr_max_seq_item_setting(self):
         # GH10182
@@ -192,48 +191,43 @@ def test_repr_max_seq_item_setting(self):
             repr(idx)
             assert '...' not in str(idx)
 
-    def test_wrong_number_names(self):
+    def test_wrong_number_names(self, indices):
         def testit(ind):
             ind.names = ["apple", "banana", "carrot"]
+        tm.assert_raises_regex(ValueError, "^Length", testit, indices)
 
-        for ind in self.indices.values():
-            tm.assert_raises_regex(ValueError, "^Length", testit, ind)
-
-    def test_set_name_methods(self):
+    def test_set_name_methods(self, indices):
         new_name = "This is the new name for this index"
-        for ind in self.indices.values():
-
-            # don't tests a MultiIndex here (as its tested separated)
-            if isinstance(ind, MultiIndex):
-                continue
 
-            original_name = ind.name
-            new_ind = ind.set_names([new_name])
-            assert new_ind.name == new_name
-            assert ind.name == original_name
-            res = ind.rename(new_name, inplace=True)
-
-            # should return None
-            assert res is None
-            assert ind.name == new_name
-            assert ind.names == [new_name]
-            # with tm.assert_raises_regex(TypeError, "list-like"):
-            #    # should still fail even if it would be the right length
-            #    ind.set_names("a")
-            with tm.assert_raises_regex(ValueError, "Level must be None"):
-                ind.set_names("a", level=0)
-
-            # rename in place just leaves tuples and other containers alone
-            name = ('A', 'B')
-            ind.rename(name, inplace=True)
-            assert ind.name == name
-            assert ind.names == [name]
-
-    def test_hash_error(self):
-        for ind in self.indices.values():
-            with tm.assert_raises_regex(TypeError, "unhashable type: %r" %
-                                        type(ind).__name__):
-                hash(ind)
+        # don't tests a MultiIndex here (as its tested separated)
+        if isinstance(indices, MultiIndex):
+            return
+        original_name = indices.name
+        new_ind = indices.set_names([new_name])
+        assert new_ind.name == new_name
+        assert indices.name == original_name
+        res = indices.rename(new_name, inplace=True)
+
+        # should return None
+        assert res is None
+        assert indices.name == new_name
+        assert indices.names == [new_name]
+        # with tm.assert_raises_regex(TypeError, "list-like"):
+        #    # should still fail even if it would be the right length
+        #    ind.set_names("a")
+        with tm.assert_raises_regex(ValueError, "Level must be None"):
+            indices.set_names("a", level=0)
+
+        # rename in place just leaves tuples and other containers alone
+        name = ('A', 'B')
+        indices.rename(name, inplace=True)
+        assert indices.name == name
+        assert indices.names == [name]
+
+    def test_hash_error(self, indices):
+        index = indices
+        tm.assert_raises_regex(TypeError, "unhashable type: %r" %
+                               type(index).__name__, hash, indices)
 
     def test_copy_name(self):
         # gh-12309: Check that the "name" argument
@@ -298,106 +292,87 @@ def test_ensure_copied_data(self):
                 tm.assert_numpy_array_equal(index._values, result._values,
                                             check_same='same')
 
-    def test_copy_and_deepcopy(self):
+    def test_copy_and_deepcopy(self, indices):
         from copy import copy, deepcopy
 
-        for ind in self.indices.values():
+        if isinstance(indices, MultiIndex):
+            return
+        for func in (copy, deepcopy):
+            idx_copy = func(indices)
+            assert idx_copy is not indices
+            assert idx_copy.equals(indices)
 
-            # don't tests a MultiIndex here (as its tested separated)
-            if isinstance(ind, MultiIndex):
-                continue
+        new_copy = indices.copy(deep=True, name="banana")
+        assert new_copy.name == "banana"
 
-            for func in (copy, deepcopy):
-                idx_copy = func(ind)
-                assert idx_copy is not ind
-                assert idx_copy.equals(ind)
+    def test_duplicates(self, indices):
+        if type(indices) is not self._holder:
+            return
+        if not len(indices) or isinstance(indices, MultiIndex):
+            return
+        idx = self._holder([indices[0]] * 5)
+        assert not idx.is_unique
+        assert idx.has_duplicates
 
-            new_copy = ind.copy(deep=True, name="banana")
-            assert new_copy.name == "banana"
+    def test_get_unique_index(self, indices):
+        # MultiIndex tested separately
+        if not len(indices) or isinstance(indices, MultiIndex):
+            return
 
-    def test_duplicates(self):
-        for ind in self.indices.values():
+        idx = indices[[0] * 5]
+        idx_unique = indices[[0]]
 
-            if not len(ind):
-                continue
-            if isinstance(ind, MultiIndex):
-                continue
-            idx = self._holder([ind[0]] * 5)
-            assert not idx.is_unique
-            assert idx.has_duplicates
-
-            # GH 10115
-            # preserve names
-            idx.name = 'foo'
-            result = idx.drop_duplicates()
-            assert result.name == 'foo'
-            tm.assert_index_equal(result, Index([ind[0]], name='foo'))
-
-    def test_get_unique_index(self):
-        for ind in self.indices.values():
-
-            # MultiIndex tested separately
-            if not len(ind) or isinstance(ind, MultiIndex):
-                continue
+        # We test against `idx_unique`, so first we make sure it's unique
+        # and doesn't contain nans.
+        assert idx_unique.is_unique
+        try:
+            assert not idx_unique.hasnans
+        except NotImplementedError:
+            pass
 
-            idx = ind[[0] * 5]
-            idx_unique = ind[[0]]
+        for dropna in [False, True]:
+            result = idx._get_unique_index(dropna=dropna)
+            tm.assert_index_equal(result, idx_unique)
 
-            # We test against `idx_unique`, so first we make sure it's unique
-            # and doesn't contain nans.
-            assert idx_unique.is_unique
-            try:
-                assert not idx_unique.hasnans
-            except NotImplementedError:
-                pass
+        # nans:
+        if not indices._can_hold_na:
+            return
 
-            for dropna in [False, True]:
-                result = idx._get_unique_index(dropna=dropna)
-                tm.assert_index_equal(result, idx_unique)
+        if needs_i8_conversion(indices):
+            vals = indices.asi8[[0] * 5]
+            vals[0] = iNaT
+        else:
+            vals = indices.values[[0] * 5]
+            vals[0] = np.nan
 
-            # nans:
-            if not ind._can_hold_na:
-                continue
+        vals_unique = vals[:2]
+        idx_nan = indices._shallow_copy(vals)
+        idx_unique_nan = indices._shallow_copy(vals_unique)
+        assert idx_unique_nan.is_unique
 
-            if needs_i8_conversion(ind):
-                vals = ind.asi8[[0] * 5]
-                vals[0] = iNaT
-            else:
-                vals = ind.values[[0] * 5]
-                vals[0] = np.nan
-
-            vals_unique = vals[:2]
-            idx_nan = ind._shallow_copy(vals)
-            idx_unique_nan = ind._shallow_copy(vals_unique)
-            assert idx_unique_nan.is_unique
-
-            assert idx_nan.dtype == ind.dtype
-            assert idx_unique_nan.dtype == ind.dtype
-
-            for dropna, expected in zip([False, True],
-                                        [idx_unique_nan, idx_unique]):
-                for i in [idx_nan, idx_unique_nan]:
-                    result = i._get_unique_index(dropna=dropna)
-                    tm.assert_index_equal(result, expected)
-
-    def test_sort(self):
-        for ind in self.indices.values():
-            pytest.raises(TypeError, ind.sort)
-
-    def test_mutability(self):
-        for ind in self.indices.values():
-            if not len(ind):
-                continue
-            pytest.raises(TypeError, ind.__setitem__, 0, ind[0])
+        assert idx_nan.dtype == indices.dtype
+        assert idx_unique_nan.dtype == indices.dtype
 
-    def test_view(self):
-        for ind in self.indices.values():
-            i_view = ind.view()
-            assert i_view.name == ind.name
+        for dropna, expected in zip([False, True],
+                                    [idx_unique_nan,
+                                     idx_unique]):
+            for i in [idx_nan, idx_unique_nan]:
+                result = i._get_unique_index(dropna=dropna)
+                tm.assert_index_equal(result, expected)
 
-    def test_compat(self):
-        for ind in self.indices.values():
-            assert ind.tolist() == list(ind)
+    def test_sort(self, indices):
+        pytest.raises(TypeError, indices.sort)
+
+    def test_mutability(self, indices):
+        if not len(indices):
+            return
+        pytest.raises(TypeError, indices.__setitem__, 0, indices[0])
+
+    def test_view(self, indices):
+        assert indices.view().name == indices.name
+
+    def test_compat(self, indices):
+        assert indices.tolist() == list(indices)
 
     def test_memory_usage(self):
         for name, index in compat.iteritems(self.indices):
@@ -457,11 +432,11 @@ def test_numpy_argsort(self):
                 tm.assert_raises_regex(ValueError, msg, np.argsort,
                                        ind, order=('a', 'b'))
 
-    def test_pickle(self):
-        for ind in self.indices.values():
-            self.verify_pickle(ind)
-            ind.name = 'foo'
-            self.verify_pickle(ind)
+    def test_pickle(self, indices):
+        self.verify_pickle(indices)
+        original_name, indices.name = indices.name, 'foo'
+        self.verify_pickle(indices)
+        indices.name = original_name
 
     def test_take(self):
         indexer = [4, 3, 0, 2]
@@ -962,46 +937,47 @@ def test_join_self_unique(self, how):
             joined = index.join(index, how=how)
             assert (index == joined).all()
 
-    def test_searchsorted_monotonic(self):
+    def test_searchsorted_monotonic(self, indices):
         # GH17271
-        for index in self.indices.values():
-            # not implemented for tuple searches in MultiIndex
-            # or Intervals searches in IntervalIndex
-            if isinstance(index, (MultiIndex, IntervalIndex)):
-                continue
+        # not implemented for tuple searches in MultiIndex
+        # or Intervals searches in IntervalIndex
+        if isinstance(indices, (MultiIndex, IntervalIndex)):
+            return
 
-            # nothing to test if the index is empty
-            if index.empty:
-                continue
-            value = index[0]
-
-            # determine the expected results (handle dupes for 'right')
-            expected_left, expected_right = 0, (index == value).argmin()
-            if expected_right == 0:
-                # all values are the same, expected_right should be length
-                expected_right = len(index)
-
-            # test _searchsorted_monotonic in all cases
-            # test searchsorted only for increasing
-            if index.is_monotonic_increasing:
-                ssm_left = index._searchsorted_monotonic(value, side='left')
-                assert expected_left == ssm_left
-
-                ssm_right = index._searchsorted_monotonic(value, side='right')
-                assert expected_right == ssm_right
-
-                ss_left = index.searchsorted(value, side='left')
-                assert expected_left == ss_left
-
-                ss_right = index.searchsorted(value, side='right')
-                assert expected_right == ss_right
-            elif index.is_monotonic_decreasing:
-                ssm_left = index._searchsorted_monotonic(value, side='left')
-                assert expected_left == ssm_left
-
-                ssm_right = index._searchsorted_monotonic(value, side='right')
-                assert expected_right == ssm_right
-            else:
-                # non-monotonic should raise.
-                with pytest.raises(ValueError):
-                    index._searchsorted_monotonic(value, side='left')
+        # nothing to test if the index is empty
+        if indices.empty:
+            return
+        value = indices[0]
+
+        # determine the expected results (handle dupes for 'right')
+        expected_left, expected_right = 0, (indices == value).argmin()
+        if expected_right == 0:
+            # all values are the same, expected_right should be length
+            expected_right = len(indices)
+
+        # test _searchsorted_monotonic in all cases
+        # test searchsorted only for increasing
+        if indices.is_monotonic_increasing:
+            ssm_left = indices._searchsorted_monotonic(value, side='left')
+            assert expected_left == ssm_left
+
+            ssm_right = indices._searchsorted_monotonic(value, side='right')
+            assert expected_right == ssm_right
+
+            ss_left = indices.searchsorted(value, side='left')
+            assert expected_left == ss_left
+
+            ss_right = indices.searchsorted(value, side='right')
+            assert expected_right == ss_right
+
+        elif indices.is_monotonic_decreasing:
+            ssm_left = indices._searchsorted_monotonic(value, side='left')
+            assert expected_left == ssm_left
+
+            ssm_right = indices._searchsorted_monotonic(value, side='right')
+            assert expected_right == ssm_right
+
+        else:
+            # non-monotonic should raise.
+            with pytest.raises(ValueError):
+                indices._searchsorted_monotonic(value, side='left')
diff --git a/pandas/tests/indexes/conftest.py b/pandas/tests/indexes/conftest.py
new file mode 100644
index 00000000000000..a0ee3e511ef378
--- /dev/null
+++ b/pandas/tests/indexes/conftest.py
@@ -0,0 +1,24 @@
+import pytest
+
+import pandas.util.testing as tm
+from pandas.core.indexes.api import Index, MultiIndex
+from pandas.compat import lzip
+
+
+@pytest.fixture(params=[tm.makeUnicodeIndex(100),
+                        tm.makeStringIndex(100),
+                        tm.makeDateIndex(100),
+                        tm.makePeriodIndex(100),
+                        tm.makeTimedeltaIndex(100),
+                        tm.makeIntIndex(100),
+                        tm.makeUIntIndex(100),
+                        tm.makeFloatIndex(100),
+                        Index([True, False]),
+                        tm.makeCategoricalIndex(100),
+                        Index([]),
+                        MultiIndex.from_tuples(lzip(
+                            ['foo', 'bar', 'baz'], [1, 2, 3])),
+                        Index([0, 0, 1, 1, 2, 2])],
+                ids=lambda x: type(x).__name__)
+def indices(request):
+    return request.param
diff --git a/pandas/tests/indexes/datetimelike.py b/pandas/tests/indexes/datetimelike.py
index 114940009377c7..12b509d4aef3fe 100644
--- a/pandas/tests/indexes/datetimelike.py
+++ b/pandas/tests/indexes/datetimelike.py
@@ -26,8 +26,8 @@ def test_str(self):
         if hasattr(idx, 'freq'):
             assert "freq='%s'" % idx.freqstr in str(idx)
 
-    def test_view(self):
-        super(DatetimeLike, self).test_view()
+    def test_view(self, indices):
+        super(DatetimeLike, self).test_view(indices)
 
         i = self.create_index()
 
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index fa73c9fc7b7225..0bd2861e060eda 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -58,8 +58,8 @@ def test_new_axis(self):
         assert new_index.ndim == 2
         assert isinstance(new_index, np.ndarray)
 
-    def test_copy_and_deepcopy(self):
-        super(TestIndex, self).test_copy_and_deepcopy()
+    def test_copy_and_deepcopy(self, indices):
+        super(TestIndex, self).test_copy_and_deepcopy(indices)
 
         new_copy2 = self.intIndex.copy(dtype=int)
         assert new_copy2.dtype.kind == 'i'
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
index 7e7e10e4aeabee..dc38b0a2b1fb7f 100644
--- a/pandas/tests/indexes/test_numeric.py
+++ b/pandas/tests/indexes/test_numeric.py
@@ -459,8 +459,8 @@ def test_take_fill_value(self):
 
 class NumericInt(Numeric):
 
-    def test_view(self):
-        super(NumericInt, self).test_view()
+    def test_view(self, indices):
+        super(NumericInt, self).test_view(indices)
 
         i = self._holder([], name='Foo')
         i_view = i.view()
diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py
index 8dc5a40ced4bfd..9fe10885186de0 100644
--- a/pandas/tests/indexes/test_range.py
+++ b/pandas/tests/indexes/test_range.py
@@ -312,8 +312,8 @@ def test_delete(self):
             # either depending on numpy version
             result = idx.delete(len(idx))
 
-    def test_view(self):
-        super(TestRangeIndex, self).test_view()
+    def test_view(self, indices):
+        super(TestRangeIndex, self).test_view(indices)
 
         i = RangeIndex(0, name='Foo')
         i_view = i.view()

From 9d0db60f75783ba5a1a036aac0485f8b760d61dc Mon Sep 17 00:00:00 2001
From: dkamm <dkamm@cs.stanford.edu>
Date: Mon, 25 Sep 2017 07:12:27 -0400
Subject: [PATCH 150/188] BUG: wrap all supported inplace methods to avoid
 making a copy (#12962) (#17589)

---
 doc/source/whatsnew/v0.21.0.txt      |  1 +
 pandas/core/ops.py                   | 15 ++++++++++++---
 pandas/tests/frame/test_operators.py | 27 +++++++++++++++++++++++++++
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 07cc00b3724e42..36551fa30c3adc 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -646,4 +646,5 @@ PyPy
 
 Other
 ^^^^^
+- Bug where some inplace operators were not being wrapped and produced a copy when invoked (:issue:`12962`)
 - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`)
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
index 221f6ff8b92c68..d37acf48ed9c28 100644
--- a/pandas/core/ops.py
+++ b/pandas/core/ops.py
@@ -186,8 +186,10 @@ def add_special_arithmetic_methods(cls, arith_method=None,
     arith_method : function (optional)
         factory for special arithmetic methods, with op string:
         f(op, name, str_rep, default_axis=None, fill_zeros=None, **eval_kwargs)
-    comp_method : function, optional,
+    comp_method : function (optional)
         factory for rich comparison - signature: f(op, name, str_rep)
+    bool_method : function (optional)
+        factory for boolean methods - signature: f(op, name, str_rep)
     use_numexpr : bool, default True
         whether to accelerate with numexpr, defaults to True
     force : bool, default False
@@ -234,9 +236,16 @@ def f(self, other):
              __isub__=_wrap_inplace_method(new_methods["__sub__"]),
              __imul__=_wrap_inplace_method(new_methods["__mul__"]),
              __itruediv__=_wrap_inplace_method(new_methods["__truediv__"]),
-             __ipow__=_wrap_inplace_method(new_methods["__pow__"]), ))
+             __ifloordiv__=_wrap_inplace_method(new_methods["__floordiv__"]),
+             __imod__=_wrap_inplace_method(new_methods["__mod__"]),
+             __ipow__=_wrap_inplace_method(new_methods["__pow__"])))
     if not compat.PY3:
-        new_methods["__idiv__"] = new_methods["__div__"]
+        new_methods["__idiv__"] = _wrap_inplace_method(new_methods["__div__"])
+    if bool_method:
+        new_methods.update(
+            dict(__iand__=_wrap_inplace_method(new_methods["__and__"]),
+                 __ior__=_wrap_inplace_method(new_methods["__or__"]),
+                 __ixor__=_wrap_inplace_method(new_methods["__xor__"])))
 
     add_methods(cls, new_methods=new_methods, force=force, select=select,
                 exclude=exclude)
diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py
index 309c0f0244d7c8..10a9853b8a5b4f 100644
--- a/pandas/tests/frame/test_operators.py
+++ b/pandas/tests/frame/test_operators.py
@@ -1167,6 +1167,33 @@ def test_inplace_ops_identity(self):
         assert_frame_equal(df2, expected)
         assert df._data is df2._data
 
+    @pytest.mark.parametrize('op', ['add', 'and', 'div', 'floordiv', 'mod',
+                                    'mul', 'or', 'pow', 'sub', 'truediv',
+                                    'xor'])
+    def test_inplace_ops_identity2(self, op):
+
+        if compat.PY3 and op == 'div':
+            return
+
+        df = DataFrame({'a': [1., 2., 3.],
+                        'b': [1, 2, 3]})
+
+        operand = 2
+        if op in ('and', 'or', 'xor'):
+            # cannot use floats for boolean ops
+            df['a'] = [True, False, True]
+
+        df_copy = df.copy()
+        iop = '__i{}__'.format(op)
+        op = '__{}__'.format(op)
+
+        # no id change and value is correct
+        getattr(df, iop)(operand)
+        expected = getattr(df_copy, op)(operand)
+        assert_frame_equal(df, expected)
+        expected = id(df)
+        assert id(df) == expected
+
     def test_alignment_non_pandas(self):
         index = ['A', 'B', 'C']
         columns = ['X', 'Y', 'Z']

From 83c9205971bd173692286187024ae30aae9ffc39 Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Mon, 25 Sep 2017 13:54:02 +0200
Subject: [PATCH 151/188] Correct wrong doc string for MultiIndex.get_loc_level
 + added examples (#17663)

---
 pandas/core/indexes/multi.py | 34 ++++++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 8c6b26c9070a9c..35f738b347a3eb 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -2083,16 +2083,42 @@ def _maybe_str_to_time_stamp(key, lev):
 
     def get_loc_level(self, key, level=0, drop_level=True):
         """
-        Get integer location slice for requested label or tuple
+        Get both the location for the requested label(s) and the
+        resulting sliced index.
 
         Parameters
         ----------
-        key : label or tuple
-        level : int/level name or list thereof
+        key : label or sequence of labels
+        level : int/level name or list thereof, optional
+        drop_level : bool, default True
+            if ``False``, the resulting index will not drop any level.
 
         Returns
         -------
-        loc : int or slice object
+        loc : A 2-tuple where the elements are:
+              Element 0: int, slice object or boolean array
+              Element 1: The resulting sliced multiindex/index. If the key
+              contains all levels, this will be ``None``.
+
+        Examples
+        --------
+        >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')],
+        ...                                names=['A', 'B'])
+
+        >>> mi.get_loc_level('b')
+        (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B'))
+
+        >>> mi.get_loc_level('e', level='B')
+        (array([False,  True, False], dtype=bool),
+        Index(['b'], dtype='object', name='A'))
+
+        >>> mi.get_loc_level(['b', 'e'])
+        (1, None)
+
+        See Also
+        ---------
+        MultiIndex.get_loc : Get integer location, slice or boolean mask for
+                             requested label or tuple.
         """
 
         def maybe_droplevels(indexer, levels, drop_level):

From e0fe5cc60b1dc0d777223bba64b8abfc0e0e02ab Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 25 Sep 2017 10:12:45 -0400
Subject: [PATCH 152/188] COMPAT: skip 32-bit test on int repr (#17664)

closes #17121
---
 pandas/tests/frame/test_api.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index b3209da6449d6a..230a5806ccb2e6 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -213,8 +213,8 @@ def test_itertuples(self):
         assert (list(dfaa.itertuples()) ==
                 [(0, 1, 1), (1, 2, 2), (2, 3, 3)])
 
-        # repr with be int/long on windows
-        if not compat.is_platform_windows():
+        # repr with be int/long on 32-bit/windows
+        if not (compat.is_platform_windows() or compat.is_platform_32bit()):
             assert (repr(list(df.itertuples(name=None))) ==
                     '[(0, 1, 4), (1, 2, 5), (2, 3, 6)]')
 

From 45a795e03c985aa3d456916879e3728b90276a7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Jonasson?= <andre.jonasson@gmail.com>
Date: Mon, 25 Sep 2017 23:50:18 +0200
Subject: [PATCH 153/188] ERR: get_indexer returns the correct indexer when
 Index is numeric and target is boolean (#16877) (#17343)

---
 doc/source/whatsnew/v0.21.0.txt      | 2 +-
 pandas/core/indexes/base.py          | 7 ++++++-
 pandas/tests/indexes/test_base.py    | 7 +++++++
 pandas/tests/series/test_indexing.py | 5 +++++
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 36551fa30c3adc..b6bd86bd79a1f2 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -481,7 +481,7 @@ Other API Changes
 - :class:`Period` is now immutable, and will now raise an ``AttributeError`` when a user tries to assign a new value to the ``ordinal`` or ``freq`` attributes (:issue:`17116`).
 - :func:`to_datetime` when passed a tz-aware ``origin=`` kwarg will now raise a more informative ``ValueError`` rather than a ``TypeError`` (:issue:`16842`)
 - Renamed non-functional ``index`` to ``index_col`` in :func:`read_stata` to improve API consistency (:issue:`16342`)
-
+- Bug in :func:`DataFrame.drop` caused boolean labels ``False`` and ``True`` to be treated as labels 0 and 1 respectively when dropping indices from a numeric index. This will now raise a ValueError (:issue:`16877`)
 
 .. _whatsnew_0210.deprecations:
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index f28ff9697e517f..be26720adb0bda 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2609,6 +2609,12 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
         if tolerance is not None:
             tolerance = self._convert_tolerance(tolerance)
 
+        # Treat boolean labels passed to a numeric index as not found. Without
+        # this fix False and True would be treated as 0 and 1 respectively.
+        # (GH #16877)
+        if target.is_boolean() and self.is_numeric():
+            return _ensure_platform_int(np.repeat(-1, target.size))
+
         pself, ptarget = self._maybe_promote(target)
         if pself is not self or ptarget is not target:
             return pself.get_indexer(ptarget, method=method, limit=limit,
@@ -2637,7 +2643,6 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
                                  'backfill or nearest reindexing')
 
             indexer = self._engine.get_indexer(target._values)
-
         return _ensure_platform_int(indexer)
 
     def _convert_tolerance(self, tolerance):
diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
index 0bd2861e060eda..81f113d58d680a 100644
--- a/pandas/tests/indexes/test_base.py
+++ b/pandas/tests/indexes/test_base.py
@@ -1141,6 +1141,13 @@ def test_get_indexer_strings(self):
         with pytest.raises(TypeError):
             idx.get_indexer(['a', 'b', 'c', 'd'], method='pad', tolerance=2)
 
+    def test_get_indexer_numeric_index_boolean_target(self):
+        # GH 16877
+        numeric_idx = pd.Index(range(4))
+        result = numeric_idx.get_indexer([True, False, True])
+        expected = np.array([-1, -1, -1], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
     def test_get_loc(self):
         idx = pd.Index([0, 1, 2])
         all_methods = [None, 'pad', 'backfill', 'nearest']
diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py
index 91187b709463aa..2182e3fbfc2129 100644
--- a/pandas/tests/series/test_indexing.py
+++ b/pandas/tests/series/test_indexing.py
@@ -1783,6 +1783,11 @@ def test_drop(self):
         expected = Series([3], index=[False])
         assert_series_equal(result, expected)
 
+        # GH 16877
+        s = Series([2, 3], index=[0, 1])
+        with tm.assert_raises_regex(ValueError, 'not contained in axis'):
+            s.drop([False, True])
+
     def test_align(self):
         def _check_align(a, b, how='left', fill=None):
             aa, ab = a.align(b, join=how, fill_value=fill)

From 5279a172a86ac22250c5a382708e23917df79744 Mon Sep 17 00:00:00 2001
From: JennaVergeynst <jenna.vergeynst@ugent.be>
Date: Tue, 26 Sep 2017 01:19:21 +0200
Subject: [PATCH 154/188] DOC: improve docstring of function where (#17665)

---
 pandas/core/generic.py | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index b49eeed6db85f0..a7be145f210833 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -5825,13 +5825,15 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
 
     _shared_docs['where'] = ("""
         Return an object of same shape as self and whose corresponding
-        entries are from self where cond is %(cond)s and otherwise are from
-        other.
+        entries are from self where `cond` is %(cond)s and otherwise are from
+        `other`.
 
         Parameters
         ----------
         cond : boolean %(klass)s, array-like, or callable
-            If cond is callable, it is computed on the %(klass)s and
+            Where `cond` is %(cond)s, keep the original value. Where
+            %(cond_rev)s, replace with corresponding value from `other`.
+            If `cond` is callable, it is computed on the %(klass)s and
             should return boolean %(klass)s or array. The callable must
             not change input %(klass)s (though pandas doesn't check it).
 
@@ -5839,6 +5841,8 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
                 A callable can be used as cond.
 
         other : scalar, %(klass)s, or callable
+            Entries where `cond` is %(cond_rev)s are replaced with
+            corresponding value from `other`.
             If other is callable, it is computed on the %(klass)s and
             should return scalar or %(klass)s. The callable must not
             change input %(klass)s (though pandas doesn't check it).
@@ -5884,6 +5888,20 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
         3    3.0
         4    4.0
 
+        >>> s.mask(s > 0)
+        0    0.0
+        1    NaN
+        2    NaN
+        3    NaN
+        4    NaN
+
+        >>> s.where(s > 1, 10)
+        0    10.0
+        1    10.0
+        2    2.0
+        3    3.0
+        4    4.0
+
         >>> df = pd.DataFrame(np.arange(10).reshape(-1, 2), columns=['A', 'B'])
         >>> m = df %% 3 == 0
         >>> df.where(m, -df)
@@ -5914,7 +5932,8 @@ def _where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
         """)
 
     @Appender(_shared_docs['where'] % dict(_shared_doc_kwargs, cond="True",
-                                           name='where', name_other='mask'))
+                                           cond_rev="False", name='where',
+                                           name_other='mask'))
     def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
               try_cast=False, raise_on_error=True):
 
@@ -5923,7 +5942,8 @@ def where(self, cond, other=np.nan, inplace=False, axis=None, level=None,
                            raise_on_error)
 
     @Appender(_shared_docs['where'] % dict(_shared_doc_kwargs, cond="False",
-                                           name='mask', name_other='where'))
+                                           cond_rev="True", name='mask',
+                                           name_other='where'))
     def mask(self, cond, other=np.nan, inplace=False, axis=None, level=None,
              try_cast=False, raise_on_error=True):
 

From 0d239d9ec0a38d8208269fc688a49f0c3c6a9b2a Mon Sep 17 00:00:00 2001
From: Gabe F <gef756@users.noreply.github.com>
Date: Tue, 26 Sep 2017 06:34:33 -0400
Subject: [PATCH 155/188] DOC: correct grammar in unicode section (#17678)

---
 doc/source/options.rst | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/doc/source/options.rst b/doc/source/options.rst
index f042e4d3f51204..2da55a5a658a47 100644
--- a/doc/source/options.rst
+++ b/doc/source/options.rst
@@ -474,10 +474,10 @@ Unicode Formatting
    Enabling this option will affect the performance for printing of DataFrame and Series (about 2 times slower).
    Use only when it is actually required.
 
-Some East Asian countries use Unicode characters its width is corresponding to 2 alphabets.
-If DataFrame or Series contains these characters, default output cannot be aligned properly.
+Some East Asian countries use Unicode characters whose width corresponds to two Latin characters.
+If a DataFrame or Series contains these characters, the default output mode may not align them properly.
 
-.. note:: Screen captures are attached for each outputs to show the actual results.
+.. note:: Screen captures are attached for each output to show the actual results.
 
 .. ipython:: python
 
@@ -486,8 +486,9 @@ If DataFrame or Series contains these characters, default output cannot be align
 
 .. image:: _static/option_unicode01.png
 
-Enable ``display.unicode.east_asian_width`` allows pandas to check each character's "East Asian Width" property.
-These characters can be aligned properly by checking this property, but it takes longer time than standard ``len`` function.
+Enabling ``display.unicode.east_asian_width`` allows pandas to check each character's "East Asian Width" property.
+These characters can be aligned properly by setting this option to ``True``. However, this will result in longer render
+times than the standard ``len`` function.
 
 .. ipython:: python
 
@@ -496,9 +497,10 @@ These characters can be aligned properly by checking this property, but it takes
 
 .. image:: _static/option_unicode02.png
 
-In addition, Unicode contains characters which width is "Ambiguous". These character's width should be either 1 or 2 depending on terminal setting or encoding. Because this cannot be distinguished from Python, ``display.unicode.ambiguous_as_wide`` option is added to handle this.
+In addition, Unicode characters whose width is "Ambiguous" can either be 1 or 2 characters wide depending on the
+terminal setting or encoding. The option ``display.unicode.ambiguous_as_wide`` can be used to handle the ambiguity.
 
-By default, "Ambiguous" character's width, "¡" (inverted exclamation) in below example, is regarded as 1.
+By default, an "Ambiguous" character's width, such as "¡" (inverted exclamation) in the example below, is taken to be 1.
 
 .. ipython:: python
 
@@ -507,7 +509,10 @@ By default, "Ambiguous" character's width, "¡" (inverted exclamation) in below
 
 .. image:: _static/option_unicode03.png
 
-Enabling ``display.unicode.ambiguous_as_wide`` lets pandas to figure these character's width as 2. Note that this option will be effective only when ``display.unicode.east_asian_width`` is enabled. Confirm starting position has been changed, but is not aligned properly because the setting is mismatched with this environment.
+Enabling ``display.unicode.ambiguous_as_wide`` makes pandas interpret these characters' widths to be 2.
+(Note that this option will only be effective when ``display.unicode.east_asian_width`` is enabled.)
+
+However, setting this option incorrectly for your terminal will cause these characters to be aligned incorrectly:
 
 .. ipython:: python
 

From 7e87385e20682184a3f5d188c8e783d63c703b83 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 26 Sep 2017 06:29:19 -0700
Subject: [PATCH 156/188] Separate parsing functions out from tslib (#17363)

---
 pandas/_libs/period.pyx                      |   4 +-
 pandas/_libs/src/inference.pyx               | 159 -----
 pandas/_libs/tslib.pyx                       | 274 +-------
 pandas/_libs/tslibs/parsing.pyx              | 681 +++++++++++++++++++
 pandas/core/indexes/base.py                  |   3 +-
 pandas/core/tools/datetimes.py               | 204 +-----
 pandas/io/date_converters.py                 |  11 +-
 pandas/io/parsers.py                         |  10 +-
 pandas/tests/indexes/datetimes/test_tools.py |   9 +-
 pandas/tests/io/parser/parse_dates.py        |   5 +-
 setup.py                                     |   3 +
 11 files changed, 721 insertions(+), 642 deletions(-)
 create mode 100644 pandas/_libs/tslibs/parsing.pyx

diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx
index 943f925ec5b04a..725da22104efcc 100644
--- a/pandas/_libs/period.pyx
+++ b/pandas/_libs/period.pyx
@@ -37,10 +37,10 @@ from tslibs.timezones cimport (
     is_utc, is_tzlocal, get_utcoffset, get_dst_info, maybe_get_tz)
 from tslib cimport _nat_scalar_rules
 
+from tslibs.parsing import parse_time_string, NAT_SENTINEL
 from tslibs.frequencies cimport get_freq_code
 
 from pandas.tseries import offsets
-from pandas.core.tools.datetimes import parse_time_string
 from pandas.tseries import frequencies
 
 cdef int64_t NPY_NAT = util.get_nat()
@@ -1197,6 +1197,8 @@ class Period(_Period):
                 value = str(value)
             value = value.upper()
             dt, _, reso = parse_time_string(value, freq)
+            if dt is NAT_SENTINEL:
+                ordinal = iNaT
 
             if freq is None:
                 try:
diff --git a/pandas/_libs/src/inference.pyx b/pandas/_libs/src/inference.pyx
index a2764e87eec556..ed883bf5db5bcc 100644
--- a/pandas/_libs/src/inference.pyx
+++ b/pandas/_libs/src/inference.pyx
@@ -1384,165 +1384,6 @@ def convert_sql_column(x):
     return maybe_convert_objects(x, try_float=1)
 
 
-def try_parse_dates(ndarray[object] values, parser=None,
-                    dayfirst=False, default=None):
-    cdef:
-        Py_ssize_t i, n
-        ndarray[object] result
-
-    n = len(values)
-    result = np.empty(n, dtype='O')
-
-    if parser is None:
-        if default is None: # GH2618
-            date=datetime.now()
-            default=datetime(date.year, date.month, 1)
-
-        try:
-            from dateutil.parser import parse
-            parse_date = lambda x: parse(x, dayfirst=dayfirst, default=default)
-        except ImportError: # pragma: no cover
-            def parse_date(s):
-                try:
-                    return datetime.strptime(s, '%m/%d/%Y')
-                except Exception:
-                    return s
-        # EAFP here
-        try:
-            for i from 0 <= i < n:
-                if values[i] == '':
-                    result[i] = np.nan
-                else:
-                    result[i] = parse_date(values[i])
-        except Exception:
-            # failed
-            return values
-    else:
-        parse_date = parser
-
-        try:
-            for i from 0 <= i < n:
-                if values[i] == '':
-                    result[i] = np.nan
-                else:
-                    result[i] = parse_date(values[i])
-        except Exception:
-            # raise if passed parser and it failed
-            raise
-
-    return result
-
-
-def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
-                            date_parser=None, time_parser=None,
-                            dayfirst=False, default=None):
-    cdef:
-        Py_ssize_t i, n
-        ndarray[object] result
-
-    from datetime import date, time, datetime, timedelta
-
-    n = len(dates)
-    if len(times) != n:
-        raise ValueError('Length of dates and times must be equal')
-    result = np.empty(n, dtype='O')
-
-    if date_parser is None:
-        if default is None: # GH2618
-            date=datetime.now()
-            default=datetime(date.year, date.month, 1)
-
-        try:
-            from dateutil.parser import parse
-            parse_date = lambda x: parse(x, dayfirst=dayfirst, default=default)
-        except ImportError: # pragma: no cover
-            def parse_date(s):
-                try:
-                    return date.strptime(s, '%m/%d/%Y')
-                except Exception:
-                    return s
-    else:
-        parse_date = date_parser
-
-    if time_parser is None:
-        try:
-            from dateutil.parser import parse
-            parse_time = lambda x: parse(x)
-        except ImportError: # pragma: no cover
-            def parse_time(s):
-                try:
-                    return time.strptime(s, '%H:%M:%S')
-                except Exception:
-                    return s
-
-    else:
-        parse_time = time_parser
-
-    for i from 0 <= i < n:
-        d = parse_date(str(dates[i]))
-        t = parse_time(str(times[i]))
-        result[i] = datetime(d.year, d.month, d.day,
-                             t.hour, t.minute, t.second)
-
-    return result
-
-
-def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
-                             ndarray[object] days):
-    cdef:
-        Py_ssize_t i, n
-        ndarray[object] result
-
-    from datetime import datetime
-
-    n = len(years)
-    if len(months) != n or len(days) != n:
-        raise ValueError('Length of years/months/days must all be equal')
-    result = np.empty(n, dtype='O')
-
-    for i from 0 <= i < n:
-        result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))
-
-    return result
-
-
-def try_parse_datetime_components(ndarray[object] years,
-                                  ndarray[object] months,
-                                  ndarray[object] days,
-                                  ndarray[object] hours,
-                                  ndarray[object] minutes,
-                                  ndarray[object] seconds):
-
-    cdef:
-        Py_ssize_t i, n
-        ndarray[object] result
-        int secs
-        double float_secs
-        double micros
-
-    from datetime import datetime
-
-    n = len(years)
-    if (len(months) != n or len(days) != n or len(hours) != n or
-        len(minutes) != n or len(seconds) != n):
-        raise ValueError('Length of all datetime components must be equal')
-    result = np.empty(n, dtype='O')
-
-    for i from 0 <= i < n:
-        float_secs = float(seconds[i])
-        secs = int(float_secs)
-
-        micros = float_secs - secs
-        if micros > 0:
-            micros = micros * 1000000
-
-        result[i] = datetime(int(years[i]), int(months[i]), int(days[i]),
-                             int(hours[i]), int(minutes[i]), secs,
-                             int(micros))
-
-    return result
-
-
 def sanitize_objects(ndarray[object] values, set na_values,
                      convert_empty=True):
     cdef:
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index d4ca5af09367eb..4c34d0fcb1e5f6 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -67,6 +67,9 @@ from khash cimport (
     kh_init_int64, kh_int64_t,
     kh_resize_int64, kh_get_int64)
 
+from .tslibs.parsing import parse_datetime_string
+from .tslibs.parsing import DateParseError  # noqa
+
 cimport cython
 
 import re
@@ -1737,26 +1740,6 @@ def datetime_to_datetime64(ndarray[object] values):
 
     return result, inferred_tz
 
-cdef:
-    set _not_datelike_strings = set(['a', 'A', 'm', 'M', 'p', 'P', 't', 'T'])
-
-cpdef bint _does_string_look_like_datetime(object date_string):
-    if date_string.startswith('0'):
-        # Strings starting with 0 are more consistent with a
-        # date-like string than a number
-        return True
-
-    try:
-        if float(date_string) < 1000:
-            return False
-    except ValueError:
-        pass
-
-    if date_string in _not_datelike_strings:
-        return False
-
-    return True
-
 
 def format_array_from_datetime(ndarray[int64_t] values, object tz=None,
                                object format=None, object na_rep=None):
@@ -1841,257 +1824,6 @@ def format_array_from_datetime(ndarray[int64_t] values, object tz=None,
     return result
 
 
-class DateParseError(ValueError):
-    pass
-
-
-cdef object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])')
-
-
-def parse_datetime_string(object date_string, object freq=None,
-                          dayfirst=False, yearfirst=False, **kwargs):
-    """parse datetime string, only returns datetime.
-    Also cares special handling matching time patterns.
-
-    Returns
-    -------
-    datetime
-    """
-
-    cdef:
-        object dt
-
-    if not _does_string_look_like_datetime(date_string):
-        raise ValueError('Given date string not likely a datetime.')
-
-    if _TIMEPAT.match(date_string):
-        # use current datetime as default, not pass _DEFAULT_DATETIME
-        dt = parse_date(date_string, dayfirst=dayfirst,
-                        yearfirst=yearfirst, **kwargs)
-        return dt
-    try:
-        dt, _, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq)
-        return dt
-    except DateParseError:
-        raise
-    except ValueError:
-        pass
-
-    try:
-        dt = parse_date(date_string, default=_DEFAULT_DATETIME,
-                        dayfirst=dayfirst, yearfirst=yearfirst, **kwargs)
-    except TypeError:
-        # following may be raised from dateutil
-        # TypeError: 'NoneType' object is not iterable
-        raise ValueError('Given date string not likely a datetime.')
-
-    return dt
-
-
-def parse_datetime_string_with_reso(object date_string, object freq=None,
-                                    dayfirst=False, yearfirst=False, **kwargs):
-    """parse datetime string, only returns datetime
-
-    Returns
-    -------
-    datetime
-    """
-
-    cdef:
-        object parsed, reso
-
-    if not _does_string_look_like_datetime(date_string):
-        raise ValueError('Given date string not likely a datetime.')
-
-    try:
-        return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq)
-    except DateParseError:
-        raise
-    except ValueError:
-        pass
-
-    try:
-        parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME,
-                                      dayfirst=dayfirst, yearfirst=yearfirst)
-    except Exception as e:
-        # TODO: allow raise of errors within instead
-        raise DateParseError(e)
-    if parsed is None:
-        raise DateParseError("Could not parse %s" % date_string)
-    return parsed, parsed, reso
-
-
-cdef inline object _parse_dateabbr_string(object date_string, object default,
-                                          object freq):
-    cdef:
-        object ret
-        int year, quarter = -1, month, mnum, date_len
-
-    # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1
-    assert util.is_string_object(date_string)
-
-    # len(date_string) == 0
-    # should be NaT???
-
-    if date_string in _nat_strings:
-        return NaT, NaT, ''
-
-    date_string = date_string.upper()
-    date_len = len(date_string)
-
-    if date_len == 4:
-        # parse year only like 2000
-        try:
-            ret = default.replace(year=int(date_string))
-            return ret, ret, 'year'
-        except ValueError:
-            pass
-
-    try:
-        if 4 <= date_len <= 7:
-            i = date_string.index('Q', 1, 6)
-            if i == 1:
-                quarter = int(date_string[0])
-                if date_len == 4 or (date_len == 5
-                                     and date_string[i + 1] == '-'):
-                    # r'(\d)Q-?(\d\d)')
-                    year = 2000 + int(date_string[-2:])
-                elif date_len == 6 or (date_len == 7
-                                       and date_string[i + 1] == '-'):
-                    # r'(\d)Q-?(\d\d\d\d)')
-                    year = int(date_string[-4:])
-                else:
-                    raise ValueError
-            elif i == 2 or i == 3:
-                # r'(\d\d)-?Q(\d)'
-                if date_len == 4 or (date_len == 5
-                                     and date_string[i - 1] == '-'):
-                    quarter = int(date_string[-1])
-                    year = 2000 + int(date_string[:2])
-                else:
-                    raise ValueError
-            elif i == 4 or i == 5:
-                if date_len == 6 or (date_len == 7
-                                     and date_string[i - 1] == '-'):
-                    # r'(\d\d\d\d)-?Q(\d)'
-                    quarter = int(date_string[-1])
-                    year = int(date_string[:4])
-                else:
-                    raise ValueError
-
-            if not (1 <= quarter <= 4):
-                msg = ('Incorrect quarterly string is given, quarter must be '
-                       'between 1 and 4: {0}')
-                raise DateParseError(msg.format(date_string))
-
-            if freq is not None:
-                # hack attack, #1228
-                try:
-                    mnum = _MONTH_NUMBERS[_get_rule_month(freq)] + 1
-                except (KeyError, ValueError):
-                    msg = ('Unable to retrieve month information from given '
-                           'freq: {0}').format(freq)
-                    raise DateParseError(msg)
-
-                month = (mnum + (quarter - 1) * 3) % 12 + 1
-                if month > mnum:
-                    year -= 1
-            else:
-                month = (quarter - 1) * 3 + 1
-
-            ret = default.replace(year=year, month=month)
-            return ret, ret, 'quarter'
-
-    except DateParseError:
-        raise
-    except ValueError:
-        pass
-
-    if date_len == 6 and (freq == 'M' or getattr(
-            freq, 'rule_code', None) == 'M'):
-        year = int(date_string[:4])
-        month = int(date_string[4:6])
-        try:
-            ret = default.replace(year=year, month=month)
-            return ret, ret, 'month'
-        except ValueError:
-            pass
-
-    for pat in ['%Y-%m', '%m-%Y', '%b %Y', '%b-%Y']:
-        try:
-            ret = datetime.strptime(date_string, pat)
-            return ret, ret, 'month'
-        except ValueError:
-            pass
-
-    raise ValueError('Unable to parse {0}'.format(date_string))
-
-
-def dateutil_parse(object timestr, object default, ignoretz=False,
-                   tzinfos=None, **kwargs):
-    """ lifted from dateutil to get resolution"""
-
-    cdef:
-        object fobj, res, attr, ret, tzdata
-        object reso = None
-        dict repl = {}
-
-    fobj = StringIO(str(timestr))
-    res = DEFAULTPARSER._parse(fobj, **kwargs)
-
-    # dateutil 2.2 compat
-    if isinstance(res, tuple):
-        res, _ = res
-
-    if res is None:
-        msg = "Unknown datetime string format, unable to parse: {0}"
-        raise ValueError(msg.format(timestr))
-
-    for attr in ["year", "month", "day", "hour",
-                 "minute", "second", "microsecond"]:
-        value = getattr(res, attr)
-        if value is not None:
-            repl[attr] = value
-            reso = attr
-
-    if reso is None:
-        msg = "Unable to parse datetime string: {0}"
-        raise ValueError(msg.format(timestr))
-
-    if reso == 'microsecond':
-        if repl['microsecond'] == 0:
-            reso = 'second'
-        elif repl['microsecond'] % 1000 == 0:
-            reso = 'millisecond'
-
-    ret = default.replace(**repl)
-    if res.weekday is not None and not res.day:
-        ret = ret + relativedelta.relativedelta(weekday=res.weekday)
-    if not ignoretz:
-        if callable(tzinfos) or tzinfos and res.tzname in tzinfos:
-            if callable(tzinfos):
-                tzdata = tzinfos(res.tzname, res.tzoffset)
-            else:
-                tzdata = tzinfos.get(res.tzname)
-            if isinstance(tzdata, datetime.tzinfo):
-                tzinfo = tzdata
-            elif isinstance(tzdata, string_types):
-                tzinfo = _dateutil_tzstr(tzdata)
-            elif isinstance(tzdata, int):
-                tzinfo = tzoffset(res.tzname, tzdata)
-            else:
-                raise ValueError("offset must be tzinfo subclass, "
-                                 "tz string, or int offset")
-            ret = ret.replace(tzinfo=tzinfo)
-        elif res.tzname and res.tzname in time.tzname:
-            ret = ret.replace(tzinfo=_dateutil_tzlocal())
-        elif res.tzoffset == 0:
-            ret = ret.replace(tzinfo=_dateutil_tzutc())
-        elif res.tzoffset:
-            ret = ret.replace(tzinfo=tzoffset(res.tzname, res.tzoffset))
-    return ret, reso
-
-
 # const for parsers
 
 _DEFAULT_DATETIME = datetime(1, 1, 1).replace(
diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
new file mode 100644
index 00000000000000..845d1b8dcabba3
--- /dev/null
+++ b/pandas/_libs/tslibs/parsing.pyx
@@ -0,0 +1,681 @@
+# -*- coding: utf-8 -*-
+# cython: profile=False
+# cython: linetrace=False
+# distutils: define_macros=CYTHON_TRACE=0
+# distutils: define_macros=CYTHON_TRACE_NOGIL=0
+"""
+Parsing functions for datetime and datetime-like strings.
+"""
+import sys
+import re
+
+from cpython cimport PyString_Check, PyUnicode_Check
+
+from libc.stdlib cimport free
+
+cimport cython
+from cython cimport Py_ssize_t
+
+
+from datetime import datetime
+import time
+
+import numpy as np
+cimport numpy as np
+from numpy cimport int64_t, ndarray
+np.import_array()
+
+# Avoid import from outside _libs
+if sys.version_info.major == 2:
+    string_types = basestring
+    from StringIO import StringIO
+else:
+    string_types = str
+    from io import StringIO
+
+
+# dateutil compat
+from dateutil.tz import (tzoffset,
+                         tzlocal as _dateutil_tzlocal,
+                         tzfile as _dateutil_tzfile,
+                         tzutc as _dateutil_tzutc,
+                         tzstr as _dateutil_tzstr)
+from dateutil.relativedelta import relativedelta
+from dateutil.parser import DEFAULTPARSER
+from dateutil.parser import parse as du_parse
+
+
+class DateParseError(ValueError):
+    pass
+
+_nat_strings = set(['NaT', 'nat', 'NAT', 'nan', 'NaN', 'NAN'])
+
+_DEFAULT_DATETIME = datetime(1, 1, 1).replace(hour=0, minute=0,
+                                              second=0, microsecond=0)
+_MONTHS = ['JAN', 'FEB', 'MAR', 'APR', 'MAY', 'JUN', 'JUL',
+           'AUG', 'SEP', 'OCT', 'NOV', 'DEC']
+_MONTH_NUMBERS = {k: i for i, k in enumerate(_MONTHS)}
+_MONTH_ALIASES = {(k + 1): v for k, v in enumerate(_MONTHS)}
+
+cdef object _TIMEPAT = re.compile(r'^([01]?[0-9]|2[0-3]):([0-5][0-9])')
+
+cdef set _not_datelike_strings = set(['a', 'A', 'm', 'M', 'p', 'P', 't', 'T'])
+
+NAT_SENTINEL = object()
+# This allows us to reference NaT without having to import it
+
+
+def parse_datetime_string(date_string, freq=None, dayfirst=False,
+                          yearfirst=False, **kwargs):
+    """parse datetime string, only returns datetime.
+    Also cares special handling matching time patterns.
+
+    Returns
+    -------
+    datetime
+    """
+
+    cdef:
+        object dt
+
+    if not _does_string_look_like_datetime(date_string):
+        raise ValueError('Given date string not likely a datetime.')
+
+    if _TIMEPAT.match(date_string):
+        # use current datetime as default, not pass _DEFAULT_DATETIME
+        dt = du_parse(date_string, dayfirst=dayfirst,
+                      yearfirst=yearfirst, **kwargs)
+        return dt
+
+    try:
+        dt, _, _ = _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq)
+        return dt
+    except DateParseError:
+        raise
+    except ValueError:
+        pass
+
+    try:
+        dt = du_parse(date_string, default=_DEFAULT_DATETIME,
+                      dayfirst=dayfirst, yearfirst=yearfirst, **kwargs)
+    except TypeError:
+        # following may be raised from dateutil
+        # TypeError: 'NoneType' object is not iterable
+        raise ValueError('Given date string not likely a datetime.')
+
+    return dt
+
+
+def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
+    """
+    Try hard to parse datetime string, leveraging dateutil plus some extra
+    goodies like quarter recognition.
+
+    Parameters
+    ----------
+    arg : compat.string_types
+    freq : str or DateOffset, default None
+        Helps with interpreting time string if supplied
+    dayfirst : bool, default None
+        If None uses default from print_config
+    yearfirst : bool, default None
+        If None uses default from print_config
+
+    Returns
+    -------
+    datetime, datetime/dateutil.parser._result, str
+    """
+    if not isinstance(arg, string_types):
+        return arg
+
+    if getattr(freq, "_typ", None) == "dateoffset":
+        freq = freq.rule_code
+
+    if dayfirst is None:
+        from pandas.core.config import get_option
+        dayfirst = get_option("display.date_dayfirst")
+    if yearfirst is None:
+        from pandas.core.config import get_option
+        yearfirst = get_option("display.date_yearfirst")
+
+    res = parse_datetime_string_with_reso(arg, freq=freq,
+                                          dayfirst=dayfirst,
+                                          yearfirst=yearfirst)
+    if res[0] is NAT_SENTINEL:
+        from pandas._libs.tslib import NaT
+        res = (NaT,) + res[1:]
+    return res
+
+
+def parse_datetime_string_with_reso(date_string, freq=None, dayfirst=False,
+                                    yearfirst=False, **kwargs):
+    """parse datetime string, only returns datetime
+
+    Returns
+    -------
+    datetime
+    """
+
+    cdef:
+        object parsed, reso
+
+    if not _does_string_look_like_datetime(date_string):
+        raise ValueError('Given date string not likely a datetime.')
+
+    try:
+        return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq)
+    except DateParseError:
+        raise
+    except ValueError:
+        pass
+
+    try:
+        parsed, reso = dateutil_parse(date_string, _DEFAULT_DATETIME,
+                                      dayfirst=dayfirst, yearfirst=yearfirst)
+    except Exception as e:
+        # TODO: allow raise of errors within instead
+        raise DateParseError(e)
+    if parsed is None:
+        raise DateParseError("Could not parse %s" % date_string)
+    return parsed, parsed, reso
+
+
+cpdef bint _does_string_look_like_datetime(object date_string):
+    if date_string.startswith('0'):
+        # Strings starting with 0 are more consistent with a
+        # date-like string than a number
+        return True
+
+    try:
+        if float(date_string) < 1000:
+            return False
+    except ValueError:
+        pass
+
+    if date_string in _not_datelike_strings:
+        return False
+
+    return True
+
+
+cdef inline object _parse_dateabbr_string(object date_string, object default,
+                                           object freq):
+    cdef:
+        object ret
+        int year, quarter = -1, month, mnum, date_len
+
+    # special handling for possibilities eg, 2Q2005, 2Q05, 2005Q1, 05Q1
+    assert isinstance(date_string, string_types)
+
+    # len(date_string) == 0
+    # should be NaT???
+
+    if date_string in _nat_strings:
+        return NAT_SENTINEL, NAT_SENTINEL, ''
+
+    date_string = date_string.upper()
+    date_len = len(date_string)
+
+    if date_len == 4:
+        # parse year only like 2000
+        try:
+            ret = default.replace(year=int(date_string))
+            return ret, ret, 'year'
+        except ValueError:
+            pass
+
+    try:
+        if 4 <= date_len <= 7:
+            i = date_string.index('Q', 1, 6)
+            if i == 1:
+                quarter = int(date_string[0])
+                if date_len == 4 or (date_len == 5
+                                     and date_string[i + 1] == '-'):
+                    # r'(\d)Q-?(\d\d)')
+                    year = 2000 + int(date_string[-2:])
+                elif date_len == 6 or (date_len == 7
+                                       and date_string[i + 1] == '-'):
+                    # r'(\d)Q-?(\d\d\d\d)')
+                    year = int(date_string[-4:])
+                else:
+                    raise ValueError
+            elif i == 2 or i == 3:
+                # r'(\d\d)-?Q(\d)'
+                if date_len == 4 or (date_len == 5
+                                     and date_string[i - 1] == '-'):
+                    quarter = int(date_string[-1])
+                    year = 2000 + int(date_string[:2])
+                else:
+                    raise ValueError
+            elif i == 4 or i == 5:
+                if date_len == 6 or (date_len == 7
+                                     and date_string[i - 1] == '-'):
+                    # r'(\d\d\d\d)-?Q(\d)'
+                    quarter = int(date_string[-1])
+                    year = int(date_string[:4])
+                else:
+                    raise ValueError
+
+            if not (1 <= quarter <= 4):
+                msg = ('Incorrect quarterly string is given, quarter must be '
+                       'between 1 and 4: {0}')
+                raise DateParseError(msg.format(date_string))
+
+            if freq is not None:
+                # hack attack, #1228
+                try:
+                    mnum = _MONTH_NUMBERS[_get_rule_month(freq)] + 1
+                except (KeyError, ValueError):
+                    msg = ('Unable to retrieve month information from given '
+                           'freq: {0}').format(freq)
+                    raise DateParseError(msg)
+
+                month = (mnum + (quarter - 1) * 3) % 12 + 1
+                if month > mnum:
+                    year -= 1
+            else:
+                month = (quarter - 1) * 3 + 1
+
+            ret = default.replace(year=year, month=month)
+            return ret, ret, 'quarter'
+
+    except DateParseError:
+        raise
+    except ValueError:
+        pass
+
+    if date_len == 6 and (freq == 'M' or
+                          getattr(freq, 'rule_code', None) == 'M'):
+        year = int(date_string[:4])
+        month = int(date_string[4:6])
+        try:
+            ret = default.replace(year=year, month=month)
+            return ret, ret, 'month'
+        except ValueError:
+            pass
+
+    for pat in ['%Y-%m', '%m-%Y', '%b %Y', '%b-%Y']:
+        try:
+            ret = datetime.strptime(date_string, pat)
+            return ret, ret, 'month'
+        except ValueError:
+            pass
+
+    raise ValueError('Unable to parse {0}'.format(date_string))
+
+
+def dateutil_parse(object timestr, object default, ignoretz=False,
+                   tzinfos=None, **kwargs):
+    """ lifted from dateutil to get resolution"""
+
+    cdef:
+        object fobj, res, attr, ret, tzdata
+        object reso = None
+        dict repl = {}
+
+    fobj = StringIO(str(timestr))
+    res = DEFAULTPARSER._parse(fobj, **kwargs)
+
+    # dateutil 2.2 compat
+    if isinstance(res, tuple): # PyTuple_Check
+        res, _ = res
+
+    if res is None:
+        msg = "Unknown datetime string format, unable to parse: {0}"
+        raise ValueError(msg.format(timestr))
+
+    for attr in ["year", "month", "day", "hour",
+                 "minute", "second", "microsecond"]:
+        value = getattr(res, attr)
+        if value is not None:
+            repl[attr] = value
+            reso = attr
+
+    if reso is None:
+        msg = "Unable to parse datetime string: {0}"
+        raise ValueError(msg.format(timestr))
+
+    if reso == 'microsecond':
+        if repl['microsecond'] == 0:
+            reso = 'second'
+        elif repl['microsecond'] % 1000 == 0:
+            reso = 'millisecond'
+
+    ret = default.replace(**repl)
+    if res.weekday is not None and not res.day:
+        ret = ret + relativedelta.relativedelta(weekday=res.weekday)
+    if not ignoretz:
+        if callable(tzinfos) or tzinfos and res.tzname in tzinfos:
+            if callable(tzinfos):
+                tzdata = tzinfos(res.tzname, res.tzoffset)
+            else:
+                tzdata = tzinfos.get(res.tzname)
+            if isinstance(tzdata, datetime.tzinfo):
+                tzinfo = tzdata
+            elif isinstance(tzdata, string_types):
+                tzinfo = _dateutil_tzstr(tzdata)
+            elif isinstance(tzdata, int):
+                tzinfo = tzoffset(res.tzname, tzdata)
+            else:
+                raise ValueError("offset must be tzinfo subclass, "
+                                 "tz string, or int offset")
+            ret = ret.replace(tzinfo=tzinfo)
+        elif res.tzname and res.tzname in time.tzname:
+            ret = ret.replace(tzinfo=_dateutil_tzlocal())
+        elif res.tzoffset == 0:
+            ret = ret.replace(tzinfo=_dateutil_tzutc())
+        elif res.tzoffset:
+            ret = ret.replace(tzinfo=tzoffset(res.tzname, res.tzoffset))
+    return ret, reso
+
+
+cpdef object _get_rule_month(object source, object default='DEC'):
+    """
+    Return starting month of given freq, default is December.
+
+    Example
+    -------
+    >>> _get_rule_month('D')
+    'DEC'
+
+    >>> _get_rule_month('A-JAN')
+    'JAN'
+    """
+    if hasattr(source, 'freqstr'):
+        source = source.freqstr
+    source = source.upper()
+    if '-' not in source:
+        return default
+    else:
+        return source.split('-')[1]
+
+
+#----------------------------------------------------------------------
+# Parsing for type-inference
+
+
+def try_parse_dates(ndarray[object] values, parser=None,
+                    dayfirst=False, default=None):
+    cdef:
+        Py_ssize_t i, n
+        ndarray[object] result
+
+    n = len(values)
+    result = np.empty(n, dtype='O')
+
+    if parser is None:
+        if default is None: # GH2618
+            date = datetime.now()
+            default = datetime(date.year, date.month, 1)
+
+        parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default)
+
+        # EAFP here
+        try:
+            for i from 0 <= i < n:
+                if values[i] == '':
+                    result[i] = np.nan
+                else:
+                    result[i] = parse_date(values[i])
+        except Exception:
+            # failed
+            return values
+    else:
+        parse_date = parser
+
+        try:
+            for i from 0 <= i < n:
+                if values[i] == '':
+                    result[i] = np.nan
+                else:
+                    result[i] = parse_date(values[i])
+        except Exception:
+            # raise if passed parser and it failed
+            raise
+
+    return result
+
+
+def try_parse_date_and_time(ndarray[object] dates, ndarray[object] times,
+                            date_parser=None, time_parser=None,
+                            dayfirst=False, default=None):
+    cdef:
+        Py_ssize_t i, n
+        ndarray[object] result
+
+    n = len(dates)
+    if len(times) != n:
+        raise ValueError('Length of dates and times must be equal')
+    result = np.empty(n, dtype='O')
+
+    if date_parser is None:
+        if default is None: # GH2618
+            date = datetime.now()
+            default = datetime(date.year, date.month, 1)
+
+        parse_date = lambda x: du_parse(x, dayfirst=dayfirst, default=default)
+
+    else:
+        parse_date = date_parser
+
+    if time_parser is None:
+        parse_time = lambda x: du_parse(x)
+
+    else:
+        parse_time = time_parser
+
+    for i from 0 <= i < n:
+        d = parse_date(str(dates[i]))
+        t = parse_time(str(times[i]))
+        result[i] = datetime(d.year, d.month, d.day,
+                             t.hour, t.minute, t.second)
+
+    return result
+
+
+def try_parse_year_month_day(ndarray[object] years, ndarray[object] months,
+                             ndarray[object] days):
+    cdef:
+        Py_ssize_t i, n
+        ndarray[object] result
+
+    n = len(years)
+    if len(months) != n or len(days) != n:
+        raise ValueError('Length of years/months/days must all be equal')
+    result = np.empty(n, dtype='O')
+
+    for i from 0 <= i < n:
+        result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))
+
+    return result
+
+
+def try_parse_datetime_components(ndarray[object] years,
+                                  ndarray[object] months,
+                                  ndarray[object] days,
+                                  ndarray[object] hours,
+                                  ndarray[object] minutes,
+                                  ndarray[object] seconds):
+
+    cdef:
+        Py_ssize_t i, n
+        ndarray[object] result
+        int secs
+        double float_secs
+        double micros
+
+    n = len(years)
+    if (len(months) != n or len(days) != n or len(hours) != n or
+        len(minutes) != n or len(seconds) != n):
+        raise ValueError('Length of all datetime components must be equal')
+    result = np.empty(n, dtype='O')
+
+    for i from 0 <= i < n:
+        float_secs = float(seconds[i])
+        secs = int(float_secs)
+
+        micros = float_secs - secs
+        if micros > 0:
+            micros = micros * 1000000
+
+        result[i] = datetime(int(years[i]), int(months[i]), int(days[i]),
+                             int(hours[i]), int(minutes[i]), secs,
+                             int(micros))
+
+    return result
+
+
+#----------------------------------------------------------------------
+# Miscellaneous
+
+_DATEUTIL_LEXER_SPLIT = None
+try:
+    # Since these are private methods from dateutil, it is safely imported
+    # here so in case this interface changes, pandas will just fallback
+    # to not using the functionality
+    from dateutil.parser import _timelex
+
+    if hasattr(_timelex, 'split'):
+        def _lexer_split_from_str(dt_str):
+            # The StringIO(str(_)) is for dateutil 2.2 compatibility
+            return _timelex.split(StringIO(str(dt_str)))
+
+        _DATEUTIL_LEXER_SPLIT = _lexer_split_from_str
+except (ImportError, AttributeError):
+    pass
+
+
+def _format_is_iso(f):
+    """
+    Does format match the iso8601 set that can be handled by the C parser?
+    Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different
+    but must be consistent.  Leading 0s in dates and times are optional.
+    """
+    iso_template = '%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S.%f'.format
+    excluded_formats = ['%Y%m%d', '%Y%m', '%Y']
+
+    for date_sep in [' ', '/', '\\', '-', '.', '']:
+        for time_sep in [' ', 'T']:
+            if (iso_template(date_sep=date_sep,
+                             time_sep=time_sep
+                             ).startswith(f) and f not in excluded_formats):
+                return True
+    return False
+
+
+def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse,
+                           dt_str_split=_DATEUTIL_LEXER_SPLIT):
+    """
+    Guess the datetime format of a given datetime string.
+
+    Parameters
+    ----------
+    dt_str : string, datetime string to guess the format of
+    dayfirst : boolean, default False
+        If True parses dates with the day first, eg 20/01/2005
+        Warning: dayfirst=True is not strict, but will prefer to parse
+        with day first (this is a known bug).
+    dt_str_parse : function, defaults to `compat.parse_date` (dateutil)
+        This function should take in a datetime string and return
+        a `datetime.datetime` guess that the datetime string represents
+    dt_str_split : function, defaults to `_DATEUTIL_LEXER_SPLIT` (dateutil)
+        This function should take in a datetime string and return
+        a list of strings, the guess of the various specific parts
+        e.g. '2011/12/30' -> ['2011', '/', '12', '/', '30']
+
+    Returns
+    -------
+    ret : datetime format string (for `strftime` or `strptime`)
+    """
+    if dt_str_parse is None or dt_str_split is None:
+        return None
+
+    if not isinstance(dt_str, string_types):
+        return None
+
+    day_attribute_and_format = (('day',), '%d', 2)
+
+    # attr name, format, padding (if any)
+    datetime_attrs_to_format = [
+        (('year', 'month', 'day'), '%Y%m%d', 0),
+        (('year',), '%Y', 0),
+        (('month',), '%B', 0),
+        (('month',), '%b', 0),
+        (('month',), '%m', 2),
+        day_attribute_and_format,
+        (('hour',), '%H', 2),
+        (('minute',), '%M', 2),
+        (('second',), '%S', 2),
+        (('microsecond',), '%f', 6),
+        (('second', 'microsecond'), '%S.%f', 0),
+    ]
+
+    if dayfirst:
+        datetime_attrs_to_format.remove(day_attribute_and_format)
+        datetime_attrs_to_format.insert(0, day_attribute_and_format)
+
+    try:
+        parsed_datetime = dt_str_parse(dt_str, dayfirst=dayfirst)
+    except:
+        # In case the datetime can't be parsed, its format cannot be guessed
+        return None
+
+    if parsed_datetime is None:
+        return None
+
+    try:
+        tokens = dt_str_split(dt_str)
+    except:
+        # In case the datetime string can't be split, its format cannot
+        # be guessed
+        return None
+
+    format_guess = [None] * len(tokens)
+    found_attrs = set()
+
+    for attrs, attr_format, padding in datetime_attrs_to_format:
+        # If a given attribute has been placed in the format string, skip
+        # over other formats for that same underlying attribute (IE, month
+        # can be represented in multiple different ways)
+        if set(attrs) & found_attrs:
+            continue
+
+        if all(getattr(parsed_datetime, attr) is not None for attr in attrs):
+            for i, token_format in enumerate(format_guess):
+                token_filled = tokens[i].zfill(padding)
+                if (token_format is None and
+                        token_filled == parsed_datetime.strftime(attr_format)):
+                    format_guess[i] = attr_format
+                    tokens[i] = token_filled
+                    found_attrs.update(attrs)
+                    break
+
+    # Only consider it a valid guess if we have a year, month and day
+    if len(set(['year', 'month', 'day']) & found_attrs) != 3:
+        return None
+
+    output_format = []
+    for i, guess in enumerate(format_guess):
+        if guess is not None:
+            # Either fill in the format placeholder (like %Y)
+            output_format.append(guess)
+        else:
+            # Or just the token separate (IE, the dashes in "01-01-2013")
+            try:
+                # If the token is numeric, then we likely didn't parse it
+                # properly, so our guess is wrong
+                float(tokens[i])
+                return None
+            except ValueError:
+                pass
+
+            output_format.append(tokens[i])
+
+    guessed_format = ''.join(output_format)
+
+    # rebuild string, capturing any inferred padding
+    dt_str = ''.join(tokens)
+    if parsed_datetime.strftime(guessed_format) == dt_str:
+        return guessed_format
+    else:
+        return None
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index be26720adb0bda..dba616c2d15e62 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -7,6 +7,7 @@
                           algos as libalgos, join as libjoin,
                           Timestamp, Timedelta, )
 from pandas._libs.lib import is_datetime_array
+from pandas._libs.tslibs import parsing
 
 from pandas.compat import range, u
 from pandas.compat.numpy import function as nv
@@ -1037,7 +1038,7 @@ def to_datetime(self, dayfirst=False):
         if self.inferred_type == 'string':
             from dateutil.parser import parse
             parser = lambda x: parse(x, dayfirst=dayfirst)
-            parsed = lib.try_parse_dates(self.values, parser=parser)
+            parsed = parsing.try_parse_dates(self.values, parser=parser)
             return DatetimeIndex(parsed)
         else:
             return DatetimeIndex(self.values)
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index bf89509fd17467..97ac8445faf4c2 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -2,9 +2,14 @@
 import numpy as np
 from collections import MutableMapping
 
-from pandas._libs import lib, tslib
+from pandas._libs import tslib
 from pandas._libs.tslibs.strptime import array_strptime
 from pandas._libs.tslibs.timezones import get_timezone
+from pandas._libs.tslibs import parsing
+from pandas._libs.tslibs.parsing import (  # noqa
+    parse_time_string,
+    _format_is_iso,
+    _guess_datetime_format)
 
 from pandas.core.dtypes.common import (
     _ensure_object,
@@ -19,28 +24,10 @@
     is_numeric_dtype)
 from pandas.core.dtypes.generic import (
     ABCIndexClass, ABCSeries,
-    ABCDataFrame, ABCDateOffset)
+    ABCDataFrame)
 from pandas.core.dtypes.missing import notna
 from pandas.core import algorithms
 
-import pandas.compat as compat
-
-_DATEUTIL_LEXER_SPLIT = None
-try:
-    # Since these are private methods from dateutil, it is safely imported
-    # here so in case this interface changes, pandas will just fallback
-    # to not using the functionality
-    from dateutil.parser import _timelex
-
-    if hasattr(_timelex, 'split'):
-        def _lexer_split_from_str(dt_str):
-            # The StringIO(str(_)) is for dateutil 2.2 compatibility
-            return _timelex.split(compat.StringIO(str(dt_str)))
-
-        _DATEUTIL_LEXER_SPLIT = _lexer_split_from_str
-except (ImportError, AttributeError):
-    pass
-
 
 def _infer_tzinfo(start, end):
     def _infer(a, b):
@@ -60,123 +47,6 @@ def _infer(a, b):
     return tz
 
 
-def _guess_datetime_format(dt_str, dayfirst=False,
-                           dt_str_parse=compat.parse_date,
-                           dt_str_split=_DATEUTIL_LEXER_SPLIT):
-    """
-    Guess the datetime format of a given datetime string.
-
-    Parameters
-    ----------
-    dt_str : string, datetime string to guess the format of
-    dayfirst : boolean, default False
-        If True parses dates with the day first, eg 20/01/2005
-        Warning: dayfirst=True is not strict, but will prefer to parse
-        with day first (this is a known bug).
-    dt_str_parse : function, defaults to `compat.parse_date` (dateutil)
-        This function should take in a datetime string and return
-        a `datetime.datetime` guess that the datetime string represents
-    dt_str_split : function, defaults to `_DATEUTIL_LEXER_SPLIT` (dateutil)
-        This function should take in a datetime string and return
-        a list of strings, the guess of the various specific parts
-        e.g. '2011/12/30' -> ['2011', '/', '12', '/', '30']
-
-    Returns
-    -------
-    ret : datetime format string (for `strftime` or `strptime`)
-    """
-    if dt_str_parse is None or dt_str_split is None:
-        return None
-
-    if not isinstance(dt_str, compat.string_types):
-        return None
-
-    day_attribute_and_format = (('day',), '%d', 2)
-
-    # attr name, format, padding (if any)
-    datetime_attrs_to_format = [
-        (('year', 'month', 'day'), '%Y%m%d', 0),
-        (('year',), '%Y', 0),
-        (('month',), '%B', 0),
-        (('month',), '%b', 0),
-        (('month',), '%m', 2),
-        day_attribute_and_format,
-        (('hour',), '%H', 2),
-        (('minute',), '%M', 2),
-        (('second',), '%S', 2),
-        (('microsecond',), '%f', 6),
-        (('second', 'microsecond'), '%S.%f', 0),
-    ]
-
-    if dayfirst:
-        datetime_attrs_to_format.remove(day_attribute_and_format)
-        datetime_attrs_to_format.insert(0, day_attribute_and_format)
-
-    try:
-        parsed_datetime = dt_str_parse(dt_str, dayfirst=dayfirst)
-    except:
-        # In case the datetime can't be parsed, its format cannot be guessed
-        return None
-
-    if parsed_datetime is None:
-        return None
-
-    try:
-        tokens = dt_str_split(dt_str)
-    except:
-        # In case the datetime string can't be split, its format cannot
-        # be guessed
-        return None
-
-    format_guess = [None] * len(tokens)
-    found_attrs = set()
-
-    for attrs, attr_format, padding in datetime_attrs_to_format:
-        # If a given attribute has been placed in the format string, skip
-        # over other formats for that same underlying attribute (IE, month
-        # can be represented in multiple different ways)
-        if set(attrs) & found_attrs:
-            continue
-
-        if all(getattr(parsed_datetime, attr) is not None for attr in attrs):
-            for i, token_format in enumerate(format_guess):
-                token_filled = tokens[i].zfill(padding)
-                if (token_format is None and
-                        token_filled == parsed_datetime.strftime(attr_format)):
-                    format_guess[i] = attr_format
-                    tokens[i] = token_filled
-                    found_attrs.update(attrs)
-                    break
-
-    # Only consider it a valid guess if we have a year, month and day
-    if len(set(['year', 'month', 'day']) & found_attrs) != 3:
-        return None
-
-    output_format = []
-    for i, guess in enumerate(format_guess):
-        if guess is not None:
-            # Either fill in the format placeholder (like %Y)
-            output_format.append(guess)
-        else:
-            # Or just the token separate (IE, the dashes in "01-01-2013")
-            try:
-                # If the token is numeric, then we likely didn't parse it
-                # properly, so our guess is wrong
-                float(tokens[i])
-                return None
-            except ValueError:
-                pass
-
-            output_format.append(tokens[i])
-
-    guessed_format = ''.join(output_format)
-
-    # rebuild string, capturing any inferred padding
-    dt_str = ''.join(tokens)
-    if parsed_datetime.strftime(guessed_format) == dt_str:
-        return guessed_format
-
-
 def _guess_datetime_format_for_array(arr, **kwargs):
     # Try to guess the format based on the first non-NaN element
     non_nan_elements = notna(arr).nonzero()[0]
@@ -655,9 +525,9 @@ def _attempt_YYYYMMDD(arg, errors):
     def calc(carg):
         # calculate the actual result
         carg = carg.astype(object)
-        parsed = lib.try_parse_year_month_day(carg / 10000,
-                                              carg / 100 % 100,
-                                              carg % 100)
+        parsed = parsing.try_parse_year_month_day(carg / 10000,
+                                                  carg / 100 % 100,
+                                                  carg % 100)
         return tslib.array_to_datetime(parsed, errors=errors)
 
     def calc_with_mask(carg, mask):
@@ -691,60 +561,6 @@ def calc_with_mask(carg, mask):
     return None
 
 
-def _format_is_iso(f):
-    """
-    Does format match the iso8601 set that can be handled by the C parser?
-    Generally of form YYYY-MM-DDTHH:MM:SS - date separator can be different
-    but must be consistent.  Leading 0s in dates and times are optional.
-    """
-    iso_template = '%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S.%f'.format
-    excluded_formats = ['%Y%m%d', '%Y%m', '%Y']
-
-    for date_sep in [' ', '/', '\\', '-', '.', '']:
-        for time_sep in [' ', 'T']:
-            if (iso_template(date_sep=date_sep,
-                             time_sep=time_sep
-                             ).startswith(f) and f not in excluded_formats):
-                return True
-    return False
-
-
-def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
-    """
-    Try hard to parse datetime string, leveraging dateutil plus some extra
-    goodies like quarter recognition.
-
-    Parameters
-    ----------
-    arg : compat.string_types
-    freq : str or DateOffset, default None
-        Helps with interpreting time string if supplied
-    dayfirst : bool, default None
-        If None uses default from print_config
-    yearfirst : bool, default None
-        If None uses default from print_config
-
-    Returns
-    -------
-    datetime, datetime/dateutil.parser._result, str
-    """
-    from pandas.core.config import get_option
-    if not isinstance(arg, compat.string_types):
-        return arg
-
-    if isinstance(freq, ABCDateOffset):
-        freq = freq.rule_code
-
-    if dayfirst is None:
-        dayfirst = get_option("display.date_dayfirst")
-    if yearfirst is None:
-        yearfirst = get_option("display.date_yearfirst")
-
-    return tslib.parse_datetime_string_with_reso(arg, freq=freq,
-                                                 dayfirst=dayfirst,
-                                                 yearfirst=yearfirst)
-
-
 DateParseError = tslib.DateParseError
 normalize_date = tslib.normalize_date
 
diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py
index 080d6c3e273a3d..377373f8a01356 100644
--- a/pandas/io/date_converters.py
+++ b/pandas/io/date_converters.py
@@ -1,20 +1,20 @@
 """This module is designed for community supported date conversion functions"""
 from pandas.compat import range, map
 import numpy as np
-import pandas._libs.lib as lib
+from pandas._libs.tslibs import parsing
 
 
 def parse_date_time(date_col, time_col):
     date_col = _maybe_cast(date_col)
     time_col = _maybe_cast(time_col)
-    return lib.try_parse_date_and_time(date_col, time_col)
+    return parsing.try_parse_date_and_time(date_col, time_col)
 
 
 def parse_date_fields(year_col, month_col, day_col):
     year_col = _maybe_cast(year_col)
     month_col = _maybe_cast(month_col)
     day_col = _maybe_cast(day_col)
-    return lib.try_parse_year_month_day(year_col, month_col, day_col)
+    return parsing.try_parse_year_month_day(year_col, month_col, day_col)
 
 
 def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col,
@@ -25,8 +25,9 @@ def parse_all_fields(year_col, month_col, day_col, hour_col, minute_col,
     hour_col = _maybe_cast(hour_col)
     minute_col = _maybe_cast(minute_col)
     second_col = _maybe_cast(second_col)
-    return lib.try_parse_datetime_components(year_col, month_col, day_col,
-                                             hour_col, minute_col, second_col)
+    return parsing.try_parse_datetime_components(year_col, month_col, day_col,
+                                                 hour_col, minute_col,
+                                                 second_col)
 
 
 def generic_parser(parse_func, *cols):
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index ed15d4295d6881..eeb79552477e12 100755
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -42,7 +42,7 @@
 
 import pandas._libs.lib as lib
 import pandas._libs.parsers as parsers
-
+from pandas._libs.tslibs import parsing
 
 # BOM character (byte order mark)
 # This exists at the beginning of a file to indicate endianness
@@ -2981,7 +2981,7 @@ def converter(*date_cols):
                 )
             except:
                 return tools.to_datetime(
-                    lib.try_parse_dates(strs, dayfirst=dayfirst))
+                    parsing.try_parse_dates(strs, dayfirst=dayfirst))
         else:
             try:
                 result = tools.to_datetime(
@@ -2992,9 +2992,9 @@ def converter(*date_cols):
             except Exception:
                 try:
                     return tools.to_datetime(
-                        lib.try_parse_dates(_concat_date_cols(date_cols),
-                                            parser=date_parser,
-                                            dayfirst=dayfirst),
+                        parsing.try_parse_dates(_concat_date_cols(date_cols),
+                                                parser=date_parser,
+                                                dayfirst=dayfirst),
                         errors='ignore')
                 except Exception:
                     return generic_parser(date_parser, *date_cols)
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index e0ccedb834adf9..bdfe6b5b09e45a 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -12,7 +12,8 @@
 from distutils.version import LooseVersion
 
 import pandas as pd
-from pandas._libs import tslib, lib
+from pandas._libs import tslib
+from pandas._libs.tslibs import parsing
 from pandas.core.tools import datetimes as tools
 from pandas.core.tools.datetimes import normalize_date
 from pandas.compat import lmap
@@ -1063,7 +1064,7 @@ def test_does_not_convert_mixed_integer(self):
         bad_date_strings = ('-50000', '999', '123.1234', 'm', 'T')
 
         for bad_date_string in bad_date_strings:
-            assert not tslib._does_string_look_like_datetime(bad_date_string)
+            assert not parsing._does_string_look_like_datetime(bad_date_string)
 
         good_date_strings = ('2012-01-01',
                              '01/01/2012',
@@ -1073,7 +1074,7 @@ def test_does_not_convert_mixed_integer(self):
                              '1-1', )
 
         for good_date_string in good_date_strings:
-            assert tslib._does_string_look_like_datetime(good_date_string)
+            assert parsing._does_string_look_like_datetime(good_date_string)
 
     def test_parsers(self):
 
@@ -1412,7 +1413,7 @@ class TestArrayToDatetime(object):
     def test_try_parse_dates(self):
         arr = np.array(['5/1/2000', '6/1/2000', '7/1/2000'], dtype=object)
 
-        result = lib.try_parse_dates(arr, dayfirst=True)
+        result = parsing.try_parse_dates(arr, dayfirst=True)
         expected = [parse(d, dayfirst=True) for d in arr]
         assert np.array_equal(result, expected)
 
diff --git a/pandas/tests/io/parser/parse_dates.py b/pandas/tests/io/parser/parse_dates.py
index e1ae1b577ea296..90103e7bf26b02 100644
--- a/pandas/tests/io/parser/parse_dates.py
+++ b/pandas/tests/io/parser/parse_dates.py
@@ -10,7 +10,7 @@
 
 import pytest
 import numpy as np
-import pandas._libs.lib as lib
+from pandas._libs.tslibs import parsing
 from pandas._libs.lib import Timestamp
 
 import pandas as pd
@@ -53,7 +53,8 @@ def test_multiple_date_col(self):
 """
 
         def func(*date_cols):
-            return lib.try_parse_dates(parsers._concat_date_cols(date_cols))
+            res = parsing.try_parse_dates(parsers._concat_date_cols(date_cols))
+            return res
 
         df = self.read_csv(StringIO(data), header=None,
                            date_parser=func,
diff --git a/setup.py b/setup.py
index 25a4924dad0bc8..d25ae4a5fb45ce 100755
--- a/setup.py
+++ b/setup.py
@@ -343,6 +343,7 @@ class CheckSDist(sdist_class):
                  'pandas/_libs/parsers.pyx',
                  'pandas/_libs/tslibs/timezones.pyx',
                  'pandas/_libs/tslibs/frequencies.pyx',
+                 'pandas/_libs/tslibs/parsing.pyx',
                  'pandas/io/sas/sas.pyx']
 
     def initialize_options(self):
@@ -498,6 +499,8 @@ def pxd(name):
                      'sources': ['pandas/_libs/src/datetime/np_datetime.c',
                                  'pandas/_libs/src/datetime/np_datetime_strings.c',
                                  'pandas/_libs/src/period_helper.c']},
+    '_libs.tslibs.parsing': {'pyxfile': '_libs/tslibs/parsing',
+                             'pxdfiles': ['_libs/src/util']},
     '_libs.tslibs.frequencies': {'pyxfile': '_libs/tslibs/frequencies',
                                  'pxdfiles': ['_libs/src/util']},
     '_libs.index': {'pyxfile': '_libs/index',

From b5842bb7c8196666aa22d819652d1db7a5aeb582 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 26 Sep 2017 16:30:39 +0200
Subject: [PATCH 157/188] DOC: fix no autosummary for numerical index api pages
 (#17642)

---
 doc/sphinxext/numpydoc/numpydoc.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/sphinxext/numpydoc/numpydoc.py b/doc/sphinxext/numpydoc/numpydoc.py
index 710c3cc9842c45..f06915997c6162 100755
--- a/doc/sphinxext/numpydoc/numpydoc.py
+++ b/doc/sphinxext/numpydoc/numpydoc.py
@@ -43,9 +43,10 @@ def mangle_docstrings(app, what, name, obj, options, lines,
               )
 
     # PANDAS HACK (to remove the list of methods/attributes for Categorical)
-    if what == "class" and (name.endswith(".Categorical") or
-                            name.endswith("CategoricalIndex") or
-                            name.endswith("IntervalIndex")):
+    no_autosummary = [".Categorical", "CategoricalIndex", "IntervalIndex",
+                      "RangeIndex", "Int64Index", "UInt64Index",
+                      "Float64Index"]
+    if what == "class" and any(name.endswith(n) for n in no_autosummary):
         cfg['class_members_list'] = False
 
     if what == 'module':

From 66f4cc11725fd244c029058e9ac4743c4f36e2b4 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 26 Sep 2017 12:25:07 -0400
Subject: [PATCH 158/188] BUG: remove tab completion for deprecated functions
 (#17683)

closes #17674
---
 pandas/core/accessor.py         |  3 ++-
 pandas/core/categorical.py      |  8 +-------
 pandas/core/frame.py            |  1 +
 pandas/core/generic.py          |  2 ++
 pandas/core/series.py           |  2 ++
 pandas/tests/frame/test_api.py  | 11 +++++++++++
 pandas/tests/series/test_api.py | 11 +++++++++++
 7 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py
index c8476841bfce47..7a2da9655cc4a0 100644
--- a/pandas/core/accessor.py
+++ b/pandas/core/accessor.py
@@ -10,10 +10,11 @@
 
 class DirNamesMixin(object):
     _accessors = frozenset([])
+    _deprecations = frozenset([])
 
     def _dir_deletions(self):
         """ delete unwanted __dir__ for this object """
-        return self._accessors
+        return self._accessors | self._deprecations
 
     def _dir_additions(self):
         """ add addtional __dir__ for this object """
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 8b055e9ae59c3a..011aa746322965 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -231,6 +231,7 @@ class Categorical(PandasObject):
     # ops, which raise
     __array_priority__ = 1000
     _dtype = CategoricalDtype()
+    _deprecations = frozenset(['labels'])
     _typ = 'categorical'
 
     def __init__(self, values, categories=None, ordered=None, dtype=None,
@@ -412,13 +413,6 @@ def dtype(self):
         """The :ref:`~pandas.api.types.CategoricalDtype` for this instance"""
         return self._dtype
 
-    def __dir__(self):
-        # Avoid IPython warnings for deprecated properties
-        # https://github.com/pandas-dev/pandas/issues/16409
-        rv = set(dir(type(self)))
-        rv.discard("labels")
-        return sorted(rv)
-
     @property
     def _constructor(self):
         return Categorical
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 912dbdb9de7059..579d9f10d5875b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -299,6 +299,7 @@ def _constructor(self):
         return DataFrame
 
     _constructor_sliced = Series
+    _deprecations = NDFrame._deprecations | frozenset(['sortlevel'])
 
     @property
     def _constructor_expanddim(self):
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index a7be145f210833..2fb0e348c01c0b 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -112,6 +112,8 @@ class NDFrame(PandasObject, SelectionMixin):
                        '__array_interface__']
     _internal_names_set = set(_internal_names)
     _accessors = frozenset([])
+    _deprecations = frozenset(['as_blocks', 'blocks',
+                               'consolidate', 'convert_objects'])
     _metadata = []
     is_copy = None
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index db8ee2529ef577..89add1ef4c5907 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -145,6 +145,8 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
     """
     _metadata = ['name']
     _accessors = frozenset(['dt', 'cat', 'str'])
+    _deprecations = generic.NDFrame._deprecations | frozenset(
+        ['sortlevel', 'reshape'])
     _allow_index_ops = True
 
     def __init__(self, data=None, index=None, dtype=None, name=None,
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index 230a5806ccb2e6..5ea8230ced41b9 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -438,3 +438,14 @@ def _check_f(base, f):
         # rename
         f = lambda x: x.rename({1: 'foo'}, inplace=True)
         _check_f(d.copy(), f)
+
+    def test_tab_complete_warning(self, ip):
+        # https://github.com/pandas-dev/pandas/issues/16409
+        pytest.importorskip('IPython', minversion="6.0.0")
+        from IPython.core.completer import provisionalcompleter
+
+        code = "import pandas as pd; df = pd.DataFrame()"
+        ip.run_code(code)
+        with tm.assert_produces_warning(None):
+            with provisionalcompleter('ignore'):
+                list(ip.Completer.completions('df.', 1))
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
index d0805e2bb54d25..56b8a90ec0c9f1 100644
--- a/pandas/tests/series/test_api.py
+++ b/pandas/tests/series/test_api.py
@@ -407,3 +407,14 @@ def test_empty_method(self):
 
         for full_series in [pd.Series([1]), pd.Series(index=[1])]:
             assert not full_series.empty
+
+    def test_tab_complete_warning(self, ip):
+        # https://github.com/pandas-dev/pandas/issues/16409
+        pytest.importorskip('IPython', minversion="6.0.0")
+        from IPython.core.completer import provisionalcompleter
+
+        code = "import pandas as pd; s = pd.Series()"
+        ip.run_code(code)
+        with tm.assert_produces_warning(None):
+            with provisionalcompleter('ignore'):
+                list(ip.Completer.completions('s.', 1))

From d3be81ad595c5338781bed9963c729a9702e6611 Mon Sep 17 00:00:00 2001
From: kernc <kerncece@gmail.com>
Date: Tue, 26 Sep 2017 21:01:39 +0200
Subject: [PATCH 159/188] BUG: Fix/test SparseSeries/SparseDataFrame
 stack/unstack (#16616)

---
 doc/source/whatsnew/v0.21.0.txt     |  2 +-
 pandas/core/categorical.py          | 10 ++-
 pandas/core/internals.py            | 97 ++++++++++++++++++++++++++++-
 pandas/core/reshape/reshape.py      | 49 +++++----------
 pandas/tests/sparse/test_reshape.py | 38 +++++++++++
 5 files changed, 159 insertions(+), 37 deletions(-)
 create mode 100644 pandas/tests/sparse/test_reshape.py

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index b6bd86bd79a1f2..06f19782682b03 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -607,7 +607,7 @@ Sparse
 
 - Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`)
 - Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`)
-
+- Bug in :func:`SparseSeries.unstack` and :func:`SparseDataFrame.stack` (:issue:`16614`, :issue:`15045`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 011aa746322965..d79937829cf3fb 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -125,10 +125,16 @@ def f(self, other):
     return f
 
 
-def maybe_to_categorical(array):
-    """ coerce to a categorical if a series is given """
+def _maybe_to_categorical(array):
+    """
+    Coerce to a categorical if a series is given.
+
+    Internal use ONLY.
+    """
     if isinstance(array, (ABCSeries, ABCCategoricalIndex)):
         return array._values
+    elif isinstance(array, np.ndarray):
+        return Categorical(array)
     return array
 
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index e6f61a22e31373..9e348819ce5a3c 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -56,7 +56,7 @@
 
 from pandas.core.index import Index, MultiIndex, _ensure_index
 from pandas.core.indexing import maybe_convert_indices, length_of_indexer
-from pandas.core.categorical import Categorical, maybe_to_categorical
+from pandas.core.categorical import Categorical, _maybe_to_categorical
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.io.formats.printing import pprint_thing
 
@@ -1484,6 +1484,35 @@ def equals(self, other):
             return False
         return array_equivalent(self.values, other.values)
 
+    def _unstack(self, unstacker_func, new_columns):
+        """Return a list of unstacked blocks of self
+
+        Parameters
+        ----------
+        unstacker_func : callable
+            Partially applied unstacker.
+        new_columns : Index
+            All columns of the unstacked BlockManager.
+
+        Returns
+        -------
+        blocks : list of Block
+            New blocks of unstacked values.
+        mask : array_like of bool
+            The mask of columns of `blocks` we should keep.
+        """
+        unstacker = unstacker_func(self.values.T)
+        new_items = unstacker.get_new_columns()
+        new_placement = new_columns.get_indexer(new_items)
+        new_values, mask = unstacker.get_new_values()
+
+        mask = mask.any(0)
+        new_values = new_values.T[mask]
+        new_placement = new_placement[mask]
+
+        blocks = [make_block(new_values, placement=new_placement)]
+        return blocks, mask
+
     def quantile(self, qs, interpolation='linear', axis=0, mgr=None):
         """
         compute the quantiles of the
@@ -1712,6 +1741,38 @@ def _slice(self, slicer):
     def _try_cast_result(self, result, dtype=None):
         return result
 
+    def _unstack(self, unstacker_func, new_columns):
+        """Return a list of unstacked blocks of self
+
+        Parameters
+        ----------
+        unstacker_func : callable
+            Partially applied unstacker.
+        new_columns : Index
+            All columns of the unstacked BlockManager.
+
+        Returns
+        -------
+        blocks : list of Block
+            New blocks of unstacked values.
+        mask : array_like of bool
+            The mask of columns of `blocks` we should keep.
+        """
+        # NonConsolidatable blocks can have a single item only, so we return
+        # one block per item
+        unstacker = unstacker_func(self.values.T)
+        new_items = unstacker.get_new_columns()
+        new_placement = new_columns.get_indexer(new_items)
+        new_values, mask = unstacker.get_new_values()
+
+        mask = mask.any(0)
+        new_values = new_values.T[mask]
+        new_placement = new_placement[mask]
+
+        blocks = [self.make_block_same_class(vals, [place])
+                  for vals, place in zip(new_values, new_placement)]
+        return blocks, mask
+
 
 class NumericBlock(Block):
     __slots__ = ()
@@ -2227,7 +2288,7 @@ class CategoricalBlock(NonConsolidatableMixIn, ObjectBlock):
     def __init__(self, values, placement, fastpath=False, **kwargs):
 
         # coerce to categorical if we can
-        super(CategoricalBlock, self).__init__(maybe_to_categorical(values),
+        super(CategoricalBlock, self).__init__(_maybe_to_categorical(values),
                                                fastpath=True,
                                                placement=placement, **kwargs)
 
@@ -4192,6 +4253,38 @@ def canonicalize(block):
         return all(block.equals(oblock)
                    for block, oblock in zip(self_blocks, other_blocks))
 
+    def unstack(self, unstacker_func):
+        """Return a blockmanager with all blocks unstacked.
+
+        Parameters
+        ----------
+        unstacker_func : callable
+            A (partially-applied) ``pd.core.reshape._Unstacker`` class.
+
+        Returns
+        -------
+        unstacked : BlockManager
+        """
+        dummy = unstacker_func(np.empty((0, 0)), value_columns=self.items)
+        new_columns = dummy.get_new_columns()
+        new_index = dummy.get_new_index()
+        new_blocks = []
+        columns_mask = []
+
+        for blk in self.blocks:
+            blocks, mask = blk._unstack(
+                partial(unstacker_func,
+                        value_columns=self.items[blk.mgr_locs.indexer]),
+                new_columns)
+
+            new_blocks.extend(blocks)
+            columns_mask.extend(mask)
+
+        new_columns = new_columns[columns_mask]
+
+        bm = BlockManager(new_blocks, [new_columns, new_index])
+        return bm
+
 
 class SingleBlockManager(BlockManager):
     """ manage a single block with """
diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
index bff09be6149f32..d280c4f3f73d7b 100644
--- a/pandas/core/reshape/reshape.py
+++ b/pandas/core/reshape/reshape.py
@@ -2,6 +2,7 @@
 # pylint: disable=W0703,W0622,W0613,W0201
 from pandas.compat import range, text_type, zip
 from pandas import compat
+from functools import partial
 import itertools
 import re
 
@@ -10,7 +11,7 @@
 from pandas.core.dtypes.common import (
     _ensure_platform_int,
     is_list_like, is_bool_dtype,
-    needs_i8_conversion)
+    needs_i8_conversion, is_sparse)
 from pandas.core.dtypes.cast import maybe_promote
 from pandas.core.dtypes.missing import notna
 import pandas.core.dtypes.concat as _concat
@@ -75,10 +76,15 @@ def __init__(self, values, index, level=-1, value_columns=None,
                  fill_value=None):
 
         self.is_categorical = None
+        self.is_sparse = is_sparse(values)
         if values.ndim == 1:
             if isinstance(values, Categorical):
                 self.is_categorical = values
                 values = np.array(values)
+            elif self.is_sparse:
+                # XXX: Makes SparseArray *dense*, but it's supposedly
+                # a single column at a time, so it's "doable"
+                values = values.values
             values = values[:, np.newaxis]
         self.values = values
         self.value_columns = value_columns
@@ -177,7 +183,8 @@ def get_result(self):
                                   ordered=ordered)
                       for i in range(values.shape[-1])]
 
-        return DataFrame(values, index=index, columns=columns)
+        klass = SparseDataFrame if self.is_sparse else DataFrame
+        return klass(values, index=index, columns=columns)
 
     def get_new_values(self):
         values = self.values
@@ -469,36 +476,12 @@ def unstack(obj, level, fill_value=None):
 
 
 def _unstack_frame(obj, level, fill_value=None):
-    from pandas.core.internals import BlockManager, make_block
-
     if obj._is_mixed_type:
-        unstacker = _Unstacker(np.empty(obj.shape, dtype=bool),  # dummy
-                               obj.index, level=level,
-                               value_columns=obj.columns)
-        new_columns = unstacker.get_new_columns()
-        new_index = unstacker.get_new_index()
-        new_axes = [new_columns, new_index]
-
-        new_blocks = []
-        mask_blocks = []
-        for blk in obj._data.blocks:
-            blk_items = obj._data.items[blk.mgr_locs.indexer]
-            bunstacker = _Unstacker(blk.values.T, obj.index, level=level,
-                                    value_columns=blk_items,
-                                    fill_value=fill_value)
-            new_items = bunstacker.get_new_columns()
-            new_placement = new_columns.get_indexer(new_items)
-            new_values, mask = bunstacker.get_new_values()
-
-            mblk = make_block(mask.T, placement=new_placement)
-            mask_blocks.append(mblk)
-
-            newb = make_block(new_values.T, placement=new_placement)
-            new_blocks.append(newb)
-
-        result = DataFrame(BlockManager(new_blocks, new_axes))
-        mask_frame = DataFrame(BlockManager(mask_blocks, new_axes))
-        return result.loc[:, mask_frame.sum(0) > 0]
+        unstacker = partial(_Unstacker, index=obj.index,
+                            level=level, fill_value=fill_value)
+        blocks = obj._data.unstack(unstacker)
+        klass = type(obj)
+        return klass(blocks)
     else:
         unstacker = _Unstacker(obj.values, obj.index, level=level,
                                value_columns=obj.columns,
@@ -559,7 +542,9 @@ def factorize(index):
         mask = notna(new_values)
         new_values = new_values[mask]
         new_index = new_index[mask]
-    return Series(new_values, index=new_index)
+
+    klass = type(frame)._constructor_sliced
+    return klass(new_values, index=new_index)
 
 
 def stack_multiple(frame, level, dropna=True):
diff --git a/pandas/tests/sparse/test_reshape.py b/pandas/tests/sparse/test_reshape.py
new file mode 100644
index 00000000000000..b492c47375bcf5
--- /dev/null
+++ b/pandas/tests/sparse/test_reshape.py
@@ -0,0 +1,38 @@
+import pytest
+import numpy as np
+
+import pandas as pd
+import pandas.util.testing as tm
+
+
+@pytest.fixture
+def sparse_df():
+    return pd.SparseDataFrame({0: {0: 1}, 1: {1: 1}, 2: {2: 1}})  # eye
+
+
+@pytest.fixture
+def multi_index3():
+    return pd.MultiIndex.from_tuples([(0, 0), (1, 1), (2, 2)])
+
+
+def test_sparse_frame_stack(sparse_df, multi_index3):
+    ss = sparse_df.stack()
+    expected = pd.SparseSeries(np.ones(3), index=multi_index3)
+    tm.assert_sp_series_equal(ss, expected)
+
+
+def test_sparse_frame_unstack(sparse_df):
+    mi = pd.MultiIndex.from_tuples([(0, 0), (1, 0), (1, 2)])
+    sparse_df.index = mi
+    arr = np.array([[1, np.nan, np.nan],
+                    [np.nan, 1, np.nan],
+                    [np.nan, np.nan, 1]])
+    unstacked_df = pd.DataFrame(arr, index=mi).unstack()
+    unstacked_sdf = sparse_df.unstack()
+
+    tm.assert_numpy_array_equal(unstacked_df.values, unstacked_sdf.values)
+
+
+def test_sparse_series_unstack(sparse_df, multi_index3):
+    frame = pd.SparseSeries(np.ones(3), index=multi_index3).unstack()
+    tm.assert_sp_frame_equal(frame, sparse_df)

From 44747c8e91cee3599f96a4f154cb0323269b4ef1 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 27 Sep 2017 03:20:43 -0700
Subject: [PATCH 160/188] typo fix evalute_compare-->evaluate_compare (#17688)

---
 pandas/core/indexes/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index dba616c2d15e62..c4e1398d0178fe 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3742,7 +3742,7 @@ def _evaluate_with_timedelta_like(self, other, op, opstr):
     def _evaluate_with_datetime_like(self, other, op, opstr):
         raise TypeError("can only perform ops with datetime like values")
 
-    def _evalute_compare(self, op):
+    def _evaluate_compare(self, op):
         raise base.AbstractMethodError(self)
 
     @classmethod

From f9d88cd6b3543bbb678378fc4fe736f13497d21e Mon Sep 17 00:00:00 2001
From: Lucas Kushner <lphkspam@gmail.com>
Date: Wed, 27 Sep 2017 10:07:47 -0500
Subject: [PATCH 161/188] Deprecating Series.argmin and Series.argmax (#16830)
 (#16955)

* Deprecating Series.argmin and Series.argmax (#16830)

Added statements about correcting behavior in future commit

Add reference to github ticket

Fixing placement of github comment

Made test code more explicit

Fixing unrelated tests that are also throwing warnings

Updating whatsnew to give more detail about deprecation

Fixing whatsnew and breaking out tests to catch warnings

Additional comments and more concise whatsnew

Updating deprecate decorator to support custom message

DOC: Update docstrings, depr message, and whatsnew

* Added debug prints

* Try splitting the filters

* Reword whatsnew

* Change sparse series test

* Skip on py2

* Change to idxmin

* Remove py2 skips

* Catch more warnings

* Final switch to idxmax

* Consistent tests, refactor to_string

* Fixed tests
---
 doc/source/whatsnew/v0.21.0.txt       | 22 ++++++++++
 pandas/core/series.py                 | 29 +++++++++----
 pandas/io/formats/format.py           |  4 +-
 pandas/tests/series/test_analytics.py | 60 ++++++++++++++++++++-------
 pandas/tests/series/test_api.py       |  2 +-
 pandas/tests/series/test_operators.py | 28 ++++++-------
 pandas/tests/sparse/test_series.py    | 16 ++++++-
 pandas/util/_decorators.py            |  8 ++--
 8 files changed, 124 insertions(+), 45 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 06f19782682b03..ae55b4a0aa4691 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -487,11 +487,33 @@ Other API Changes
 
 Deprecations
 ~~~~~~~~~~~~
+
 - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
 - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
 - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).
 - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`)
 
+.. _whatsnew_0210.deprecations.argmin_min
+
+Series.argmax and Series.argmin
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+- The behavior of :func:`Series.argmax` has been deprecated in favor of :func:`Series.idxmax` (:issue:`16830`)
+- The behavior of :func:`Series.argmin` has been deprecated in favor of :func:`Series.idxmin` (:issue:`16830`)
+
+For compatibility with NumPy arrays, ``pd.Series`` implements ``argmax`` and
+``argmin``. Since pandas 0.13.0, ``argmax`` has been an alias for
+:meth:`pandas.Series.idxmax`, and ``argmin`` has been an alias for
+:meth:`pandas.Series.idxmin`. They return the *label* of the maximum or minimum,
+rather than the *position*.
+
+We've deprecated the current behavior of ``Series.argmax`` and
+``Series.argmin``. Using either of these will emit a ``FutureWarning``. Use
+:meth:`Series.idxmax` if you want the label of the maximum. Use
+``Series.values.argmax()`` if you want the position of the maximum. Likewise for
+the minimum. In a future release ``Series.argmax`` and ``Series.argmin`` will
+return the position of the maximum or minimum.
+
 .. _whatsnew_0210.prior_deprecations:
 
 Removal of prior version deprecations/changes
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 89add1ef4c5907..a05324142b223a 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -69,7 +69,8 @@
 import pandas.core.common as com
 import pandas.core.nanops as nanops
 import pandas.io.formats.format as fmt
-from pandas.util._decorators import Appender, deprecate_kwarg, Substitution
+from pandas.util._decorators import (
+    Appender, deprecate, deprecate_kwarg, Substitution)
 from pandas.util._validators import validate_bool_kwarg
 
 from pandas._libs import index as libindex, tslib as libts, lib, iNaT
@@ -1274,7 +1275,7 @@ def duplicated(self, keep='first'):
 
     def idxmin(self, axis=None, skipna=True, *args, **kwargs):
         """
-        Index of first occurrence of minimum of values.
+        Index *label* of the first occurrence of minimum of values.
 
         Parameters
         ----------
@@ -1287,7 +1288,9 @@ def idxmin(self, axis=None, skipna=True, *args, **kwargs):
 
         Notes
         -----
-        This method is the Series version of ``ndarray.argmin``.
+        This method is the Series version of ``ndarray.argmin``. This method
+        returns the label of the minimum, while ``ndarray.argmin`` returns
+        the position. To get the position, use ``series.values.argmin()``.
 
         See Also
         --------
@@ -1302,7 +1305,7 @@ def idxmin(self, axis=None, skipna=True, *args, **kwargs):
 
     def idxmax(self, axis=None, skipna=True, *args, **kwargs):
         """
-        Index of first occurrence of maximum of values.
+        Index *label* of the first occurrence of maximum of values.
 
         Parameters
         ----------
@@ -1315,7 +1318,9 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs):
 
         Notes
         -----
-        This method is the Series version of ``ndarray.argmax``.
+        This method is the Series version of ``ndarray.argmax``. This method
+        returns the label of the maximum, while ``ndarray.argmax`` returns
+        the position. To get the position, use ``series.values.argmax()``.
 
         See Also
         --------
@@ -1329,8 +1334,18 @@ def idxmax(self, axis=None, skipna=True, *args, **kwargs):
         return self.index[i]
 
     # ndarray compat
-    argmin = idxmin
-    argmax = idxmax
+    argmin = deprecate('argmin', idxmin,
+                       msg="'argmin' is deprecated. Use 'idxmin' instead. "
+                           "The behavior of 'argmin' will be corrected to "
+                           "return the positional minimum in the future. "
+                           "Use 'series.values.argmin' to get the position of "
+                           "the minimum now.")
+    argmax = deprecate('argmax', idxmax,
+                       msg="'argmax' is deprecated. Use 'idxmax' instead. "
+                           "The behavior of 'argmax' will be corrected to "
+                           "return the positional maximum in the future. "
+                           "Use 'series.values.argmax' to get the position of "
+                           "the maximum now.")
 
     def round(self, decimals=0, *args, **kwargs):
         """
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 547b9676717c99..386d9c3ffe30df 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -598,9 +598,7 @@ def to_string(self):
                 text = self._join_multiline(*strcols)
             else:  # max_cols == 0. Try to fit frame to terminal
                 text = self.adj.adjoin(1, *strcols).split('\n')
-                row_lens = Series(text).apply(len)
-                max_len_col_ix = np.argmax(row_lens)
-                max_len = row_lens[max_len_col_ix]
+                max_len = Series(text).str.len().max()
                 headers = [ele[0] for ele in strcols]
                 # Size of last col determines dot col size. See
                 # `self._to_str_columns
diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py
index 914181dc941549..9f5e4f2ac4b6e6 100644
--- a/pandas/tests/series/test_analytics.py
+++ b/pandas/tests/series/test_analytics.py
@@ -1242,16 +1242,31 @@ def test_idxmin(self):
         result = s.idxmin()
         assert result == 1
 
-    def test_numpy_argmin(self):
-        # argmin is aliased to idxmin
-        data = np.random.randint(0, 11, size=10)
-        result = np.argmin(Series(data))
-        assert result == np.argmin(data)
+    def test_numpy_argmin_deprecated(self):
+        # See gh-16830
+        data = np.arange(1, 11)
+
+        s = Series(data, index=data)
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            # The deprecation of Series.argmin also causes a deprecation
+            # warning when calling np.argmin. This behavior is temporary
+            # until the implemention of Series.argmin is corrected.
+            result = np.argmin(s)
+
+        assert result == 1
+
+        with tm.assert_produces_warning(FutureWarning):
+            # argmin is aliased to idxmin
+            result = s.argmin()
+
+        assert result == 1
 
         if not _np_version_under1p10:
-            msg = "the 'out' parameter is not supported"
-            tm.assert_raises_regex(ValueError, msg, np.argmin,
-                                   Series(data), out=data)
+            with tm.assert_produces_warning(FutureWarning,
+                                            check_stacklevel=False):
+                msg = "the 'out' parameter is not supported"
+                tm.assert_raises_regex(ValueError, msg, np.argmin,
+                                       s, out=data)
 
     def test_idxmax(self):
         # test idxmax
@@ -1297,17 +1312,30 @@ def test_idxmax(self):
         result = s.idxmin()
         assert result == 1.1
 
-    def test_numpy_argmax(self):
+    def test_numpy_argmax_deprecated(self):
+        # See gh-16830
+        data = np.arange(1, 11)
+
+        s = Series(data, index=data)
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            # The deprecation of Series.argmax also causes a deprecation
+            # warning when calling np.argmax. This behavior is temporary
+            # until the implemention of Series.argmax is corrected.
+            result = np.argmax(s)
+        assert result == 10
+
+        with tm.assert_produces_warning(FutureWarning):
+            # argmax is aliased to idxmax
+            result = s.argmax()
 
-        # argmax is aliased to idxmax
-        data = np.random.randint(0, 11, size=10)
-        result = np.argmax(Series(data))
-        assert result == np.argmax(data)
+        assert result == 10
 
         if not _np_version_under1p10:
-            msg = "the 'out' parameter is not supported"
-            tm.assert_raises_regex(ValueError, msg, np.argmax,
-                                   Series(data), out=data)
+            with tm.assert_produces_warning(FutureWarning,
+                                            check_stacklevel=False):
+                msg = "the 'out' parameter is not supported"
+                tm.assert_raises_regex(ValueError, msg, np.argmax,
+                                       s, out=data)
 
     def test_ptp(self):
         N = 1000
diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py
index 56b8a90ec0c9f1..6b950be15ca465 100644
--- a/pandas/tests/series/test_api.py
+++ b/pandas/tests/series/test_api.py
@@ -345,7 +345,7 @@ def test_ndarray_compat(self):
                          index=date_range('1/1/2000', periods=1000))
 
         def f(x):
-            return x[x.argmax()]
+            return x[x.idxmax()]
 
         result = tsdf.apply(f)
         expected = tsdf.max()
diff --git a/pandas/tests/series/test_operators.py b/pandas/tests/series/test_operators.py
index 114a055de81953..c8cc80b1cf4b1b 100644
--- a/pandas/tests/series/test_operators.py
+++ b/pandas/tests/series/test_operators.py
@@ -1872,33 +1872,33 @@ def test_op_duplicate_index(self):
             ),
         ]
     )
-    def test_assert_argminmax_raises(self, test_input, error_type):
+    def test_assert_idxminmax_raises(self, test_input, error_type):
         """
         Cases where ``Series.argmax`` and related should raise an exception
         """
         with pytest.raises(error_type):
-            test_input.argmin()
+            test_input.idxmin()
         with pytest.raises(error_type):
-            test_input.argmin(skipna=False)
+            test_input.idxmin(skipna=False)
         with pytest.raises(error_type):
-            test_input.argmax()
+            test_input.idxmax()
         with pytest.raises(error_type):
-            test_input.argmax(skipna=False)
+            test_input.idxmax(skipna=False)
 
-    def test_argminmax_with_inf(self):
+    def test_idxminmax_with_inf(self):
         # For numeric data with NA and Inf (GH #13595)
         s = pd.Series([0, -np.inf, np.inf, np.nan])
 
-        assert s.argmin() == 1
-        assert np.isnan(s.argmin(skipna=False))
+        assert s.idxmin() == 1
+        assert np.isnan(s.idxmin(skipna=False))
 
-        assert s.argmax() == 2
-        assert np.isnan(s.argmax(skipna=False))
+        assert s.idxmax() == 2
+        assert np.isnan(s.idxmax(skipna=False))
 
         # Using old-style behavior that treats floating point nan, -inf, and
         # +inf as missing
         with pd.option_context('mode.use_inf_as_na', True):
-            assert s.argmin() == 0
-            assert np.isnan(s.argmin(skipna=False))
-            assert s.argmax() == 0
-            np.isnan(s.argmax(skipna=False))
+            assert s.idxmin() == 0
+            assert np.isnan(s.idxmin(skipna=False))
+            assert s.idxmax() == 0
+            np.isnan(s.idxmax(skipna=False))
diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py
index b44314d4e733be..451f3695933470 100644
--- a/pandas/tests/sparse/test_series.py
+++ b/pandas/tests/sparse/test_series.py
@@ -1379,11 +1379,25 @@ def test_numpy_func_call(self):
         # numpy passes in 'axis=None' or `axis=-1'
         funcs = ['sum', 'cumsum', 'var', 'mean',
                  'prod', 'cumprod', 'std', 'argsort',
-                 'argmin', 'argmax', 'min', 'max']
+                 'min', 'max']
         for func in funcs:
             for series in ('bseries', 'zbseries'):
                 getattr(np, func)(getattr(self, series))
 
+    def test_deprecated_numpy_func_call(self):
+        # NOTE: These should be add to the 'test_numpy_func_call' test above
+        # once the behavior of argmin/argmax is corrected.
+        funcs = ['argmin', 'argmax']
+        for func in funcs:
+            for series in ('bseries', 'zbseries'):
+                with tm.assert_produces_warning(FutureWarning,
+                                                check_stacklevel=False):
+                    getattr(np, func)(getattr(self, series))
+
+                with tm.assert_produces_warning(FutureWarning,
+                                                check_stacklevel=False):
+                    getattr(getattr(self, series), func)()
+
 
 @pytest.mark.parametrize(
     'datetime_type', (np.datetime64,
diff --git a/pandas/util/_decorators.py b/pandas/util/_decorators.py
index 3733e4311aa732..9e4e5515a292bc 100644
--- a/pandas/util/_decorators.py
+++ b/pandas/util/_decorators.py
@@ -7,7 +7,7 @@
 
 
 def deprecate(name, alternative, alt_name=None, klass=None,
-              stacklevel=2):
+              stacklevel=2, msg=None):
     """
     Return a new function that emits a deprecation warning on use.
 
@@ -21,14 +21,16 @@ def deprecate(name, alternative, alt_name=None, klass=None,
         Name to use in preference of alternative.__name__
     klass : Warning, default FutureWarning
     stacklevel : int, default 2
+    msg : str
+          The message to display in the warning.
+          Default is '{name} is deprecated. Use {alt_name} instead.'
     """
 
     alt_name = alt_name or alternative.__name__
     klass = klass or FutureWarning
+    msg = msg or "{} is deprecated. Use {} instead".format(name, alt_name)
 
     def wrapper(*args, **kwargs):
-        msg = "{name} is deprecated. Use {alt_name} instead".format(
-            name=name, alt_name=alt_name)
         warnings.warn(msg, klass, stacklevel=stacklevel)
         return alternative(*args, **kwargs)
     return wrapper

From eaa5081e381e111bd7a8b7c277c12527d7ae52e4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <TomAugspurger@users.noreply.github.com>
Date: Thu, 28 Sep 2017 04:59:25 -0500
Subject: [PATCH 162/188] CI: Pin miniconda version (#17700)

---
 appveyor.yml                     |  2 +-
 ci/install.ps1                   |  4 ++--
 ci/install_circle.sh             |  6 ++++--
 ci/install_travis.sh             | 10 +++++++---
 ci/requirements-2.7_SLOW.run     |  2 +-
 ci/requirements-2.7_WIN.run      |  2 +-
 ci/requirements_all.txt          |  2 +-
 pandas/tests/io/test_pytables.py |  4 ++++
 8 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/appveyor.yml b/appveyor.yml
index a1f8886f6d068f..f1259f271ee395 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -59,7 +59,7 @@ install:
 
   # install our build environment
   - cmd: conda config --set show_channel_urls true --set always_yes true --set changeps1 false
-  - cmd: conda update -q conda
+  # - cmd: conda update -q conda
   - cmd: conda config --set ssl_verify false
 
   # add the pandas channel *before* defaults to have defaults take priority
diff --git a/ci/install.ps1 b/ci/install.ps1
index 64ec7f81884cd1..b784b4ebf5e6ac 100644
--- a/ci/install.ps1
+++ b/ci/install.ps1
@@ -7,7 +7,7 @@ $MINICONDA_URL = "http://repo.continuum.io/miniconda/"
 
 function DownloadMiniconda ($python_version, $platform_suffix) {
     $webclient = New-Object System.Net.WebClient
-    $filename = "Miniconda3-latest-Windows-" + $platform_suffix + ".exe"
+    $filename = "Miniconda3-4.3.21-Windows-" + $platform_suffix + ".exe"
     $url = $MINICONDA_URL + $filename
 
     $basedir = $pwd.Path + "\"
@@ -85,7 +85,7 @@ function UpdateConda ($python_home) {
 
 function main () {
     InstallMiniconda "3.5" $env:PYTHON_ARCH $env:CONDA_ROOT
-    UpdateConda $env:CONDA_ROOT
+    # UpdateConda $env:CONDA_ROOT
     InstallCondaPackages $env:CONDA_ROOT "conda-build jinja2 anaconda-client"
 }
 
diff --git a/ci/install_circle.sh b/ci/install_circle.sh
index fd79f907625e9d..eba98be561397d 100755
--- a/ci/install_circle.sh
+++ b/ci/install_circle.sh
@@ -10,7 +10,9 @@ echo "[Using clean Miniconda install]"
 rm -rf "$MINICONDA_DIR"
 
 # install miniconda
-wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -q -O miniconda.sh || exit 1
+# wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -q -O miniconda.sh || exit 1
+# Pin miniconda
+wget https://repo.continuum.io/miniconda/Miniconda2-4.3.21-Linux-x86_64.sh -q -O miniconda.sh || exit 1
 bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1
 
 export PATH="$MINICONDA_DIR/bin:$PATH"
@@ -18,7 +20,7 @@ export PATH="$MINICONDA_DIR/bin:$PATH"
 echo "[update conda]"
 conda config --set ssl_verify false || exit 1
 conda config --set always_yes true --set changeps1 false || exit 1
-conda update -q conda
+# conda update -q conda
 
 # add the pandas channel to take priority
 # to add extra packages
diff --git a/ci/install_travis.sh b/ci/install_travis.sh
index b85263daa1eaca..faf404ddcd2931 100755
--- a/ci/install_travis.sh
+++ b/ci/install_travis.sh
@@ -34,9 +34,13 @@ fi
 
 # install miniconda
 if [ "${TRAVIS_OS_NAME}" == "osx" ]; then
-    time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1
+    # temporarily pin miniconda
+    # time wget http://repo.continuum.io/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh || exit 1
+    time wget https://repo.continuum.io/miniconda/Miniconda2-4.3.21-MacOSX-x86_64.sh -O miniconda.sh || exit 1
 else
-    time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1
+    # temporarily pin miniconda
+    # time wget http://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh || exit 1
+    time wget https://repo.continuum.io/miniconda/Miniconda2-4.3.21-Linux-x86_64.sh -O miniconda.sh || exit 1
 fi
 time bash miniconda.sh -b -p "$MINICONDA_DIR" || exit 1
 
@@ -48,7 +52,7 @@ echo
 echo "[update conda]"
 conda config --set ssl_verify false || exit 1
 conda config --set quiet true --set always_yes true --set changeps1 false || exit 1
-conda update -q conda
+# conda update -q conda
 
 echo
 echo "[add channels]"
diff --git a/ci/requirements-2.7_SLOW.run b/ci/requirements-2.7_SLOW.run
index f7708283ad04a0..db95a6ccb23140 100644
--- a/ci/requirements-2.7_SLOW.run
+++ b/ci/requirements-2.7_SLOW.run
@@ -16,4 +16,4 @@ s3fs
 psycopg2
 pymysql
 html5lib
-beautiful-soup
+beautifulsoup4
diff --git a/ci/requirements-2.7_WIN.run b/ci/requirements-2.7_WIN.run
index f953682f52d45a..a81542ee5006c7 100644
--- a/ci/requirements-2.7_WIN.run
+++ b/ci/requirements-2.7_WIN.run
@@ -14,5 +14,5 @@ xlsxwriter
 s3fs
 bottleneck
 html5lib
-beautiful-soup
+beautifulsoup4
 jinja2=2.8
diff --git a/ci/requirements_all.txt b/ci/requirements_all.txt
index b153b6989df868..e13afd619f1054 100644
--- a/ci/requirements_all.txt
+++ b/ci/requirements_all.txt
@@ -13,7 +13,7 @@ xlrd
 xlwt
 html5lib
 patsy
-beautiful-soup
+beautifulsoup4
 numpy
 cython
 scipy
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index ff21afc11d2205..c5729d421758e3 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -799,6 +799,10 @@ def test_complibs(self):
         # Remove lzo if its not available on this platform
         if not tables.which_lib_version('lzo'):
             all_complibs.remove('lzo')
+        # Remove bzip2 if its not available on this platform
+        if not tables.which_lib_version("bzip2"):
+            all_complibs.remove("bzip2")
+
         all_levels = range(0, 10)
         all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels]
 

From db1206aaf00cf0024a2ff28d828c0a78d6cbe7df Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Thu, 28 Sep 2017 07:50:32 -0400
Subject: [PATCH 163/188] DEPR: deprecate pd.TimeGrouper (#17703)

closes #16747
---
 doc/source/whatsnew/v0.21.0.txt          |  1 +
 pandas/core/api.py                       | 17 ++++++++++++++---
 pandas/tests/api/test_api.py             |  9 +++++++--
 pandas/tests/groupby/test_groupby.py     | 12 ++++++------
 pandas/tests/groupby/test_timegrouper.py | 20 +++++++++++---------
 pandas/tests/groupby/test_transform.py   |  2 +-
 pandas/tests/test_resample.py            | 12 ++++++++----
 7 files changed, 48 insertions(+), 25 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index ae55b4a0aa4691..dae93feb48b02f 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -492,6 +492,7 @@ Deprecations
 - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
 - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).
 - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`)
+- ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`)
 
 .. _whatsnew_0210.deprecations.argmin_min
 
diff --git a/pandas/core/api.py b/pandas/core/api.py
index 6a32d3763ffb19..a012ccce839653 100644
--- a/pandas/core/api.py
+++ b/pandas/core/api.py
@@ -33,7 +33,6 @@
 from pandas.tseries.offsets import DateOffset
 from pandas.core.tools.datetimes import to_datetime
 from pandas.core.tools.timedeltas import to_timedelta
-from pandas.core.resample import TimeGrouper
 
 # see gh-14094.
 from pandas.util._depr_module import _DeprecatedModule
@@ -52,8 +51,8 @@
 
 # deprecation, xref #13790
 def match(*args, **kwargs):
-    import warnings
 
+    import warnings
     warnings.warn("pd.match() is deprecated and will be removed "
                   "in a future version",
                   FutureWarning, stacklevel=2)
@@ -64,8 +63,20 @@ def match(*args, **kwargs):
 def groupby(*args, **kwargs):
     import warnings
 
-    warnings.warn("pd.groupby() is deprecated and will be removed "
+    warnings.warn("pd.groupby() is deprecated and will be removed; "
                   "Please use the Series.groupby() or "
                   "DataFrame.groupby() methods",
                   FutureWarning, stacklevel=2)
     return args[0].groupby(*args[1:], **kwargs)
+
+
+# deprecation, xref
+class TimeGrouper(object):
+
+    def __new__(cls, *args, **kwargs):
+        from pandas.core.resample import TimeGrouper
+        import warnings
+        warnings.warn("pd.TimeGrouper is deprecated and will be removed; "
+                      "Please use pd.Grouper(freq=...)",
+                      FutureWarning, stacklevel=2)
+        return TimeGrouper(*args, **kwargs)
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index cbc73615811a2c..c593290410b961 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -47,11 +47,11 @@ class TestPDApi(Base):
                'Grouper', 'HDFStore', 'Index', 'Int64Index', 'MultiIndex',
                'Period', 'PeriodIndex', 'RangeIndex', 'UInt64Index',
                'Series', 'SparseArray', 'SparseDataFrame',
-               'SparseSeries', 'TimeGrouper', 'Timedelta',
+               'SparseSeries', 'Timedelta',
                'TimedeltaIndex', 'Timestamp', 'Interval', 'IntervalIndex']
 
     # these are already deprecated; awaiting removal
-    deprecated_classes = ['WidePanel', 'Panel4D',
+    deprecated_classes = ['WidePanel', 'Panel4D', 'TimeGrouper',
                           'SparseList', 'Expr', 'Term']
 
     # these should be deprecated in the future
@@ -184,6 +184,11 @@ def test_groupby(self):
                                         check_stacklevel=False):
             pd.groupby(pd.Series([1, 2, 3]), [1, 1, 1])
 
+    def test_TimeGrouper(self):
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            pd.TimeGrouper(freq='D')
+
     # GH 15940
 
     def test_get_store(self):
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 8957beacab376d..d91cff436dee2e 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -3335,7 +3335,7 @@ def test_groupby_with_empty(self):
         index = pd.DatetimeIndex(())
         data = ()
         series = pd.Series(data, index)
-        grouper = pd.core.resample.TimeGrouper('D')
+        grouper = pd.Grouper(freq='D')
         grouped = series.groupby(grouper)
         assert next(iter(grouped), None) is None
 
@@ -3354,7 +3354,7 @@ def test_groupby_with_small_elem(self):
         df = pd.DataFrame({'event': ['start', 'start'],
                            'change': [1234, 5678]},
                           index=pd.DatetimeIndex(['2014-09-10', '2013-10-10']))
-        grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event'])
+        grouped = df.groupby([pd.Grouper(freq='M'), 'event'])
         assert len(grouped.groups) == 2
         assert grouped.ngroups == 2
         assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups
@@ -3369,7 +3369,7 @@ def test_groupby_with_small_elem(self):
                            'change': [1234, 5678, 9123]},
                           index=pd.DatetimeIndex(['2014-09-10', '2013-10-10',
                                                   '2014-09-15']))
-        grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event'])
+        grouped = df.groupby([pd.Grouper(freq='M'), 'event'])
         assert len(grouped.groups) == 2
         assert grouped.ngroups == 2
         assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups
@@ -3385,7 +3385,7 @@ def test_groupby_with_small_elem(self):
                            'change': [1234, 5678, 9123]},
                           index=pd.DatetimeIndex(['2014-09-10', '2013-10-10',
                                                   '2014-08-05']))
-        grouped = df.groupby([pd.TimeGrouper(freq='M'), 'event'])
+        grouped = df.groupby([pd.Grouper(freq='M'), 'event'])
         assert len(grouped.groups) == 3
         assert grouped.ngroups == 3
         assert (pd.Timestamp('2014-09-30'), 'start') in grouped.groups
@@ -3682,9 +3682,9 @@ def test_nunique_with_timegrouper(self):
                      Timestamp('2016-06-28 16:09:30'),
                      Timestamp('2016-06-28 16:46:28')],
             'data': ['1', '2', '3']}).set_index('time')
-        result = test.groupby(pd.TimeGrouper(freq='h'))['data'].nunique()
+        result = test.groupby(pd.Grouper(freq='h'))['data'].nunique()
         expected = test.groupby(
-            pd.TimeGrouper(freq='h')
+            pd.Grouper(freq='h')
         )['data'].apply(pd.Series.nunique)
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index f83a3fcd0668d9..fafcbf947e3df7 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -52,10 +52,10 @@ def test_groupby_with_timegrouper(self):
             assert_frame_equal(result1, expected)
 
             df_sorted = df.sort_index()
-            result2 = df_sorted.groupby(pd.TimeGrouper(freq='5D')).sum()
+            result2 = df_sorted.groupby(pd.Grouper(freq='5D')).sum()
             assert_frame_equal(result2, expected)
 
-            result3 = df.groupby(pd.TimeGrouper(freq='5D')).sum()
+            result3 = df.groupby(pd.Grouper(freq='5D')).sum()
             assert_frame_equal(result3, expected)
 
     def test_groupby_with_timegrouper_methods(self):
@@ -80,7 +80,7 @@ def test_groupby_with_timegrouper_methods(self):
 
         for df in [df_original, df_sorted]:
             df = df.set_index('Date', drop=False)
-            g = df.groupby(pd.TimeGrouper('6M'))
+            g = df.groupby(pd.Grouper(freq='6M'))
             assert g.group_keys
             assert isinstance(g.grouper, pd.core.groupby.BinGrouper)
             groups = g.groups
@@ -265,11 +265,11 @@ def test_timegrouper_with_reg_groups(self):
                         ['date', 'user_id']).sort_index().astype('int64')
             expected.name = 'whole_cost'
 
-            result1 = df.sort_index().groupby([pd.TimeGrouper(freq=freq),
+            result1 = df.sort_index().groupby([pd.Grouper(freq=freq),
                                                'user_id'])['whole_cost'].sum()
             assert_series_equal(result1, expected)
 
-            result2 = df.groupby([pd.TimeGrouper(freq=freq), 'user_id'])[
+            result2 = df.groupby([pd.Grouper(freq=freq), 'user_id'])[
                 'whole_cost'].sum()
             assert_series_equal(result2, expected)
 
@@ -340,7 +340,7 @@ def sumfunc_series(x):
             return pd.Series([x['value'].sum()], ('sum',))
 
         expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_series)
-        result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
+        result = (df_dt.groupby(pd.Grouper(freq='M', key='date'))
                   .apply(sumfunc_series))
         assert_frame_equal(result.reset_index(drop=True),
                            expected.reset_index(drop=True))
@@ -358,8 +358,10 @@ def sumfunc_value(x):
             return x.value.sum()
 
         expected = df.groupby(pd.Grouper(key='date')).apply(sumfunc_value)
-        result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
-                  .apply(sumfunc_value))
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            result = (df_dt.groupby(pd.TimeGrouper(freq='M', key='date'))
+                      .apply(sumfunc_value))
         assert_series_equal(result.reset_index(drop=True),
                             expected.reset_index(drop=True))
 
@@ -617,7 +619,7 @@ def test_nunique_with_timegrouper_and_nat(self):
                      Timestamp('2016-06-28 16:46:28')],
             'data': ['1', '2', '3']})
 
-        grouper = pd.TimeGrouper(key='time', freq='h')
+        grouper = pd.Grouper(key='time', freq='h')
         result = test.groupby(grouper)['data'].nunique()
         expected = test[test.time.notnull()].groupby(grouper)['data'].nunique()
         tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
index 267b67972c6406..4b821dade6eae7 100644
--- a/pandas/tests/groupby/test_transform.py
+++ b/pandas/tests/groupby/test_transform.py
@@ -57,7 +57,7 @@ def demean(arr):
 
         # GH 8430
         df = tm.makeTimeDataFrame()
-        g = df.groupby(pd.TimeGrouper('M'))
+        g = df.groupby(pd.Grouper(freq='M'))
         g.transform(lambda x: x - 1)
 
         # GH 9700
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
index 28a68a0a6e36d3..7449beb8f97dfe 100644
--- a/pandas/tests/test_resample.py
+++ b/pandas/tests/test_resample.py
@@ -1983,8 +1983,8 @@ def test_resample_nunique(self):
                      pd.Timestamp('2015-06-08 00:00:00'): '2015-06-08'}})
         r = df.resample('D')
         g = df.groupby(pd.Grouper(freq='D'))
-        expected = df.groupby(pd.TimeGrouper('D')).ID.apply(lambda x:
-                                                            x.nunique())
+        expected = df.groupby(pd.Grouper(freq='D')).ID.apply(lambda x:
+                                                             x.nunique())
         assert expected.name == 'ID'
 
         for t in [r, g]:
@@ -3075,7 +3075,9 @@ def setup_method(self, method):
                          index=date_range('1/1/2000', periods=1000))
 
     def test_apply(self):
-        grouper = TimeGrouper('A', label='right', closed='right')
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            grouper = pd.TimeGrouper(freq='A', label='right', closed='right')
 
         grouped = self.ts.groupby(grouper)
 
@@ -3093,7 +3095,9 @@ def test_count(self):
 
         expected = self.ts.groupby(lambda x: x.year).count()
 
-        grouper = TimeGrouper('A', label='right', closed='right')
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            grouper = pd.TimeGrouper(freq='A', label='right', closed='right')
         result = self.ts.groupby(grouper).count()
         expected.index = result.index
         assert_series_equal(result, expected)

From 45bd47186938fcd247aad3c2dc572c1581c06f4c Mon Sep 17 00:00:00 2001
From: Licht Takeuchi <licht-t@outlook.jp>
Date: Thu, 28 Sep 2017 23:11:25 +0900
Subject: [PATCH 164/188] BUG: Fix make_sparse mask generation (#17574)

---
 asv_bench/benchmarks/sparse.py    | 65 ++++++++++++++++++++++++++++++-
 doc/source/whatsnew/v0.21.0.txt   |  1 +
 pandas/_libs/sparse.pyx           | 19 +++++++++
 pandas/core/sparse/array.py       |  9 ++++-
 pandas/tests/sparse/test_array.py |  9 +++++
 5 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py
index b958f5e0e5c342..a46205026481e5 100644
--- a/asv_bench/benchmarks/sparse.py
+++ b/asv_bench/benchmarks/sparse.py
@@ -2,7 +2,7 @@
 
 from .pandas_vb_common import *
 import scipy.sparse
-from pandas import SparseSeries, SparseDataFrame
+from pandas import SparseSeries, SparseDataFrame, SparseArray
 
 
 class sparse_series_to_frame(object):
@@ -23,6 +23,69 @@ def time_sparse_series_to_frame(self):
         SparseDataFrame(self.series)
 
 
+class sparse_array_constructor(object):
+    goal_time = 0.2
+
+    def setup(self):
+        np.random.seed(1)
+        self.int64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=0, dtype=np.int64)
+        self.int64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=0, dtype=np.int64)
+
+        self.float64_10percent = self.make_numeric_array(length=1000000, dense_size=100000, fill_value=np.nan, dtype=np.float64)
+        self.float64_1percent = self.make_numeric_array(length=1000000, dense_size=10000, fill_value=np.nan, dtype=np.float64)
+
+        self.object_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=np.nan)
+        self.object_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=np.nan)
+
+        self.object_non_nan_fill_value_10percent = self.make_object_array(length=1000000, dense_size=100000, fill_value=0)
+        self.object_non_nan_fill_value_1percent = self.make_object_array(length=1000000, dense_size=10000, fill_value=0)
+
+    def make_numeric_array(self, length, dense_size, fill_value, dtype):
+        arr = np.array([fill_value] * length, dtype=dtype)
+        indexer = np.unique(np.random.randint(0, length, dense_size))
+        arr[indexer] = np.random.randint(0, 100, len(indexer))
+        return (arr, fill_value, dtype)
+
+    def make_object_array(self, length, dense_size, fill_value):
+        elems = np.array(['a', 0.0, False, 1, 2], dtype=np.object)
+        arr = np.array([fill_value] * length, dtype=np.object)
+        indexer = np.unique(np.random.randint(0, length, dense_size))
+        arr[indexer] = np.random.choice(elems, len(indexer))
+        return (arr, fill_value, np.object)
+
+    def time_sparse_array_constructor_int64_10percent(self):
+        arr, fill_value, dtype = self.int64_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_int64_1percent(self):
+        arr, fill_value, dtype = self.int64_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_float64_10percent(self):
+        arr, fill_value, dtype = self.float64_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_float64_1percent(self):
+        arr, fill_value, dtype = self.float64_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_nan_fill_value_10percent(self):
+        arr, fill_value, dtype = self.object_nan_fill_value_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_nan_fill_value_1percent(self):
+        arr, fill_value, dtype = self.object_nan_fill_value_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_non_nan_fill_value_10percent(self):
+        arr, fill_value, dtype = self.object_non_nan_fill_value_10percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+    def time_sparse_array_constructor_object_non_nan_fill_value_1percent(self):
+        arr, fill_value, dtype = self.object_non_nan_fill_value_1percent
+        SparseArray(arr, fill_value=fill_value, dtype=dtype)
+
+
 class sparse_frame_constructor(object):
     goal_time = 0.2
 
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index dae93feb48b02f..eeabe6cff6e30a 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -631,6 +631,7 @@ Sparse
 - Bug in ``SparseSeries`` raises ``AttributeError`` when a dictionary is passed in as data (:issue:`16905`)
 - Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`)
 - Bug in :func:`SparseSeries.unstack` and :func:`SparseDataFrame.stack` (:issue:`16614`, :issue:`15045`)
+- Bug in :func:`make_sparse` treating two numeric/boolean data, which have same bits, as same when array ``dtype`` is ``object`` (:issue:`17574`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx
index 1cc7f5ace95ea5..fac678e531c8be 100644
--- a/pandas/_libs/sparse.pyx
+++ b/pandas/_libs/sparse.pyx
@@ -848,3 +848,22 @@ def reindex_integer(ndarray[float64_t, ndim=1] values,
                     IntIndex sparse_index,
                     ndarray[int32_t, ndim=1] indexer):
     pass
+
+
+# -----------------------------------------------------------------------------
+# SparseArray mask create operations
+
+def make_mask_object_ndarray(ndarray[object, ndim=1] arr, object fill_value):
+    cdef object value
+    cdef Py_ssize_t i
+    cdef Py_ssize_t new_length = len(arr)
+    cdef ndarray[int8_t, ndim=1] mask
+
+    mask = np.ones(new_length, dtype=np.int8)
+
+    for i in range(new_length):
+        value = arr[i]
+        if value == fill_value and type(value) == type(fill_value):
+            mask[i] = 0
+
+    return mask.view(dtype=np.bool)
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index f965c91999a03d..3b45a013734c91 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -19,6 +19,7 @@
 from pandas.core.dtypes.common import (
     _ensure_platform_int,
     is_float, is_integer,
+    is_object_dtype,
     is_integer_dtype,
     is_bool_dtype,
     is_list_like,
@@ -789,7 +790,13 @@ def make_sparse(arr, kind='block', fill_value=None):
         if is_string_dtype(arr):
             arr = arr.astype(object)
 
-        mask = arr != fill_value
+        if is_object_dtype(arr.dtype):
+            # element-wise equality check method in numpy doesn't treat
+            # each element type, eg. 0, 0.0, and False are treated as
+            # same. So we have to check the both of its type and value.
+            mask = splib.make_mask_object_ndarray(arr, fill_value)
+        else:
+            mask = arr != fill_value
 
     length = len(arr)
     if length != mask.size:
diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py
index b0a9182a265fe8..f653ee50982ad1 100644
--- a/pandas/tests/sparse/test_array.py
+++ b/pandas/tests/sparse/test_array.py
@@ -61,6 +61,15 @@ def test_constructor_object_dtype(self):
         assert arr.dtype == np.object
         assert arr.fill_value == 'A'
 
+        # GH 17574
+        data = [False, 0, 100.0, 0.0]
+        arr = SparseArray(data, dtype=np.object, fill_value=False)
+        assert arr.dtype == np.object
+        assert arr.fill_value is False
+        arr_expected = np.array(data, dtype=np.object)
+        it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
+        assert np.fromiter(it, dtype=np.bool).all()
+
     def test_constructor_spindex_dtype(self):
         arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
         tm.assert_sp_array_equal(arr, SparseArray([np.nan, 1, 2, np.nan]))

From 074b4850151cd2785f670e1d18c412e46c509f60 Mon Sep 17 00:00:00 2001
From: Amol K <amolkahat@gmail.com>
Date: Thu, 28 Sep 2017 14:12:50 +0000
Subject: [PATCH 165/188] Fixed Value Error when doing HDFStore.Select of
 contiguous mixed-data (#17670)

---
 doc/source/whatsnew/v0.21.0.txt  |  1 +
 pandas/io/pytables.py            |  5 ++---
 pandas/tests/io/test_pytables.py | 13 +++++++++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index eeabe6cff6e30a..50f11c38bae236 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -603,6 +603,7 @@ I/O
 - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`)
 - Bug in :func:`read_csv` where automatic delimiter detection caused a ``TypeError`` to be thrown when a bad line was encountered rather than the correct error message (:issue:`13374`)
 - Bug in ``DataFrame.to_html()`` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`)
+- Bug in :func:`HDFStore.select` when reading a contiguous mixed-data table featuring VLArray (:issue:`17021`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 4d300b200971ac..ea69116ec363da 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -2441,13 +2441,12 @@ def read_array(self, key, start=None, stop=None):
         """ read an array for the specified node (off of group """
         import tables
         node = getattr(self.group, key)
-        data = node[start:stop]
         attrs = node._v_attrs
 
         transposed = getattr(attrs, 'transposed', False)
 
         if isinstance(node, tables.VLArray):
-            ret = data[0]
+            ret = node[0][start:stop]
         else:
             dtype = getattr(attrs, 'value_type', None)
             shape = getattr(attrs, 'shape', None)
@@ -2456,7 +2455,7 @@ def read_array(self, key, start=None, stop=None):
                 # length 0 axis
                 ret = np.empty(shape, dtype=dtype)
             else:
-                ret = data
+                ret = node[start:stop]
 
             if dtype == u('datetime64'):
 
diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py
index c5729d421758e3..2fe3cf1f34d44c 100644
--- a/pandas/tests/io/test_pytables.py
+++ b/pandas/tests/io/test_pytables.py
@@ -4391,6 +4391,19 @@ def test_path_pathlib(self):
             lambda p: pd.read_hdf(p, 'df'))
         tm.assert_frame_equal(df, result)
 
+    @pytest.mark.parametrize('start, stop', [(0, 2), (1, 2), (None, None)])
+    def test_contiguous_mixed_data_table(self, start, stop):
+        # GH 17021
+        # ValueError when reading a contiguous mixed-data table ft. VLArray
+        df = DataFrame({'a': Series([20111010, 20111011, 20111012]),
+                        'b': Series(['ab', 'cd', 'ab'])})
+
+        with ensure_clean_store(self.path) as store:
+            store.append('test_dataset', df)
+
+            result = store.select('test_dataset', start=start, stop=stop)
+            assert_frame_equal(df[start:stop], result)
+
     def test_path_pathlib_hdfstore(self):
         df = tm.makeDataFrame()
 

From cc58b84f423db58de9edca762f0abbe10c638efb Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Thu, 28 Sep 2017 21:26:38 +0200
Subject: [PATCH 166/188] DOC: Improved doc string for IntervalIndex + related
 changes (#17706)

---
 pandas/_libs/interval.pyx       |   4 ++
 pandas/core/indexes/interval.py | 110 ++++++++++++++++++++++++++------
 2 files changed, 96 insertions(+), 18 deletions(-)

diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
index bfbda9696ff2bb..306597031817df 100644
--- a/pandas/_libs/interval.pyx
+++ b/pandas/_libs/interval.pyx
@@ -58,6 +58,10 @@ cdef class Interval(IntervalMixin):
     closed : {'left', 'right', 'both', 'neither'}
         Whether the interval is closed on the left-side, right-side, both or
         neither. Defaults to 'right'.
+
+    See Also
+    --------
+    IntervalIndex : an Index of intervals that are all closed on the same side.
     """
 
     cdef readonly object left, right
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 55ed2342571ab8..a697ed7888f90b 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -120,9 +120,42 @@ class IntervalIndex(IntervalMixin, Index):
     copy : boolean, default False
         Copy the meta-data
 
+    Examples
+    ---------
+    A new ``IntervalIndex`` is typically constructed using
+    :func:`interval_range`:
+
+    >>> pd.interval_range(start=0, end=5)
+    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]]
+                  closed='right', dtype='interval[int64]')
+
+    It may also be constructed using one of the constructor
+    methods :meth:`IntervalIndex.from_arrays`,
+    :meth:`IntervalIndex.from_breaks`, :meth:`IntervalIndex.from_intervals`
+    and :meth:`IntervalIndex.from_tuples`.
+
+    See further examples in the doc strings of ``interval_range`` and the
+    mentioned constructor methods.
+
+    Notes
+    ------
+    See the `user guide
+    <http://pandas.pydata.org/pandas-docs/stable/advanced.html#intervalindex>`_
+    for more.
+
     See Also
     --------
     Index
+    Interval : A bounded slice-like interval
+    interval_range : Function to create a fixed frequency IntervalIndex
+    IntervalIndex.from_arrays : Construct an IntervalIndex from a left and
+                                right array
+    IntervalIndex.from_breaks : Construct an IntervalIndex from an array of
+                                splits
+    IntervalIndex.from_intervals : Construct an IntervalIndex from an array of
+                                   Interval objects
+    IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of
+                                tuples
     """
     _typ = 'intervalindex'
     _comparables = ['name']
@@ -319,11 +352,20 @@ def from_breaks(cls, breaks, closed='right', name=None, copy=False):
 
         Examples
         --------
+        >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3])
+        IntervalIndex([(0, 1], (1, 2], (2, 3]]
+                      closed='right',
+                      dtype='interval[int64]')
 
-        >>> IntervalIndex.from_breaks([0, 1, 2, 3])
-        IntervalIndex(left=[0, 1, 2],
-                      right=[1, 2, 3],
-                      closed='right')
+        See Also
+        --------
+        interval_range : Function to create a fixed frequency IntervalIndex
+        IntervalIndex.from_arrays : Construct an IntervalIndex from a left and
+                                    right array
+        IntervalIndex.from_intervals : Construct an IntervalIndex from an array
+                                       of Interval objects
+        IntervalIndex.from_tuples : Construct an IntervalIndex from a
+                                    list/array of tuples
         """
         breaks = np.asarray(breaks)
         return cls.from_arrays(breaks[:-1], breaks[1:], closed,
@@ -350,11 +392,20 @@ def from_arrays(cls, left, right, closed='right', name=None, copy=False):
 
         Examples
         --------
+        >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])
+        IntervalIndex([(0, 1], (1, 2], (2, 3]]
+                      closed='right',
+                      dtype='interval[int64]')
 
-        >>> IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])
-        IntervalIndex(left=[0, 1, 2],
-                      right=[1, 2, 3],
-                      closed='right')
+        See Also
+        --------
+        interval_range : Function to create a fixed frequency IntervalIndex
+        IntervalIndex.from_breaks : Construct an IntervalIndex from an array of
+                                    splits
+        IntervalIndex.from_intervals : Construct an IntervalIndex from an array
+                                       of Interval objects
+        IntervalIndex.from_tuples : Construct an IntervalIndex from a
+                                    list/array of tuples
         """
         left = np.asarray(left)
         right = np.asarray(right)
@@ -378,19 +429,27 @@ def from_intervals(cls, data, name=None, copy=False):
 
         Examples
         --------
-
-        >>> IntervalIndex.from_intervals([Interval(0, 1), Interval(1, 2)])
-        IntervalIndex(left=[0, 1],
-                      right=[1, 2],
-                      closed='right')
+        >>> pd.IntervalIndex.from_intervals([pd.Interval(0, 1),
+        ...                                  pd.Interval(1, 2)])
+        IntervalIndex([(0, 1], (1, 2]]
+                      closed='right', dtype='interval[int64]')
 
         The generic Index constructor work identically when it infers an array
         of all intervals:
 
-        >>> Index([Interval(0, 1), Interval(1, 2)])
-        IntervalIndex(left=[0, 1],
-                      right=[1, 2],
-                      closed='right')
+        >>> pd.Index([pd.Interval(0, 1), pd.Interval(1, 2)])
+        IntervalIndex([(0, 1], (1, 2]]
+                      closed='right', dtype='interval[int64]')
+
+        See Also
+        --------
+        interval_range : Function to create a fixed frequency IntervalIndex
+        IntervalIndex.from_arrays : Construct an IntervalIndex from a left and
+                                    right array
+        IntervalIndex.from_breaks : Construct an IntervalIndex from an array of
+                                    splits
+        IntervalIndex.from_tuples : Construct an IntervalIndex from a
+                                    list/array of tuples
         """
         data = np.asarray(data)
         left, right, closed = intervals_to_interval_bounds(data)
@@ -415,7 +474,19 @@ def from_tuples(cls, data, closed='right', name=None, copy=False):
 
         Examples
         --------
+        >>>  pd.IntervalIndex.from_tuples([(0, 1), (1,2)])
+        IntervalIndex([(0, 1], (1, 2]],
+                      closed='right', dtype='interval[int64]')
 
+        See Also
+        --------
+        interval_range : Function to create a fixed frequency IntervalIndex
+        IntervalIndex.from_arrays : Construct an IntervalIndex from a left and
+                                    right array
+        IntervalIndex.from_breaks : Construct an IntervalIndex from an array of
+                                    splits
+        IntervalIndex.from_intervals : Construct an IntervalIndex from an array
+                                       of Interval objects
         """
         left = []
         right = []
@@ -1121,7 +1192,6 @@ def interval_range(start=None, end=None, periods=None, freq=None,
 
     Examples
     --------
-
     Numeric ``start`` and  ``end`` is supported.
 
     >>> pd.interval_range(start=0, end=5)
@@ -1159,6 +1229,10 @@ def interval_range(start=None, end=None, periods=None, freq=None,
     >>> pd.interval_range(end=5, periods=4, closed='both')
     IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]]
                   closed='both', dtype='interval[int64]')
+
+    See Also
+    --------
+    IntervalIndex : an Index of intervals that are all closed on the same side.
     """
     if com._count_not_none(start, end, periods) != 2:
         raise ValueError('Of the three parameters: start, end, and periods, '

From 42adf7da3f42a5ab02ea53fcced36a3ed85a6037 Mon Sep 17 00:00:00 2001
From: louispotok <louispotok@users.noreply.github.com>
Date: Thu, 28 Sep 2017 16:42:01 -0700
Subject: [PATCH 167/188] Add chunksize param to read_json when lines=True
 (#17168)

closes #17048
---
 asv_bench/benchmarks/io_bench.py       |  30 ++++
 doc/source/io.rst                      |  10 ++
 doc/source/whatsnew/v0.21.0.txt        |   1 +
 pandas/io/json/json.py                 | 215 ++++++++++++++++++++-----
 pandas/tests/io/json/test_pandas.py    |  47 ------
 pandas/tests/io/json/test_readlines.py | 168 +++++++++++++++++++
 6 files changed, 383 insertions(+), 88 deletions(-)
 create mode 100644 pandas/tests/io/json/test_readlines.py

diff --git a/asv_bench/benchmarks/io_bench.py b/asv_bench/benchmarks/io_bench.py
index 52064d2cdb8a25..93273955a29b9f 100644
--- a/asv_bench/benchmarks/io_bench.py
+++ b/asv_bench/benchmarks/io_bench.py
@@ -1,3 +1,4 @@
+import os
 from .pandas_vb_common import *
 from pandas import concat, Timestamp, compat
 try:
@@ -192,3 +193,32 @@ def time_read_nrows(self, compression, engine):
             ext = ".bz2"
         pd.read_csv(self.big_fname + ext, nrows=10,
                     compression=compression, engine=engine)
+
+
+class read_json_lines(object):
+    goal_time = 0.2
+    fname = "__test__.json"
+
+    def setup(self):
+        self.N = 100000
+        self.C = 5
+        self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]))
+        self.df.to_json(self.fname,orient="records",lines=True)
+
+    def teardown(self):
+        try:
+            os.remove(self.fname)
+        except:
+            pass
+
+    def time_read_json_lines(self):
+        pd.read_json(self.fname, lines=True)
+
+    def time_read_json_lines_chunk(self):
+        pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4))
+
+    def peakmem_read_json_lines(self):
+        pd.read_json(self.fname, lines=True)
+
+    def peakmem_read_json_lines_chunk(self):
+        pd.concat(pd.read_json(self.fname, lines=True, chunksize=self.N//4))
diff --git a/doc/source/io.rst b/doc/source/io.rst
index d6abed6e9d1ad6..4eba9687efc58e 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -1845,6 +1845,7 @@ is ``None``. To explicitly force ``Series`` parsing, pass ``typ=series``
   seconds, milliseconds, microseconds or nanoseconds respectively.
 - ``lines`` : reads file as one json object per line.
 - ``encoding`` : The encoding to use to decode py3 bytes.
+- ``chunksize`` : when used in combination with ``lines=True``, return a JsonReader which reads in ``chunksize`` lines per iteration.
 
 The parser will raise one of ``ValueError/TypeError/AssertionError`` if the JSON is not parseable.
 
@@ -2049,6 +2050,10 @@ Line delimited json
 pandas is able to read and write line-delimited json files that are common in data processing pipelines
 using Hadoop or Spark.
 
+.. versionadded:: 0.21.0
+
+For line-delimited json files, pandas can also return an iterator which reads in ``chunksize`` lines at a time. This can be useful for large files or to read from a stream.
+
 .. ipython:: python
 
   jsonl = '''
@@ -2059,6 +2064,11 @@ using Hadoop or Spark.
   df
   df.to_json(orient='records', lines=True)
 
+  # reader is an iterator that returns `chunksize` lines each iteration
+  reader = pd.read_json(StringIO(jsonl), lines=True, chunksize=1)
+  reader
+  for chunk in reader:
+      print(chunk)
 
 .. _io.table_schema:
 
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 50f11c38bae236..d5d508d02cb730 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -162,6 +162,7 @@ Other Enhancements
 - :func:`MultiIndex.is_monotonic_decreasing` has been implemented.  Previously returned ``False`` in all cases. (:issue:`16554`)
 - :func:`Categorical.rename_categories` now accepts a dict-like argument as `new_categories` and only updates the categories found in that dict. (:issue:`17336`)
 - :func:`read_excel` raises ``ImportError`` with a better message if ``xlrd`` is not installed. (:issue:`17613`)
+- :func:`read_json` now accepts a ``chunksize`` parameter that can be used when ``lines=True``. If ``chunksize`` is passed, read_json now returns an iterator which reads in ``chunksize`` lines with each iteration. (:issue:`17048`)
 - :meth:`DataFrame.assign` will preserve the original order of ``**kwargs`` for Python 3.6+ users instead of sorting the column names
 
 
diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py
index 5dae6099446d0f..ab74b265b6a067 100644
--- a/pandas/io/json/json.py
+++ b/pandas/io/json/json.py
@@ -1,4 +1,5 @@
 # pylint: disable-msg=E1101,W0613,W0603
+from itertools import islice
 import os
 import numpy as np
 
@@ -8,8 +9,10 @@
 from pandas import compat, isna
 from pandas import Series, DataFrame, to_datetime, MultiIndex
 from pandas.io.common import (get_filepath_or_buffer, _get_handle,
-                              _stringify_path)
+                              _stringify_path, BaseIterator)
+from pandas.io.parsers import _validate_integer
 from pandas.core.common import AbstractMethodError
+from pandas.core.reshape.concat import concat
 from pandas.io.formats.printing import pprint_thing
 from .normalize import _convert_to_line_delimits
 from .table_schema import build_table_schema
@@ -175,7 +178,7 @@ def write(self):
 def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
               convert_axes=True, convert_dates=True, keep_default_dates=True,
               numpy=False, precise_float=False, date_unit=None, encoding=None,
-              lines=False):
+              lines=False, chunksize=None):
     """
     Convert a JSON string to pandas object
 
@@ -264,6 +267,16 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
 
         .. versionadded:: 0.19.0
 
+    chunksize: integer, default None
+        Return JsonReader object for iteration.
+        See the `line-delimted json docs
+        <http://pandas.pydata.org/pandas-docs/stable/io.html#io-jsonl>`_
+        for more information on ``chunksize``.
+        This can only be passed if `lines=True`.
+        If this is None, the file will be read into memory all at once.
+
+        .. versionadded:: 0.21.0
+
     Returns
     -------
     result : Series or DataFrame, depending on the value of `typ`.
@@ -323,47 +336,167 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True,
 
     filepath_or_buffer, _, _ = get_filepath_or_buffer(path_or_buf,
                                                       encoding=encoding)
-    if isinstance(filepath_or_buffer, compat.string_types):
-        try:
-            exists = os.path.exists(filepath_or_buffer)
-
-        # if the filepath is too long will raise here
-        # 5874
-        except (TypeError, ValueError):
-            exists = False
-
-        if exists:
-            fh, handles = _get_handle(filepath_or_buffer, 'r',
-                                      encoding=encoding)
-            json = fh.read()
-            fh.close()
+
+    json_reader = JsonReader(
+        filepath_or_buffer, orient=orient, typ=typ, dtype=dtype,
+        convert_axes=convert_axes, convert_dates=convert_dates,
+        keep_default_dates=keep_default_dates, numpy=numpy,
+        precise_float=precise_float, date_unit=date_unit, encoding=encoding,
+        lines=lines, chunksize=chunksize
+    )
+
+    if chunksize:
+        return json_reader
+
+    return json_reader.read()
+
+
+class JsonReader(BaseIterator):
+    """
+    JsonReader provides an interface for reading in a JSON file.
+
+    If initialized with ``lines=True`` and ``chunksize``, can be iterated over
+    ``chunksize`` lines at a time. Otherwise, calling ``read`` reads in the
+    whole document.
+    """
+    def __init__(self, filepath_or_buffer, orient, typ, dtype, convert_axes,
+                 convert_dates, keep_default_dates, numpy, precise_float,
+                 date_unit, encoding, lines, chunksize):
+
+        self.path_or_buf = filepath_or_buffer
+        self.orient = orient
+        self.typ = typ
+        self.dtype = dtype
+        self.convert_axes = convert_axes
+        self.convert_dates = convert_dates
+        self.keep_default_dates = keep_default_dates
+        self.numpy = numpy
+        self.precise_float = precise_float
+        self.date_unit = date_unit
+        self.encoding = encoding
+        self.lines = lines
+        self.chunksize = chunksize
+        self.nrows_seen = 0
+        self.should_close = False
+
+        if self.chunksize is not None:
+            self.chunksize = _validate_integer("chunksize", self.chunksize, 1)
+            if not self.lines:
+                raise ValueError("chunksize can only be passed if lines=True")
+
+        data = self._get_data_from_filepath(filepath_or_buffer)
+        self.data = self._preprocess_data(data)
+
+    def _preprocess_data(self, data):
+        """
+        At this point, the data either has a `read` attribute (e.g. a file
+        object or a StringIO) or is a string that is a JSON document.
+
+        If self.chunksize, we prepare the data for the `__next__` method.
+        Otherwise, we read it into memory for the `read` method.
+        """
+        if hasattr(data, 'read') and not self.chunksize:
+            data = data.read()
+        if not hasattr(data, 'read') and self.chunksize:
+            data = StringIO(data)
+
+        return data
+
+    def _get_data_from_filepath(self, filepath_or_buffer):
+        """
+        read_json accepts three input types:
+            1. filepath (string-like)
+            2. file-like object (e.g. open file object, StringIO)
+            3. JSON string
+
+        This method turns (1) into (2) to simplify the rest of the processing.
+        It returns input types (2) and (3) unchanged.
+        """
+
+        data = filepath_or_buffer
+
+        if isinstance(data, compat.string_types):
+            try:
+                exists = os.path.exists(filepath_or_buffer)
+
+            # gh-5874: if the filepath is too long will raise here
+            except (TypeError, ValueError):
+                pass
+
+            else:
+                if exists:
+                    data, _ = _get_handle(filepath_or_buffer, 'r',
+                                          encoding=self.encoding)
+                    self.should_close = True
+                    self.open_stream = data
+
+        return data
+
+    def _combine_lines(self, lines):
+        """Combines a list of JSON objects into one JSON object"""
+        lines = filter(None, map(lambda x: x.strip(), lines))
+        return '[' + ','.join(lines) + ']'
+
+    def read(self):
+        """Read the whole JSON input into a pandas object"""
+        if self.lines and self.chunksize:
+            obj = concat(self)
+        elif self.lines:
+            obj = self._get_object_parser(
+                self._combine_lines(self.data.split('\n'))
+            )
         else:
-            json = filepath_or_buffer
-    elif hasattr(filepath_or_buffer, 'read'):
-        json = filepath_or_buffer.read()
-    else:
-        json = filepath_or_buffer
+            obj = self._get_object_parser(self.data)
+        self.close()
+        return obj
+
+    def _get_object_parser(self, json):
+        """parses a json document into a pandas object"""
+        typ = self.typ
+        dtype = self.dtype
+        kwargs = {
+            "orient": self.orient, "dtype": self.dtype,
+            "convert_axes": self.convert_axes,
+            "convert_dates": self.convert_dates,
+            "keep_default_dates": self.keep_default_dates, "numpy": self.numpy,
+            "precise_float": self.precise_float, "date_unit": self.date_unit
+        }
+        obj = None
+        if typ == 'frame':
+            obj = FrameParser(json, **kwargs).parse()
+
+        if typ == 'series' or obj is None:
+            if not isinstance(dtype, bool):
+                dtype = dict(data=dtype)
+            obj = SeriesParser(json, **kwargs).parse()
+
+        return obj
+
+    def close(self):
+        """
+        If we opened a  stream earlier, in _get_data_from_filepath, we should
+        close it. If an open stream or file was passed, we leave it open.
+        """
+        if self.should_close:
+            try:
+                self.open_stream.close()
+            except (IOError, AttributeError):
+                pass
 
-    if lines:
-        # If given a json lines file, we break the string into lines, add
-        # commas and put it in a json list to make a valid json object.
-        lines = list(StringIO(json.strip()))
-        json = '[' + ','.join(lines) + ']'
-
-    obj = None
-    if typ == 'frame':
-        obj = FrameParser(json, orient, dtype, convert_axes, convert_dates,
-                          keep_default_dates, numpy, precise_float,
-                          date_unit).parse()
-
-    if typ == 'series' or obj is None:
-        if not isinstance(dtype, bool):
-            dtype = dict(data=dtype)
-        obj = SeriesParser(json, orient, dtype, convert_axes, convert_dates,
-                           keep_default_dates, numpy, precise_float,
-                           date_unit).parse()
-
-    return obj
+    def __next__(self):
+        lines = list(islice(self.data, self.chunksize))
+        if lines:
+            lines_json = self._combine_lines(lines)
+            obj = self._get_object_parser(lines_json)
+
+            # Make sure that the returned objects have the right index.
+            obj.index = range(self.nrows_seen, self.nrows_seen + len(obj))
+            self.nrows_seen += len(obj)
+
+            return obj
+
+        self.close()
+        raise StopIteration
 
 
 class Parser(object):
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 671d4248818e40..de4afec883efdb 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -985,53 +985,6 @@ def test_tz_range_is_utc(self):
         df = DataFrame({'DT': dti})
         assert dumps(df, iso_dates=True) == dfexp
 
-    def test_read_jsonl(self):
-        # GH9180
-        result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
-        expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
-        assert_frame_equal(result, expected)
-
-    def test_read_jsonl_unicode_chars(self):
-        # GH15132: non-ascii unicode characters
-        # \u201d == RIGHT DOUBLE QUOTATION MARK
-
-        # simulate file handle
-        json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
-        json = StringIO(json)
-        result = read_json(json, lines=True)
-        expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
-                             columns=['a', 'b'])
-        assert_frame_equal(result, expected)
-
-        # simulate string
-        json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
-        result = read_json(json, lines=True)
-        expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
-                             columns=['a', 'b'])
-        assert_frame_equal(result, expected)
-
-    def test_to_jsonl(self):
-        # GH9180
-        df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
-        result = df.to_json(orient="records", lines=True)
-        expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
-        assert result == expected
-
-        df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
-        result = df.to_json(orient="records", lines=True)
-        expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
-        assert result == expected
-        assert_frame_equal(pd.read_json(result, lines=True), df)
-
-        # GH15096: escaped characters in columns and data
-        df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]],
-                       columns=["a\\", 'b'])
-        result = df.to_json(orient="records", lines=True)
-        expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n'
-                    '{"a\\\\":"foo\\"","b":"bar"}')
-        assert result == expected
-        assert_frame_equal(pd.read_json(result, lines=True), df)
-
     def test_latin_encoding(self):
         if compat.PY2:
             tm.assert_raises_regex(
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
new file mode 100644
index 00000000000000..d14355b07cf204
--- /dev/null
+++ b/pandas/tests/io/json/test_readlines.py
@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+import pytest
+import pandas as pd
+from pandas import DataFrame, read_json
+from pandas.compat import StringIO
+from pandas.io.json.json import JsonReader
+import pandas.util.testing as tm
+from pandas.util.testing import (assert_frame_equal, assert_series_equal,
+                                 ensure_clean)
+
+
+@pytest.fixture
+def lines_json_df():
+    df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
+    return df.to_json(lines=True, orient="records")
+
+
+def test_read_jsonl():
+    # GH9180
+    result = read_json('{"a": 1, "b": 2}\n{"b":2, "a" :1}\n', lines=True)
+    expected = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
+    assert_frame_equal(result, expected)
+
+
+def test_read_jsonl_unicode_chars():
+    # GH15132: non-ascii unicode characters
+    # \u201d == RIGHT DOUBLE QUOTATION MARK
+
+    # simulate file handle
+    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
+    json = StringIO(json)
+    result = read_json(json, lines=True)
+    expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
+                         columns=['a', 'b'])
+    assert_frame_equal(result, expected)
+
+    # simulate string
+    json = '{"a": "foo”", "b": "bar"}\n{"a": "foo", "b": "bar"}\n'
+    result = read_json(json, lines=True)
+    expected = DataFrame([[u"foo\u201d", "bar"], ["foo", "bar"]],
+                         columns=['a', 'b'])
+    assert_frame_equal(result, expected)
+
+
+def test_to_jsonl():
+    # GH9180
+    df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
+    result = df.to_json(orient="records", lines=True)
+    expected = '{"a":1,"b":2}\n{"a":1,"b":2}'
+    assert result == expected
+
+    df = DataFrame([["foo}", "bar"], ['foo"', "bar"]], columns=['a', 'b'])
+    result = df.to_json(orient="records", lines=True)
+    expected = '{"a":"foo}","b":"bar"}\n{"a":"foo\\"","b":"bar"}'
+    assert result == expected
+    assert_frame_equal(read_json(result, lines=True), df)
+
+    # GH15096: escaped characters in columns and data
+    df = DataFrame([["foo\\", "bar"], ['foo"', "bar"]],
+                   columns=["a\\", 'b'])
+    result = df.to_json(orient="records", lines=True)
+    expected = ('{"a\\\\":"foo\\\\","b":"bar"}\n'
+                '{"a\\\\":"foo\\"","b":"bar"}')
+    assert result == expected
+    assert_frame_equal(read_json(result, lines=True), df)
+
+
+@pytest.mark.parametrize("chunksize", [1, 1.0])
+def test_readjson_chunks(lines_json_df, chunksize):
+    # Basic test that read_json(chunks=True) gives the same result as
+    # read_json(chunks=False)
+    # GH17048: memory usage when lines=True
+
+    unchunked = read_json(StringIO(lines_json_df), lines=True)
+    reader = read_json(StringIO(lines_json_df), lines=True,
+                       chunksize=chunksize)
+    chunked = pd.concat(reader)
+
+    assert_frame_equal(chunked, unchunked)
+
+
+def test_readjson_chunksize_requires_lines(lines_json_df):
+    msg = "chunksize can only be passed if lines=True"
+    with tm.assert_raises_regex(ValueError, msg):
+        pd.read_json(StringIO(lines_json_df), lines=False, chunksize=2)
+
+
+def test_readjson_chunks_series():
+    # Test reading line-format JSON to Series with chunksize param
+    s = pd.Series({'A': 1, 'B': 2})
+
+    strio = StringIO(s.to_json(lines=True, orient="records"))
+    unchunked = pd.read_json(strio, lines=True, typ='Series')
+
+    strio = StringIO(s.to_json(lines=True, orient="records"))
+    chunked = pd.concat(pd.read_json(
+        strio, lines=True, typ='Series', chunksize=1
+    ))
+
+    assert_series_equal(chunked, unchunked)
+
+
+def test_readjson_each_chunk(lines_json_df):
+    # Other tests check that the final result of read_json(chunksize=True)
+    # is correct. This checks the intermediate chunks.
+    chunks = list(
+        pd.read_json(StringIO(lines_json_df), lines=True, chunksize=2)
+    )
+    assert chunks[0].shape == (2, 2)
+    assert chunks[1].shape == (1, 2)
+
+
+def test_readjson_chunks_from_file():
+    with ensure_clean('test.json') as path:
+        df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
+        df.to_json(path, lines=True, orient="records")
+        chunked = pd.concat(pd.read_json(path, lines=True, chunksize=1))
+        unchunked = pd.read_json(path, lines=True)
+        assert_frame_equal(unchunked, chunked)
+
+
+@pytest.mark.parametrize("chunksize", [None, 1])
+def test_readjson_chunks_closes(chunksize):
+    with ensure_clean('test.json') as path:
+        df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
+        df.to_json(path, lines=True, orient="records")
+        reader = JsonReader(
+            path, orient=None, typ="frame", dtype=True, convert_axes=True,
+            convert_dates=True, keep_default_dates=True, numpy=False,
+            precise_float=False, date_unit=None, encoding=None,
+            lines=True, chunksize=chunksize)
+        reader.read()
+        assert reader.open_stream.closed, "didn't close stream with \
+            chunksize = %s" % chunksize
+
+
+@pytest.mark.parametrize("chunksize", [0, -1, 2.2, "foo"])
+def test_readjson_invalid_chunksize(lines_json_df, chunksize):
+    msg = r"'chunksize' must be an integer >=1"
+
+    with tm.assert_raises_regex(ValueError, msg):
+        pd.read_json(StringIO(lines_json_df), lines=True,
+                     chunksize=chunksize)
+
+
+@pytest.mark.parametrize("chunksize", [None, 1, 2])
+def test_readjson_chunks_multiple_empty_lines(chunksize):
+    j = """
+
+    {"A":1,"B":4}
+
+
+
+    {"A":2,"B":5}
+
+
+
+
+
+
+
+    {"A":3,"B":6}
+    """
+    orig = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6]})
+    test = pd.read_json(j, lines=True, chunksize=chunksize)
+    if chunksize is not None:
+        test = pd.concat(test)
+    tm.assert_frame_equal(orig, test, obj="chunksize: %s" % chunksize)

From bbf0ddaf6461a9586c5e459d9c00fe863adc43f8 Mon Sep 17 00:00:00 2001
From: Licht Takeuchi <licht-t@outlook.jp>
Date: Fri, 29 Sep 2017 08:44:29 +0900
Subject: [PATCH 168/188] BUG: Add SparseArray.all (#17570)

---
 doc/source/whatsnew/v0.21.0.txt   |  1 +
 pandas/compat/numpy/function.py   |  8 +++
 pandas/core/sparse/array.py       | 42 +++++++++++++++
 pandas/tests/sparse/test_array.py | 88 +++++++++++++++++++++++++++++++
 4 files changed, 139 insertions(+)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index d5d508d02cb730..ee781ec4b0361c 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -634,6 +634,7 @@ Sparse
 - Bug in :func:`SparseDataFrame.fillna` not filling all NaNs when frame was instantiated from SciPy sparse matrix (:issue:`16112`)
 - Bug in :func:`SparseSeries.unstack` and :func:`SparseDataFrame.stack` (:issue:`16614`, :issue:`15045`)
 - Bug in :func:`make_sparse` treating two numeric/boolean data, which have same bits, as same when array ``dtype`` is ``object`` (:issue:`17574`)
+- :func:`SparseArray.all` and :func:`SparseArray.any` are now implemented to handle ``SparseArray``, these were used but not implemented (:issue:`17570`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/compat/numpy/function.py b/pandas/compat/numpy/function.py
index ccbd3d9704e0c9..d42be569635696 100644
--- a/pandas/compat/numpy/function.py
+++ b/pandas/compat/numpy/function.py
@@ -184,6 +184,14 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name):
     return skipna
 
 
+ALLANY_DEFAULTS = OrderedDict()
+ALLANY_DEFAULTS['dtype'] = None
+ALLANY_DEFAULTS['out'] = None
+validate_all = CompatValidator(ALLANY_DEFAULTS, fname='all',
+                               method='both', max_fname_arg_count=1)
+validate_any = CompatValidator(ALLANY_DEFAULTS, fname='any',
+                               method='both', max_fname_arg_count=1)
+
 LOGICAL_FUNC_DEFAULTS = dict(out=None)
 validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method='kwargs')
 
diff --git a/pandas/core/sparse/array.py b/pandas/core/sparse/array.py
index 3b45a013734c91..0424ac8703e255 100644
--- a/pandas/core/sparse/array.py
+++ b/pandas/core/sparse/array.py
@@ -615,6 +615,48 @@ def fillna(self, value, downcast=None):
         return self._simple_new(new_values, self.sp_index,
                                 fill_value=fill_value)
 
+    def all(self, axis=0, *args, **kwargs):
+        """
+        Tests whether all elements evaluate True
+
+        Returns
+        -------
+        all : bool
+
+        See Also
+        --------
+        numpy.all
+        """
+        nv.validate_all(args, kwargs)
+
+        values = self.sp_values
+
+        if len(values) != len(self) and not np.all(self.fill_value):
+            return False
+
+        return values.all()
+
+    def any(self, axis=0, *args, **kwargs):
+        """
+        Tests whether at least one of elements evaluate True
+
+        Returns
+        -------
+        any : bool
+
+        See Also
+        --------
+        numpy.any
+        """
+        nv.validate_any(args, kwargs)
+
+        values = self.sp_values
+
+        if len(values) != len(self) and np.any(self.fill_value):
+            return True
+
+        return values.any()
+
     def sum(self, axis=0, *args, **kwargs):
         """
         Sum of non-NA/null values
diff --git a/pandas/tests/sparse/test_array.py b/pandas/tests/sparse/test_array.py
index f653ee50982ad1..8de93ff3209613 100644
--- a/pandas/tests/sparse/test_array.py
+++ b/pandas/tests/sparse/test_array.py
@@ -664,6 +664,94 @@ def test_fillna_overlap(self):
 
 class TestSparseArrayAnalytics(object):
 
+    @pytest.mark.parametrize('data,pos,neg', [
+        ([True, True, True], True, False),
+        ([1, 2, 1], 1, 0),
+        ([1.0, 2.0, 1.0], 1.0, 0.0)
+    ])
+    def test_all(self, data, pos, neg):
+        # GH 17570
+        out = SparseArray(data).all()
+        assert out
+
+        out = SparseArray(data, fill_value=pos).all()
+        assert out
+
+        data[1] = neg
+        out = SparseArray(data).all()
+        assert not out
+
+        out = SparseArray(data, fill_value=pos).all()
+        assert not out
+
+    @pytest.mark.parametrize('data,pos,neg', [
+        ([True, True, True], True, False),
+        ([1, 2, 1], 1, 0),
+        ([1.0, 2.0, 1.0], 1.0, 0.0)
+    ])
+    def test_numpy_all(self, data, pos, neg):
+        # GH 17570
+        out = np.all(SparseArray(data))
+        assert out
+
+        out = np.all(SparseArray(data, fill_value=pos))
+        assert out
+
+        data[1] = neg
+        out = np.all(SparseArray(data))
+        assert not out
+
+        out = np.all(SparseArray(data, fill_value=pos))
+        assert not out
+
+        msg = "the 'out' parameter is not supported"
+        tm.assert_raises_regex(ValueError, msg, np.all,
+                               SparseArray(data), out=out)
+
+    @pytest.mark.parametrize('data,pos,neg', [
+        ([False, True, False], True, False),
+        ([0, 2, 0], 2, 0),
+        ([0.0, 2.0, 0.0], 2.0, 0.0)
+    ])
+    def test_any(self, data, pos, neg):
+        # GH 17570
+        out = SparseArray(data).any()
+        assert out
+
+        out = SparseArray(data, fill_value=pos).any()
+        assert out
+
+        data[1] = neg
+        out = SparseArray(data).any()
+        assert not out
+
+        out = SparseArray(data, fill_value=pos).any()
+        assert not out
+
+    @pytest.mark.parametrize('data,pos,neg', [
+        ([False, True, False], True, False),
+        ([0, 2, 0], 2, 0),
+        ([0.0, 2.0, 0.0], 2.0, 0.0)
+    ])
+    def test_numpy_any(self, data, pos, neg):
+        # GH 17570
+        out = np.any(SparseArray(data))
+        assert out
+
+        out = np.any(SparseArray(data, fill_value=pos))
+        assert out
+
+        data[1] = neg
+        out = np.any(SparseArray(data))
+        assert not out
+
+        out = np.any(SparseArray(data, fill_value=pos))
+        assert not out
+
+        msg = "the 'out' parameter is not supported"
+        tm.assert_raises_regex(ValueError, msg, np.any,
+                               SparseArray(data), out=out)
+
     def test_sum(self):
         data = np.arange(10).astype(float)
         out = SparseArray(data).sum()

From 00e52abe927150d10a72e397893bee56f4cc6505 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 29 Sep 2017 03:04:41 -0700
Subject: [PATCH 169/188] update imports of DateParseError, remove unused
 imports from tslib (#17713)

See #17652
---
 pandas/_libs/tslib.pyx                       | 36 +++++---------------
 pandas/core/tools/datetimes.py               |  2 +-
 pandas/tests/indexes/datetimes/test_tools.py |  4 +--
 pandas/tests/scalar/test_period.py           |  5 +--
 4 files changed, 14 insertions(+), 33 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 4c34d0fcb1e5f6..b0b70bb8102047 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -1,12 +1,9 @@
 # -*- coding: utf-8 -*-
 # cython: profile=False
 
-import warnings
-
 cimport numpy as np
 from numpy cimport (int8_t, int32_t, int64_t, import_array, ndarray,
-                    float64_t,
-                    NPY_INT64, NPY_DATETIME, NPY_TIMEDELTA)
+                    float64_t, NPY_DATETIME, NPY_TIMEDELTA)
 import numpy as np
 
 import sys
@@ -16,12 +13,10 @@ from cpython cimport (
     PyTypeObject,
     PyFloat_Check,
     PyComplex_Check,
-    PyLong_Check,
     PyObject_RichCompareBool,
     PyObject_RichCompare,
     Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE,
-    PyUnicode_Check,
-    PyUnicode_AsUTF8String)
+    PyUnicode_Check)
 
 cdef extern from "Python.h":
     cdef PyTypeObject *Py_TYPE(object)
@@ -38,7 +33,6 @@ from datetime cimport (
     pandas_datetimestruct,
     pandas_datetime_to_datetimestruct,
     pandas_datetimestruct_to_datetime,
-    cmp_pandas_datetimestruct,
     days_per_month_table,
     get_datetime64_value,
     get_timedelta64_value,
@@ -68,23 +62,12 @@ from khash cimport (
     kh_resize_int64, kh_get_int64)
 
 from .tslibs.parsing import parse_datetime_string
-from .tslibs.parsing import DateParseError  # noqa
 
 cimport cython
 
-import re
 import time
 
-# dateutil compat
-from dateutil.tz import (tzoffset, tzlocal as _dateutil_tzlocal,
-                         tzutc as _dateutil_tzutc,
-                         tzstr as _dateutil_tzstr)
-
-from dateutil.relativedelta import relativedelta
-from dateutil.parser import DEFAULTPARSER
-
-from pandas.compat import (parse_date, string_types, iteritems,
-                           StringIO, callable)
+from pandas.compat import iteritems, callable
 
 import operator
 import collections
@@ -97,9 +80,6 @@ import_array()
 # import datetime C API
 PyDateTime_IMPORT
 
-# in numpy 1.7, will prob need the following:
-# numpy_pydatetime_import
-
 cdef int64_t NPY_NAT = util.get_nat()
 iNaT = NPY_NAT
 
@@ -318,7 +298,7 @@ class Timestamp(_Timestamp):
         tz : string / timezone object, default None
             Timezone to localize to
         """
-        if isinstance(tz, string_types):
+        if util.is_string_object(tz):
             tz = maybe_get_tz(tz)
         return cls(datetime.now(tz))
 
@@ -613,7 +593,7 @@ class Timestamp(_Timestamp):
         if self.tzinfo is None:
             # tz naive, localize
             tz = maybe_get_tz(tz)
-            if not isinstance(ambiguous, string_types):
+            if not util.is_string_object(ambiguous):
                 ambiguous =   [ambiguous]
             value = tz_localize_to_utc(np.array([self.value], dtype='i8'), tz,
                                        ambiguous=ambiguous, errors=errors)[0]
@@ -2426,8 +2406,8 @@ class Timedelta(_Timedelta):
                 raise TypeError(
                     "Invalid type {0}. Must be int or float.".format(type(v)))
 
-            kwargs = dict([ (k, _to_py_int_float(v))
-                            for k, v in iteritems(kwargs) ])
+            kwargs = dict([(k, _to_py_int_float(v))
+                            for k, v in iteritems(kwargs)])
 
             try:
                 nano = kwargs.pop('nanoseconds', 0)
@@ -3682,7 +3662,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
             result[i] = v - delta
         return result
 
-    if isinstance(ambiguous, string_types):
+    if util.is_string_object(ambiguous):
         if ambiguous == 'infer':
             infer_dst = True
         elif ambiguous == 'NaT':
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 97ac8445faf4c2..8fe28aa4006131 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -8,6 +8,7 @@
 from pandas._libs.tslibs import parsing
 from pandas._libs.tslibs.parsing import (  # noqa
     parse_time_string,
+    DateParseError,
     _format_is_iso,
     _guess_datetime_format)
 
@@ -561,7 +562,6 @@ def calc_with_mask(carg, mask):
     return None
 
 
-DateParseError = tslib.DateParseError
 normalize_date = tslib.normalize_date
 
 # Fixed time formats for time parsing
diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py
index bdfe6b5b09e45a..b8ce1f0af6ea85 100644
--- a/pandas/tests/indexes/datetimes/test_tools.py
+++ b/pandas/tests/indexes/datetimes/test_tools.py
@@ -1335,13 +1335,13 @@ def test_parsers_monthfreq(self):
     def test_parsers_quarterly_with_freq(self):
         msg = ('Incorrect quarterly string is given, quarter '
                'must be between 1 and 4: 2013Q5')
-        with tm.assert_raises_regex(tslib.DateParseError, msg):
+        with tm.assert_raises_regex(parsing.DateParseError, msg):
             tools.parse_time_string('2013Q5')
 
         # GH 5418
         msg = ('Unable to retrieve month information from given freq: '
                'INVLD-L-DEC-SAT')
-        with tm.assert_raises_regex(tslib.DateParseError, msg):
+        with tm.assert_raises_regex(parsing.DateParseError, msg):
             tools.parse_time_string('2013Q1', freq='INVLD-L-DEC-SAT')
 
         cases = {('2013Q2', None): datetime(2013, 4, 1),
diff --git a/pandas/tests/scalar/test_period.py b/pandas/tests/scalar/test_period.py
index c17a216df44cbd..28d85c52604d94 100644
--- a/pandas/tests/scalar/test_period.py
+++ b/pandas/tests/scalar/test_period.py
@@ -11,6 +11,7 @@
 from pandas.compat.numpy import np_datetime64_compat
 
 from pandas._libs import tslib, period as libperiod
+from pandas._libs.tslibs.parsing import DateParseError
 from pandas import Period, Timestamp, offsets
 from pandas.tseries.frequencies import DAYS, MONTHS
 
@@ -886,8 +887,8 @@ def test_constructor_infer_freq(self):
 
     def test_badinput(self):
         pytest.raises(ValueError, Period, '-2000', 'A')
-        pytest.raises(tslib.DateParseError, Period, '0', 'A')
-        pytest.raises(tslib.DateParseError, Period, '1/1/-2000', 'A')
+        pytest.raises(DateParseError, Period, '0', 'A')
+        pytest.raises(DateParseError, Period, '1/1/-2000', 'A')
 
     def test_multiples(self):
         result1 = Period('1989', freq='2A')

From ffa86c5d154e7013863f94a5a72b574aa2846508 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 29 Sep 2017 03:05:59 -0700
Subject: [PATCH 170/188] Add missing file to _pyxfiles, delete commented-out
 (#17712)

---
 setup.py | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/setup.py b/setup.py
index d25ae4a5fb45ce..793aa089e708fa 100755
--- a/setup.py
+++ b/setup.py
@@ -341,6 +341,7 @@ class CheckSDist(sdist_class):
                  'pandas/_libs/window.pyx',
                  'pandas/_libs/sparse.pyx',
                  'pandas/_libs/parsers.pyx',
+                 'pandas/_libs/tslibs/strptime.pyx',
                  'pandas/_libs/tslibs/timezones.pyx',
                  'pandas/_libs/tslibs/frequencies.pyx',
                  'pandas/_libs/tslibs/parsing.pyx',
@@ -349,14 +350,6 @@ class CheckSDist(sdist_class):
     def initialize_options(self):
         sdist_class.initialize_options(self)
 
-        '''
-        self._pyxfiles = []
-        for root, dirs, files in os.walk('pandas'):
-            for f in files:
-                if f.endswith('.pyx'):
-                    self._pyxfiles.append(pjoin(root, f))
-        '''
-
     def run(self):
         if 'cython' in cmdclass:
             self.run_command('cython')

From 54f6648cdebfa376c83f9fc03b53effe82df7492 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Fri, 29 Sep 2017 03:16:35 -0700
Subject: [PATCH 171/188] Last of the timezones funcs (#17669)

---
 pandas/_libs/tslibs/timezones.pxd      |  2 --
 pandas/_libs/tslibs/timezones.pyx      | 19 +++++++++++++++++++
 pandas/core/indexes/datetimes.py       |  2 +-
 pandas/core/tools/datetimes.py         | 19 -------------------
 pandas/tests/tseries/test_timezones.py | 19 +++++++++----------
 5 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd
index e5d1343e1c9843..95e0474b3a174a 100644
--- a/pandas/_libs/tslibs/timezones.pxd
+++ b/pandas/_libs/tslibs/timezones.pxd
@@ -1,8 +1,6 @@
 # -*- coding: utf-8 -*-
 # cython: profile=False
 
-from numpy cimport ndarray
-
 cdef bint is_utc(object tz)
 cdef bint is_tzlocal(object tz)
 
diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx
index 48d82996a0bd0f..7f778dde86e232 100644
--- a/pandas/_libs/tslibs/timezones.pyx
+++ b/pandas/_libs/tslibs/timezones.pyx
@@ -1,5 +1,8 @@
 # -*- coding: utf-8 -*-
 # cython: profile=False
+# cython: linetrace=False
+# distutils: define_macros=CYTHON_TRACE=0
+# distutils: define_macros=CYTHON_TRACE_NOGIL=0
 
 cimport cython
 from cython cimport Py_ssize_t
@@ -275,3 +278,19 @@ cdef object get_dst_info(object tz):
         dst_cache[cache_key] = (trans, deltas, typ)
 
     return dst_cache[cache_key]
+
+
+def infer_tzinfo(start, end):
+    if start is not None and end is not None:
+        tz = start.tzinfo
+        if end.tzinfo:
+            if not (get_timezone(tz) == get_timezone(end.tzinfo)):
+                msg = 'Inputs must both have the same timezone, {tz1} != {tz2}'
+                raise AssertionError(msg.format(tz1=tz, tz2=end.tzinfo))
+    elif start is not None:
+        tz = start.tzinfo
+    elif end is not None:
+        tz = end.tzinfo
+    else:
+        tz = None
+    return tz
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 39dc24642235ba..9127864eab8a16 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -443,7 +443,7 @@ def _generate(cls, start, end, periods, name, offset,
             raise ValueError("Closed has to be either 'left', 'right' or None")
 
         try:
-            inferred_tz = tools._infer_tzinfo(start, end)
+            inferred_tz = timezones.infer_tzinfo(start, end)
         except:
             raise TypeError('Start and end cannot both be tz-aware with '
                             'different timezones')
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index 8fe28aa4006131..e335dfe3a41421 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -4,7 +4,6 @@
 
 from pandas._libs import tslib
 from pandas._libs.tslibs.strptime import array_strptime
-from pandas._libs.tslibs.timezones import get_timezone
 from pandas._libs.tslibs import parsing
 from pandas._libs.tslibs.parsing import (  # noqa
     parse_time_string,
@@ -30,24 +29,6 @@
 from pandas.core import algorithms
 
 
-def _infer_tzinfo(start, end):
-    def _infer(a, b):
-        tz = a.tzinfo
-        if b and b.tzinfo:
-            if not (get_timezone(tz) == get_timezone(b.tzinfo)):
-                raise AssertionError('Inputs must both have the same timezone,'
-                                     ' {timezone1} != {timezone2}'
-                                     .format(timezone1=tz, timezone2=b.tzinfo))
-        return tz
-
-    tz = None
-    if start is not None:
-        tz = _infer(start, end)
-    elif end is not None:
-        tz = _infer(end, start)
-    return tz
-
-
 def _guess_datetime_format_for_array(arr, **kwargs):
     # Try to guess the format based on the first non-NaN element
     non_nan_elements = notna(arr).nonzero()[0]
diff --git a/pandas/tests/tseries/test_timezones.py b/pandas/tests/tseries/test_timezones.py
index e7b470e01e2af3..aa8fe90ea65006 100644
--- a/pandas/tests/tseries/test_timezones.py
+++ b/pandas/tests/tseries/test_timezones.py
@@ -12,7 +12,6 @@
 from datetime import datetime, timedelta, tzinfo, date
 
 import pandas.util.testing as tm
-import pandas.core.tools.datetimes as tools
 import pandas.tseries.offsets as offsets
 from pandas.compat import lrange, zip
 from pandas.core.indexes.datetimes import bdate_range, date_range
@@ -646,20 +645,20 @@ def test_infer_tz(self):
 
         start = self.localize(eastern, _start)
         end = self.localize(eastern, _end)
-        assert (tools._infer_tzinfo(start, end) is self.localize(
-            eastern, _start).tzinfo)
-        assert (tools._infer_tzinfo(start, None) is self.localize(
-            eastern, _start).tzinfo)
-        assert (tools._infer_tzinfo(None, end) is self.localize(eastern,
-                                                                _end).tzinfo)
+        assert (timezones.infer_tzinfo(start, end) is
+                self.localize(eastern, _start).tzinfo)
+        assert (timezones.infer_tzinfo(start, None) is
+                self.localize(eastern, _start).tzinfo)
+        assert (timezones.infer_tzinfo(None, end) is
+                self.localize(eastern, _end).tzinfo)
 
         start = utc.localize(_start)
         end = utc.localize(_end)
-        assert (tools._infer_tzinfo(start, end) is utc)
+        assert (timezones.infer_tzinfo(start, end) is utc)
 
         end = self.localize(eastern, _end)
-        pytest.raises(Exception, tools._infer_tzinfo, start, end)
-        pytest.raises(Exception, tools._infer_tzinfo, end, start)
+        pytest.raises(Exception, timezones.infer_tzinfo, start, end)
+        pytest.raises(Exception, timezones.infer_tzinfo, end, start)
 
     def test_tz_string(self):
         result = date_range('1/1/2000', periods=10,

From ad7d051bdfdbadc4221307d691fd55412d9d7ae8 Mon Sep 17 00:00:00 2001
From: Jean-Mathieu Deschenes <jean-mathieu.deschenes@cae.com>
Date: Fri, 29 Sep 2017 06:31:22 -0400
Subject: [PATCH 172/188] BUG: DataFrame sort_values and multiple "by" columns
 fails to order NaT correctly

closes #16836

Author: Jean-Mathieu Deschenes <jean-mathieu.deschenes@cae.com>

This patch had conflicts when merged, resolved by
Committer: Jeff Reback <jeff@reback.net>

Closes #16995 from jdeschenes/datetime_sort_issues and squashes the following commits:

257e10a43 [Jean-Mathieu Deschenes] Changes requested by @jreback
c6d55e2ad [Jean-Mathieu Deschenes] Fix for #16836
---
 doc/source/whatsnew/v0.21.0.txt    |  1 +
 pandas/core/frame.py               |  7 +------
 pandas/tests/frame/test_sorting.py | 29 ++++++++++++++++++++++++++++-
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index ee781ec4b0361c..4a3122a78b2340 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -648,6 +648,7 @@ Reshaping
 - :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`).
 - Bug in :func:`unique` where checking a tuple of strings raised a ``TypeError`` (:issue:`17108`)
 - Bug in :func:`concat` where order of result index was unpredictable if it contained non-comparable elements (:issue:`17344`)
+- Fixes regression when sorting by multiple columns on a ``datetime64`` dtype ``Series`` with ``NaT`` values (:issue:`16836`)
 
 Numeric
 ^^^^^^^
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 579d9f10d5875b..a12e611f6618a0 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3453,18 +3453,13 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
         if len(by) > 1:
             from pandas.core.sorting import lexsort_indexer
 
-            def trans(v):
-                if needs_i8_conversion(v):
-                    return v.view('i8')
-                return v
-
             keys = []
             for x in by:
                 k = self.xs(x, axis=other_axis).values
                 if k.ndim == 2:
                     raise ValueError('Cannot sort by duplicate column %s' %
                                      str(x))
-                keys.append(trans(k))
+                keys.append(k)
             indexer = lexsort_indexer(keys, orders=ascending,
                                       na_position=na_position)
             indexer = _ensure_platform_int(indexer)
diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py
index 891c94b59074aa..e6f823bf6fac22 100644
--- a/pandas/tests/frame/test_sorting.py
+++ b/pandas/tests/frame/test_sorting.py
@@ -269,6 +269,11 @@ def test_sort_datetimes(self):
         df2 = df.sort_values(by=['B'])
         assert_frame_equal(df1, df2)
 
+        df1 = df.sort_values(by='B')
+
+        df2 = df.sort_values(by=['C', 'B'])
+        assert_frame_equal(df1, df2)
+
     def test_frame_column_inplace_sort_exception(self):
         s = self.frame['A']
         with tm.assert_raises_regex(ValueError, "This Series is a view"):
@@ -321,7 +326,29 @@ def test_sort_nat_values_in_int_column(self):
         assert_frame_equal(df_sorted, df_reversed)
 
         df_sorted = df.sort_values(["datetime", "float"], na_position="last")
-        assert_frame_equal(df_sorted, df_reversed)
+        assert_frame_equal(df_sorted, df)
+
+        # Ascending should not affect the results.
+        df_sorted = df.sort_values(["datetime", "float"], ascending=False)
+        assert_frame_equal(df_sorted, df)
+
+    def test_sort_nat(self):
+
+        # GH 16836
+
+        d1 = [Timestamp(x) for x in ['2016-01-01', '2015-01-01',
+                                     np.nan, '2016-01-01']]
+        d2 = [Timestamp(x) for x in ['2017-01-01', '2014-01-01',
+                                     '2016-01-01', '2015-01-01']]
+        df = pd.DataFrame({'a': d1, 'b': d2}, index=[0, 1, 2, 3])
+
+        d3 = [Timestamp(x) for x in ['2015-01-01', '2016-01-01',
+                                     '2016-01-01', np.nan]]
+        d4 = [Timestamp(x) for x in ['2014-01-01', '2015-01-01',
+                                     '2017-01-01', '2016-01-01']]
+        expected = pd.DataFrame({'a': d3, 'b': d4}, index=[1, 3, 0, 2])
+        sorted_df = df.sort_values(by=['a', 'b'], )
+        tm.assert_frame_equal(sorted_df, expected)
 
 
 class TestDataFrameSortIndexKinds(TestData):

From e2a0251d32a1467e9ab86281a31f57aca582a88f Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Fri, 29 Sep 2017 17:00:08 +0200
Subject: [PATCH 173/188] Doc improvements for IntervalIndex and Interval
 (#17714)

---
 doc/source/advanced.rst         | 23 +++++++++++++++++++++++
 pandas/_libs/interval.pyx       | 26 ++++++++++++++++++++++----
 pandas/core/indexes/interval.py | 20 +++++++++-----------
 3 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst
index 799d04859cc2ac..cfdb53ec7e4b1a 100644
--- a/doc/source/advanced.rst
+++ b/doc/source/advanced.rst
@@ -833,12 +833,21 @@ Of course if you need integer based selection, then use ``iloc``
 IntervalIndex
 ~~~~~~~~~~~~~
 
+:class:`IntervalIndex` together with its own dtype, ``interval`` as well as the
+:class:`Interval` scalar type,  allow first-class support in pandas for interval
+notation.
+
+The ``IntervalIndex`` allows some unique indexing and is also used as a
+return type for the categories in :func:`cut` and :func:`qcut`.
+
 .. versionadded:: 0.20.0
 
 .. warning::
 
    These indexing behaviors are provisional and may change in a future version of pandas.
 
+An ``IntervalIndex`` can be used in ``Series`` and in ``DataFrame`` as the index.
+
 .. ipython:: python
 
    df = pd.DataFrame({'A': [1, 2, 3, 4]},
@@ -860,6 +869,20 @@ If you select a lable *contained* within an interval, this will also select the
    df.loc[2.5]
    df.loc[[2.5, 3.5]]
 
+``Interval`` and ``IntervalIndex`` are used by ``cut`` and ``qcut``:
+
+.. ipython:: python
+
+   c = pd.cut(range(4), bins=2)
+   c
+   c.categories
+
+Furthermore, ``IntervalIndex`` allows one to bin *other* data with these same
+bins, with ``NaN`` representing a missing value similar to other dtypes.
+
+.. ipython:: python
+
+   pd.cut([0, 3, 5, 1], bins=c.categories)
 
 Miscellaneous indexing FAQ
 --------------------------
diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
index 306597031817df..264a983fe4d536 100644
--- a/pandas/_libs/interval.pyx
+++ b/pandas/_libs/interval.pyx
@@ -51,17 +51,35 @@ cdef class Interval(IntervalMixin):
 
     .. versionadded:: 0.20.0
 
-    Attributes
+    Parameters
     ----------
-    left, right : values
-        Left and right bounds for each interval.
+    left : value
+        Left bound for interval.
+    right : value
+        Right bound for interval.
     closed : {'left', 'right', 'both', 'neither'}
         Whether the interval is closed on the left-side, right-side, both or
         neither. Defaults to 'right'.
 
+    Examples
+    --------
+    >>> iv = pd.Interval(left=0, right=5)
+    >>> iv
+    Interval(0, 5, closed='right')
+    >>> 2.5 in iv
+    True
+
+    >>> year_2017 = pd.Interval(pd.Timestamp('2017-01-01'),
+    ...                         pd.Timestamp('2017-12-31'), closed='both')
+    >>> pd.Timestamp('2017-01-01 00:00') in year_2017
+    True
+
     See Also
     --------
-    IntervalIndex : an Index of intervals that are all closed on the same side.
+    IntervalIndex : an Index of ``interval`` s that are all closed on the same
+                    side.
+    cut, qcut : convert arrays of continuous data into categoricals/series of
+                ``Interval``.
     """
 
     cdef readonly object left, right
diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index a697ed7888f90b..29699f664bbf34 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -105,8 +105,10 @@ class IntervalIndex(IntervalMixin, Index):
 
     .. versionadded:: 0.20.0
 
-    Warning: the indexing behaviors are provisional and may change in
-    a future version of pandas.
+    .. warning::
+
+       The indexing behaviors are provisional and may change in
+       a future version of pandas.
 
     Attributes
     ----------
@@ -147,15 +149,11 @@ class IntervalIndex(IntervalMixin, Index):
     --------
     Index
     Interval : A bounded slice-like interval
-    interval_range : Function to create a fixed frequency IntervalIndex
-    IntervalIndex.from_arrays : Construct an IntervalIndex from a left and
-                                right array
-    IntervalIndex.from_breaks : Construct an IntervalIndex from an array of
-                                splits
-    IntervalIndex.from_intervals : Construct an IntervalIndex from an array of
-                                   Interval objects
-    IntervalIndex.from_tuples : Construct an IntervalIndex from a list/array of
-                                tuples
+    interval_range : Function to create a fixed frequency
+    IntervalIndex, IntervalIndex.from_arrays, IntervalIndex.from_breaks,
+    IntervalIndex.from_intervals, IntervalIndex.from_tuples
+    cut, qcut : convert arrays of continuous data into categoricals/series of
+                ``Interval``.
     """
     _typ = 'intervalindex'
     _comparables = ['name']

From b8467c00f78eec73efd14f159f1ba935a65b4ee7 Mon Sep 17 00:00:00 2001
From: topper-123 <terji78@gmail.com>
Date: Sat, 30 Sep 2017 17:25:57 +0200
Subject: [PATCH 174/188] DOC: Add examples for MultiIndex.get_locs + cleanups
 (#17675)

---
 pandas/core/categorical.py   |  2 +-
 pandas/core/indexes/multi.py | 65 +++++++++++++++++++++++++-----------
 2 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index d79937829cf3fb..61e28dde2e34c0 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -229,7 +229,7 @@ class Categorical(PandasObject):
 
     See also
     --------
-    pandas.api.types.CategoricalDtype
+    pandas.api.types.CategoricalDtype : Type for categorical data
     CategoricalIndex : An Index with an underlying ``Categorical``
     """
 
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 35f738b347a3eb..9ffac0832062d0 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -72,8 +72,8 @@ class MultiIndex(Index):
     Examples
     ---------
     A new ``MultiIndex`` is typically constructed using one of the helper
-    methods :meth:`MultiIndex.from_arrays``, :meth:`MultiIndex.from_product``
-    and :meth:`MultiIndex.from_tuples``. For example (using ``.from_arrays``):
+    methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product`
+    and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``):
 
     >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
     >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
@@ -1982,33 +1982,41 @@ def _partial_tup_index(self, tup, side='left'):
 
     def get_loc(self, key, method=None):
         """
-        Get integer location, slice or boolean mask for requested label or
-        tuple.  If the key is past the lexsort depth, the return may be a
-        boolean mask array, otherwise it is always a slice or int.
+        Get location for a label or a tuple of labels as an integer, slice or
+        boolean mask.
 
         Parameters
         ----------
-        key : label or tuple
+        key : label or tuple of labels (one for each level)
         method : None
 
         Returns
         -------
         loc : int, slice object or boolean mask
+            If the key is past the lexsort depth, the return may be a
+            boolean mask array, otherwise it is always a slice or int.
 
         Examples
         ---------
         >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
+
         >>> mi.get_loc('b')
         slice(1, 3, None)
+
         >>> mi.get_loc(('b', 'e'))
         1
 
+        Notes
+        ------
+        The key cannot be a slice, list of same-level labels, a boolean mask,
+        or a sequence of such. If you want to use those, use
+        :meth:`MultiIndex.get_locs` instead.
+
         See also
         --------
         Index.get_loc : get_loc method for (single-level) index.
-        get_locs : Given a tuple of slices/lists/labels/boolean indexer to a
-                   level-wise spec, produce an indexer to extract those
-                   locations.
+        MultiIndex.get_locs : Get location for a label/slice/list/mask or a
+                              sequence of such.
         """
         if method is not None:
             raise NotImplementedError('only the default get_loc method is '
@@ -2117,8 +2125,9 @@ def get_loc_level(self, key, level=0, drop_level=True):
 
         See Also
         ---------
-        MultiIndex.get_loc : Get integer location, slice or boolean mask for
-                             requested label or tuple.
+        MultiIndex.get_loc  : Get location for a label or a tuple of labels.
+        MultiIndex.get_locs : Get location for a label/slice/list/mask or a
+                              sequence of such
         """
 
         def maybe_droplevels(indexer, levels, drop_level):
@@ -2328,23 +2337,41 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels):
             j = labels.searchsorted(loc, side='right')
             return slice(i, j)
 
-    def get_locs(self, tup):
+    def get_locs(self, seq):
         """
-        Given a tuple of slices/lists/labels/boolean indexer to a level-wise
-        spec produce an indexer to extract those locations
+        Get location for a given label/slice/list/mask or a sequence of such as
+        an array of integers.
 
         Parameters
         ----------
-        key : tuple of (slices/list/labels)
+        seq : label/slice/list/mask or a sequence of such
+           You should use one of the above for each level.
+           If a level should not be used, set it to ``slice(None)``.
 
         Returns
         -------
-        locs : integer list of locations or boolean indexer suitable
-               for passing to iloc
+        locs : array of integers suitable for passing to iloc
+
+        Examples
+        ---------
+        >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
+
+        >>> mi.get_locs('b')
+        array([1, 2], dtype=int64)
+
+        >>> mi.get_locs([slice(None), ['e', 'f']])
+        array([1, 2], dtype=int64)
+
+        >>> mi.get_locs([[True, False, True], slice('e', 'f')])
+        array([2], dtype=int64)
+
+        See also
+        --------
+        MultiIndex.get_loc : Get location for a label or a tuple of labels.
         """
 
         # must be lexsorted to at least as many levels
-        true_slices = [i for (i, s) in enumerate(is_true_slices(tup)) if s]
+        true_slices = [i for (i, s) in enumerate(is_true_slices(seq)) if s]
         if true_slices and true_slices[-1] >= self.lexsort_depth:
             raise UnsortedIndexError('MultiIndex slicing requires the index '
                                      'to be lexsorted: slicing on levels {0}, '
@@ -2377,7 +2404,7 @@ def _update_indexer(idxr, indexer=indexer):
                 return indexer
             return indexer & idxr
 
-        for i, k in enumerate(tup):
+        for i, k in enumerate(seq):
 
             if is_bool_indexer(k):
                 # a boolean indexer, must be the same length!

From 030e374940a93b7920c0c2ac5c950668564c3703 Mon Sep 17 00:00:00 2001
From: huashuai <huashuai@users.noreply.github.com>
Date: Sat, 30 Sep 2017 12:33:52 -0700
Subject: [PATCH 175/188] BUG: Fix series rename called with str altering name
 rather index (GH17407) (#17654)

* BUG: Fix series rename called with str altering the name. GH17407

* add whatsnew for the fix for #17407

* Fix typo in whatsnew

* remove whitespace

* Update code after @jreback's comments

* Change `or` to `and` for checking iterable

* Only check against Iterable in is_list_like and add test for `str`

* Update v0.21.0.txt
---
 doc/source/whatsnew/v0.21.0.txt       |  1 +
 pandas/core/dtypes/inference.py       |  3 ++-
 pandas/tests/dtypes/test_inference.py |  2 +-
 pandas/tests/series/test_indexing.py  | 10 ++++++++++
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 4a3122a78b2340..e0e0c180525501 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -589,6 +589,7 @@ Indexing
 - Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`)
 - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`)
 - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`)
+- Bug in :func:`Series.rename` when called with a `callable`, incorrectly alters the name of the `Series`, rather than the name of the `Index`. (:issue:`17407`)
 
 I/O
 ^^^
diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py
index ff7e215951a1f7..de769c69f44fd0 100644
--- a/pandas/core/dtypes/inference.py
+++ b/pandas/core/dtypes/inference.py
@@ -3,6 +3,7 @@
 import collections
 import re
 import numpy as np
+from collections import Iterable
 from numbers import Number
 from pandas.compat import (PY2, string_types, text_type,
                            string_and_binary_types)
@@ -262,7 +263,7 @@ def is_list_like(obj):
     False
     """
 
-    return (hasattr(obj, '__iter__') and
+    return (isinstance(obj, Iterable) and
             not isinstance(obj, string_and_binary_types))
 
 
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
index dbde7ae5081d4a..857f7a283aa951 100644
--- a/pandas/tests/dtypes/test_inference.py
+++ b/pandas/tests/dtypes/test_inference.py
@@ -58,7 +58,7 @@ def __getitem__(self):
 def test_is_list_like():
     passes = ([], [1], (1, ), (1, 2), {'a': 1}, set([1, 'a']), Series([1]),
               Series([]), Series(['a']).str)
-    fails = (1, '2', object())
+    fails = (1, '2', object(), str)
 
     for p in passes:
         assert inference.is_list_like(p)
diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py
index 2182e3fbfc2129..83d6a09d38f415 100644
--- a/pandas/tests/series/test_indexing.py
+++ b/pandas/tests/series/test_indexing.py
@@ -2188,6 +2188,16 @@ def test_reindex_fill_value(self):
         expected = Series([False, True, False], index=[1, 2, 3])
         assert_series_equal(result, expected)
 
+    def test_rename(self):
+
+        # GH 17407
+        s = Series(range(1, 6), index=pd.Index(range(2, 7), name='IntIndex'))
+        result = s.rename(str)
+        expected = s.rename(lambda i: str(i))
+        assert_series_equal(result, expected)
+
+        assert result.name == expected.name
+
     def test_select(self):
         n = len(self.ts)
         result = self.ts.select(lambda x: x >= self.ts.index[n // 2])

From baadad7581c48b0b1c6401b7e3b32fd09e7f0863 Mon Sep 17 00:00:00 2001
From: MarsGuy <kkiranraj2k@gmail.com>
Date: Sun, 1 Oct 2017 17:55:40 +0530
Subject: [PATCH 176/188] DOC: Fixed typo in documentation for
 'pandas.DataFrame.replace' (#17731)

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 2fb0e348c01c0b..6fd4f3eeb6b907 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -4260,7 +4260,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None,
             dicts of such objects are also allowed.
         inplace : boolean, default False
             If True, in place. Note: this will modify any
-            other views on this object (e.g. a column form a DataFrame).
+            other views on this object (e.g. a column from a DataFrame).
             Returns the caller if this is True.
         limit : int, default None
             Maximum size gap to forward or backward fill

From fd336fbea59edf6324d5c4ac8b22ed696312f50e Mon Sep 17 00:00:00 2001
From: Licht Takeuchi <licht-t@outlook.jp>
Date: Sun, 1 Oct 2017 23:53:45 +0900
Subject: [PATCH 177/188] BUG: Fix unexpected sort in groupby (#17621)

---
 doc/source/whatsnew/v0.21.0.txt        |  1 +
 pandas/core/generic.py                 |  2 +-
 pandas/core/groupby.py                 | 23 +++++++++++--
 pandas/tests/groupby/test_groupby.py   | 47 +++++++++++++++-----------
 pandas/tests/groupby/test_whitelist.py | 16 ++++++---
 pandas/tests/test_multilevel.py        | 17 +++++++---
 6 files changed, 76 insertions(+), 30 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index e0e0c180525501..11eba13dd0f1f1 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -627,6 +627,7 @@ Groupby/Resample/Rolling
 - Bug in ``.rolling(...).apply(...)`` with a ``DataFrame`` with a ``DatetimeIndex``, a ``window`` of a timedelta-convertible and ``min_periods >= 1` (:issue:`15305`)
 - Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`)
 - Bug in ``groupby.nunique()`` with ``TimeGrouper`` which cannot handle ``NaT`` correctly (:issue:`17575`)
+- Bug in ``DataFrame.groupby`` where a single level selection from a ``MultiIndex`` unexpectedly sorts (:issue:`17537`)
 
 Sparse
 ^^^^^^
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 6fd4f3eeb6b907..4f6fd0828693e1 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6631,7 +6631,7 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
         return rs
 
     def _agg_by_level(self, name, axis=0, level=0, skipna=True, **kwargs):
-        grouped = self.groupby(level=level, axis=axis)
+        grouped = self.groupby(level=level, axis=axis, sort=False)
         if hasattr(grouped, name) and skipna:
             return getattr(grouped, name)(**kwargs)
         axis = self._get_axis_number(axis)
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index a62ae40a85941f..2f2056279558d3 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -2586,10 +2586,27 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
     """
     group_axis = obj._get_axis(axis)
 
-    # validate that the passed level is compatible with the passed
+    # validate that the passed single level is compatible with the passed
     # axis of the object
     if level is not None:
-        if not isinstance(group_axis, MultiIndex):
+        # TODO: These if-block and else-block are almost same.
+        # MultiIndex instance check is removable, but it seems that there are
+        # some processes only for non-MultiIndex in else-block,
+        # eg. `obj.index.name != level`. We have to consider carefully whether
+        # these are applicable for MultiIndex. Even if these are applicable,
+        # we need to check if it makes no side effect to subsequent processes
+        # on the outside of this condition.
+        # (GH 17621)
+        if isinstance(group_axis, MultiIndex):
+            if is_list_like(level) and len(level) == 1:
+                level = level[0]
+
+            if key is None and is_scalar(level):
+                # Get the level values from group_axis
+                key = group_axis.get_level_values(level)
+                level = None
+
+        else:
             # allow level to be a length-one list-like object
             # (e.g., level=[0])
             # GH 13901
@@ -2611,6 +2628,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
                 raise ValueError('level > 0 or level < -1 only valid with '
                                  ' MultiIndex')
 
+            # NOTE: `group_axis` and `group_axis.get_level_values(level)`
+            # are same in this section.
             level = None
             key = group_axis
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index d91cff436dee2e..47bf837fa62d95 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1791,18 +1791,20 @@ def aggfun(ser):
         agged2 = df.groupby(keys).aggregate(aggfun)
         assert len(agged2.columns) + 1 == len(df.columns)
 
-    def test_groupby_level(self):
+    @pytest.mark.parametrize('sort', [True, False])
+    def test_groupby_level(self, sort):
+        # GH 17537
         frame = self.mframe
         deleveled = frame.reset_index()
 
-        result0 = frame.groupby(level=0).sum()
-        result1 = frame.groupby(level=1).sum()
+        result0 = frame.groupby(level=0, sort=sort).sum()
+        result1 = frame.groupby(level=1, sort=sort).sum()
 
-        expected0 = frame.groupby(deleveled['first'].values).sum()
-        expected1 = frame.groupby(deleveled['second'].values).sum()
+        expected0 = frame.groupby(deleveled['first'].values, sort=sort).sum()
+        expected1 = frame.groupby(deleveled['second'].values, sort=sort).sum()
 
-        expected0 = expected0.reindex(frame.index.levels[0])
-        expected1 = expected1.reindex(frame.index.levels[1])
+        expected0.index.name = 'first'
+        expected1.index.name = 'second'
 
         assert result0.index.name == 'first'
         assert result1.index.name == 'second'
@@ -1813,15 +1815,15 @@ def test_groupby_level(self):
         assert result1.index.name == frame.index.names[1]
 
         # groupby level name
-        result0 = frame.groupby(level='first').sum()
-        result1 = frame.groupby(level='second').sum()
+        result0 = frame.groupby(level='first', sort=sort).sum()
+        result1 = frame.groupby(level='second', sort=sort).sum()
         assert_frame_equal(result0, expected0)
         assert_frame_equal(result1, expected1)
 
         # axis=1
 
-        result0 = frame.T.groupby(level=0, axis=1).sum()
-        result1 = frame.T.groupby(level=1, axis=1).sum()
+        result0 = frame.T.groupby(level=0, axis=1, sort=sort).sum()
+        result1 = frame.T.groupby(level=1, axis=1, sort=sort).sum()
         assert_frame_equal(result0, expected0.T)
         assert_frame_equal(result1, expected1.T)
 
@@ -1835,15 +1837,17 @@ def test_groupby_level_index_names(self):
         df.groupby(level='exp')
         pytest.raises(ValueError, df.groupby, level='foo')
 
-    def test_groupby_level_with_nas(self):
+    @pytest.mark.parametrize('sort', [True, False])
+    def test_groupby_level_with_nas(self, sort):
+        # GH 17537
         index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]],
                            labels=[[1, 1, 1, 1, 0, 0, 0, 0], [0, 1, 2, 3, 0, 1,
                                                               2, 3]])
 
         # factorizing doesn't confuse things
         s = Series(np.arange(8.), index=index)
-        result = s.groupby(level=0).sum()
-        expected = Series([22., 6.], index=[1, 0])
+        result = s.groupby(level=0, sort=sort).sum()
+        expected = Series([6., 22.], index=[0, 1])
         assert_series_equal(result, expected)
 
         index = MultiIndex(levels=[[1, 0], [0, 1, 2, 3]],
@@ -1852,8 +1856,8 @@ def test_groupby_level_with_nas(self):
 
         # factorizing doesn't confuse things
         s = Series(np.arange(8.), index=index)
-        result = s.groupby(level=0).sum()
-        expected = Series([18., 6.], index=[1, 0])
+        result = s.groupby(level=0, sort=sort).sum()
+        expected = Series([6., 18.], index=[0.0, 1.0])
         assert_series_equal(result, expected)
 
     def test_groupby_level_apply(self):
@@ -1936,9 +1940,14 @@ def test_groupby_complex(self):
         result = a.sum(level=0)
         assert_series_equal(result, expected)
 
-    def test_level_preserve_order(self):
-        grouped = self.mframe.groupby(level=0)
-        exp_labels = np.array([0, 0, 0, 1, 1, 2, 2, 3, 3, 3], np.intp)
+    @pytest.mark.parametrize('sort,labels', [
+        [True, [2, 2, 2, 0, 0, 1, 1, 3, 3, 3]],
+        [False, [0, 0, 0, 1, 1, 2, 2, 3, 3, 3]]
+    ])
+    def test_level_preserve_order(self, sort, labels):
+        # GH 17537
+        grouped = self.mframe.groupby(level=0, sort=sort)
+        exp_labels = np.array(labels, np.intp)
         assert_almost_equal(grouped.grouper.labels[0], exp_labels)
 
     def test_grouping_labels(self):
diff --git a/pandas/tests/groupby/test_whitelist.py b/pandas/tests/groupby/test_whitelist.py
index 1c5161d2ffb431..259f466316c414 100644
--- a/pandas/tests/groupby/test_whitelist.py
+++ b/pandas/tests/groupby/test_whitelist.py
@@ -174,12 +174,16 @@ def raw_frame():
 
 
 @pytest.mark.parametrize(
-    "op, level, axis, skipna",
+    "op, level, axis, skipna, sort",
     product(AGG_FUNCTIONS,
             lrange(2), lrange(2),
+            [True, False],
             [True, False]))
-def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna):
+def test_regression_whitelist_methods(
+        raw_frame, op, level,
+        axis, skipna, sort):
     # GH6944
+    # GH 17537
     # explicity test the whitelest methods
 
     if axis == 0:
@@ -188,15 +192,19 @@ def test_regression_whitelist_methods(raw_frame, op, level, axis, skipna):
         frame = raw_frame.T
 
     if op in AGG_FUNCTIONS_WITH_SKIPNA:
-        grouped = frame.groupby(level=level, axis=axis)
+        grouped = frame.groupby(level=level, axis=axis, sort=sort)
         result = getattr(grouped, op)(skipna=skipna)
         expected = getattr(frame, op)(level=level, axis=axis,
                                       skipna=skipna)
+        if sort:
+            expected = expected.sort_index(axis=axis, level=level)
         tm.assert_frame_equal(result, expected)
     else:
-        grouped = frame.groupby(level=level, axis=axis)
+        grouped = frame.groupby(level=level, axis=axis, sort=sort)
         result = getattr(grouped, op)()
         expected = getattr(frame, op)(level=level, axis=axis)
+        if sort:
+            expected = expected.sort_index(axis=axis, level=level)
         tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 6976fe162c5d5c..050335988ca417 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -1392,17 +1392,23 @@ def test_count(self):
     AGG_FUNCTIONS = ['sum', 'prod', 'min', 'max', 'median', 'mean', 'skew',
                      'mad', 'std', 'var', 'sem']
 
-    def test_series_group_min_max(self):
+    @pytest.mark.parametrize('sort', [True, False])
+    def test_series_group_min_max(self, sort):
+        # GH 17537
         for op, level, skipna in cart_product(self.AGG_FUNCTIONS, lrange(2),
                                               [False, True]):
-            grouped = self.series.groupby(level=level)
+            grouped = self.series.groupby(level=level, sort=sort)
             aggf = lambda x: getattr(x, op)(skipna=skipna)
             # skipna=True
             leftside = grouped.agg(aggf)
             rightside = getattr(self.series, op)(level=level, skipna=skipna)
+            if sort:
+                rightside = rightside.sort_index(level=level)
             tm.assert_series_equal(leftside, rightside)
 
-    def test_frame_group_ops(self):
+    @pytest.mark.parametrize('sort', [True, False])
+    def test_frame_group_ops(self, sort):
+        # GH 17537
         self.frame.iloc[1, [1, 2]] = np.nan
         self.frame.iloc[7, [0, 1]] = np.nan
 
@@ -1415,7 +1421,7 @@ def test_frame_group_ops(self):
             else:
                 frame = self.frame.T
 
-            grouped = frame.groupby(level=level, axis=axis)
+            grouped = frame.groupby(level=level, axis=axis, sort=sort)
 
             pieces = []
 
@@ -1426,6 +1432,9 @@ def aggf(x):
             leftside = grouped.agg(aggf)
             rightside = getattr(frame, op)(level=level, axis=axis,
                                            skipna=skipna)
+            if sort:
+                rightside = rightside.sort_index(level=level, axis=axis)
+                frame = frame.sort_index(level=level, axis=axis)
 
             # for good measure, groupby detail
             level_index = frame._get_axis(axis).levels[level]

From 7d4a260cbe6d5c1825541adcd0d5310f32a3ba42 Mon Sep 17 00:00:00 2001
From: Andreas Winkler <winkler.andreas2@gmail.com>
Date: Sun, 1 Oct 2017 16:55:32 +0200
Subject: [PATCH 178/188] BUG: Fix some PeriodIndex resampling issues (#16153)

closes #15944
xref #12884
closes #13083
closes #13224
---
 doc/source/whatsnew/v0.21.0.txt |  76 ++++++++
 pandas/core/resample.py         | 132 ++++++++------
 pandas/tests/test_resample.py   | 304 ++++++++++++++++++++------------
 3 files changed, 340 insertions(+), 172 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 11eba13dd0f1f1..1094e96bd0d201 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -171,6 +171,82 @@ Other Enhancements
 Backwards incompatible API changes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. _whatsnew_0210.api_breaking.period_index_resampling:
+
+``PeriodIndex`` resampling
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In previous versions of pandas, resampling a ``Series``/``DataFrame`` indexed by a ``PeriodIndex`` returned a ``DatetimeIndex`` in some cases (:issue:`12884`). Resampling to a multiplied frequency now returns a ``PeriodIndex`` (:issue:`15944`). As a minor enhancement, resampling a ``PeriodIndex`` can now handle ``NaT`` values (:issue:`13224`)
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+   In [1]: pi = pd.period_range('2017-01', periods=12, freq='M')
+
+   In [2]: s = pd.Series(np.arange(12), index=pi)
+
+   In [3]: resampled = s.resample('2Q').mean()
+
+   In [4]: resampled
+   Out[4]:
+   2017-03-31     1.0
+   2017-09-30     5.5
+   2018-03-31    10.0
+   Freq: 2Q-DEC, dtype: float64
+
+   In [5]: resampled.index
+   Out[5]: DatetimeIndex(['2017-03-31', '2017-09-30', '2018-03-31'], dtype='datetime64[ns]', freq='2Q-DEC')
+
+New Behavior:
+
+.. ipython:: python
+
+   pi = pd.period_range('2017-01', periods=12, freq='M')
+
+   s = pd.Series(np.arange(12), index=pi)
+
+   resampled = s.resample('2Q').mean()
+
+   resampled
+
+   resampled.index
+
+
+Upsampling and calling ``.ohlc()`` previously returned a ``Series``, basically identical to calling ``.asfreq()``. OHLC upsampling now returns a DataFrame with columns ``open``, ``high``, ``low`` and ``close`` (:issue:`13083`). This is consistent with downsampling and ``DatetimeIndex`` behavior.
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+   In [1]: pi = pd.PeriodIndex(start='2000-01-01', freq='D', periods=10)
+
+   In [2]: s = pd.Series(np.arange(10), index=pi)
+
+   In [3]: s.resample('H').ohlc()
+   Out[3]:
+   2000-01-01 00:00    0.0
+                   ...
+   2000-01-10 23:00    NaN
+   Freq: H, Length: 240, dtype: float64
+
+   In [4]: s.resample('M').ohlc()
+   Out[4]:
+            open  high  low  close
+   2000-01     0     9    0      9
+
+New Behavior:
+
+.. ipython:: python
+
+   pi = pd.PeriodIndex(start='2000-01-01', freq='D', periods=10)
+
+   s = pd.Series(np.arange(10), index=pi)
+
+   s.resample('H').ohlc()
+
+   s.resample('M').ohlc()
+
 
 .. _whatsnew_0210.api_breaking.deps:
 
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 01c7e875b8eccc..083fbcaaabe460 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -14,7 +14,7 @@
 from pandas.core.indexes.datetimes import DatetimeIndex, date_range
 from pandas.core.indexes.timedeltas import TimedeltaIndex
 from pandas.tseries.offsets import DateOffset, Tick, Day, _delta_to_nanoseconds
-from pandas.core.indexes.period import PeriodIndex, period_range
+from pandas.core.indexes.period import PeriodIndex
 import pandas.core.common as com
 import pandas.core.algorithms as algos
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
@@ -834,53 +834,32 @@ class PeriodIndexResampler(DatetimeIndexResampler):
     def _resampler_for_grouping(self):
         return PeriodIndexResamplerGroupby
 
+    def _get_binner_for_time(self):
+        if self.kind == 'timestamp':
+            return super(PeriodIndexResampler, self)._get_binner_for_time()
+        return self.groupby._get_period_bins(self.ax)
+
     def _convert_obj(self, obj):
         obj = super(PeriodIndexResampler, self)._convert_obj(obj)
 
-        offset = to_offset(self.freq)
-        if offset.n > 1:
-            if self.kind == 'period':  # pragma: no cover
-                print('Warning: multiple of frequency -> timestamps')
-
-            # Cannot have multiple of periods, convert to timestamp
+        if self._from_selection:
+            # see GH 14008, GH 12871
+            msg = ("Resampling from level= or on= selection"
+                   " with a PeriodIndex is not currently supported,"
+                   " use .set_index(...) to explicitly set index")
+            raise NotImplementedError(msg)
+
+        if self.loffset is not None:
+            # Cannot apply loffset/timedelta to PeriodIndex -> convert to
+            # timestamps
             self.kind = 'timestamp'
 
         # convert to timestamp
-        if not (self.kind is None or self.kind == 'period'):
-            if self._from_selection:
-                # see GH 14008, GH 12871
-                msg = ("Resampling from level= or on= selection"
-                       " with a PeriodIndex is not currently supported,"
-                       " use .set_index(...) to explicitly set index")
-                raise NotImplementedError(msg)
-            else:
-                obj = obj.to_timestamp(how=self.convention)
+        if self.kind == 'timestamp':
+            obj = obj.to_timestamp(how=self.convention)
 
         return obj
 
-    def aggregate(self, arg, *args, **kwargs):
-        result, how = self._aggregate(arg, *args, **kwargs)
-        if result is None:
-            result = self._downsample(arg, *args, **kwargs)
-
-        result = self._apply_loffset(result)
-        return result
-
-    agg = aggregate
-
-    def _get_new_index(self):
-        """ return our new index """
-        ax = self.ax
-
-        if len(ax) == 0:
-            values = []
-        else:
-            start = ax[0].asfreq(self.freq, how=self.convention)
-            end = ax[-1].asfreq(self.freq, how='end')
-            values = period_range(start, end, freq=self.freq).asi8
-
-        return ax._shallow_copy(values, freq=self.freq)
-
     def _downsample(self, how, **kwargs):
         """
         Downsample the cython defined function
@@ -898,22 +877,17 @@ def _downsample(self, how, **kwargs):
         how = self._is_cython_func(how) or how
         ax = self.ax
 
-        new_index = self._get_new_index()
-
-        # Start vs. end of period
-        memb = ax.asfreq(self.freq, how=self.convention)
-
         if is_subperiod(ax.freq, self.freq):
             # Downsampling
-            if len(new_index) == 0:
-                bins = []
-            else:
-                i8 = memb.asi8
-                rng = np.arange(i8[0], i8[-1] + 1)
-                bins = memb.searchsorted(rng, side='right')
-            grouper = BinGrouper(bins, new_index)
-            return self._groupby_and_aggregate(how, grouper=grouper)
+            return self._groupby_and_aggregate(how, grouper=self.grouper)
         elif is_superperiod(ax.freq, self.freq):
+            if how == 'ohlc':
+                # GH #13083
+                # upsampling to subperiods is handled as an asfreq, which works
+                # for pure aggregating/reducing methods
+                # OHLC reduces along the time dimension, but creates multiple
+                # values for each period -> handle by _groupby_and_aggregate()
+                return self._groupby_and_aggregate(how, grouper=self.grouper)
             return self.asfreq()
         elif ax.freq == self.freq:
             return self.asfreq()
@@ -936,19 +910,16 @@ def _upsample(self, method, limit=None, fill_value=None):
         .fillna
 
         """
-        if self._from_selection:
-            raise ValueError("Upsampling from level= or on= selection"
-                             " is not supported, use .set_index(...)"
-                             " to explicitly set index to"
-                             " datetime-like")
+
         # we may need to actually resample as if we are timestamps
         if self.kind == 'timestamp':
             return super(PeriodIndexResampler, self)._upsample(
                 method, limit=limit, fill_value=fill_value)
 
+        self._set_binner()
         ax = self.ax
         obj = self.obj
-        new_index = self._get_new_index()
+        new_index = self.binner
 
         # Start vs. end of period
         memb = ax.asfreq(self.freq, how=self.convention)
@@ -1293,6 +1264,51 @@ def _get_time_period_bins(self, ax):
 
         return binner, bins, labels
 
+    def _get_period_bins(self, ax):
+        if not isinstance(ax, PeriodIndex):
+            raise TypeError('axis must be a PeriodIndex, but got '
+                            'an instance of %r' % type(ax).__name__)
+
+        memb = ax.asfreq(self.freq, how=self.convention)
+
+        # NaT handling as in pandas._lib.lib.generate_bins_dt64()
+        nat_count = 0
+        if memb.hasnans:
+            nat_count = np.sum(memb._isnan)
+            memb = memb[~memb._isnan]
+
+        # if index contains no valid (non-NaT) values, return empty index
+        if not len(memb):
+            binner = labels = PeriodIndex(
+                data=[], freq=self.freq, name=ax.name)
+            return binner, [], labels
+
+        start = ax.min().asfreq(self.freq, how=self.convention)
+        end = ax.max().asfreq(self.freq, how='end')
+
+        labels = binner = PeriodIndex(start=start, end=end,
+                                      freq=self.freq, name=ax.name)
+
+        i8 = memb.asi8
+        freq_mult = self.freq.n
+
+        # when upsampling to subperiods, we need to generate enough bins
+        expected_bins_count = len(binner) * freq_mult
+        i8_extend = expected_bins_count - (i8[-1] - i8[0])
+        rng = np.arange(i8[0], i8[-1] + i8_extend, freq_mult)
+        rng += freq_mult
+        bins = memb.searchsorted(rng, side='left')
+
+        if nat_count > 0:
+            # NaT handling as in pandas._lib.lib.generate_bins_dt64()
+            # shift bins by the number of NaT
+            bins += nat_count
+            bins = np.insert(bins, 0, nat_count)
+            binner = binner.insert(0, tslib.NaT)
+            labels = labels.insert(0, tslib.NaT)
+
+        return binner, bins, labels
+
 
 def _take_new_index(obj, indexer, new_index, axis=0):
     from pandas.core.api import Series, DataFrame
diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py
index 7449beb8f97dfe..cd15203eccd826 100644
--- a/pandas/tests/test_resample.py
+++ b/pandas/tests/test_resample.py
@@ -18,7 +18,7 @@
 
 from pandas.core.dtypes.generic import ABCSeries, ABCDataFrame
 from pandas.compat import range, lrange, zip, product, OrderedDict
-from pandas.core.base import SpecificationError
+from pandas.core.base import SpecificationError, AbstractMethodError
 from pandas.errors import UnsupportedFunctionCall
 from pandas.core.groupby import DataError
 from pandas.tseries.frequencies import MONTHS, DAYS
@@ -698,35 +698,58 @@ def create_index(self, *args, **kwargs):
         factory = self._index_factory()
         return factory(*args, **kwargs)
 
-    def test_asfreq_downsample(self):
-        s = self.create_series()
-
-        result = s.resample('2D').asfreq()
-        expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2)))
-        expected.index.freq = to_offset('2D')
-        assert_series_equal(result, expected)
-
-        frame = s.to_frame('value')
-        result = frame.resample('2D').asfreq()
-        expected = frame.reindex(
-            frame.index.take(np.arange(0, len(frame.index), 2)))
-        expected.index.freq = to_offset('2D')
-        assert_frame_equal(result, expected)
-
-    def test_asfreq_upsample(self):
-        s = self.create_series()
-
-        result = s.resample('1H').asfreq()
-        new_index = self.create_index(s.index[0], s.index[-1], freq='1H')
-        expected = s.reindex(new_index)
-        assert_series_equal(result, expected)
-
-        frame = s.to_frame('value')
-        result = frame.resample('1H').asfreq()
-        new_index = self.create_index(frame.index[0],
-                                      frame.index[-1], freq='1H')
-        expected = frame.reindex(new_index)
-        assert_frame_equal(result, expected)
+    @pytest.fixture
+    def _index_start(self):
+        return datetime(2005, 1, 1)
+
+    @pytest.fixture
+    def _index_end(self):
+        return datetime(2005, 1, 10)
+
+    @pytest.fixture
+    def _index_freq(self):
+        return 'D'
+
+    @pytest.fixture
+    def index(self, _index_start, _index_end, _index_freq):
+        return self.create_index(_index_start, _index_end, freq=_index_freq)
+
+    @pytest.fixture
+    def _series_name(self):
+        raise AbstractMethodError(self)
+
+    @pytest.fixture
+    def _static_values(self, index):
+        return np.arange(len(index))
+
+    @pytest.fixture
+    def series(self, index, _series_name, _static_values):
+        return Series(_static_values, index=index, name=_series_name)
+
+    @pytest.fixture
+    def frame(self, index, _static_values):
+        return DataFrame({'value': _static_values}, index=index)
+
+    @pytest.fixture(params=[Series, DataFrame])
+    def series_and_frame(self, request, index, _series_name, _static_values):
+        if request.param == Series:
+            return Series(_static_values, index=index, name=_series_name)
+        if request.param == DataFrame:
+            return DataFrame({'value': _static_values}, index=index)
+
+    @pytest.mark.parametrize('freq', ['2D', '1H'])
+    def test_asfreq(self, series_and_frame, freq):
+        obj = series_and_frame
+
+        result = obj.resample(freq).asfreq()
+        if freq == '2D':
+            new_index = obj.index.take(np.arange(0, len(obj.index), 2))
+            new_index.freq = to_offset('2D')
+        else:
+            new_index = self.create_index(obj.index[0], obj.index[-1],
+                                          freq=freq)
+        expected = obj.reindex(new_index)
+        assert_almost_equal(result, expected)
 
     def test_asfreq_fill_value(self):
         # test for fill value during resampling, issue 3715
@@ -824,7 +847,7 @@ def test_resample_loffset_arg_type(self):
                                            periods=len(df.index) / 2,
                                            freq='2D')
 
-        # loffset coreces PeriodIndex to DateTimeIndex
+        # loffset coerces PeriodIndex to DateTimeIndex
         if isinstance(expected_index, PeriodIndex):
             expected_index = expected_index.to_timestamp()
 
@@ -866,6 +889,10 @@ def test_apply_to_empty_series(self):
 class TestDatetimeIndex(Base):
     _index_factory = lambda x: date_range
 
+    @pytest.fixture
+    def _series_name(self):
+        return 'dti'
+
     def setup_method(self, method):
         dti = DatetimeIndex(start=datetime(2005, 1, 1),
                             end=datetime(2005, 1, 10), freq='Min')
@@ -2214,57 +2241,35 @@ def test_resample_datetime_values(self):
 class TestPeriodIndex(Base):
     _index_factory = lambda x: period_range
 
+    @pytest.fixture
+    def _series_name(self):
+        return 'pi'
+
     def create_series(self):
+        # TODO: replace calls to .create_series() by injecting the series
+        # fixture
         i = period_range(datetime(2005, 1, 1),
                          datetime(2005, 1, 10), freq='D')
 
         return Series(np.arange(len(i)), index=i, name='pi')
 
-    def test_asfreq_downsample(self):
-
-        # series
-        s = self.create_series()
-        expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2)))
-        expected.index = expected.index.to_timestamp()
-        expected.index.freq = to_offset('2D')
-
-        # this is a bug, this *should* return a PeriodIndex
-        # directly
-        # GH 12884
-        result = s.resample('2D').asfreq()
-        assert_series_equal(result, expected)
-
-        # frame
-        frame = s.to_frame('value')
-        expected = frame.reindex(
-            frame.index.take(np.arange(0, len(frame.index), 2)))
-        expected.index = expected.index.to_timestamp()
-        expected.index.freq = to_offset('2D')
-        result = frame.resample('2D').asfreq()
-        assert_frame_equal(result, expected)
-
-    def test_asfreq_upsample(self):
-
-        # this is a bug, this *should* return a PeriodIndex
-        # directly
-        # GH 12884
-        s = self.create_series()
-        new_index = date_range(s.index[0].to_timestamp(how='start'),
-                               (s.index[-1] + 1).to_timestamp(how='start'),
-                               freq='1H',
-                               closed='left')
-        expected = s.to_timestamp().reindex(new_index).to_period()
-        result = s.resample('1H').asfreq()
-        assert_series_equal(result, expected)
-
-        frame = s.to_frame('value')
-        new_index = date_range(frame.index[0].to_timestamp(how='start'),
-                               (frame.index[-1] + 1).to_timestamp(how='start'),
-                               freq='1H',
-                               closed='left')
-        expected = frame.to_timestamp().reindex(new_index).to_period()
-        result = frame.resample('1H').asfreq()
-        assert_frame_equal(result, expected)
+    @pytest.mark.parametrize('freq', ['2D', '1H', '2H'])
+    @pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
+    def test_asfreq(self, series_and_frame, freq, kind):
+        # GH 12884, 15944
+        # make sure .asfreq() returns PeriodIndex (except kind='timestamp')
+
+        obj = series_and_frame
+        if kind == 'timestamp':
+            expected = obj.to_timestamp().resample(freq).asfreq()
+        else:
+            start = obj.index[0].to_timestamp(how='start')
+            end = (obj.index[-1] + 1).to_timestamp(how='start')
+            new_index = date_range(start=start, end=end, freq=freq,
+                                   closed='left')
+            expected = obj.to_timestamp().reindex(new_index).to_period(freq)
+        result = obj.resample(freq, kind=kind).asfreq()
+        assert_almost_equal(result, expected)
 
     def test_asfreq_fill_value(self):
         # test for fill value during resampling, issue 3715
@@ -2285,8 +2290,9 @@ def test_asfreq_fill_value(self):
         result = frame.resample('1H', kind='timestamp').asfreq(fill_value=3.0)
         assert_frame_equal(result, expected)
 
-    def test_selection(self):
-        index = self.create_series().index
+    @pytest.mark.parametrize('freq', ['H', '12H', '2D', 'W'])
+    @pytest.mark.parametrize('kind', [None, 'period', 'timestamp'])
+    def test_selection(self, index, freq, kind):
         # This is a bug, these should be implemented
         # GH 14008
         df = pd.DataFrame({'date': index,
@@ -2294,12 +2300,10 @@ def test_selection(self):
                           index=pd.MultiIndex.from_arrays([
                               np.arange(len(index), dtype=np.int64),
                               index], names=['v', 'd']))
-
         with pytest.raises(NotImplementedError):
-            df.resample('2D', on='date')
-
+            df.resample(freq, on='date', kind=kind)
         with pytest.raises(NotImplementedError):
-            df.resample('2D', level='d')
+            df.resample(freq, level='d', kind=kind)
 
     def test_annual_upsample_D_s_f(self):
         self._check_annual_upsample_cases('D', 'start', 'ffill')
@@ -2366,15 +2370,14 @@ def test_not_subperiod(self):
         pytest.raises(ValueError, lambda: ts.resample('M').mean())
         pytest.raises(ValueError, lambda: ts.resample('w-thu').mean())
 
-    def test_basic_upsample(self):
+    @pytest.mark.parametrize('freq', ['D', '2D'])
+    def test_basic_upsample(self, freq):
         ts = _simple_pts('1/1/1990', '6/30/1995', freq='M')
         result = ts.resample('a-dec').mean()
 
-        resampled = result.resample('D', convention='end').ffill()
-
-        expected = result.to_timestamp('D', how='end')
-        expected = expected.asfreq('D', 'ffill').to_period()
-
+        resampled = result.resample(freq, convention='end').ffill()
+        expected = result.to_timestamp(freq, how='end')
+        expected = expected.asfreq(freq, 'ffill').to_period(freq)
         assert_series_equal(resampled, expected)
 
     def test_upsample_with_limit(self):
@@ -2440,16 +2443,15 @@ def test_resample_basic(self):
         result2 = s.resample('T', kind='period').mean()
         assert_series_equal(result2, expected)
 
-    def test_resample_count(self):
-
+    @pytest.mark.parametrize('freq,expected_vals', [('M', [31, 29, 31, 9]),
+                                                    ('2M', [31 + 29, 31 + 9])])
+    def test_resample_count(self, freq, expected_vals):
         # GH12774
-        series = pd.Series(1, index=pd.period_range(start='2000',
-                                                    periods=100))
-        result = series.resample('M').count()
-
-        expected_index = pd.period_range(start='2000', freq='M', periods=4)
-        expected = pd.Series([31, 29, 31, 9], index=expected_index)
-
+        series = pd.Series(1, index=pd.period_range(start='2000', periods=100))
+        result = series.resample(freq).count()
+        expected_index = pd.period_range(start='2000', freq=freq,
+                                         periods=len(expected_vals))
+        expected = pd.Series(expected_vals, index=expected_index)
         assert_series_equal(result, expected)
 
     def test_resample_same_freq(self):
@@ -2587,12 +2589,15 @@ def test_cant_fill_missing_dups(self):
         s = Series(np.random.randn(5), index=rng)
         pytest.raises(Exception, lambda: s.resample('A').ffill())
 
-    def test_resample_5minute(self):
+    @pytest.mark.parametrize('freq', ['5min'])
+    @pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
+    def test_resample_5minute(self, freq, kind):
         rng = period_range('1/1/2000', '1/5/2000', freq='T')
         ts = Series(np.random.randn(len(rng)), index=rng)
-
-        result = ts.resample('5min').mean()
-        expected = ts.to_timestamp().resample('5min').mean()
+        expected = ts.to_timestamp().resample(freq).mean()
+        if kind != 'timestamp':
+            expected = expected.to_period(freq)
+        result = ts.resample(freq, kind=kind).mean()
         assert_series_equal(result, expected)
 
     def test_upsample_daily_business_daily(self):
@@ -2812,18 +2817,96 @@ def test_evenly_divisible_with_no_extra_bins(self):
         result = df.resample('7D').sum()
         assert_frame_equal(result, expected)
 
-    def test_apply_to_empty_series(self):
-        # GH 14313
-        series = self.create_series()[:0]
+    @pytest.mark.parametrize('kind', ['period', None, 'timestamp'])
+    @pytest.mark.parametrize('agg_arg', ['mean', {'value': 'mean'}, ['mean']])
+    def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg):
+        # make sure passing loffset returns DatetimeIndex in all cases
+        # basic method taken from Base.test_resample_loffset_arg_type()
+        df = frame
+        expected_means = [df.values[i:i + 2].mean()
+                          for i in range(0, len(df.values), 2)]
+        expected_index = self.create_index(df.index[0],
+                                           periods=len(df.index) / 2,
+                                           freq='2D')
 
-        for freq in ['M', 'D', 'H']:
-            with pytest.raises(TypeError):
-                series.resample(freq).apply(lambda x: 1)
+        # loffset coerces PeriodIndex to DateTimeIndex
+        expected_index = expected_index.to_timestamp()
+        expected_index += timedelta(hours=2)
+        expected = DataFrame({'value': expected_means}, index=expected_index)
+
+        result_agg = df.resample('2D', loffset='2H', kind=kind).agg(agg_arg)
+        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
+            result_how = df.resample('2D', how=agg_arg, loffset='2H',
+                                     kind=kind)
+        if isinstance(agg_arg, list):
+            expected.columns = pd.MultiIndex.from_tuples([('value', 'mean')])
+        assert_frame_equal(result_agg, expected)
+        assert_frame_equal(result_how, expected)
+
+    @pytest.mark.parametrize('freq, period_mult', [('H', 24), ('12H', 2)])
+    @pytest.mark.parametrize('kind', [None, 'period'])
+    def test_upsampling_ohlc(self, freq, period_mult, kind):
+        # GH 13083
+        pi = PeriodIndex(start='2000', freq='D', periods=10)
+        s = Series(range(len(pi)), index=pi)
+        expected = s.to_timestamp().resample(freq).ohlc().to_period(freq)
+
+        # timestamp-based resampling doesn't include all sub-periods
+        # of the last original period, so extend accordingly:
+        new_index = PeriodIndex(start='2000', freq=freq,
+                                periods=period_mult * len(pi))
+        expected = expected.reindex(new_index)
+        result = s.resample(freq, kind=kind).ohlc()
+        assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize('periods, values',
+                             [([pd.NaT, '1970-01-01 00:00:00', pd.NaT,
+                                '1970-01-01 00:00:02', '1970-01-01 00:00:03'],
+                               [2, 3, 5, 7, 11]),
+                              ([pd.NaT, pd.NaT, '1970-01-01 00:00:00', pd.NaT,
+                                pd.NaT, pd.NaT, '1970-01-01 00:00:02',
+                                '1970-01-01 00:00:03', pd.NaT, pd.NaT],
+                               [1, 2, 3, 5, 6, 8, 7, 11, 12, 13])])
+    @pytest.mark.parametrize('freq, expected_values',
+                             [('1s', [3, np.NaN, 7, 11]),
+                              ('2s', [3, int((7 + 11) / 2)]),
+                              ('3s', [int((3 + 7) / 2), 11])])
+    def test_resample_with_nat(self, periods, values, freq, expected_values):
+        # GH 13224
+        index = PeriodIndex(periods, freq='S')
+        frame = DataFrame(values, index=index)
+
+        expected_index = period_range('1970-01-01 00:00:00',
+                                      periods=len(expected_values), freq=freq)
+        expected = DataFrame(expected_values, index=expected_index)
+        result = frame.resample(freq).mean()
+        assert_frame_equal(result, expected)
+
+    def test_resample_with_only_nat(self):
+        # GH 13224
+        pi = PeriodIndex([pd.NaT] * 3, freq='S')
+        frame = DataFrame([2, 3, 5], index=pi)
+        expected_index = PeriodIndex(data=[], freq=pi.freq)
+        expected = DataFrame([], index=expected_index)
+        result = frame.resample('1s').mean()
+        assert_frame_equal(result, expected)
 
 
 class TestTimedeltaIndex(Base):
     _index_factory = lambda x: timedelta_range
 
+    @pytest.fixture
+    def _index_start(self):
+        return '1 day'
+
+    @pytest.fixture
+    def _index_end(self):
+        return '10 day'
+
+    @pytest.fixture
+    def _series_name(self):
+        return 'tdi'
+
     def create_series(self):
         i = timedelta_range('1 day',
                             '10 day', freq='D')
@@ -3167,13 +3250,6 @@ def test_fails_on_no_datetime_index(self):
                                         "instance of %r" % name):
                 df.groupby(TimeGrouper('D'))
 
-        # PeriodIndex gives a specific error message
-        df = DataFrame({'a': np.random.randn(n)}, index=tm.makePeriodIndex(n))
-        with tm.assert_raises_regex(TypeError,
-                                    "axis must be a DatetimeIndex, but "
-                                    "got an instance of 'PeriodIndex'"):
-            df.groupby(TimeGrouper('D'))
-
     def test_aaa_group_order(self):
         # GH 12840
         # check TimeGrouper perform stable sorts

From cdbbf80ec3dd919414560855a1ea3f1efd6c5332 Mon Sep 17 00:00:00 2001
From: ruiann <534676033@qq.com>
Date: Sun, 1 Oct 2017 12:48:56 -0500
Subject: [PATCH 179/188] BUG:Time Grouper bug fix when applied for list
 groupers (#17587)

closes #17530
---
 doc/source/whatsnew/v0.21.0.txt          |   1 +
 pandas/core/groupby.py                   | 119 ++++++++++++++++++-----
 pandas/core/resample.py                  |  27 +----
 pandas/tests/groupby/test_timegrouper.py |  19 ++++
 4 files changed, 116 insertions(+), 50 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 1094e96bd0d201..3276310fa3e6e2 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -704,6 +704,7 @@ Groupby/Resample/Rolling
 - Bug in ``DataFrame.groupby`` where index and column keys were not recognized correctly when the number of keys equaled the number of elements on the groupby axis (:issue:`16859`)
 - Bug in ``groupby.nunique()`` with ``TimeGrouper`` which cannot handle ``NaT`` correctly (:issue:`17575`)
 - Bug in ``DataFrame.groupby`` where a single level selection from a ``MultiIndex`` unexpectedly sorts (:issue:`17537`)
+- Bug in ``TimeGrouper`` differs when passes as a list and as a scalar (:issue:`17530`)
 
 Sparse
 ^^^^^^
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 2f2056279558d3..9379ade4be7a69 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -256,11 +256,13 @@ def __init__(self, key=None, level=None, freq=None, axis=0, sort=False):
     def ax(self):
         return self.grouper
 
-    def _get_grouper(self, obj):
+    def _get_grouper(self, obj, validate=True):
         """
         Parameters
         ----------
         obj : the subject object
+        validate : boolean, default True
+            if True, validate the grouper
 
         Returns
         -------
@@ -271,7 +273,8 @@ def _get_grouper(self, obj):
         self.grouper, exclusions, self.obj = _get_grouper(self.obj, [self.key],
                                                           axis=self.axis,
                                                           level=self.level,
-                                                          sort=self.sort)
+                                                          sort=self.sort,
+                                                          validate=validate)
         return self.binner, self.grouper, self.obj
 
     def _set_grouper(self, obj, sort=False):
@@ -326,12 +329,6 @@ def _set_grouper(self, obj, sort=False):
         self.grouper = ax
         return self.grouper
 
-    def _get_binner_for_grouping(self, obj):
-        """ default to the standard binner here """
-        group_axis = obj._get_axis(self.axis)
-        return Grouping(group_axis, None, obj=obj, name=self.key,
-                        level=self.level, sort=self.sort, in_axis=False)
-
     @property
     def groups(self):
         return self.grouper.groups
@@ -1733,16 +1730,34 @@ class BaseGrouper(object):
     """
     This is an internal Grouper class, which actually holds
     the generated groups
+
+    Parameters
+    ----------
+    axis : int
+        the axis to group
+    groupings : array of grouping
+        all the grouping instances to handle in this grouper
+        for example for grouper list to groupby, need to pass the list
+    sort : boolean, default True
+        whether this grouper will give sorted result or not
+    group_keys : boolean, default True
+    mutated : boolean, default False
+    indexer : intp array, optional
+        the indexer created by Grouper
+        some groupers (TimeGrouper) will sort its axis and its
+        group_info is also sorted, so need the indexer to reorder
+
     """
 
     def __init__(self, axis, groupings, sort=True, group_keys=True,
-                 mutated=False):
+                 mutated=False, indexer=None):
         self._filter_empty_groups = self.compressed = len(groupings) != 1
         self.axis = axis
         self.groupings = groupings
         self.sort = sort
         self.group_keys = group_keys
         self.mutated = mutated
+        self.indexer = indexer
 
     @property
     def shape(self):
@@ -1888,6 +1903,15 @@ def group_info(self):
         comp_ids = _ensure_int64(comp_ids)
         return comp_ids, obs_group_ids, ngroups
 
+    @cache_readonly
+    def label_info(self):
+        # return the labels of items in original grouped axis
+        labels, _, _ = self.group_info
+        if self.indexer is not None:
+            sorter = np.lexsort((labels, self.indexer))
+            labels = labels[sorter]
+        return labels
+
     def _get_compressed_labels(self):
         all_labels = [ping.labels for ping in self.groupings]
         if len(all_labels) > 1:
@@ -2288,11 +2312,42 @@ def generate_bins_generic(values, binner, closed):
 
 class BinGrouper(BaseGrouper):
 
-    def __init__(self, bins, binlabels, filter_empty=False, mutated=False):
+    """
+    This is an internal Grouper class
+
+    Parameters
+    ----------
+    bins : the split index of binlabels to group the item of axis
+    binlabels : the label list
+    filter_empty : boolean, default False
+    mutated : boolean, default False
+    indexer : a intp array
+
+    Examples
+    --------
+    bins: [2, 4, 6, 8, 10]
+    binlabels: DatetimeIndex(['2005-01-01', '2005-01-03',
+        '2005-01-05', '2005-01-07', '2005-01-09'],
+        dtype='datetime64[ns]', freq='2D')
+
+    the group_info, which contains the label of each item in grouped
+    axis, the index of label in label list, group number, is
+
+    (array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4]), array([0, 1, 2, 3, 4]), 5)
+
+    means that, the grouped axis has 10 items, can be grouped into 5
+    labels, the first and second items belong to the first label, the
+    third and forth items belong to the second label, and so on
+
+    """
+
+    def __init__(self, bins, binlabels, filter_empty=False, mutated=False,
+                 indexer=None):
         self.bins = _ensure_int64(bins)
         self.binlabels = _ensure_index(binlabels)
         self._filter_empty_groups = filter_empty
         self.mutated = mutated
+        self.indexer = indexer
 
     @cache_readonly
     def groups(self):
@@ -2460,6 +2515,19 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
             self.grouper, self._labels, self._group_index = \
                 index._get_grouper_for_level(self.grouper, level)
 
+        # a passed Grouper like, directly get the grouper in the same way
+        # as single grouper groupby, use the group_info to get labels
+        elif isinstance(self.grouper, Grouper):
+            # get the new grouper; we already have disambiguated
+            # what key/level refer to exactly, don't need to
+            # check again as we have by this point converted these
+            # to an actual value (rather than a pd.Grouper)
+            _, grouper, _ = self.grouper._get_grouper(self.obj, validate=False)
+            if self.name is None:
+                self.name = grouper.result_index.name
+            self.obj = self.grouper.obj
+            self.grouper = grouper
+
         else:
             if self.grouper is None and self.name is not None:
                 self.grouper = self.obj[self.name]
@@ -2482,16 +2550,6 @@ def __init__(self, index, grouper=None, obj=None, name=None, level=None,
                                            categories=c,
                                            ordered=self.grouper.ordered))
 
-            # a passed Grouper like
-            elif isinstance(self.grouper, Grouper):
-
-                # get the new grouper
-                grouper = self.grouper._get_binner_for_grouping(self.obj)
-                self.obj = self.grouper.obj
-                self.grouper = grouper
-                if self.name is None:
-                    self.name = grouper.name
-
             # we are done
             if isinstance(self.grouper, Grouping):
                 self.grouper = self.grouper.grouper
@@ -2536,6 +2594,10 @@ def ngroups(self):
 
     @cache_readonly
     def indices(self):
+        # we have a list of groupers
+        if isinstance(self.grouper, BaseGrouper):
+            return self.grouper.indices
+
         values = _ensure_categorical(self.grouper)
         return values._reverse_indexer()
 
@@ -2553,9 +2615,14 @@ def group_index(self):
 
     def _make_labels(self):
         if self._labels is None or self._group_index is None:
-            labels, uniques = algorithms.factorize(
-                self.grouper, sort=self.sort)
-            uniques = Index(uniques, name=self.name)
+            # we have a list of groupers
+            if isinstance(self.grouper, BaseGrouper):
+                labels = self.grouper.label_info
+                uniques = self.grouper.result_index
+            else:
+                labels, uniques = algorithms.factorize(
+                    self.grouper, sort=self.sort)
+                uniques = Index(uniques, name=self.name)
             self._labels = labels
             self._group_index = uniques
 
@@ -2566,7 +2633,7 @@ def groups(self):
 
 
 def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
-                 mutated=False):
+                 mutated=False, validate=True):
     """
     create and return a BaseGrouper, which is an internal
     mapping of how to create the grouper indexers.
@@ -2583,6 +2650,8 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True,
     are and then creates a Grouping for each one, combined into
     a BaseGrouper.
 
+    If validate, then check for key/level overlaps
+
     """
     group_axis = obj._get_axis(axis)
 
@@ -2707,7 +2776,7 @@ def is_in_obj(gpr):
 
         elif is_in_axis(gpr):  # df.groupby('name')
             if gpr in obj:
-                if gpr in obj.index.names:
+                if validate and gpr in obj.index.names:
                     warnings.warn(
                         ("'%s' is both a column name and an index level.\n"
                          "Defaulting to column but "
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
index 083fbcaaabe460..6edbb99641542d 100644
--- a/pandas/core/resample.py
+++ b/pandas/core/resample.py
@@ -250,7 +250,7 @@ def _get_binner(self):
         """
 
         binner, bins, binlabels = self._get_binner_for_time()
-        bin_grouper = BinGrouper(bins, binlabels)
+        bin_grouper = BinGrouper(bins, binlabels, indexer=self.groupby.indexer)
         return binner, bin_grouper
 
     def _assure_grouper(self):
@@ -1105,35 +1105,12 @@ def _get_resampler(self, obj, kind=None):
                         "TimedeltaIndex or PeriodIndex, "
                         "but got an instance of %r" % type(ax).__name__)
 
-    def _get_grouper(self, obj):
+    def _get_grouper(self, obj, validate=True):
         # create the resampler and return our binner
         r = self._get_resampler(obj)
         r._set_binner()
         return r.binner, r.grouper, r.obj
 
-    def _get_binner_for_grouping(self, obj):
-        # return an ordering of the transformed group labels,
-        # suitable for multi-grouping, e.g the labels for
-        # the resampled intervals
-        binner, grouper, obj = self._get_grouper(obj)
-
-        l = []
-        for key, group in grouper.get_iterator(self.ax):
-            l.extend([key] * len(group))
-
-        if isinstance(self.ax, PeriodIndex):
-            grouper = binner.__class__(l, freq=binner.freq, name=binner.name)
-        else:
-            # resampling causes duplicated values, specifying freq is invalid
-            grouper = binner.__class__(l, name=binner.name)
-
-        # since we may have had to sort
-        # may need to reorder groups here
-        if self.indexer is not None:
-            indexer = self.indexer.argsort(kind='quicksort')
-            grouper = grouper.take(indexer)
-        return grouper
-
     def _get_time_bins(self, ax):
         if not isinstance(ax, DatetimeIndex):
             raise TypeError('axis must be a DatetimeIndex, but got '
diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py
index fafcbf947e3df7..c8503b16a0e16a 100644
--- a/pandas/tests/groupby/test_timegrouper.py
+++ b/pandas/tests/groupby/test_timegrouper.py
@@ -623,3 +623,22 @@ def test_nunique_with_timegrouper_and_nat(self):
         result = test.groupby(grouper)['data'].nunique()
         expected = test[test.time.notnull()].groupby(grouper)['data'].nunique()
         tm.assert_series_equal(result, expected)
+
+    def test_scalar_call_versus_list_call(self):
+        # Issue: 17530
+        data_frame = {
+            'location': ['shanghai', 'beijing', 'shanghai'],
+            'time': pd.Series(['2017-08-09 13:32:23', '2017-08-11 23:23:15',
+                               '2017-08-11 22:23:15'],
+                              dtype='datetime64[ns]'),
+            'value': [1, 2, 3]
+        }
+        data_frame = pd.DataFrame(data_frame).set_index('time')
+        grouper = pd.Grouper(freq='D')
+
+        grouped = data_frame.groupby(grouper)
+        result = grouped.count()
+        grouped = data_frame.groupby([grouper])
+        expected = grouped.count()
+
+        assert_frame_equal(result, expected)

From 458c1dc81b7e6f90180b06179ac91d9ed868cb05 Mon Sep 17 00:00:00 2001
From: gfyoung <gfyoung17@gmail.com>
Date: Sun, 1 Oct 2017 13:05:02 -0700
Subject: [PATCH 180/188] DEPR: Deprecate convert parameter in take (#17352)

xref gh-16948. The parameter is not respected,
nor is it a parameter in many 'take' implementations.
---
 doc/source/whatsnew/v0.21.0.txt               |  1 +
 pandas/core/frame.py                          | 12 +--
 pandas/core/generic.py                        | 96 +++++++++++++++----
 pandas/core/groupby.py                        | 10 +-
 pandas/core/indexing.py                       | 18 ++--
 pandas/core/series.py                         | 35 +++----
 pandas/core/sparse/series.py                  | 13 ++-
 .../tests/frame/test_axis_select_reindex.py   |  8 +-
 pandas/tests/indexing/test_loc.py             |  4 +-
 pandas/tests/series/test_indexing.py          | 17 ++++
 pandas/tests/sparse/test_series.py            |  3 +
 11 files changed, 144 insertions(+), 73 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 3276310fa3e6e2..c8a0a6bff5cc75 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -566,6 +566,7 @@ Deprecations
 ~~~~~~~~~~~~
 
 - :func:`read_excel()` has deprecated ``sheetname`` in favor of ``sheet_name`` for consistency with ``.to_excel()`` (:issue:`10559`).
+- The ``convert`` parameter has been deprecated in the ``.take()`` method, as it was not being respected (:issue:`16948`)
 - ``pd.options.html.border`` has been deprecated in favor of ``pd.options.display.html.border`` (:issue:`15793`).
 - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).
 - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index a12e611f6618a0..5d439f88bca15a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2034,7 +2034,7 @@ def _ixs(self, i, axis=0):
                 return self.loc[:, lab_slice]
             else:
                 if isinstance(label, Index):
-                    return self.take(i, axis=1, convert=True)
+                    return self._take(i, axis=1, convert=True)
 
                 index_len = len(self.index)
 
@@ -2116,10 +2116,10 @@ def _getitem_array(self, key):
             # be reindexed to match DataFrame rows
             key = check_bool_indexer(self.index, key)
             indexer = key.nonzero()[0]
-            return self.take(indexer, axis=0, convert=False)
+            return self._take(indexer, axis=0, convert=False)
         else:
             indexer = self.loc._convert_to_indexer(key, axis=1)
-            return self.take(indexer, axis=1, convert=True)
+            return self._take(indexer, axis=1, convert=True)
 
     def _getitem_multilevel(self, key):
         loc = self.columns.get_loc(key)
@@ -3355,7 +3355,7 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
                 else:
                     raise TypeError('must specify how or thresh')
 
-            result = self.take(mask.nonzero()[0], axis=axis, convert=False)
+            result = self._take(mask.nonzero()[0], axis=axis, convert=False)
 
         if inplace:
             self._update_inplace(result)
@@ -3486,7 +3486,7 @@ def sort_values(self, by, axis=0, ascending=True, inplace=False,
 
         new_data = self._data.take(indexer,
                                    axis=self._get_block_manager_axis(axis),
-                                   convert=False, verify=False)
+                                   verify=False)
 
         if inplace:
             return self._update_inplace(new_data)
@@ -3547,7 +3547,7 @@ def sort_index(self, axis=0, level=None, ascending=True, inplace=False,
         baxis = self._get_block_manager_axis(axis)
         new_data = self._data.take(indexer,
                                    axis=baxis,
-                                   convert=False, verify=False)
+                                   verify=False)
 
         # reconstruct axis if needed
         new_data.axes[baxis] = new_data.axes[baxis]._sort_levels_monotonic()
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 4f6fd0828693e1..5dd770b2600a09 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -38,6 +38,7 @@
 from pandas.core.index import (Index, MultiIndex, _ensure_index,
                                InvalidIndexError)
 import pandas.core.indexing as indexing
+from pandas.core.indexing import maybe_convert_indices
 from pandas.core.indexes.datetimes import DatetimeIndex
 from pandas.core.indexes.period import PeriodIndex, Period
 from pandas.core.internals import BlockManager
@@ -1822,7 +1823,8 @@ def _iget_item_cache(self, item):
         if ax.is_unique:
             lower = self._get_item_cache(ax[item])
         else:
-            lower = self.take(item, axis=self._info_axis_number, convert=True)
+            lower = self._take(item, axis=self._info_axis_number,
+                               convert=True)
         return lower
 
     def _box_item_values(self, key, values):
@@ -2057,8 +2059,63 @@ def __delitem__(self, key):
         except KeyError:
             pass
 
-    def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs):
+    _shared_docs['_take'] = """
+        Return the elements in the given *positional* indices along an axis.
+
+        This means that we are not indexing according to actual values in
+        the index attribute of the object. We are indexing according to the
+        actual position of the element in the object.
+
+        This is the internal version of ``.take()`` and will contain a wider
+        selection of parameters useful for internal use but not as suitable
+        for public usage.
+
+        Parameters
+        ----------
+        indices : array-like
+            An array of ints indicating which positions to take.
+        axis : int, default 0
+            The axis on which to select elements. "0" means that we are
+            selecting rows, "1" means that we are selecting columns, etc.
+        convert : bool, default True
+            Whether to convert negative indices into positive ones.
+            For example, ``-1`` would map to the ``len(axis) - 1``.
+            The conversions are similar to the behavior of indexing a
+            regular Python list.
+        is_copy : bool, default True
+            Whether to return a copy of the original object or not.
+
+        Returns
+        -------
+        taken : type of caller
+            An array-like containing the elements taken from the object.
+
+        See Also
+        --------
+        numpy.ndarray.take
+        numpy.take
         """
+
+    @Appender(_shared_docs['_take'])
+    def _take(self, indices, axis=0, convert=True, is_copy=True):
+        self._consolidate_inplace()
+
+        if convert:
+            indices = maybe_convert_indices(indices, len(self._get_axis(axis)))
+
+        new_data = self._data.take(indices,
+                                   axis=self._get_block_manager_axis(axis),
+                                   verify=True)
+        result = self._constructor(new_data).__finalize__(self)
+
+        # Maybe set copy if we didn't actually change the index.
+        if is_copy:
+            if not result._get_axis(axis).equals(self._get_axis(axis)):
+                result._set_is_copy(self)
+
+        return result
+
+    _shared_docs['take'] = """
         Return the elements in the given *positional* indices along an axis.
 
         This means that we are not indexing according to actual values in
@@ -2073,9 +2130,12 @@ def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs):
             The axis on which to select elements. "0" means that we are
             selecting rows, "1" means that we are selecting columns, etc.
         convert : bool, default True
-            Whether to convert negative indices to positive ones, just as with
-            indexing into Python lists. For example, if `-1` was passed in,
-            this index would be converted ``n - 1``.
+            .. deprecated:: 0.21.0
+
+            Whether to convert negative indices into positive ones.
+            For example, ``-1`` would map to the ``len(axis) - 1``.
+            The conversions are similar to the behavior of indexing a
+            regular Python list.
         is_copy : bool, default True
             Whether to return a copy of the original object or not.
 
@@ -2131,19 +2191,17 @@ class  max_speed
         numpy.ndarray.take
         numpy.take
         """
+
+    @Appender(_shared_docs['take'])
+    def take(self, indices, axis=0, convert=True, is_copy=True, **kwargs):
         nv.validate_take(tuple(), kwargs)
-        self._consolidate_inplace()
-        new_data = self._data.take(indices,
-                                   axis=self._get_block_manager_axis(axis),
-                                   convert=True, verify=True)
-        result = self._constructor(new_data).__finalize__(self)
 
-        # maybe set copy if we didn't actually change the index
-        if is_copy:
-            if not result._get_axis(axis).equals(self._get_axis(axis)):
-                result._set_is_copy(self)
+        if not convert:
+            msg = ("The 'convert' parameter is deprecated "
+                   "and will be removed in a future version.")
+            warnings.warn(msg, FutureWarning, stacklevel=2)
 
-        return result
+        return self._take(indices, axis=axis, convert=convert, is_copy=is_copy)
 
     def xs(self, key, axis=0, level=None, drop_level=True):
         """
@@ -2244,9 +2302,9 @@ def xs(self, key, axis=0, level=None, drop_level=True):
             if isinstance(loc, np.ndarray):
                 if loc.dtype == np.bool_:
                     inds, = loc.nonzero()
-                    return self.take(inds, axis=axis, convert=False)
+                    return self._take(inds, axis=axis, convert=False)
                 else:
-                    return self.take(loc, axis=axis, convert=True)
+                    return self._take(loc, axis=axis, convert=True)
 
             if not is_scalar(loc):
                 new_index = self.index[loc]
@@ -5112,7 +5170,7 @@ def at_time(self, time, asof=False):
         """
         try:
             indexer = self.index.indexer_at_time(time, asof=asof)
-            return self.take(indexer, convert=False)
+            return self._take(indexer, convert=False)
         except AttributeError:
             raise TypeError('Index must be DatetimeIndex')
 
@@ -5136,7 +5194,7 @@ def between_time(self, start_time, end_time, include_start=True,
             indexer = self.index.indexer_between_time(
                 start_time, end_time, include_start=include_start,
                 include_end=include_end)
-            return self.take(indexer, convert=False)
+            return self._take(indexer, convert=False)
         except AttributeError:
             raise TypeError('Index must be DatetimeIndex')
 
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index 9379ade4be7a69..9518f17e5f4f17 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -322,8 +322,8 @@ def _set_grouper(self, obj, sort=False):
             # use stable sort to support first, last, nth
             indexer = self.indexer = ax.argsort(kind='mergesort')
             ax = ax.take(indexer)
-            obj = obj.take(indexer, axis=self.axis,
-                           convert=False, is_copy=False)
+            obj = obj._take(indexer, axis=self.axis,
+                            convert=False, is_copy=False)
 
         self.obj = obj
         self.grouper = ax
@@ -640,7 +640,7 @@ def get_group(self, name, obj=None):
         if not len(inds):
             raise KeyError(name)
 
-        return obj.take(inds, axis=self.axis, convert=False)
+        return obj._take(inds, axis=self.axis, convert=False)
 
     def __iter__(self):
         """
@@ -2226,7 +2226,7 @@ def _aggregate_series_fast(self, obj, func):
         # avoids object / Series creation overhead
         dummy = obj._get_values(slice(None, 0)).to_dense()
         indexer = get_group_index_sorter(group_index, ngroups)
-        obj = obj.take(indexer, convert=False).to_dense()
+        obj = obj._take(indexer, convert=False).to_dense()
         group_index = algorithms.take_nd(
             group_index, indexer, allow_fill=False)
         grouper = lib.SeriesGrouper(obj, func, group_index, ngroups,
@@ -4523,7 +4523,7 @@ def __iter__(self):
             yield i, self._chop(sdata, slice(start, end))
 
     def _get_sorted_data(self):
-        return self.data.take(self.sort_idx, axis=self.axis, convert=False)
+        return self.data._take(self.sort_idx, axis=self.axis, convert=False)
 
     def _chop(self, sdata, slice_obj):
         return sdata.iloc[slice_obj]
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index b7a51afcedabfe..2ea1b8a2389134 100755
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1093,7 +1093,7 @@ def _getitem_iterable(self, key, axis=0):
         if is_bool_indexer(key):
             key = check_bool_indexer(labels, key)
             inds, = key.nonzero()
-            return self.obj.take(inds, axis=axis, convert=False)
+            return self.obj._take(inds, axis=axis, convert=False)
         else:
             # Have the index compute an indexer or return None
             # if it cannot handle; we only act on all found values
@@ -1126,15 +1126,15 @@ def _getitem_iterable(self, key, axis=0):
                     keyarr)
 
                 if new_indexer is not None:
-                    result = self.obj.take(indexer[indexer != -1], axis=axis,
-                                           convert=False)
+                    result = self.obj._take(indexer[indexer != -1], axis=axis,
+                                            convert=False)
 
                     result = result._reindex_with_indexers(
                         {axis: [new_target, new_indexer]},
                         copy=True, allow_dups=True)
 
                 else:
-                    result = self.obj.take(indexer, axis=axis, convert=False)
+                    result = self.obj._take(indexer, axis=axis)
 
                 return result
 
@@ -1265,7 +1265,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
         if isinstance(indexer, slice):
             return self._slice(indexer, axis=axis, kind='iloc')
         else:
-            return self.obj.take(indexer, axis=axis, convert=False)
+            return self.obj._take(indexer, axis=axis, convert=False)
 
 
 class _IXIndexer(_NDFrameIndexer):
@@ -1350,7 +1350,7 @@ def _getbool_axis(self, key, axis=0):
         key = check_bool_indexer(labels, key)
         inds, = key.nonzero()
         try:
-            return self.obj.take(inds, axis=axis, convert=False)
+            return self.obj._take(inds, axis=axis, convert=False)
         except Exception as detail:
             raise self._exception(detail)
 
@@ -1367,7 +1367,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
         if isinstance(indexer, slice):
             return self._slice(indexer, axis=axis, kind='iloc')
         else:
-            return self.obj.take(indexer, axis=axis, convert=False)
+            return self.obj._take(indexer, axis=axis, convert=False)
 
 
 class _LocIndexer(_LocationIndexer):
@@ -1707,7 +1707,7 @@ def _get_slice_axis(self, slice_obj, axis=0):
         if isinstance(slice_obj, slice):
             return self._slice(slice_obj, axis=axis, kind='iloc')
         else:
-            return self.obj.take(slice_obj, axis=axis, convert=False)
+            return self.obj._take(slice_obj, axis=axis, convert=False)
 
     def _get_list_axis(self, key, axis=0):
         """
@@ -1723,7 +1723,7 @@ def _get_list_axis(self, key, axis=0):
         Series object
         """
         try:
-            return self.obj.take(key, axis=axis, convert=False)
+            return self.obj._take(key, axis=axis, convert=False)
         except IndexError:
             # re-raise with different error message
             raise IndexError("positional indexers are out-of-bounds")
diff --git a/pandas/core/series.py b/pandas/core/series.py
index a05324142b223a..97f39a680c8c97 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -2563,35 +2563,24 @@ def memory_usage(self, index=True, deep=False):
             v += self.index.memory_usage(deep=deep)
         return v
 
-    def take(self, indices, axis=0, convert=True, is_copy=False, **kwargs):
-        """
-        return Series corresponding to requested indices
-
-        Parameters
-        ----------
-        indices : list / array of ints
-        convert : translate negative to positive indices (default)
-
-        Returns
-        -------
-        taken : Series
-
-        See also
-        --------
-        numpy.ndarray.take
-        """
-        if kwargs:
-            nv.validate_take(tuple(), kwargs)
-
-        # check/convert indicies here
+    @Appender(generic._shared_docs['_take'])
+    def _take(self, indices, axis=0, convert=True, is_copy=False):
         if convert:
             indices = maybe_convert_indices(indices, len(self._get_axis(axis)))
 
         indices = _ensure_platform_int(indices)
         new_index = self.index.take(indices)
         new_values = self._values.take(indices)
-        return (self._constructor(new_values, index=new_index, fastpath=True)
-                    .__finalize__(self))
+
+        result = (self._constructor(new_values, index=new_index,
+                                    fastpath=True).__finalize__(self))
+
+        # Maybe set copy if we didn't actually change the index.
+        if is_copy:
+            if not result._get_axis(axis).equals(self._get_axis(axis)):
+                result._set_is_copy(self)
+
+        return result
 
     def isin(self, values):
         """
diff --git a/pandas/core/sparse/series.py b/pandas/core/sparse/series.py
index 2aecb9d7c4ffbd..5166dc927989e5 100644
--- a/pandas/core/sparse/series.py
+++ b/pandas/core/sparse/series.py
@@ -602,16 +602,15 @@ def sparse_reindex(self, new_index):
                                  sparse_index=new_index,
                                  fill_value=self.fill_value).__finalize__(self)
 
+    @Appender(generic._shared_docs['take'])
     def take(self, indices, axis=0, convert=True, *args, **kwargs):
-        """
-        Sparse-compatible version of ndarray.take
+        convert = nv.validate_take_with_convert(convert, args, kwargs)
 
-        Returns
-        -------
-        taken : ndarray
-        """
+        if not convert:
+            msg = ("The 'convert' parameter is deprecated "
+                   "and will be removed in a future version.")
+            warnings.warn(msg, FutureWarning, stacklevel=2)
 
-        convert = nv.validate_take_with_convert(convert, args, kwargs)
         new_values = SparseArray.take(self.values, indices)
         new_index = self.index.take(indices)
         return self._constructor(new_values,
diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py
index fb9b8c2ed7affe..219c1df301c4b6 100644
--- a/pandas/tests/frame/test_axis_select_reindex.py
+++ b/pandas/tests/frame/test_axis_select_reindex.py
@@ -822,7 +822,7 @@ def test_take(self):
             expected = df.loc[:, ['D', 'B', 'C', 'A']]
             assert_frame_equal(result, expected, check_names=False)
 
-        # neg indicies
+        # negative indices
         order = [2, 1, -1]
         for df in [self.frame]:
 
@@ -830,6 +830,10 @@ def test_take(self):
             expected = df.reindex(df.index.take(order))
             assert_frame_equal(result, expected)
 
+            with tm.assert_produces_warning(FutureWarning):
+                result = df.take(order, convert=False, axis=0)
+                assert_frame_equal(result, expected)
+
             # axis = 1
             result = df.take(order, axis=1)
             expected = df.loc[:, ['C', 'B', 'D']]
@@ -854,7 +858,7 @@ def test_take(self):
             expected = df.loc[:, ['foo', 'B', 'C', 'A', 'D']]
             assert_frame_equal(result, expected)
 
-        # neg indicies
+        # negative indices
         order = [4, 1, -2]
         for df in [self.mixed_frame]:
 
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 3e863a59df67e6..17316a714e2609 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -581,11 +581,11 @@ def gen_test(l, l2):
 
         def gen_expected(df, mask):
             l = len(mask)
-            return pd.concat([df.take([0], convert=False),
+            return pd.concat([df.take([0]),
                               DataFrame(np.ones((l, len(columns))),
                                         index=[0] * l,
                                         columns=columns),
-                              df.take(mask[1:], convert=False)])
+                              df.take(mask[1:])])
 
         df = gen_test(900, 100)
         assert not df.index.is_unique
diff --git a/pandas/tests/series/test_indexing.py b/pandas/tests/series/test_indexing.py
index 83d6a09d38f415..272e8c7de5e498 100644
--- a/pandas/tests/series/test_indexing.py
+++ b/pandas/tests/series/test_indexing.py
@@ -1066,6 +1066,23 @@ def test_setitem_with_tz_dst(self):
         s.iloc[[1, 2]] = vals
         tm.assert_series_equal(s, exp)
 
+    def test_take(self):
+        s = Series([-1, 5, 6, 2, 4])
+
+        actual = s.take([1, 3, 4])
+        expected = Series([5, 2, 4], index=[1, 3, 4])
+        tm.assert_series_equal(actual, expected)
+
+        actual = s.take([-1, 3, 4])
+        expected = Series([4, 2, 4], index=[4, 3, 4])
+        tm.assert_series_equal(actual, expected)
+
+        pytest.raises(IndexError, s.take, [1, 10])
+        pytest.raises(IndexError, s.take, [2, 5])
+
+        with tm.assert_produces_warning(FutureWarning):
+            s.take([-1, 3, 4], convert=False)
+
     def test_where(self):
         s = Series(np.random.randn(5))
         cond = s > 0
diff --git a/pandas/tests/sparse/test_series.py b/pandas/tests/sparse/test_series.py
index 451f3695933470..8c0ed322028e8c 100644
--- a/pandas/tests/sparse/test_series.py
+++ b/pandas/tests/sparse/test_series.py
@@ -520,6 +520,9 @@ def _compare(idx):
         exp = pd.Series(np.repeat(nan, 5))
         tm.assert_series_equal(sp.take([0, 1, 2, 3, 4]), exp)
 
+        with tm.assert_produces_warning(FutureWarning):
+            sp.take([1, 5], convert=False)
+
     def test_numpy_take(self):
         sp = SparseSeries([1.0, 2.0, 3.0])
         indices = [1, 2]

From dead59ab80a971d7cd0cdbebd5dc421def3df9b7 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Sun, 1 Oct 2017 13:47:43 -0700
Subject: [PATCH 181/188] remove unused time conversion funcs (#17711)

---
 pandas/_libs/index.pyx | 31 ++-------------------
 pandas/_libs/lib.pyx   | 63 ------------------------------------------
 pandas/_libs/tslib.pxd |  2 ++
 pandas/_libs/tslib.pyx | 13 ++++++++-
 pandas/io/pytables.py  |  4 +--
 5 files changed, 18 insertions(+), 95 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 884117799ec5be..c96251a0293d66 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -13,29 +13,19 @@ cimport util
 
 import numpy as np
 
-cimport tslib
+from tslib cimport _to_i8
 
 from hashtable cimport HashTable
 
-from tslibs.timezones cimport is_utc, get_utcoffset
-from pandas._libs import tslib, algos, hashtable as _hash
+from pandas._libs import algos, hashtable as _hash
 from pandas._libs.tslib import Timestamp, Timedelta
 from datetime import datetime, timedelta
 
-from datetime cimport (get_datetime64_value, _pydatetime_to_dts,
-                       pandas_datetimestruct)
-
 from cpython cimport PyTuple_Check, PyList_Check
 
-cdef extern from "datetime.h":
-    bint PyDateTime_Check(object o)
-    void PyDateTime_IMPORT()
-
 cdef int64_t iNaT = util.get_nat()
 
 
-PyDateTime_IMPORT
-
 cdef extern from "Python.h":
     int PySlice_Check(object)
 
@@ -540,23 +530,6 @@ cpdef convert_scalar(ndarray arr, object value):
 
     return value
 
-cdef inline _to_i8(object val):
-    cdef pandas_datetimestruct dts
-    try:
-        return val.value
-    except AttributeError:
-        if util.is_datetime64_object(val):
-            return get_datetime64_value(val)
-        elif PyDateTime_Check(val):
-            tzinfo = getattr(val, 'tzinfo', None)
-            # Save the original date value so we can get the utcoffset from it.
-            ival = _pydatetime_to_dts(val, &dts)
-            if tzinfo is not None and not is_utc(tzinfo):
-                offset = get_utcoffset(tzinfo, val)
-                ival -= tslib._delta_to_nanoseconds(offset)
-            return ival
-        return val
-
 
 cdef class MultiIndexObjectEngine(ObjectEngine):
     """
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index 01548e17d39abf..503badd0ca8bc3 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -45,19 +45,11 @@ cdef double NaN = <double> np.NaN
 cdef double nan = NaN
 cdef double NAN = nan
 
-from datetime import datetime as pydatetime
-
 # this is our tseries.pxd
 from datetime cimport (
     get_timedelta64_value, get_datetime64_value,
     npy_timedelta, npy_datetime,
     PyDateTime_Check, PyDate_Check, PyTime_Check, PyDelta_Check,
-    PyDateTime_GET_YEAR,
-    PyDateTime_GET_MONTH,
-    PyDateTime_GET_DAY,
-    PyDateTime_DATE_GET_HOUR,
-    PyDateTime_DATE_GET_MINUTE,
-    PyDateTime_DATE_GET_SECOND,
     PyDateTime_IMPORT)
 
 
@@ -132,61 +124,6 @@ def memory_usage_of_objects(ndarray[object, ndim=1] arr):
         s += arr[i].__sizeof__()
     return s
 
-#----------------------------------------------------------------------
-# datetime / io related
-
-cdef int _EPOCH_ORD = 719163
-
-from datetime import date as pydate
-
-cdef inline int64_t gmtime(object date):
-    cdef int y, m, d, h, mn, s, days
-
-    y = PyDateTime_GET_YEAR(date)
-    m = PyDateTime_GET_MONTH(date)
-    d = PyDateTime_GET_DAY(date)
-    h = PyDateTime_DATE_GET_HOUR(date)
-    mn = PyDateTime_DATE_GET_MINUTE(date)
-    s = PyDateTime_DATE_GET_SECOND(date)
-
-    days = pydate(y, m, 1).toordinal() - _EPOCH_ORD + d - 1
-    return ((<int64_t> (((days * 24 + h) * 60 + mn))) * 60 + s) * 1000
-
-
-cpdef object to_datetime(int64_t timestamp):
-    return pydatetime.utcfromtimestamp(timestamp / 1000.0)
-
-
-cpdef object to_timestamp(object dt):
-    return gmtime(dt)
-
-
-def array_to_timestamp(ndarray[object, ndim=1] arr):
-    cdef int i, n
-    cdef ndarray[int64_t, ndim=1] result
-
-    n = len(arr)
-    result = np.empty(n, dtype=np.int64)
-
-    for i from 0 <= i < n:
-        result[i] = gmtime(arr[i])
-
-    return result
-
-
-def time64_to_datetime(ndarray[int64_t, ndim=1] arr):
-    cdef int i, n
-    cdef ndarray[object, ndim=1] result
-
-    n = len(arr)
-    result = np.empty(n, dtype=object)
-
-    for i from 0 <= i < n:
-        result[i] = to_datetime(arr[i])
-
-    return result
-
-
 #----------------------------------------------------------------------
 # isnull / notnull related
 
diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd
index ee8adfe67bb5ed..147320b108cc8c 100644
--- a/pandas/_libs/tslib.pxd
+++ b/pandas/_libs/tslib.pxd
@@ -4,3 +4,5 @@ cdef convert_to_tsobject(object, object, object, bint, bint)
 cpdef convert_to_timedelta64(object, object)
 cdef bint _nat_scalar_rules[6]
 cdef bint _check_all_nulls(obj)
+
+cdef _to_i8(object val)
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index b0b70bb8102047..096ebe9a5627b0 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -3416,7 +3416,18 @@ def cast_to_nanoseconds(ndarray arr):
     return result
 
 
-def pydt_to_i8(object pydt):
+cdef inline _to_i8(object val):
+    cdef pandas_datetimestruct dts
+    try:
+        return val.value
+    except AttributeError:
+        if is_datetime64_object(val):
+            return get_datetime64_value(val)
+        elif PyDateTime_Check(val):
+            return Timestamp(val).value
+        return val
+
+cpdef pydt_to_i8(object pydt):
     """
     Convert to int64 representation compatible with numpy datetime64; converts
     to UTC
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index ea69116ec363da..ca1b4d031d3ced 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -25,7 +25,7 @@
 
 import numpy as np
 from pandas import (Series, DataFrame, Panel, Panel4D, Index,
-                    MultiIndex, Int64Index, isna, concat,
+                    MultiIndex, Int64Index, isna, concat, to_datetime,
                     SparseSeries, SparseDataFrame, PeriodIndex,
                     DatetimeIndex, TimedeltaIndex)
 from pandas.core import config
@@ -4529,7 +4529,7 @@ def _unconvert_index(data, kind, encoding=None):
 def _unconvert_index_legacy(data, kind, legacy=False, encoding=None):
     kind = _ensure_decoded(kind)
     if kind == u('datetime'):
-        index = lib.time64_to_datetime(data)
+        index = to_datetime(data)
     elif kind in (u('integer')):
         index = np.asarray(data, dtype=object)
     elif kind in (u('string')):

From 408ecd21759979f94952a48a0e4a2b2a608ee84d Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Sun, 1 Oct 2017 18:58:01 -0400
Subject: [PATCH 182/188] TST: add backward compat for offset testing for
 pickles (#17733)

closes #17721
---
 .../0.17.0/0.17.0_x86_64_darwin_3.5.3.pickle  | Bin 0 -> 129175 bytes
 .../0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle  | Bin 125826 -> 127853 bytes
 .../tests/io/generate_legacy_storage_files.py |  69 ++++++++++++++++--
 pandas/tests/io/test_pickle.py                |  22 ++----
 4 files changed, 70 insertions(+), 21 deletions(-)
 create mode 100644 pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.5.3.pickle

diff --git a/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.5.3.pickle b/pandas/tests/io/data/legacy_pickle/0.17.0/0.17.0_x86_64_darwin_3.5.3.pickle
new file mode 100644
index 0000000000000000000000000000000000000000..537864af7028b2c282150d54bf58aa3f3d553c01
GIT binary patch
literal 129175
zcmdqq1(a0B-Y9y4ySux)ySuwP3=Hn>Zi5dbSOO%F5Wy1MlRzK@NN@-N5+H#DcjxuM
zk8^U)ckXxZUGJ{<-g-5Q{`Kx%dv{gss`}S%hGulcc`>WM6D}-`2+bN2rC*13-P`r;
z7aFz^xpz?CF1<R2hQ#jOu4l(~{c^W?`$FWpZ}!X$tsRos&i?%ZuLbqX-K$flenI{F
zg@#0})vIU!;7UC^hRzMmHs<viBX%3qG3?wS(R%jk+oN6gE<=Mtr-a0BGoW8ko4!Ha
z+x71<FsNft_x|kyuSRL}=1hSTb!^u^sDGCpL4o7Kc#~spXpMJjgsTy5QfU3qkav0p
zwg$AX_4u9Fo3FF=>))<N@6eE#|0rMgF6{$nXc#!dn_Y85r$!AIZr|0jkx#_0T|4Z+
z0U_agbO|e8#J)kD^M{7Sb8LrReS>m$>De)8aA-*MHa&v|wW;=cV{T}0`jF_g26XS=
z<?Rb$4Hv0ryTE&dwhDXNsY~B}{X;_{_Y3OKE6@}{A(6YcYai78&E7;k2lVJYB=_sn
z_Xs;nyS{zf4GEMv&>Dg4{rdME(4l{z`9k9Uy&&z|^$Rpbtjc|R4Grp9yJET5#{`-q
z+=S2y@3apMt4P*B+eDoZ8YoHF8wBjwe@O43urr14k}r_6#_M*<S|dWpJHtb3*REat
zRUo~7s1dGzX!|J@0uA%dJK@7eh!7gs49*)|AnfeHh5mN#;L>5a-g)yAmMbVETEAX>
z`}gYGF{p2#UqtR6G*CT*GX|O|(%)MsVpt1>24@e67`V3WLWA@Eqoh#+J?(E5`sZ6l
z47`7dknnYi{=N9YW&XABWx@&{*6tD7x9b}k_Bb@~*r`{qz+;~QgF}M?`v*)6G*F;O
zwE~waxK416;JShOHTc*1g%7-0^EW5z)ico85&946_0NNYUZ0|0NYsA4+jQz1R+B)N
ziPWikuXceeoxr8)*E=wdf^xsv9}=g*>&=RR+E#vj#6QMFtTr8kI<*U2{5IVJJuGmZ
z_T77R=oYZe>!t}w^!H2OrDy-3zCGIox=%>V@?rUEwd>igbD+InpDt^l=KrWz?f+D-
zutt1SuXoA?Mosy^p9+CL75`bckc8gv?RX9pDy-Ybf8Ff^<$paA-(Jh$86mO${k1@|
zhD7YtH>gi&aFmc3<p=cZ(le-Ezlwo={kG1*F$3)t+3*fd5H^b6jN{<gQ-TwR4djsU
z{fCCu4zBYbD-r!oPpI|xo)9tQos$3Fkpl&(5WZ)>uz4cz(?4`#STCJYp>DWvM^Yas
z_aI)laH-d}niXb`a(B;%x&7$GTn_`r=$K-7n2(d3>HH{Ql~p}9hB?1@_(+cfUO70w
zL6`+w<yihWUf5}0_lTgd_Rst8J;0cb@umm-eYF0wb4PhorA~jZQiR|Y|Ju3VjLf31
zC+mMa+%Ds3**8XITqb1FvVme&8TI&0u76JHZ@Ot<T6^7{-d@)l(f{k1c;`(SyA=GF
z0r(%w7%@WNZNi%6Kc8jalnN1}$PqT9Obq;lWnKTTgD|X7woIw;f26{}yZ%#^!iIUE
zFoDS~T)4>p!y>=U6;`Q;a{V{O4(pzQVh6g}-_!s4b$WAD%Cc`v%k=+p{r+=x!mdzQ
z+lC$a-?x5rIsTh!M1E6^ykRH%XZoL4<L}+<|86ngUXy<m^YsXOJ9YoZ$sw!_!e+SO
zNrB1u-{$)N{(#B<y9Z49-#uXJKMn{An*j6X&Q~yZzSj$*$gd~;*NcL%=`-@ccHIXA
zRuI7(LZbHQ+_z0&rVm?CwI9$Wuto^XXd#jMb??$4ES`sj@;AYJK;Xcx<pN8&;FDd;
z2k2TU@DSJ;{@-_B|J&~HH6r}m?ylj(j{WBeBGiZ|M^H$_c7wwTHF`i`Mbkd8K<hW4
zePCG;2t{uu5wS2NVw=~mMhlBy!e4@a1i4OKx_58$x_bYdjRv&u7g#&=Zu4dZ8P*`d
z{r(Y$-kdwPeIYU5Ors56FE;-fQHBK%4~gHPOV7^TgZ^V&e7yvTUae>U!Uf;Xaba)s
zp94&sKpzS$HM;yS%*KP?4vj#oy-wj7Hd%zF*Ml)&q}N+-rkSv(Ve?;DdOi09jP`mf
zT)4228J1$aAzVzxVr+(u@7F=6U7)Z3m#H(_o55REX8g|w@0*NC%f2xyv;X_t6X(Cq
z>u)E?up`6D{GaEax4FV<AJ$S~x&AS@!)lVTYsWxK{<oK=Yxh8oDHZ-d(DEZfyOw-Y
z+ODMowhHUHuZLjQ0fEiG4We*0;uu8jy4DNZ)jsg>dK7hS5O@|6%K*~JgX@GuY5%sv
zNB>VFrfZ|H!vZ%IVYghNuh&ccLc6vMy!hr$D|lGfwr@59Yr^1J!E?e|G&Fd=y@B;t
z@WTJN@Csh?KU#Ge6#u&F`Y%hA!2A5CHCNzb{hwWPg$tK1-nHpZ0&B3|%SW6Gb60`*
zIi3cLy?5o1Fk3Y)vH$5m*Jl6!8Ycgzu4vaafnM@@Ed9TIdH%;^X?viE|J_(x9@zi4
zu@v7}3j3tsZA1wSsA%o`_cL6&E)E>=kEQVIq11Ir*h$(C2#L@k5TgT~I3!ZwR;xhZ
zBY-4-zx@jA4C>sgZ<oM_B%vWOD*SEh?X68nj5cpGg@wxh$#hmLZk7q5Z|1Y|ql*TL
zAiaI||5PxE<9*{YFnnI8_)NfrOvJ=Y!lX>b<V?YoOvThp!?aAt^vuAF%*4#h!mP~3
z?99QO%*EWy!?%mwe75tm01L7Z3$qA|vKWiA1WU3MOY`6E;lh>kdU;l0MOI>ER^i)t
zU(I%P)?iK6Vr|yp+k2LJw(GM28?q4_vk9BB8Jn{OTe1~fvklv_9ow@5J2Hr!*qOoX
z!mjMb?(D&y?8V;f!@lgt{v5!89K=AFd!2@GD2H)4M{p!ZaWuzpEXQ#?CvYMsaWbcH
zDyK1o(>a4PIg7J7hjSUqd7RI8`5qT=As2Bmm+*alz@=Qq<@}HzaRpcMV}8O_{FI;Z
zbAG|qT*I|o$MyV@UvUFBauYXm3%7C`w{r(~au;`V55ML&{FZyUkKb`W5Ab^)<PSW=
z!#u(t`4f-w7?1M=Px2H`^9;}OXP)CPJkJZf$Y1#zFYz+3@OS>ftGvcP`4_MA25<5f
zZ}SfC@*eN=0Uz=aAM**H@)@7=1z++N10SZpPVX>0BQPQ(F*2htDx)zvV=yLTF*f5c
zF5@vi6EGnYF)@=cDU&fdQ!ph{F*VaLEz>bQGcY4FF*CC;E3+{>b1)}!F*oxtFY_@!
z3$P#yu`rA9?Gmn-?cyxKk}SoryT-6ohGiM{F-}-2&kC%_u#a}aQe{?QRaT?F*$aFO
z^d^5z)?#heVO@sZqlTsW47*7UOAQ%z_ZgNNvk9BB8Jn{OTe1~fvkk-US;JC0wr2;1
zeKru5g4l_j8O$#1%5Dt%<RC2dU{Cg9Z}wqd_G5nz;6M)IU=HC>4&!i+;7E?*XpZ4n
zj^lVv;6zU1WKQ8!PGbnCa|UN}7H4w~=Q5P@IG^wGJucuvF5+S?;rsl6OSz28`5`~z
z3a;eG{DiCcDL>=q{DP~whHJTw>-i<W;s$QyCT`{yZsj&^=ML`VF7D<Ye$8+AE%$OC
zzvF%$;P*VpA9#p|d4xamCm!W79_I<3<SCx!8J^|OJjY*no)>tLzw$R;;$>dp@BD*T
zd5wSaFJ9*j-sCOb<{jSUJ>KU7KI9`l<`X{UGd|}FzT_*u{!BD5KfM$F^)wYOJR>k7
zBQY|gFe;-lI%6;<V=*@4FfQXUJ`*q@6EQK9Fe#HUIa4qtQ!zEuFfG$DJu@&PGchx>
zFe|e$J9986b1^sbFfa2lKMSxR3$ZYZuqcbMI7_f3OR+S|uq?~5JS(swE3q=Guqvyu
zI%}{dYq2)#urBMdJ{zzh8?iB)uqm6dIa{zLTd_6Uur1rMJv*=?gV>3k8O$#1%5Ln=
z9_-0p?9D#x%YN+70UXFd9Lymc%3&PN5gf@;9L+Ht%W)jf37p7DoXjbl%4rPYbk5*R
z&f;v&;arAt9_RC2zQ+Yz$VFVtC48SBa4DB@IX~n_T)~z6n4fSJKjmlqoL_J?*KjS@
zaXr7}SKPpj+{De?!mZrK?cBkg+{NA8!>{=bzvW);<9FQ81N@!``2!E}Fpuy@{=}m^
z#^XG}lRU-KJj1j6ndkTm&+`H=@>l-GOT5f0{GEUBDzEWR{>AIO!JE9r+q}cOyvO@|
zz=wRq$9%%4e8%T|!Iyl+a1q|b{dX9i5%~J|Y64jz*^bO8jLK+?&KQizSd7g$jLUe8
z&jd`!L`=*iOv+?T&J;|^R7}k@Ov`jk&kW4SOw7zI%*t%c&K%6iT+Gcp%*%Yt&jKvS
zLM+T8EXram&JrxiQY_6fEX#5%&kC%_O03K(tjcPv&Kj)ATCB}Ftjl_=&jxJBMr_O`
zY|3VA&K7LRR&32SY|D0R&kpR!Aa-JB2D1yhvKzaz2Ya#?d$SMwvLE|%00(jq2XhFA
zau|nm1V?fdM{^9vavaBV0w;13Cvys?avDQ8oijL-vpAb`IG3TE$N7Ah?{NVaauFAE
z3E$@jT*_r!&JXz!S8ydi<|kamPx%=?=NDYfHC)SeT+c806*q7rH*qt!a4WZQJ9ls=
zcX2oO@N0g<Z@HKI_#OB20Kex!{=h>#%p?4fKk+D!@i<TLBv0`)&+sgN<~jbt^Sr=|
z{FT4)5-;-#f9D^(%4__SfAKnR@Fs8ZHt+B*@9{n#@F5@ZF`w`$pYb_g@FidI?Qe^{
z6Vd%YBQPQ(F*2htDx)zvV=yLTF*f5cF5@vi6EGnYF)@=cDU&fdQ!ph{F*VaLEz>bQ
zGcY4FF*CC;E3+{>b1)}!F*oxtFY_@!3$P#yu`r9UD2uT;ORywMu{6uDEX%Px-~38%
zxNsG1S7K#WVO3URb=F`_)?#heVO`c^eKuf2HezEoVN*6^bGBehwqk3xVOzFidv;()
z2C)-6Gnif2mEG8#J=l}I*qeRWm;KnE12~X_IG95?l*2fjBRG<yIGSTPmg6{{6F8BR
zIGIy8mD3o)>72otoW<Fk!?_IQJkIC4e2)vbkc+sOOZYxN;8HH*a(>8<xPmMBF+brd
ze#+1IIltg)uHjm)<9dF{uegC5xrv*(g<H9e+qr`~xr@8GhhOs>e#^bw$M3kG2lzb?
z@&_K`VIJX+{E0_-jK_I`CwYped4^~CGtcoCp63N#<gfgVmw1_1_&fjLRbJzt{EOFl
zgEx7Lw|R$md5`z`fDieIkNJd8`Hau`f-m`sZ+}btok-$8BQPQ(F*2htDx)zvV=yLT
zF*f5cF5@vi6EGnYF)@=cDU&fdQ!ph{F*VaLEz>bQGcY4FF*CC;E3+{>b1)}!F*oxt
zFY_@!3$P#yu`r9UD2uT;ORywMu{6uDEX%PxE3hIfu`;W$Dyy+NYp^D3u{P_lF6*&A
z8?Yf8u`!#lDVwo5Td*Ztu{GPUE!(j@JFp{z*omDP%r5N8ZtTt;?8#p2%|7hQe(cWy
z9LPZ&%pn}gVI0m89LZ4}%`qIyaU9PHoXAO>%qg78X$;|X&frYW;%v_0T!wNU=kr~@
z#|2!-MO@4!e4ih1DVK3MKjcST!Ik`&pKujF<!AhyUvM?oa4pwyJ-_5v+`x_8#Le8o
zt=z`#+`*mP#ogS)ulWtX<zDXNcihhd{GJE-0}t^qkMKwS#G^dM<2=EWJjK&I!?XOE
z=lBcH^8zpOSN_IJyv!^7oqzBuukla*#p}Gmo4m!_yu-V^$NPN1hkV4xe8Q)E#^-#&
zmwd(7{|Ge@`rnBx{xbq2G7=**3ZpU_qca9$G8SVq4&yQ&<1+yhG7%Fq36nAzlQRWV
zG8I!Z4bw6m(=!7zG7~d13$rpCvoi;CG8c0*5A!k~^Roa8vJeZi2#c~9i?akvvJ^|R
z49l_{%d-M2vJxw^3ahdjtFs1cvKDKz4(qZW>$3qHvJo4z37fJRo3jO5vK3pi4coFE
z+p_~ZGKihnnZfMBuI$F{?7^Pw#op}0zU;^T9KeAb#K9cGp&Z8H9Kn$s#nBwYu^h+o
zoWO~k#L1k(shq|TPUj5H<SfqS9L{AZ=W#yY<$GMfg<QnNT*CMH0he+am-9n@#1&l0
zkNF8#@l$@r&-n#ca}C#W9oO?qe#H&k$W7eLE!@g&+|C``$z9ydJ^Y&A@LTTXK7Pmj
zJizaHkU#Jc5Az6r<WD@xV?53iJjqi$%`-g9pLveI@H{W@B7fy?yu{1A!r%D^uksrI
z<X^nb8@$O|yv;kj%X_@f2Ykp!e9R|&%4dAe7ktTAeEYA$--#mrGXh_KYeryKB-@c0
zg;5!e(HVm=8H=$QhjAH?@tJ@LnTUy*gh`o<$(e#FnTn~IhH06O>6w8UnTeU1g;|-6
z*_nemnTxrZhk2Qg`B{JkS%`&Mghg45#aV(SS&F4uhGkif<ynChS&5Zdg;iON)meiz
zS&Ow<hjm$x_1S<8*@%tVgiYCu&Dnx2*@~^%hHcr7?b(4H8N^QP%wTq5S9W7}_Fzx;
zVsG|gU-n~v4&Xoz;$RNpP!8j8j^Id+;%JWHSdQa(PT)jN;$%+YR8C_Er*j5pau#QE
z4(BqI^EjXH@;xr#LN4NBF5&z9fJ?cI%lRQc;tH<h$NYq=_$fc*=lp`JxrS@Gj_dg)
zzv2dN<R)(B7H;J>Zs!i}<Sy>!9)8Vl_$~KxAHU;%9^m&p$RBu!hk1lQ@+ThUF&^g$
zp5!T><{6&l&pgLpc%Bz{k-zdcUgBk5;qUx|S9y(p@-JTJ4c_D}-sT<N<vrf#13u&<
zKIRiX<ug9#3%=wlzWp|sccO~_jKGMD#K?@osEo$wjKP?U#n_C)xQxg6Ou&Rp#KcU(
zq)f)-Ou>{)#nep0v`okJ%)pGy#LUdXtjxyj%)y+@#oWxpyv)b^EWm;+#KJ7XqAbSZ
zEWwg2#nLRpvMk5)eDe)ufxrK^U5S-hg;iON)meizS&Ow<hjm$x_1S<8*@%tVgiYCu
z&Dnx2*@~^%hHcr7?b(4H8N^QP%wTq5S9W7}_Fzx;VsG|gU-n~v4&Xoz;$RNpP!8j8
zj^Id+;%JWHSdQa(PT)jN;$%+YR8C_Er*j5pau#QE4(BqI^EjXH@;xr#LN4NBF5&z9
zfJ?cI%lRQc;tH<h$NYq=_$fc*=lp`JxrS@Gj_dg)zv2dN<R)(B7H;J>Zs!i}<Sy>!
z9)8Vl_$~KxAHU;%9^m&p$RBu!hk1lQ@+ThUF&^g$p5!T><{6&l&pgLpc%Bz{k-zdc
zUgBk5;qUx|S9y(p@-JTJ4c_D}-sT<N<vrf#13u&<KIRiX<ug9#3%=wlzWo-sccO{^
zjKGMD#K?@osEo$wjKP?U#n_C)xQxg6Ou&Rp#KcU(q)f)-Ou>{)#nep0v`okJ%)pGy
z#LUdXtjxyj%)y+@#oWxpyv)b^EWm;+#KJ7XqAbSZEWwg2#nLRpvMk5)tiXz_#LBF~
zs;tK9tihVB#oDaHx~#|gY`}(W#KvsGrfkOMY{8an#nx=Ywrt1t?7)r;VkdTHFuSlT
zyRkcauqS)5H~X+J`>{U<a3BY9Fo$p`hjBPZa3n`@G{<l($8kI-a3UvhGN*7Vr!j=n
zIfFAfi?cb0a~aBcoX>ap9v5&S7jZF{@O^&3rCi44{E#1U1y}N8e!^A!l%Mf)e!<mT
z!?j$;_56}waRWDU6E|}Uw{jb|a|d^F7k6_HzvegmmV3F6-*G<=@OvKQ4?M)fJi;IO
z6OZy3kMjgi@)S?=4A1gsp5re(&kMZBU-=s^@iMRQcmBbvyv9HI7q9aMZ}Jvz^A7Lw
z9`Ex3AMz0&^9i5w8K3h7U-A{-e*5b?(Zzp8U_?e@WJY0BMq_lwU`)nhY{p?+#$$XY
zU_vHhVkTiyCS!7@U`nQ9YNlaYrek_$U`A$QW@celW@C2dU{2;@ZsuWL=3{;qU_lmQ
zVHROg7GrUiU`du@X_jGGmScHVU`1A9WmaKTR%3P6U`^IyZPsC3)?<A(U_&-yV>V$^
zHe++PU`w`QYqnuqwqtvCU`GbA6FW1QUD%b~*quGtlfBrReb|@%*q;M9kb^jwLpYSf
zIGiImlA}19V>p)MIGz(Yk&`%?Q#h5=7{ckC!I_-J*_^|<4COq|=evB53%HPrxR^`$
zK0n}6F5_~3$d9;!EBP@$;VOR0&-gjN;A*bnTCU@Ie#x)6fg8Dro4JKsxsBVogFCs4
zySay7^BaE4z1+v|xSt33JrD8+9^zph;g9@@M|q6Ld4eZ-il=#oXZbVF@fV)w1zzN@
z{Ee4*nOFEb|KL?#<DdMC*Lj0Cd5gDshj)38_xXSi`G}ACgira5&-sEc`HFA9_5Ga~
z;y)uWA|o*}qcAF?F*;)~CSx%+<1jAcF+LM8Armn%lQ1chF*#E(B~vjq(=aX5F+DRd
zBQr5GvoI^OF*|cGCv!13^Dr;-F+U5iAPccDi?Aq*u{cYxBulY0%djlVu{<lVA}g^n
ztFS7ou{vw8CTp=a>##2Cu|6BHAsewVo3JUHu{m3?C0nsI+psO$u{}GmBZJt9of*t7
z?8<KJ&K~T^UhK_2?8|=a&jB3BK^)8>9LixF&Ji5RQ5?-N9LsSW&k3B!Nu10noXTko
z;dIX6OwQtL&f#2!avtaNUB1T!T*yUS%q4uEA8;v`aXCNaM_j>`{Ft9`6+h)?{G4BK
zHP>)0*Ks|+<X7Cljoie|+`_Hg#_im}o!rIU+{3T=4Zr1H?&Ejd&jb9P2l)dJ@i33@
zNB+d4JjUZZ!IM12(>%ko{F&$Y3(xZcFY;IZ#!I}+EBu{*@G7tIPyWU0yuq8i#oN5Y
zyS&Hye87i%#K(NXr+miee8HD|#kc?4%R4c}e@0+LMq*?}VN^zAbjDyz#$s&7VO+*z
zd?sK*CSqbHVNxbza;9KPrebQQVOpkRdS+loW@2V$VOC~icIIGC=3;K<VP58AeimRs
z7Ghx*VNn)iah707mSSm^VOf@Ac~)RWR$^sVVO3URb=F`_)?#heVO`c^eKuf2HezEo
zVN*6^bGBehwqk3xVOzFidv;()2C)-6Gnif2mEG8#J=l}I*qeRWm;KnE12~X_IG95?
zl*2fjBRG<yIGSTPmg6{{6F8BRIGIy8mD3o)>72otoW<Fk!?_IQJkIC4e2)vbkc+sO
zOZYxN;8HH*a(>8<xPmMBF+brde#+1IIltg)uHjm)<9dF{uegC5xrv*(g<H9e+qr`~
zxr@8GhhOs>e#^bw$M3kG2lzb?@&_K`VIJX+{E0_-jK_I`CwYped4^~CGtcoCp63N#
z<gfgVmw1_1_&fjLRbJzt{EOFlgEx7Lw|R$md5`z`fDieIkNJd8`Hau`f-m`s;bOgs
z`|mJ3BQPQ(F*2htDx)zvV=yLTF*f5cF5@vi6EGnYF)@=cDU&fdQ!ph{F*VaLEz>bQ
zGcY4FF*CC;E3+{>b1)}!F*oxtFY_@!3$P#yu`r9UD2uT;ORywMu{6uDEX%PxE3hIf
zu`;W$Dyy+NYp^D3u{P_lF6*&A8?Yf8u`!#lDVwo5Td*Ztu{GPUE!(j@JFp{z*omDP
z%r5N8ZtTt;?8#p2%|7hQe(cWy9LPZ&%pn}gVI0m89LZ4}%`qIyaU9PHoXAO>%qg78
zX$;|X&frYW;%v_0T!wNU=kr~@#|2!-MO@4!e4ih1DVK3MKjcST!Ik`&pKujF<!Ahy
zUvM?oa4pwyJ-_5v+`x_8#Le8ot=z`#+`*mP#ogS)ulWtX<zDXNcihhd{GJE-0}t^q
zkMKwS#G^dM<2=EWJjK&I!?XOE=lBcH^8zpOSN_IJyv!^7oqzBuukla*#p}Gmo4m!_
zyu-V^$NPN1hkV4xe8Q)E#^-#&mwd%=vBiIeX9PxMBt~WwMrAZcXAH(<EXHOW#$`Ol
zX96Z<A|_@MCS@`vX9}idDyC){re!*&X9i|uCT3<9W@R>JXAb6MF6L$)=4C$SX8{&u
zAr@v47G*IOX9<>MDVAm#mSs7XX9ZSdC01q?R%JC-XARb5E!Jio)@41`X9G55BQ|Ce
zHf1w5XA8DuE4F4Ewq-lEX9sp<5IeCmgV}{$*^S-VgFV@cz1fF-*^m7>fCD**gE@pl
zIgG<Of+IPKqdA6SIgaBwffG52lR1S`IgKHl&KaD^S)9!|oXb$o<9xo$_qc!yxrmFo
zgzxhMF6A;V=ZE}=E4Y#$^AoP(r~Hhc^9!!#8m{F!uIHEhiW|6*o4A=<xRu+uojbUb
zySSTs_%*-bx7^Ep{Eqv1fZy{Vf8Ze=<`MqLpLmqVc$_DAlBal@XLy!B^BjNSd0yZ}
z{>tBYiI;hWzw-}X<u(4vzj&QDc$2qyn|FAZ_jsQV_>hnIm{0hW&-k1#_>!*}E{^!m
z@QlESjKs){!l;bK=#0UbjK$cD!?=vc_)NfrOvJ=Y!lX>b<V?YoOvThp!?aAt^vuAF
z%*4#h!mP~3?99QO%*EWy!@SJL{4BtNEX2Yr!lEq3;w-_EEXC3+!?G;L@~ps$ti;N!
z!m6ys>a4+<ti{@_!@8`;`fR|4Y{bTF!lrD-=4`>1Y{k}W!?tY4_Uyop3}PpCW-z<3
zE4#5fd$1>au{Zm$FZ;1S2XG(<aWIE)D2H)4M{p!ZaWuzpEXQ#?CvYMsaWbcHDyK1o
z(>a4PIg7J7hjSUqd7RI8`5qT=As2Bmm+*alz@=Qq<@}HzaRpcMV}8O_{FI;ZbAG|q
zT*I|o$MyV@UvUFBauYXm3%7C`w{r(~au;`V55ML&{FZyUkKb`W5Ab^)<PSW=!#u(t
z`4f-w7?1M=Px2H`^9;}OXP)CPJkJZf$Y1#zFYz+3@OS>ftGvcP`4_MA25<5fZ}SfC
z@*eN=0Uz=aAM**H@)@7=1z++N!^IW<8J-ask&zggQ5coc7@aW~ld%|^aTu5J7@rB4
zkcpU>Ntl$$n4Bq?lBt-QX_%Jjn4TG!k(rp8S(ugCn4LM8lew6id6<{^n4bk$kcC*7
zMOc)@SezwTlBHOhWmuNwSe_MFk(F4PRalkPSe-RkleJizby%16Sf35pkd4@wP1uyp
z*qklclC9X9ZP=FW*q$BOkwNUl&J1Q3c4aqqXAkydFZO01_GLfz=Kv1mAP(jb4&^Wo
z=LnAED30bBj^#Lx=LAmVBu?fOPUSR)a5`sjCTDRr=Ws4VIgj)CF5lw<F61IE<`TZo
z54e=exSSvIBd*{|e#}p}il6c`e$Fqrnrpb0>$sj@@+)rOMsDI}ZsAsL<96=gPVVAv
z?%~(`hTn28_whUK=K+4tgZzPqc$i1{BY)yi9^-MI;7Ok1X`bO({>*dyh39#J7x^oH
z<0W3^75>gYc$L@qC;#Gg-r!B%;%(mHUEbq;KHx(>;$uGHQ$FK!zTiu~Vz_wXKf^Nu
zBQg>rGYX?J8ly7?V=@+FGY;c29^*3s6EYDKGYOM28Iv;wQ!*7(GY!)+9n&)dGcpr1
zGYhja8?!S9b21lmGY|7JAM>*S3$hRkvj~f_7>lz6OR^M8vkc3!9Luu;E3y(RvkI%S
z8mqGgYqAz=vkvRB9_zCK8?q4_vk9BB8Jn{OTe1~fvklv_9ow@5J2Hr!*qOoX!mjMb
z?(D&y?8V;f!@lgt{v5!89K^vK!l4|-;T*w{9L3Qb!?7I4@tnYkoW#kT!l|6b5KiX|
z&g3l4<{ZvtDCcoL-{pH;z=d4I#azPo`2m-58JF`ze#8}A$&dL7SMgJR#?Sc$S91;5
zavj(6OMb--+{jJb%q`r?ZQRZs+{s<s%{}~@-|$=R<vxDL{XD?$d5}Nw5D)VRf8<X*
z%40mv6FkXNJk2va%b$6UzwkUS@FIWZZ@k3Iyu#o42e0xP|Kwl1&KtbRTfEIXyvuvM
z&j)<SM|{jDe9C8h&KG>iR}2?l{AYMZU_?e@WJY0BMq_lwU`)nhY{p?+#$$XYU_vHh
zVkTiyCS!7@U`nQ9YNlaYrek_$U`A$QW@celW@C2dU{2;@ZsuWL=3{;qU_lmQVHROg
z7GrUiU`du@X_jGGmScHVU`1A9WmaKTR%3P6U`^IyZPsC3)?<A(U_&-yV>V$^He++P
zU`w`QYqnuqwqtvCU`GbA6FW1QUD%b~*quGtlfBrReb|@%*q;M9kb^jwLpYSfIGiIm
zlA}19V>p)MIGz(Yk&`%?Q#h5=7{ckC!I_-J*_^|<4COq|=evB53%HPrxR^`$K0n}6
zF5_~3$d9;!EBP@$;VOR0&-gjN;A*bnTCU@Ie#x)6fg8Dro4JKsxsBVogFCs4ySay7
z^BaE4z1+v|xSt33JrD8+9^zph;g9@@M|q6Ld4eZ-il=#oXZbVF@fV)w1zzN@{Ee4*
znOFEb|KL?#<DdMC*Lj0Cd5gDshj)38_xXSi`G}ACgira5&-sEc`HJBZi2n@F2#m-`
zjLayE%4m$v7>vnSjLkTV%Xo~>1Wd?8Ow1%q%4AH=6imrfOwBY*%XCc749v((%*-sz
z%52Qe9L&jF%*{N^%Y4kw0xZZvEX*P-%3>_e5-iD5EX^`3%W^Ew3arRVtjsE`%4)36
z8m!4$tj#*C%X+NO25iViY|JKX%4TfN7Hr8@Y|S=o%XVzf4(!Mvc4B7+vkSYj8@say
zd$JdMvk&{SANz9v2XYVxa|nlW7>9ENM{*QLa}39F9LIA4Cvp-ea|)+&8bdgpGdPp8
zIGb}gm!X`;`Fxk}aRC={5f^g_-{%Kh%4J;65BU*Sa3w$HCtSr(`58aw7hKIXT+4M_
z&oB8EH*h02aWl7YE4OhwcW@_naX0tyYktFTxtII+9ryD9zvn^zz(YLDBm9v+@hFe+
zI8X2-Pw_O*@GO7kIsU@)yuge6mA~;4FY^k2=O4VvYy6Xc@j7qtCU5aJ@9-|~@jf5$
zAs_KEpYSQ4@i|}cC0{XILh+yB8G#WQiIEwFQ5lWV8G|tyi?JDpaT$;CnScqIh>4km
zNtukvnSv>qim91~X_=1cnSmLZiJ6&&S(%O5nS(i*i@BMHd6|#-S%3vuh=o~%MOlo+
zS%M{5ilteGWm%5pS%DQ<iIrJ}RauSIS%WoMi?vyYby<(~*?<k%h>h8VP1%gi*@7+E
zimlm(ZP||P*?}Dy#7^wYV0K|wc4K$;U{Cg9Z}wqd_G5nz;6M)IU=HC>4&!i+;7E?*
zXpZ4nj^lVv;6zU1WKQ8!PGbnCa|UN}7H4w~=Q5P@IG^wGJucuvF5+S?;rsl6OSz28
z`5`~z3a;eG{DiCcDL>=q{DP~whHJTw>-i<W;s$QyCT`{yZsj&^=ML`VF7D<Ye$8+A
zE%$OCzvF%$;P*VpA9#p|d4xamCm!W79_I<3<SCx!8J^|OJjY*no)>tLzw$R;;$>dp
z@BD*Td5wSaFJ9*j-sCOb<{jSUJ>KU7KI9`l<`X{UGd|}FzT_*0OC<g?JR>k7BQY|g
zFe;-lI%6;<V=*@4FfQXUJ`*q@6EQK9Fe#HUIa4qtQ!zEuFfG$DJu@&PGchx>Fe|e$
zJ9986b1^sbFfa2lKMSxR3$ZYZuqcbMI7_f3OR+S|uq?~5JS(swE3q=GuqvyuI%}{d
zYq2)#urBMdJ{zzh8?iB)uqm6dIa{zLTd_6Uur1rMJv*=?gV>3k8O$#1%5Ln=9_-0p
z?9D#x%YN+70UXFd9Lymc%3&PN5gf@;9L+Ht%W)jf37p7DoXjbl%4rPYbk5*R&f;v&
z;arAt9_RC2zQ+Yz$VFVtC48SBa4DB@IX~n_T)~z6n4fSJKjmlqoL_J?*KjS@aXr7}
zSKPpj+{De?!mZrK?cBkg+{NA8!>{=bzvW);<9FQ81N@!``2!E}Fpuy@{=}m^#^XG}
zlRU-KJj1j6ndkTm&+`H=@>l-GOT5f0{GEUBDzEWR{>AIO!JE9r+q}cOyvO@|z=wRq
z$9%%4e8%T|!Iyl+aEZl#hGzsuWF$sr6h>t<MrRDhWGu#J9L8ll#%BU1WFjVJ5+-Fb
zCT9w!WGbd+8m47Bre_9bWF}^27G`BOW@irOWG?1r9_D2}=4SyGWFZ!25f)`J7H0{T
zWGR+r8J1-^mS+W4WF=N+6;@?6R%Z>?WG&Wa9oA(%)@K7YWFt0a6E<ZtHfIaAWGl92
z8@6RTwr2-+WDq;CGlSWMUD=J@*@HdVi@n*0ec6xwIe-H>h=VzVLphAYIf5fOilaG(
zV>yoFIe`;7iIX{nQ#p+xoX#1X$yuDuIh@N-&f|Q(%lEi|3%Q7kxrFcY11{w<F6W2*
zh%2~~AM+Eg;-~zKpYsc@<{GZ$I<DuJ{E8d6k(;=gTey|mxScz=le@T^d-yfK;kVq&
zef*C5d4S*ZAb;Q?9_A7L$e(zW$9SA4c#@}hnrC>HKl2=a;dx%*MgGd)c!`&Jg}?I;
zUgb6Z$-j7=H+Yk`c$;^4m-l#|5BQLe_?S=ll+XB_FZhzL7%qwU&+v@Eh>XO@jKZjl
z#^{W}n2g2PjKjE$$M{UZgiOT5Ov0p0#^g-FluX6cOvAKH$Mnp=jLgK$%)+e9#_Y_&
zoXo}C%)`9Q$NVh7f-J<sEW)BJ#^Nl&k}Sp2EW@%a$MUSeimb%Stir0S#_FuWnykgz
zti!sj$NFr*hHS*fY{I5&#^!9nmTbk=Y{Rx}$M)>NjtpWac4jcUuq(T<JA1Gvd$BkB
zurK?uKL>Ci2XQcma43gyI7e_KM{zXAa4g4hJST7>Cvh^Ta4M%Ugwr{LGdYX1Ifru@
z%6Xj6cljO{a3L3QF_-Xte!!(%#^wBwA8`d&@?(C&Rs58n@pFE`)m+21T*vkNl3#HH
zH*ym<a|^d}8@F=@cXAhZa}U4fH~f}+xsTs*KM(MG9^?-^#KSzoANdoH@)(cv1W)o5
zPxB1V@@JmoFFel+yvSeq8!z!Pukd&N!K=K+KlvB0^9FD77H{(o@A4k+^8p|75g+pj
zpYj=>^95h>6~iSJ{~4YU7?F_}nNb*((HNaE7?ZIWn{gPI@fe>8n2?E>m`RwF$(Woe
zn3AcOnrWDp>6o4wn30*7nOT^X*_fR<n3K7fn|YX*`Iw&tSdfKSm_=BW#aNsrSdyh!
znq^p)<yf8-Sdo=jnN?Vo)mWW1Sd+C_n{`;1^;n+`*pQ9bm`&J}&DfkR*pjW-nr+yY
z?bx0j*pWf(#Lf(67j|Vgc4rUvWH0t+ANFNG_U8Z&<RA{_5Dw)q4(AAt<S35j7>?yQ
zj^_kU<Rnh!6i($dhHyG(a3*JQHs^3ILphK0`7Yn%0xsktF6I)x&kwki%eb5$@*}R`
zN`A~wxQd_hGk(r5xSDIYmg~5lU-Bz%;6`rZW^UnDZsT_D;7;!1ZtmgN{D$9hFZb~~
z?&kr1&x8Ddhj^Gr_#=PfQ6A%Qp5RHI;%T1YS^ms({DtRvffxBJf8!-y<`w?VKX{eb
z_$UA3b>84j-r{ZE;a%S2eLmnrKH_6O;Zr{2bH3n9zGAp!;y=SP0wXdKBQpx4G8&^Z
z24gZ7V>1rpG9KeI0TVJ26Eg{uG8vOI1yeE=Q!@?IG9A-112ZxcGcyabG8?lq2XitP
zb2AU~G9UA^01L7Z3$qA|vKWiA1WU3MOS25ivK-5^0xPl-E3*o#vKp(i25YhwYqJjP
zvL5TR0UNRr8?y<UvKgDR1zWNeTeA(@vK`yA13NN^o!FVd?82_>#_sIFp6tcm?8Cn7
z$Nn6^fgHra9KxX-#^D^nksQU*9K*33$MKxNiJZjAoWiM`#t=^D49?^%&gLA>Whm!y
zKHuegT)>4~#Kl~~_xS;rav7KNLw>{+T*;6530Lt`e#X!F1y^$o*K!@#^Gklk4cy30
z+{`W9%5B`v9o)%X+|51wn&0qS?&Usy$NfCO?|G0v@DLC42!G^HJj!D{&J#SzQ#{Qx
zJj<VXj=%6cFYqFN<!`*i%e=zh`3JA^8vo>9yv`fE$y>b5JG{$#yw3-G$VYt4Cw$6h
ze9jkq$yW@QT>NKvMqornVq`{PR7PWT#$ZgwVr<4?T*hO3CSXD)Vqzv?QYK?^reI2@
zVrr&gTBc)qW?)8UVrFJxR%T;%=3q|dVs7SPUgl$d7GOaZVqq3xQ5IuymS9PiVriCP
zS(amYR$xU|Vr5ogRaRql)?iK6Vr|x8UDjiLHef?GVq-R8Q#NCBwqQ%PVr#ZxTef3+
zc3?*au@gHpm|fVF-PoNy*pt23n|;`q{n(!aIFN%lm_s;}!#JEHIFh3{nqxSY<2arZ
zIFXY$nNv8G(-^|(oWYr##o3(0xeVnz&gZ*)j|;evi@2Ce_&z`2QZD0ie#nowf-Ctk
zKjA8V%Fp;Yzu;=F;aaZadVa~TxPcqFiJQ5FTe*$fxq~~oi@UjpU-KJ&%e~yk@3@}_
z_&pEu2Oi>K9^sGtiAQ;i$9aM$d5WibhG+RR&+!+Y=LKHmul$Xdc$ruDJOAKSUgMwq
zi`RLBH+hS<d53p-kN5e25BZ3X`GimTjL-RkFZqh$Qi%Ty&j^gjNQ}%VjLK+?&KQiz
zSd7g$jLUe8&jd`!L`=*iOv+?T&J;|^R7}k@Ov`jk&kW4SOw7zI%*t%c&K%6iT+Gcp
z%*%Yt&jKvSLM+T8EXram&JrxiQY_6fEX#5%&kC%_O03K(tjcPv&Kj)ATCB}Ftjl_=
z&jxJBMr_O`Y|3VA&K7LRR&32SY|D0R&kpR!Aa-JB2D1yhvKzaz2Ya#?d$SMwvLE|%
z00(jq2XhFAau|nm1V?fdM{^9vavaBV0w;13Cvys?avDQ8oijL-vpAb`IG3TE$N7Ah
z?{NVaauFAE3E$@jT*_r!&JXz!S8ydi<|kamPx%=?=NDYfHC)SeT+c806*q7rH*qt!
za4WZQJ9ls=cX2oO@N0g<Z@HKI_#OB20Kex!{=h>#%p?4fKk+D!@i<TLBv0`)&+sgN
z<~jbt^Sr=|{FT4)5-;-#f9D^(%4__SfAKnR@Fs8ZHt+B*@9{n#@F5@ZF`w`$pYb_g
z@Fia{TuSku;TeGu8Hte@g;5!e(HVm=8H=$QhjAH?@tJ@LnTUy*gh`o<$(e#FnTn~I
zhH06O>6w8UnTeU1g;|-6*_nemnTxrZhk2Qg`B{JkS%`&Mghg45#aV(SS&F4uhGkif
z<ynChS&5Zdg;iON)meizS&Ow<hjm$x_1S<8*@%tVgiYCu&Dnx2*@~^%hHcr7?b(4H
z8N^QP%wTq5S9W7}_Fzx;VsG|gU-n~v4&Xoz;$RNpP!8j8j^Id+;%JWHSdQa(PT)jN
z;$%+YR8C_Er*j5pau#QE4(BqI^EjXH@;xr#LN4NBF5&z9fJ?cI%lRQc;tH<h$NYq=
z_$fc*=lp`JxrS@Gj_dg)zv2dN<R)(B7H;J>Zs!i}<Sy>!9)8Vl_$~KxAHU;%9^m&p
z$RBu!hk1lQ@+ThUF&^g$p5!T><{6&l&pgLpc%Bz{k-zdcUgBk5;qUx|S9y(p@-JTJ
z4c_D}-sT<N<vrf#13u&<KIRiX<ug9#3%=wlhD#;>Gdv?OA|o*}qcAF?F*;)~CSx%+
z<1jAcF+LM8Armn%lQ1chF*#E(B~vjq(=aX5F+DRdBQr5GvoI^OF*|cGCv!13^Dr;-
zF+U5iAPccDi?Aq*u{cYxBulY0%djlVu{<lVA}g^ntFS7ou{vw8CTp=a>##2Cu|6BH
zAsewVo3JUHu{m3?C0nsI+psO$u{}GmBZJt9of*t7?8<KJ&K~T^UhK_2?8|=a&jB3B
zK^)8>9LixF&Ji5RQ5?-N9LsSW&k3B!Nu10noXTko;dIX6OwQtL&f#2!avuLLmg+HB
zcOFo~aG4@ao9f-RZQEGewr$%s*S2ljwr$(C&Uemv@}t+ZnaO0@Y3`|T8k`Piz?pCs
zoDJu|xo{qw4;R3Na1mS#m%ycP8C(umz?E<nTn*R2wQwC=4>!P#a1-1Nx4^A%8{7_e
zz@2ax+zt1@y>K7g4-deD@DMx<kHDkw7(5P7z?1M4JPpslv+x`|4==!r@DjWXufVJD
z8oUl~z?<+EybbTbyYL>o4<EpX@DY3rpTMW^8GH_3z?bk9d=1~gx9}Z&4?n<<@DuzD
zzre5X8~hG`z@P9J{0;xWzwjRv;-mjif-+Q~3JKJp4h?8R3N6T>4IRj#3q9z=0ERFE
zW5Ad&7K{z!z_>6Tj1LpQgfJ0I43og5Fd0k^Q^1rk6-*7&z_c(OOb;`_j4%_-470$j
zFdNJcbHJQ17t9Uwz`QUY%nu8|g0K)Q42!^`uox^3OTdz_6f6y+U>R5zmV@PC1y~VQ
zf|X$vSQS=-)nN@-6V`&YVI5c()`Rt71K1EYf{kGl*c3K{&0!1J61IY^VH?;Mwu9|q
z2iOsIf}LR(*cEnz-C+;d6ZV3=VISBR_JjT505}j1f`j1@I1~<p!{G=x5{`nS;TSj;
zj)UXj1UL~+f|Fr1oC2r9X>dB60cXNla5kI+=fZh#K3o77!bNZ~TmqNEWpFuM0awCR
za5Y>5*TQvhJ=_2{!cA~9+yb}4ZE!o>0e8Y(a5vlo_riT}KRf^r!b9*dJOYoxWAHdU
z0Z+nH@H9LF&%$%?JiGue!b|WnyaKPnYw$X}0dK-v@HV^y@4|cVK70Tl!bk8id;*`s
zXYe_E0bjyb@HKn`-@<qBJ^TPa!cXus`~ttiZ}2<(0e`|@@HhMe|H6MzNPzxB3Cd7`
zDkM;YIy9gODYPJiHgq6|F7%)e0~o>xi~(c9STHt>1LMMYFg{EG6T(C=F-!uJ!elTx
zOaW8ER4_G61JlBEFg?rwGr~+TGt2_B!fY@*%mH)4TrfAx1M|XsFh48+3&KLMFf0O#
z!eX#EECEZxQm{0Pf@NS?SPqtl6<|eJ308(xU{zQRR);lUO;`)ohIL?FSP#~R4PZmq
z2sVaIU{lx(His=>OV|pwhHYS5*bcUb9biY;33i5EU{}};c85J+PuL6ghJ9dP*bnxH
z1K>b72o8or;7~XW4u>P)NH_|PhGXDZI1Y}76W~NR2~LL5a0;9Xr@`rP2Am0J!P#&Q
zoD1i{`EUVT2p7S{a0y%rm%-(51zZVN!PRgLTnpF1^>72+2sgpaa0}cDx54dj2iysF
z!QF5V+za=?{qO)h2oJ%-@CZB#kHO>c1Uv~(!PD>zJPXgk^Y8+^2rt3Q@Cv*Nufgl^
z2D}Mx!Q1c-ybJHa`|tsL2p_@6@CkehpTXzw1$+r#!PoE&d<);f_wWP!2tUEk@C*D3
zzrpYD2mA?t!Qb!?{0sj<AtCw?B`8A$s*pep>d=5Dq|ky4+R%X<y3m6@3}6T&Fb0eX
zW5L)k4vY)q!T2x%Ob8Rf#4rg=3X{R)Fa=BrQ^C|Q4NMEu!SpZ#%m_2V%rFbg3bVoN
zFbB*DbHUs&56lbm!ThiQEC>t1!mtP|3X8$wummg#OTp4G3YLLoVL4bHR)7^@C0H3&
zfmLBOSRK}YHDN7S8`gn!VLezMHh>LbBiI-=flXmE*c`TiEnzFz8n%ILVLR9!c7PpW
zC)gQwfn8xY*d6wOJz+1{8}@;HVL#X(4uAvUAUGHffkWXiI2?|EBjG4G8jgWu;W#)R
zPJk2PBsdvH!zpkooCc@E8E__?1!u!Ka4wt&=feeXAzTC(!zFMjTn3lJ6>ue71y{p0
za4lR1*TW5PBisZx!!2+t+y=M99dIYy1$V<ea4*~k_rnA5AUp&Q!z1u0JO+=$6YwNF
z1y92>@GLwB&%+DwBD@4I!z=JAyauns8}KH)1#iPU@GiUu@52Z1A$$ZM!zb`5d<LJx
z7w{!~1z*EA@GX1?-@^~^Bm4wE!!PhF{06_nAMhvq1%Jao@Gtxag+#*t4G~IEh6+?6
zfg04I0ZmAu1sSxV137e|2Ynd85Jq4O7!$^Vv0)q-7si9}VFH*CCW47!5||VwgUMkE
zm=dOfsbLzJ7N&#gVFs8HW`dbv7MK-ggV|vYm=orLxnUld7v_WcVF6eW7J`Lg5m*!!
zgT-M9SQ3_krC}5-1Ixm4uso~)E5b^!GOPlt!fLQOtO0AnTCg^(1M9+ius&=68^T7g
zF>C^x!e+2JYyn%sR<Jc}1KYxOus!SmJHk$|GwcGp!fvoT>;ZeiUa&Xp1N*{$us<9C
z2f{&cFdPDh!eMYY905ndQE)UI1INN~a6Fs<C&Ec^GK_{(;8ZvbPKPt#OgIb9hI8Ot
zI1kQ;3*bVy2rh<8;8M5@E{7}NO1KKHhHKzjxDKv|8{kH`32ug4;8wT|ZihSIPPhy1
zhI`;%xDW1!2jD??2p)z<;8A!C9)~C3Nq7pLhG*becn+S27vM#B30{U*;8l1HUWYf}
zO?V65hIimycn{u(58y-i2tI~S;8XYvK8G*hOZW=DhHv0o_zu2@AK*v$34VrO;8*w!
zeuqEcPxuS|hJWB+_zwz+(SImG87feP1Zq%+1~egs7G%(d4&=~<9`s=VLl}WEU`!Yb
z#)ffVTo@0=hY4Upm<T3@Nnlc#3?_#uU`m(@riN)?T9^)|hZ$f-m<eWvSzuO}4Q7Wq
zU{06|=7xD-UYHN&hXr6kSO^w|MPN}_3>JqaU`bdCmWENV3@i)F!Sb*ItOzT?%CHKo
z3ai2Dum-FNYr)#E4y+67!TPWPYzP~{#;^%&3Y)>^umx-hTfx?_4QvbB!S=8N><Bx-
z&aeyY3cJDXum|i3d%@nY59|y3!TxXn90&)&!Egv13Wvera0DC)N5Ro>3>*u`!SQec
zoCqhu$uJsDfm7i$I33P_GvO>a8_t1q;XF7WE`ST+BDfeXflJ{sxE!v4E8!}*8m@tB
z;X1e;Zh#x%Cb$`Hfm`7=xE=0*JK-+48}5O7;Xb$@9)JhoA$S-bfk)vncpRR9C*di0
z8lHh?;W>C7UVs<jC3qQLfmh)*cpct=H{mUK8{UC;;XQaCK7bG5Bls9TfluKx_#D1~
zFX1cr8oq&V;XC*qet;k0C-@nDfnVV__#OU$KjAO<8~%ZR;Xf!OLI0ryWvD<E5~x8P
z8qkCkT982-I*>ybdeDaf3}FPufH7e#7#qfcabY|dA0~haVIr6qCV@#|GMF5ufGJ@r
zm>Q;mX<<5;9%g_UVJ4UvW`S8@HkcjefH`3<m>cGSd0{@79~OWGVIf!;7J)@!F<2ay
zfF)rmSQ<vbGO#Qx2g}0>up+DkE5j<VDy#;p!y2$AtOaYsI<PLR2kXNIupw*&8^b2B
zDQpIt!xpe5Yz14xHn1&h2iwCAup{gQJHsxpE9?fl!yd3F>;-$nKCmzB2m8YTa3CB6
z2g4z7C>#cd!x3;K90f<iF>ov#2gkz+a3Y)pC&Oqs1x|(2;B+_x&V;kzY&Zwbh4bKi
zxBxDMi{N6o1TKZk;BvSEu7s=LYPbfjh3nvYxB+g2o8V@+1#X4g;C8qJ?u5JGZny{T
zh5O)scmN)Rhu~p&1RjOQ;Bj~Yo`k31X?O;nh3DXTcmZC7m*8c11zv^M;B|Nd-h{W{
zZFmRXh4<in_y9hHkKkkY1U`k&;B)u_zJ#yfYxoAfh40{d_yK-|pWtWs1%8F!;CJ`~
z{)E5aZ}<oPh5w+C6#a)1l%WDuNT3FFXh0KEXh8;T=s*r#=s_O_FoY2p1IC20U~Cu%
z#)a`<e3$?xgo$8cm;@$;$zXDr0;YtiU}~5KriJNXdYA!bgqdJwm<48q*<f~<1LlOe
zU~ZTP=7srSepmn&goR*XSOgY@#b9w*0+xiOU}+cy%fPa*94rqjz>2UEtPHEbs<0ZY
z4r{=guokQh>%h9O9;^==z=p69Yz&*grmz`o4qL#MuoY|#+rYN49c&Lfz>csJ><qiW
zuCN>I4tv0!uovtN`@p`iAM6hYz=3cO91MrRp>P-+4oAR|a1<O3$H1|092^fPz=?1Y
zoD8Gk6gU-5gVW&*I1|o-v*8>#7tVw8;R3i2E`p2U61WsDgUjIxxDu{{tKk~B7OsQq
z;Rd)7Zi1WP7Pu8|gWKT_xD)PzyWt+V7w&`m;Q@FM9)gGA5qK0HgU8_scoLq1r{NiR
z7M_FW;RSdRUV@k56?he1gV*5=coW`&x8WUl7v6*S;RE;(K7x<o6ZjN9gU{g$_!7Q?
zui+c`7QTb;;RpB;euAIj7x)!^gWur~_!Ituzu_PF7yg4nGV~uxP=*RrA%Pmyp#e=u
zp#>SVp#wQ|p$B~!zz{}Y3>Xu}g0W#77#GHa@nHg(5GI0&VG@`WCWFag3YZe6f~jE|
zm=>mk>0t(#5oUs!VHTJbW`o&b4ww_>g1KQHm>1@Q`C$QA5Eg=kVG&pq7K6oM30M-A
zf~8>;ECb8Ja<DwC04u^uurjOytHNrqI;;U}!dkF4tOM)9dayoh02{(aurX``o5E(W
zIcx!2!d9>~Yy;cEcCbC{06W4?urureyTWd;JL~~_!d|d9>;wD4ey~3r00+WBa4;MK
zhr(fSI2-{-!clNE90SL~ad14G04Kspa59XBQ{YrM4NiwM;7m9R&W3Z~TsRNThYR3B
zxCkzWOW;zt3@(Q&;7Yg(u7+#iTDT6bha2EVxCw5CTi{l>4Q_`!;7+&;?uL8dUbqkL
zhX>$6cnBVbN8nL-3?7Fk;7NE2o`z@OS$GbfhZo>QcnMyHSKw864PJ*g;7xc7-iCMJ
zU3d@PhY#RG_y|6RPvBGd3_gc1;7j-lzJ_n$TlfyXhacca_z8Z7U*K2x4St6|;7|Aq
z{)T_xU-%CS$<co(K^ZDgg#>C)hXynug%)Jch7RP=g&y=_07DpoF<?v>3&w_VU|bjv
z#)k=DLYN39hDl&jm<%R|DPT&N3Z{l>U|N_CriU3|Mwkg^hFM@%m<?u!Ibcqh3+9G-
zU|yIH=7$AfL0AYDhDBgeSPT}2C16Qd3YLaZuna5<%fa%n0;~ut!OE}-tO~2a>aYf^
z32VXHunw#X>%sc40c;2x!N#x&Yzmvf=CB2930uL|unlYr+rjp*1MCPp!OpM?><YWV
z?yv{!346ibun+7D`@#Nj02~Mh!NG6{914fQ;cx^T2}i-va10y^$HDP%0-OjZ!O1Wh
zPJvV5G&mj3fHUDNI2+D^bKyKVA1;6k;Uc&gE`dwoGPoSBfGgoDxEij3YvDS$9&Uge
z;U>5lZh>3jHn<(`fIHzXxEt<)d*ME~A0B`Q;URb!9)U;UF?bxFfG6Q8cp9F8XW=<`
z9$tVK;U#z(UV&HPHFzD~fH&bScpKh<ci}yFA3lH&;UoAMK7mi+Gx!|7fG^=I_!_=}
zZ{a)m9)5ry;V1YRet}=%H~1a?fIs0c_#6I#f8jqUq(J|n1ZAi|6%wdH9U9Pt6k3o$
z8#*w$OLU{I!=fuiPwBq)|93Km)NkCrNz=~JEi$&K93>2mZjo*HfAcA$#Iiiae|LGt
zNG&r*M+s5l|38*nEGbv6T<L%RjsLqu3I8oQvT(w3|GS(hNP;XVf+`R}6Li54Oo0lP
zzyw=x1TMIOC-_1jghE7!A;c7739*GZLR=x95MM|jBoq<}iG?IWQX!d;{J%GqQb;AF
z7Saf5g>*uCA%l=n$RuPIvItp)Y(jP+hmcdqCFB<J2ziBkLVlruP*5l&6c&mIMTKHQ
zaiN4zQYa;q7NUeQLRq1lP+q7YR1_)+m4zxoRiT<tU8o_{6lw{zg*rlAp`K7*XdpBc
z8VQYsCPGu8nb2HlA+!`)39W@TLR+Do&|c^ubQC%XorNw!SD~BGUFae76nY80g+4-G
zp`XxS7$6K31_^_OA;M5$m@r%zA&eA838RHE!dPLPFkYA-OcW*wlZ9wuiZE4}CQKJ*
z2s4FQ!fauVFjtr-%oi323x!3(VquA}R9Ge~7gh)>g;m08VU4g>SSPF(HV7MqO~Phj
zi?CJLCTtgW2s?#c!fs)Yuvge8>=zCQ2ZckzVd02yR5&Ia7fuK#g;T<5;f!!rI47JJ
zE(jNeOTuO0if~o9CR`V82sedW!foM>a96k|+!r1Q4~0j<W8sPLRCp#l7hVW2g;&CB
z;f?TCcqhCUJ_sL$Pr_&6i||$WCVUru2tS2i!f)Y^@K^Zv-{%)aNt8uJR7E0cqAnVu
zDN@lAnP`iS$VFH5L|+WVP>hH%#F%0%F}4^-j4Q?y<BJKzgkmBwv6w_mDkc+?iz&pE
zVk$AUm_|%1rW4bP8N`fYCNZ;^Ma(K@6SIps#GGO-F}IjU%q!*-^NR(<f?^@DuvkPa
zDi#xqizUR8Vkxn-7$uew%Zla1@?r(CqF70+ELIV#iq*vGVhypTSWB!e))DK9^~CyO
z1F@mlNNg-N5u1w5#O7iPv8C8bY%R7C+luYP_F@OIqu5F8EOrsQirvKSVh^#W*h}m!
z_7VGv{lxy_0CAu=NE|E<5r>My#Npxyailm(94(F!$BN^`@!|w=qBu#MEJlk{#Hr#m
zak@A|oGH!{XNz;hx#B!=zPLbKC@vBgi%Z0%;xci$xI$bht`b*^Ys9tUI&r<YLEI>A
z5;u!m#I52sal5!f+$ru7cZ++(z2ZJ`zj#1AC>|0Ii$}zx;xX~KctSiWo)S-sXT-DO
zIq|%BLA)ql5-*Ea#H->p@w#|JyeZxiZ;N-tyW&0ZzW6|VC_WM&i%-O-;xqBN_(FUs
zz7k)HZ^XCaJMq2vLHsCw5<iPy#INEv@w@m#{3-qte~W*_zv93DetuDsBw11<RU(om
z>5?It5|u28Nw(xjTyiB(@})ourHB+miYdjCVoPzPxKcbRzLY>pC?%2-OG%`pQZgyI
zltM}=rIJ!hX{5ALIw`%BLCPp)k}^wKq^wdlDZ7+I$|>cNa!Yxnyiz_Xzf?dfC>4?l
zOGTujQZcExR6;5#m6A$JQBoPHtW-`aFIA8#N|mI_QWdGHR86Wb)sSjRwWQio9jUHV
zPpU69kQz#jq{dPcsj1XVYA&^qT1u^?)>0d(t<+9xFLjVQN}Z(6QWvSK)J^Ix^^kf>
zy`<h!AE~d@PwFoXkOoSFq`}e<X{a<z8ZM2HMoOck(b5=ctTav<FHMjpN|U6?QnWNh
znkr3`rb{!VnbIt2wlqhYE6tPUOADlh(jsZGv_x7eEt8f@E2NduDrvQ}Mp`Salh#Wc
zq>a)hX|uFN+A3|6wo5ytozgC8x3ovvEA5l^O9!Nb(jn=vbVNET9g~hrC!~|oDe1Iy
zMmj5<lg>*Qq>Iue>9TZ1x+-0hu1hzho6;@mwsc3jE8UasOAn-n(j)1y^hA0pJ(HeG
zFQk{!E9tfLMtUo~lio`oq>s`k>9h1j`YL^szDqx(pVBYsxAaH)EB*WL=oe*4mSsg&
zWg=^`E*r8bQ`wT4Y|D<!WmooOUk>C@j>s|Om~t#Rwj4)}E60=L%L(L!aw0jgoJ3A4
zCzF%QDdd!LDmk^BMoufIlhex?<cxAAIkTKa&MIe<v&%W;oN_KXx12}LE9aB*%LU|u
zav`~}TtqG^7n6(2CFGKFDY>*9C6|%Q%H`zpas|1fTuH7hSCOmA)#U1O4Y{UVORg=~
zk?YF!<oa?0xuM)hZY(#Eo661P=5h<UrQAwxEw_=|%I)O#atFDi+)3^%cagiw-Q@0a
z54oq@OYSZAk^9R1<o@yid7wN<9xM-$hswj`;qnN1q&!L<Esv4M%H!nm@&tLJJV~A`
zN6S;>sq!>=x;#UkDbJE;%X8$p@;rIIyg*(kFOnC_OXQ{UGI_bYLS8Aal2^-X<hAlT
zdA+<r-Y9R9H_Kb(t@1W`ySzi*DesbZ%X{R#@;-ULd_X=ZACeEtN93dOG5NTBLOv;<
zl26NL<g@ZQ`Mi8Vz9?UkFUwcttMWDZx_m>vDc_QB%Xj3v@;&*!{6KyvKawBIPvoca
zGx@pvLVhW~l3&Yj<hSxW`Mvx>{wRNvKg(a_ukttfyZl4`DgTmx%YWp*^1uJWFDjBE
zD~h5jM9~yoF%(mwils2cRvd*ZuHq@a5-6b(QDP`Dl~_t_C5{qTiKoO@5-16kL`q^M
ziIP-FrX*KVC@GaxN@^vIl2%Elq*pR18I?>*W+jV~RmrAgS8^yhm0U`0C6AI<$*1I3
z3Md7YLP}w!h*DH3rW997C?%CrN@*oZDWjBC$|>cQ3Q9$#l2TczqEuC?Db<x4N=>Df
zQd_B`)K%&!^_2!nL#2_@SZSg(RhlWyl@>}%rIpfJX`{4N+9~ao4oXL*lhRq~qI6Zd
zDczMGN>8Pi(p%}H^i}#P{gnaAKxL3JSQ(-WRfZ|Ul@ZEFWt1{n8KaC<#wp{K3Ccue
zk}_F|R;DOZm1)X!Wri|SnWfBD<|uQOdCGicfwE9pq%2mJC`*-P%5r6evQk;4tX9@2
zYn64%dS!#MQQ4$yR<<Zxm2Jv)Wrwm;*`@4O_9%Omeae32fO1edq#Ra`C`XlJ%5mj{
za#A^^oL0^#XO(lxdF6s~QMsgCR<0;lm21j%<%V)oxux7z?kIPad&+&~f$~s!q&!xh
zC{LAV%5&v~@=|%FyjI>QZ<Tk-d*y@jQTe2NR=y}-m2b*-<%jZ9`KA0;{wRNye~O@r
zs-()QqN*xUHC0y))l{i!sZ6z1N9C%kdaADmYN$rk7-~#4mKs})qsCR^sqxhWYC<)U
znpjPuCRLNE$<-8UN;Q?5T1}&-Rnw{I)eLGzHItfI&7x*iv#HtD9BNKAmzrD6qvlof
zsrl6cYC*M-T39Wj7FCO>#nlpONwt((T8&c6sAbi1YI(JST2ZZ}R#vO1Rn=;0b+v|C
zQ>~@eR_myB)p}}uwSn4DZKO6<o2X6IW@>Y_h1ybWrM6bvsBP7DYJ0VV+EMMKc2>Kn
zUDa-CceRJwQ|+bpR{N-Z)qZM!b$~ih9i$Fchp0o<Vd`*oggR0krH)p|sAJV}>Ued6
zI#HdZPFADUDe6>pnmS#bq0Urisk7BN>Rff6I$vF&E>st(i`6CSQgxZSTwS5AR9C61
z)ivr`b)C9i-Jot%H>sP|E$UWvo4Q@yq3%?7sk_xZ>Rxr9x?eq@9#jvhht(tMQT3R5
zTs@(lR8Og=)idf@^_+TMy`WxHFR7Q+E9zDCntENmq25$)skhZT>Rt7odS88@K2#s6
zkJTsYQ}vnpTz#RwR9~sD)i>%}^_}`&{h)qSKdGP9FX~tIoBCb-q5f2VslU}f>R<Jr
zDiD!KL?#MR2_YKMi9t+4iA5N(i9<MXiAQ`AkdQ=33=)&XBC$yv5|_jy@ks)bkR&3B
zNfMHjBqPa53X+nfBB@Cll9r?+=}88Xkz^v7NfwfoWFy&04w94PBDqN(l9%Ko`AGp%
zkQ5??NfA<%6eGn+2~v`jBBe<bDMQMVa-=+|Kq`_-q%x^Os*-A?I;lZwl3JuTsYB|L
zdZa#SKpK)pq%mninv!OuIcY&!l2)WOX+zqQcBDP&Ksu66q%-M4x{_|BJLy4sl3t`Y
z=|lRGexyGcKn9XQWH1>*hLT}qI2l1kl2K$d8AHaBab!H1Kqit&WHN~+Q^-^@jZ7yq
z$V@Ve%qDZlTr!W$Ckx0zvWP4uOUP2Pj4UTB$V#$`tR`#7TC$F;CmYB{vWaXaTgX<j
zjcg}7$WF40>?V82Ub2tuCkMzua)=xzN61lfj2tH?$VqaFoF-?;S#pk?Cl|;?a*13f
zSIAXzja(-;$W3yK+$ML(U2>1yClAO&@`yYpPsmg9j65eV$V>8yye4nRTk?*)Cm+a1
z@`-#VU&vSTjeI9R$WQW%{3d_MU-FL#ny5*dtSOqR5lz!{&CpDZYL><{TXQt7xtgc>
zTA+nmM2n%t)M9C|wK!T_EuI!%OQ0px5^0IGBwA7}nU-8jp{3MPX{og|T3RigmR`%C
zWz;fhnYAohRxO*BUCW{6)N*OLwLDs0EuWTOE1(tB3TcJ4B3e<cm{wdXp_SB1X{EI&
zt&CPyE2ov$Drgn8N?K*DidI#trd8K!Xf?H3T5YY4R#&U1)z=zm4YfvEW37qSRBNU+
z*IH;TwN_edt&P@JYp1o>I%plWPFiQJi`G@^rghhPXg#%FT5qk7)>rGN_16Yy1GPcg
zU~PytR2!xZ*G6a~wNct=ZHzWn8>fxeCTJ72N!nyBTAQLx)uw6FwHew>ZI(7$o1@Lu
z=4tb_1=>Pwk+xV{qAk^yY0I@0+DdJewpv@Gt<~0P>$MHqMs1U}S=*v*)wXHdwH?|{
zZI`xN+oSE(_G$aI1KL6Dkak!*q8-(aX~(q_+DYw{c3L~5oz>21=d}ykMeUMyS-YZL
z)vjsRwHw+^?Ur_1yQAIJ?rHb62iimJk@i@7qCM4~Y0tG6+Dq+~_F8+Rz17}n@3jxw
zN9~jLS^J`W)xK%pwIA9~?U(jj`=kBU{%L|P>XI(&imvKJ*K}PsbW^9gr8C{u9i8j0
z?&-cB=%F6bW9TvUSbA(djviN!r^nY5=n3^idSX3^o>Wh!C)ZQxDfLu(YCVmfR!^s=
z*E8rD^-OwZJ&T@I&!%VBbLctsTzYOjkDgc0r{~uT=mqscdSShYUQ{op7uQSZCG}Ez
zX+26WqnFjo>E-nbdPTjGURkfASJkWO)%6;BO}&<0Td$+n)$8f?^#*!Fy^-EnZ=yHV
zo9WH<7J5s)mEKx!qqo)D>FxCndPlvJ-dXRWch$S;-Sr-NPraAkTkoUy)%)rF^#S@o
zeULs_AEFP{hv~!h5&B4dls;M?qmR|c>ErbY`b2$_K3R{}r|47lY5H`1hCWlDrO(#q
z=yUaX`h0zXzEEGJFV>gnOZ8>?a(#uqQeUO7*4OB3^>zAseS^MH-=uHWx9D5-ZTfb7
zhrUzarSI1F=zH~j`hNX@eo#N8AJ&iPNA+X+as7mTQa`1i*3al?^>g}p{epf`zocK*
zujp6xYx;HlhJI7OrQg=?=y&yd`hER@{!o9UKh~e<PxWW|bNz+>Qh%kt*5BxF^>_Mv
z{e%8d|D=D`zvy4}Z~AxrhyGLlrT^Cd=zsNpx?qTgWXOhMs0J}KLpKb=G^k-2%&-l|
z;D&2>hHnH$Xhe({Moc4?5!;Ak#5Lj>@r?vVLL-rp*hpd|HIf<0jTA;oBbAZbNMocm
z(i!QE3`RyHlablTVq`V48QF~-MouG_k=w{)<Tdgc`HccbL8FjS*eGHYHHsO<jS@yl
zqm)tFh%(9;WsP!1d82|+(WqopHmVp^jcP`9qlQt_sAbeP>KJv6dPaStfzi-tWHdIK
z7)_04MsuTu(b8yTv^LrpZH;zDd!vKV(dcA!Ho6#Hjc!JFqleMc=w<Xa`WStUenx*|
zfHBY*WDGWj7(<O=#&BbVG13@ij5fv?V~ugfcw>Sw(U@dRHlmFw##CdPG2NJ9%rs^h
zvyC~%Tw|Uw-&kNQG!_|)jU~oXW0|qsSYfO*RvD{}HO5+Fow457U~Dut8JmqQ##UpS
zvEA5V>@;>6yNx}@USprJ-#B0#G!7YujU&cU<Ct;WIANSLP8p|-GsaotoN?Z`U|cjV
z8JCSK##Q5*aoxCK+%#?(w~af-UE`i{-*{j=G#(j`jVH!a<C*c?cwxLWUKy{AH^y7z
zo$=oIV0<(_8J~?W##iH;@!j}g{4{<Uzl}e}U*n%4n4&3}vZ<J=NleYuO~W)zYFZ{U
zZPPKi>6)JDn}Hdc5i^Dv(~M=tHshFa&3I;fGl7}VOk^fDlbA`(WM*<Rg_+V!Wu`XM
zm}$*)W_mM&nbFK-W;U~!S<P%_b~A^W)68Y&HuIQy&3tBlvw&I9EMyioi<m{tVrFr(
zgjv!oWtKLh%ra(Kvz%GptYB6&E18weDrQx)npxefVb(NjnYGP2W?i$MS>J46HZ&WV
zjm;)zQ?r@b+-zaCG+UXi%{FFRvz^)A>|k~@JDHu$E@oG=o7vs$VfHk8nZ3<EW?!?P
z+20&s4m1augUuo4P;;0$+#F$!G)I}E%`xU!bDTNeoM28gCz+GYXmg4=)tqKdH)ohL
z%~|GbbB;OJoM+BA7nlppMdo62iMiBVW-d2Zm@Cay=4x||xz=1~t~WQB8_iATW^;?V
z)!b%oH+Psj&0XehbC0>#+-L4L510qdL*`-gh<VgJW*#?Bm?zCs=4tbcdDc8<o;NR;
z7tKrNW%G)8)x2h2H*c6X&0FSe^NxAfyl37wAD9o#N9JSmiTTugW<EDxm@my&=4<nf
z`PO`AzBfOZAI(qZXY-5s)%<3DH-DHv&0pqk^N;!0{AUVOq!N{>LRCtrMs;dXlTvC?
zMs4a)PF?Czp9VCf5gLQWq_Jph8i&TE@o0RSfF`7gXkwa#CZ)+}a+-psq^W3XnueyO
z>1cYIfo7zcXl9y)W~JF^cAA6cq`7Ernuq44`DlJxfEJ{MXkl7}7Nx~#aaw|wq@`$S
z8b!;{va}p6Pb<)hv=XgMtI(>n8m&%i(3-RstxfCDy0jjxPaDvNv=MDgo6x4T8EsBm
z(3Z3nZB5(IwzM5>Pdm_#v=i-2yU?z*8|_Ye(4Mpx?M?g8zO*0hPY2L}bPyd(htQ#P
z7#&VW(2;Z$9Zkp3v2+|APbbicbP}CRqv;enl}@A6=?prP&Z4vF96FcIqx0zkx{xlS
zi|G=&lrE#o=?c1%uA-~y8oHLQqwDDgx{+?8o9Pz1m2RWk=?=P+?xMTt9=ezAqx<Or
zdXOHXhv^Y|lpdqU=?Qw0o}#De8G4qUqvz=bdXZkDm+2LHm0qLQ=?!|5-lDhZ9eS7E
zqxb0p`j9@NkLeTols==+=?nUjzM`+`8~T>Mqwnbl`jLL3pXnF+m42h&=@0so{-VF>
zANrU6qk<(`k|kS;rCP+&EZs6J)1sDTG0V0bi(9VcS-urmp%t-WSTU_wR%|Pd71xSq
z#kUez39Up{Vk?Q2)JkS0w^CRstyETOD~*-bN@u0FGFTa{Ojc$qi<Q;NW@Wc>SUIg+
zR&FbgmDkE=<+lo01+79>VXKH$)GB5bw@O$gtx{HLE6OTkm9@%Q<*f=<MXQok*{Why
zwW?Xwtr}KMtCm&Us$<o)>RI)z23A9>k=590Vl}mzS<S5$R!ggu)!J%fwYAz=?X3=0
zN2`<7+3I3-wYpi|tsYiStC!W=>SOh_`dR(00oFikkTuvEVhy#1S;MUn)<|oVHQE|u
zjkU&E<E;tSL~D{Y*^0KNSW~TO)^uxzHPf19&9>%PbFF#Sd~1QV&{||Iww72+t!377
zYlXGaT4k-a)>vz;b=G=ogSFAxWNo&#SX-@a)^=-$wbR;V?Y8z<d#!!ee(QjB&^lxt
zwvJdwtz*`4>x6aEI%S=<&RA!ybJlt5f_2flWL>tdSXZrU)^+QKb<?_K-L~#ncddKY
zed~es(0XJ&ww_o|t!LJA>xK2wdS$(~-dJy~ch-CBgZ0t+WPP^2SYNGg)_3cN_0#%g
z{kHyCf31I(z(gi7nJG+VglSA?1~VCD7Gun24&%&a9`jkiLKa~$SWFg+#b$9>To#YT
zX9-wBmWU;0Nmx>rj3s9&SW1?PrDkbZT9%HbXBk*VmWgF%Sy)z<jb&#!SWcFU<z{(U
zUY3vLX9ZY6R)`g5MOaZ*j1^}kSV>ljm1a?_3@gjZvGS|}tH>&`%B%{j%Br#ItOl#e
zYO&g^4y()RvHGk5Yseb0#;gfz%9^p}tOaYyTCvuw4QtEVvG%M3>&QB>&a4aT%DS=c
ztOx7Kda>TD59`bNvHolT8^{K+!E6W{%7(GwYy=z0MzPUs3>(YFvGHsIo5&`y$t;>p
zVN=;OHl592GubRQo6TW!**rF%Eno}TBDR<<VN2OEww$eCE7>Zxnyq1L**dnKZD1SO
zCbpSvVO!ZYww>)@JJ~L_o9$tH**><P9bgC9A$FJ@VMp09cATAHC)p`>nw?>1**SKe
zU0@g4C3cxzVOQBTcAec|H`y(Ao84h|**$ijJzx*nBleg*VNcmJ_ME+7FWD>hn!RCf
z**o^0ePAEiC-#|rVPDxd_MQD;KiMz#oBd&b**_-OqAl67t=OtfY|Ykf!!~VdTQ;+8
z+p)Rr+Mey(fgRcrJBA(8j%CNT<JfWScy@d{ft}D!WGA+h*h%eVc5*v~ozhNar?%7B
zY3+1&dOL%i(avONwzJq-?QC{-JBOXq&SmGe^VoUqe0F}jfL+ioWEZxJ*hTGPc5%Ce
zUD7UPm$swqGIm+JoL%0oU{|y&*_G`oc2&EYUEQu>*R*Tdwe31~UAvxL-)>+xv>Vxt
z?Iw0pyP4hGZeh2yTiLDcHg;RPo!#E<V0W}T*`4h!c2~Qb-QDhC_q2Q2z3o1BU%Q{(
z-yUEOv<KOP?IHG1dzd}k9$}BPN7<w8G4@z{oIT#2U{ACs*^}*Pdx|~Po@P(CXV^3C
zS@vvujy>0&XV146*bD7N_F{X9z0_W2FSl3NEA3VGYI}{n)?R0?w>Q`u?M?P(dyBo*
z-ezyNci21aUG{E!kG<F4XYaQU*az)H_F?;oebhc?AGc4~C+$=AY5R<Q);?#Sw=dWi
z?MwD$`-*+lzGh#yZ`e2OTlQ`Hj(yj@XWzFU*bnVT_G9~r{nUPDKeu1lFYQ<MYx|A;
z)_!Ndw?Eh)?N9b+`-}b6{$_u-f7n0mU-obNkNwyFXA6$#NRI3%j_MFcb9BdWOouv_
z!yMaj9PYS|=lD+GgigeX;ly-eIkBBMPFyFR6W>YTBy<uviJc@)QYV>{+)3f2bW%B~
zoit8bC!Let$>3ykGC7%@EKXJ@o0Hwi;pB93Ik}xYPF^RUliw-e6m$wXg`FZ!QKy(w
z+$rIdbV@m;ohYY_Q`RZxly@pP6`e{>Wv7Z$)v4xGcWO8_omx(9r;bzCspr&p8aNG|
zMowd=iPO|+<}`O&I4zx4PHU%))7EL{w0Al<9i2{2XQzwP)#>JRcX~KIonB6Fr;pRu
z>F4x!1~>zqLC#=jh%?j~<_vd6I3t}=&S+<hGu9dBjCUqD6P-!UWGC90;!Jg>In$jP
z&P->PGuxTt%ys5D^PL6GLT8b)*jeH%b(T5HofXbXXO*+sS>vpA);a5)4bDbqle5{`
z;%s%cIoq8b&Q52Sv)kF@>~;1z`<(;MLFbTj*g4`Hb&fg5ofFPU=ah5WIpdsl&N=6u
z3(iI7l5^R);#_sEIoF*V&Q0f*bKAM&+;#3b_nimML+6q6*m>eSb)GrTofpna=auu?
zdE>lw-Z}4`56(yDlk?g6;(T?!Ip3Wh&QIr;^V|93{B{000vEZ&Wv+0Q6RvTc8{FiS
zTbyy5JDhWud)(&%4|#;g;4yhD9-GJEad|u*pC{l6c_N;eC*etXGM=2L;3;`3o|>oO
zX?Z%Ho@d}0c_yBjXW?0SHlCg5;5m6No}1_4d3ipbpBLZ-c_Chy7vV*DF<zXP;3at}
zUYbYoGQ2D=$IJ5yydtl}EAuM6DzC<?^BTM+uf=QgI=n8g$LsS3ydiJI8}lZ-DR0J`
z^A@}%Z^c{lHoPrw$J_G`yd&?#JM%8QEAPg;^B%k>@5OubKD;mQ$NTdEd>|jh2lF9(
zC?Cd$^AUU`AH_%WF?=i^$H(&td?KI3C-Z1Ng-_+v_;fyl&*ZcCY(9t2<@5M_zJM>}
zi}+%`gfHdG_;S92ujH%vYQBcA<?Hx*zJYJ#oA_qFg>U8C_;$X7@8rAqZoY@_<@@-4
zet;k3hxlQBgdgR{_;G%MpX8_bX?}*E<>&Z$et}=)m-uCVg<s{@_;r4R-{iOWZGMN}
z<@fk~{(wK^kN9K$gg@oa_;db(zvQp@YyO76<?r}={(*nwpZI6~g@5JW_;>z;|Kz{;
zZ~lk><^Q<gimv3!uHvdLaWz+W4cBz3Yq`v|UB~6F>w2#525#s^+!$_5H<laQjpN33
z<GJzO1a3k%k(<~};wE*IxyjuWZb~<mo7zp|rghV~>D>%&MmLk2+0Ei+b+ftI-5hRC
zH<z2+&Ew{E^SSxm0&YRKkXzU-;udv_xy9WQZb`S4TiT6s%eZCTa&CFIf?Lt8<W_d8
zxK-V1ZgsbYThp!O)^_W-b=`VyeYb(z&~4;4cAL0O-DYlcw}so%ZRNIh+qiArc5ZvO
zgWJ*V<aTzuxLw_DZg;nb+tcmk_ICTYecgU;e|La8&>iFsc89n_-C^!<cZ56A9p#R8
z$GBtNaqf6`f;-Wj<W6>@-6`%=cbYrho#D=OXSuW8IqqC{o;%-N;4X9*xr^N;?oxM|
zyWCyju5?$qtKBv3T6dkh-reACbT_%1-7W4`cbmK2-Qn(Zce%UWJ?>t2pS#~Z;2v}j
zxrf~&?os!cd)z(Yo^(&Sr`<E|S@)cK-o4;nbT7G=-7D@@_nLd%z2V+;Z@IVKJMLZg
zo_pVY;68L8xsTl^?o;=f``mruzI0!?uiZE9Tlby&-u>WybU(SD-7oG}_nZ6O{o(#}
zf4RTiKki@mpDTExCwa1`c&bM{&C@-@Gd=2A9`kI^@wn%Dp67dk7kUvdh8NR|<;C{m
zcyYaWUVJZsm(WY(CH9hdNxfuVaxaCK(o5x~_R@H1y>woBFN2rS%j9MDvUpj&Y+iOR
zhnLgK<>mJBczL~iUVg8DSI{fu750jFMZIEPaj%3|(ktba_M*HpURkf4SKh1ORrD%(
zmAxunRj-;?-K*i%^lEvvy*gf9ubx-mYv48X8hMSqCSFsonb+KF;kEQyd9A%RUR$r7
z*WT;kb@V!UoxLtzSFfAb-Rt4?^m=){y*^%Fub<c78{iG}26=<MA>L4Lm^a)T;f?f0
zd855C-dJy(H{P4zP4p&tlf7tfiZ|7p=1uozcr(3O-fVA<H`klz&G!~~3%y0&VsDAJ
z)LZ5)_f~i-y;a_7Z;iLsTj#C!Hh3GoP2Og2i?`L==56<Ocsspa-fnM?x7XX}?e`9N
z2fahyVeg1{)H~)K_fB{xy;I(4?~HfWJLjGEE_fHcOWtMgig(q!=3V!0csIRU-fi!W
zch|e;-S-}N54}g;WABOg)O+SV_g;7}y;t6A?~V7?d*{9PK6oF!Pu^$mi}%(0=6&~m
zct5>g-f!=Z_t*R93BKq{zU(W$>Jwk{b>Hw!pZb>1eA{<??z_I{`+neue#DRA$Mj?Q
zvHduHTtA*4-%sEt^b`4s{Um-;KbfE0PvNKZQ~9a=G=5q?ouA&%;AiwR`I-GJepWx5
zpWV;l=k#;=x&1tTUO%6o-!I@7^b7fg{UUx*znEX#FX5N;OZlb!D8Gzf)-UIm_bd1n
z{YrjizlvYgujW_xYxp(&T7GT6j$hZW=hyce_znF=eq+Ch-_&pBH}_lkE&W!0Yrl=(
z)^F#x_dEC<{Z4*ozl-11@8);+d-y&5UVd-CkKfnt=lAyq_yhex{$PKIKhz)Q5BEp-
zBmGhSXn%}9)*t7O_b2!h{Yn00KiZ$-PxYty)BPF#On;U?+n?jl_2>EX{RRF)f04h~
zU*a$Im-);675++pmA~3w<FEDC`Rn}+{ziY3zuDj7Z}qqN+x;E>PJfrb+u!5w_4oPv
z{R93%|B!#!KjI(tkNL;_6aGp6lz-Yk<Dd1<`RDx${zd<if7!p{U-hr~*Zmv*P5+jE
z+rQ)A_3!!j{RjR-|B?ULf8sy&pZU-I7ye8CmH*m*<G=Oa`S1M?{zw0l|JncIfAzol
z-~Au{Pyd(y+yCSL_5b-oAO=z(2TGs@B+vpqFak56ffca84xE4oZr}xe5CmZm31S2>
zgIGcAAWjfBh!?~U5(Ei@L_y*pNsu&179<Z+1Sx}5LFyn)kTyscqz^I#8G}qg<{(Ru
zHOLlZ4{`)KgIq!GAWx7t$QR@f3Iqj%LP6o6NKiB=78DOk1SNw~LFphWC=-+o$_3?v
z3PHu7QcyXl5>ySU1=WKZLCv66P&=p-)D7wd^@9dM!=O>nIA{_y4VnebgBC%{pjFU1
zXcM#z+6C={4nfDDQ_wl+5_Ao^1>J)lLC>I9&^zc8^bPt2{euC)z+g}?I2aNP4Tc57
zgAu{VU{o+V7!!;Q#s%Yp3Bkl*QZPA)4yFWCgK5F^U`8-Am=(+p<^*$tdBOZ(L9j4b
z6f6#w1WSWu!SY~5urgQ`tPa)$YlC&c`d~w_G1wGr4z>hagKfd~U`Mbs*cI#!_5^!_
zeZl_VKyWZP6dVqY1V@8o!SUcka56X*oDR+eXM=OW`QSouF}M_54z2`OgKNR{;6`vW
zxE0(E?gV#(d%^wSLGUnm6g&=|1W$u!!Smon@G^K6ybj(3Z-aNi``|<HG58dG4!#6m
zgKxq2;79N?_!ayP{se!6e}NE+p%luY5~?8ywNMX@&<ts4g)Fo~C*+|UdZ8Z%VHifj
z7-7sXRv0^s6UGhWh4I4#VZty`m^e%lCJmE?$-@+3$}m-!I!qI$4bz3`!wg}@FjJU0
z%o1h|vxV8i9AVBdSC~7@6Xp%`h55q*VZpFaSU4;a77dGq#lsR|$*@#dI*ba-gk{5W
zVfnB^STU>=Rt~F#Rl{mw^{_@*GprTX4(o(<!+K%;utC@`Y!o&Qn}kinW?}QNMc6WI
z6}ArBgl)riVf(N{*fH!Bb`HCQUBhl+_pnFUGwc=i4*P_C!+v4^a6mXP925=?hlE4J
zVd3y_L^v`W6^;(agk!^T;rMVuI5C_QP7b5PDdE&`S~xwN5zY)}g|ovs;oNXuI6qtv
zE({lii^C=1(r{V0JX{g33|ED#!!_aBa9y}Q+z@UIH-($SE#cO1Tev;k5$+6kg}cK&
z;ofjxxIa7)9t;nKhr=V`(ePM!JUkJe3{Qop!!zO8@LYI4ybxXtFNK%GE8*4fT6jIY
z5#9`Mg}1{y;ob0Fct3m)J`5j)kHaV7)9_jNJbV$p3}1z>!#CmE@Ll*m{1AQ&KZT#e
zFX7klTlhWv5&jH+g}=i;;otCIC`81F6p<rJM2(P$7SSU{#Ej606=4xO;zW4Fjd&41
z5=6pCBoZSMGZHHjI}#@nHxe%rKawDlFp?;eIFcliG?FZmJdz@kGLkBiI+7-mHj*xq
zK9V7lF_I~gIg%xkHIgloJ(44mGm<NkJCY}oH<B-sKT;r4Fj6Q|I8r21G*T>5JW?W3
zGEyp1IuaEr6Db=h7bzd95UCic6sa7k5~&)g7O5Vo5vdue6{#Jm6R8`i7pWg<5NQ}`
z6lol35@{M~7HR%J?cD`<)Xcsx@H+00Op-}vl1x(E-Q5bsT}q*aZGjdlxO-vo;_mM5
z?(XjH?)GKde+%WMh4Y?!&bi-nPk7khZX=r=m+XeKaRFQ)*P3g?wdI1iV6Gk4o(thZ
zxiBuA>%ev7I&q!3E?ifx8`qud!S&>NalN@dTwksq*Pk1}4de!K5!_&I2xsSpa>Kac
z+z4(YH;Nn0MRH@fvD`RrJU4-x$W7uVb5ppf+%#@FH-nqW&EjTrbGW(OJZ?U>fLq8d
z;udpDxTV}OZaKGtTgk2BR&#5(wcI*xJ-30|$Zg^_b6dEr+%|4Iw}acs?c#QGd$_&a
zK5jpEfIG+?;tq31xTD-L?l^aXJIS5mPIG6tv)noEJa>V+$X((tb62>l+%@hxcZ0jh
z-QsR@ceuOUJ?=jDfP2V2;vREPxToAR?m72@d&#}xUUP4_x7<7KJ@<k8$bI5Ib6>cx
z+&9jNcjjGqSKf_x=RNpDd}2NcpOjC=C+Ab}Dfv`<YCa90mQTm0=QHpe&+~{Ec#)TQ
z%*(vO6Q1%aukkw1c!M{2Pd+1`iO<Yu;j{AD`0RWRJ|~}x&&}uI^YZ!l{CokvAYX_t
z%opK{^2PY#d<niJUy3iym*LCu<@oY^1->F*iLcCi@m2V$yf<Hsug=%tYx1@D+I$_p
zE?<wY&o|&3@{Rb$d=tJY@549aefj2m3*L|S=UeivcpD$U2lB1?Hhfz?h!5u5@$LB#
zK9mpR!}$(;N4^u^neW1P<-76S`5t^vz8Bw{@5A@y`|<tx0sKII5Ff!0=7;chekebT
zAI^{9NAjci(R?I7h9Aq1<Hz$8_=)@^elkCWpUO|;r}H!Tnfxq%Ha~}-%g^KI^9%Te
z{33oazl2}PFXNZ<EBKZCDt<M;hF{CC<Ja>W_>KG~elx#?-^y>}xAQyro%}9-H@}D9
z%kSg&^9T5Y{2~4@e}q5EALEbnC-{^6DgHEnhCj=n<InRK_>24{{xW}szsg_Zuk$zf
zoBS>QHh+h|%irVg^AGrk{3HG`|Ac?aKjWYCFZh@IEB-bAhJVYy<KOci_>cT2{xkoD
z|H^;kosctfL9WORxg!sh2qi{IP*Ri(B}XYxN|XwvMrlx5ln$jw84!nfgphzlBq5As
zq#%MQQjvyq#E^kZ<cTt(OeizTg0iA)C_Bo5a-v))H_C(ZqI@VnDu4>2LZ~n*f{LPI
zs5mNtN}^JzG%AD2qH?G_s(>n@N~kjOLRC;z<c+GK>Zk^)iE5$Rs1B-&>Y@6m0cwaE
zp~k2QYKnYNGvte!qZY^y`J<Mo6|$iK6o^`*HmEHMLcypVYL7xtC<;U2r~~SVI-$;}
z3+jrxq3);$>WO-x-lz}ii~6DdXaE|B2B8Qv7!5&oG!zX(!_f#d5{*KmQ6w6J#-ed(
zJeq(eqDg2nnu4aHX=pl{fo7svXf~RI=AwCMK3aelqD5#iT7s6MWoS8CfmWhbXf;}c
z)}nQ2J=%aaqD^Qs+Jd&CZD>2%fp(%@XgAt}_M&}gKRSR8qC@B~I)aX(W9T?Kfli`R
z=rlTm&Z2YZJi34`qD$y9x`M8vYv?+<fo`H(=r+28?xK6>K6-#2qDSa4dV-#!XXrV4
zfnK6l=rwwS-lBKtJ^Fw?qEF~E`hvcqZ^%h-7F+~Z!A)=%JcL9-Vj+o;R7fTy7g7i*
zg;YXnA&rn$NGGHhG6<Z&3rG+IQIG^I$bupefeNai3A(@pLofwTA)}B<$Sh<LvI^OR
z>_QG9r;tm?E#wjM3i*WmLII(mP)H~&6cLIF#f0KQ38AD=N+>Oq5y}eXgz`cKp`uVp
zs4RF1RfMX7w@^)}F4Pce3blmVLLH&5P*12YG!PmJjfBQR6QQZ#BQz6yh2}yF!B6lP
zS_-WMn-Cxb3ay1ULR%q72o~B2?S&8_R0tEog$_bTp_9;A=pu9#x(VHd9zsu{m(W}2
zBlH#e3H^lu!a!k=5Frc}h6r|Hs4z?zE{qUH3ZsP4LZmQ87%Pku#tRdKiNYjdvM@!M
zDohim3p0e7!YpC7Fh`gx%oFAd3xtKjB4M$xL|7^;6P61rgq6Z7VYRSESSzd()(abi
zjlw2jv#>?jDr^(B3p<3J!Y*OAut(S{>=X722ZV#dA>puaL^vuO6OIcfgp<N4;k0l@
zI4hhJ&I=cWi^3)0vT#MXDqIt;3pa$D!Y$#pa7VZ++!O8#4}^!pBjK^|M0hGZ6P^n%
zgqOlA;kEEacq_aU-U}aukHRP6v+zauDtr^1L}$@ObQRr1chN&kBqkP<h)Km{VsbHs
zm{Lq7rWVtPX~lG6dNG5@iM)tJK@>$v#G))JA`z*mikhg4Of*DO^b|9SnZ(Ru7BQ=s
zP0TLl5Oa#T#N1*YF|U|U%r6!Y3yOur!eSAzs8~!aE|w5WilxNTVi~clSWYZ2RuC(S
zmBh-TmsmxtDte36#Oh)Vv8Gr{tS#0N>x%Wn`eFmIq1Z@lEH)9Fiaug9(N}CPwh;Y9
zf3c<5O0<aqVxZVsY$LW6gT!F5o!DLs5kti=F<k5*b`(2_oy9I<SFxMeUF;$D6nlxi
z#Xe$Rv7gvq93T!92Z<5lU~!0O7l(?&#Npxyailm(94$tQW5luIIB~o<L7XT~5+{pO
z#Hr#mak@A|oGH!{XNz;hx#B!=zPLbKC@vBgi%Z0%;xci$xI$bht`b*^Ys9tUI&r<Y
zLEI>A5;u!m#I52sal5!f+$ru7cZ++(z2ZJ`zj#1AC>|0Ii$}zx;xX~KctSiWo)S-s
zXT-DOIq|%BLA)ql5-*Ea#H->p@w#|JyeZxiZ;N-tyW&0ZzW6|VC_WM&i%-O-;xqBN
z_(FUsz7k)HZ^XCaJMq2vLHsCw5<iPy#INEv(MfWaTqIY?O>&n!q(o9;DT$O+N+u<j
zQb;MKR8ndwjg(eOC#9D%NSwq=ND?Gbk|Zq2k|Gg_N~)wuy2K<yG9^zbqm)U?EM<|h
zO4+3BQVuDnluODj<&pAA`K0_(0jZ!=NGdE9k%~&iq~cNusiag&DlL_f%1Y&=@=^t<
zqEtz$EO|*)q^gp)R86Wb)sSjRwWQio9jUHVPpU69kQz#jq{dPcsj1{6HIsa$=28pE
zPx6;qO06WD6d(mkt)(_nTPa8imfA_}r4T7p3X{U64pK*{lhj%2B6XFzN!_I$QctOu
z)LZH!^_BWb{iOlYKxvQ^Aq|#>NOozcG)x*UjgUr4qomPNq%=kvD~*%JOB1At(j;lJ
zG)0;!O_QceGo+c)ENQkhN17|mljchcq=nKVX|c3KS}HA*mP;$7mC`C{wX{ZBE3K2(
zOB<w((k5xMv_;w~ZIiZ3JEWb`E@`*4N7^gxllDsoq=V8S>9BM}Iw~ELj!P$`lhP^a
zv~)%~E1i?hOBbYz(k1D#bVa%<U6Za$H>8`=E$Oy&N4hKBlkQ6oq=(WY>9O=gdMZ7W
zo=Y#Jm(nZgwe&`ME4`E6OCO|<(kJP&^hNqAeUqH9Gj_qQ*bTd551a@m#z}BeoD3(&
zDR4@h3a7?ta9W%Wr^gvEhk1;!fJH1}jAg7~f+<$9hIP!aflcg*GvZ7*GtPpu;%qoO
z&Vh5{TsSw*gY)8iI6p3c3*th!FfM|N;$pZsE`dwpQn)lOgUjM_xIC_aE8<GHGWNn%
za8>M$tKsUn2Cj)~;o7(ku8Zs8`nUmZh#TR?xCw5GeQ-1Ei<{#X*bn>TmbewR;Q$<n
zTjMskEe^uLxE*efLvSb#!{N9C?ua|#&bSNiio4<NxCicud*R-=5AKWm;r@649*76w
z2s{`M!FD_p55vRp2s{#x!lQ8{9)ri?ad<qQfG6Tfcru=Xr{ZaNI-Y@N;#qh$o`dJ&
zd3ZivfEVILcrjjrm*Qo3IbMNR;#GJxUW3=-b$C7AfH&ezcr)IDx8iMhJKlkJ;$3(*
z-h=nzeRw}UfDhtB_%J?#kK$waI6i?-;#2rEK7-HVbND>IfG^@p_%gnNui|U?I=+E#
z;#>GOzJu@Ld-y(nfFI&V_%VKhpW<iuIevj(;#c@JeuLlQclbU2fIs3-_%r^3zv6G$
zNp_Z9WLMcuc9%WmL~>#|iJVkUCMTCu$SLJia%wq^oK{XJr<XIxoXpEe7GzPDWGu_F
zA`_X)s;tSn%w$70WluSyoJr0sXOXkY+2rhU4mqcsOU^Clk@L#=<ot30xu9G~E-V+3
zi^|31;&KVOq+CiaEtiqY%H`zpas|1fTuH7hd&yPgs<O9SO|CB2kZa1d<l1r_xvpGK
zt}i!`8_JF3#&Q$6sq7;+lYQmpatql{_Lp1Atz?@VAP35=<u-C#IY<tc+sW<a5IIy1
zlf&f>a!0w7+*$4-ca^)z-Q^x~Pq~-eTka$GmHWy4<pJ_Qd5|0-50;0>c6q2gOdc+e
zkVnd+<k51ZJVqWXkCVsC6Xc2VBzdwtMV=~8lc&ow<eBm;dA2-9o-5Cj=gSM^h4LbK
zvAjfHDle0l%PZuS@+x_?yhdItuanoy8|01hCV8{GMcyiJlefz|<el;^dAGbr-Yf5u
z_sa+5gYqHyuzW;5Dj$=N%O~WM@+tYWd`3PipOeqa7vzibCHb;^MZPLuldsD+<eTy>
z`L=vVzAN98@5>M5hw>x&vHV1SDnFB-%P-`Y@+<kZ{6>B&zmwm~ALNhnC;7AdMgA&(
zlbsZ2#YJ&d+!S}kLrJ70R+1=5m1IhCC54hwNu{J#(kN+_bV_<9gTg7if)qg!6-mL0
ztSAamsG=&GqAN@>6jSk3GAfyr%t{s|tCCI0uH;a1D!G*0N**Pzl26I66i^B(g_Ocd
z5v8b7OewCEP)aJLl+sEWrL0m;DX&yeDk_zf%8Hj#MX9QIE7g?hN)4r^QcJ0=)KTgx
z^_2Qb1ErzTNNKDzQJN|~N;Ab*X|A+T{1ktsrP4~VDFI5L(pqVwv{izXV5ObXUI|e`
zl`th->7aB}Iw_r%E=pIWo6=qBq4ZRGDZQ0GN?)a)(q9>%3{(aw5z1g?h+<cUD#MiF
z$_QnoGD;b(L@HyHvC24QyfQ(Vs7z8OD^rxI$~0xVGDDfE%u;46bCkKtJY~MJKv}3P
zQWh&ql%>itWx29KS*fg2Rx4|iwaPkWy|O{ssBBU;D_fMU$~I-YvP0Ra>{50sdz8J(
zK4rghKsl%!QVuIel%vWq<+yS}IjNjdPAg}Wv&uQ;ymCRgs9aJmD_4}O$~EP>aznYP
z+){2Uca*!zJ>|agKzXP<QXVT$l&8uw<+<`gd8xcoUMp{ux5_)^z4Af%sC-gBD_@ka
z$~VP{I1?A*O5BJ$@gRvvVv>X;CCNx~l7gfpsYq&)hNLCwNP3ciaD*p_2t*_j!9*qs
zA%qf@XhbKB7{nx=BqPa0GLtMME6GN(lN=-`$whLLJR~p4NAi;bq#!9o3X>wFC@DsY
zlM<vPDMd<?GNddiN6M25q#~(ADibeKg;XWpq#CJCYLJ?w7O73@kh-KEsZSb^hNKZ`
zOq!6U#D_E^zN9&6LHvk6X-QfU8wntRq%~<n+L9m=Oxlt5B!q;LFcMBWkdCAi=}fwi
zuB033PI{1@q!;N;`jEb)AL&m9kbz_ni6Dc?5Mn1o$uKgUj36V)C^DKvk}+f~8AryG
z31lLfL?)9dWGb0Prjr?DCYeQMlR0EAnMdZ61!N&vL>7}JWGPujmXj4^C0RvQlQm>5
zSx45B4P+zPL^hKxWGmT5wv!!XC)q`IlRacF*+=%11LPn%L=KZ9<S02tj*}DQBsoP+
zlQZNjIY-Wu3*;iXL@tvn<SMyFu9F+&Cb>m!lRM-txkv7k2jn4nL>`kT<SBVZo|6~k
zC3!_&lQ-loc}L!p59A~HL_U)*<SY3`oTxK(p{~@8x>FCDh$f~<Xi}PtCZ{QAN}7tM
zrfFzenvSNY87N12il{(EDp5>js!&2HRjEdG%BVq2>Pa)wOf)miLbK9rG&{{fbJAQi
zH_b!y(tI>OEkFy>LbNa~LW|O3v^Xt6OVU!bG%Z8R(sHystw1Z%O0+WdqE%>B>P@TB
z>a+%}No&#Cv<|IH>(TnO0c}Vd(Z;k1ZAyJ;GwMs5(-zc^`qP%Q6}8a-8c18yHnc4b
zqQSHsZBIjJC=H|Gv;*x(JJHUx3++m~(eAVd?MZvl-n0+xOZ(CObO0Sl2hj*Rm=2+K
zI+PBh!|4b*l8&OIX(Sy($I@|hJe@!%(n)kOokFM5X>>ZBL1)rgbT*ws=hAs}K3zZ;
z(nWMJT|$@AWpp`RL08gMbTwT=*V1)#J>5Vz(oJ+T-9oq0ZFD=`L3h$!bT{2Y_tJfI
zKRrMX(nItxJwlJtWAr#ZK~K_C^fWy~&(d@BJiS0K(o6I*y+W_jYxFw3L2uGq^ftXi
z@6vnpK7BwR(ns_$eL|noXY@IJL0{5W^fi4$-_m#VJ^esG(ogg={X)M|>lT-)v+AO{
zs&1;g>Y*l56RSzoq-ruXxtc;vsism>t7+7<YC1K&nnC4MUPY>)imIeyRaO<1s8m%|
zP1RMV8mg&!su|TxYGyTynpMrFW><5lIn`WhZZ(gZSIwv9R|}{G)k11vwTN0&Ev6P%
zOQ<E)Qfg_nj9OMLr<PYMs1?;pYGu_+t)f;{z13=Jb+v|CQ>~@eR_myB)p}}uwSn4D
zZKO6<o2X4yAGMk4t2S3#sD7%y+EQ($+SC9wP;ITYQQN9PYOvZ)ZLfx?p=y{Ku69s6
zs-4u%Y8SPu+D+}Q_E3AOz0}@nAGNRAPwlS`PzS1m)ChI3Iz+XrL)BsGaCL+_QXQp^
zRwLCh>R5H0I$oWiPE;qUlhrBeRCSs<U7exMRA;HP)j8^1b)Gt3U7#*h7paTYCF)Xj
znYvtEp{`U{sjJmB>RNT3x?bI&Zd5m^o7FAqR&|@YUEQJXRClSn)jjH7b)ULlJ)j;`
z52=UMBkEE0n0j12p`KJvsi)O5>RI)idS1PtUQ{osm(?rkRrQ*BUA>{+RBx%b)jR54
z^`3fPeV{&6AE}SkC+bu6nfhFPp}tgKsjt;H>Ra`l`d<B@epElHpVcqwSJk@3ndYpy
zXs()@=B{~YiL}I85-q8gOiQk%&{ArtwA5M}Ev=SLORr_nIE~kkCTOB2X;_mrMI##3
zR87-#jcJBvYMxp~Et8g6%c5n~vT51199m8-mzG=0qvh4|Y5BDRT0yOlR#+>d71fGq
z#kCSzNv)JtS}UWK)yiq*wF+8At&&z*^U|tlRW)y|npRz_q1DuCX|=UFT3xN4R$ptN
zHPjkujkP9PQ_V+fruk~kwHBJ6=C8HXT4^>dKnv7bYi+c)T96j3wbR;bAzG*wriE)A
zw2oRQt+Uoe>#B9rx@$eOo?0)hx7J7NtM$|RYXh`_+8`}L8>|h{?AlOmm^NG+p^el=
zX`{7BZHzWn8>fxeCTJ72N!ny>iZ)f7rcKvoXfw50+H7r(HdmXc&DR!a3$;bsVr_}G
zR9mJk*H&mNwN=_`ZH=~8Tc@qpHfS5QP1<H{i?&tUrft`DXgjrC+HP%+wpZJy?bi-y
z2em`mVeN=^R6C{}*G_0BwNu(@?TmI-JExu3E@&6EOWI}aigs1Ird`)=Xg9T6+HLKQ
zc2~Qn-Payy54A_yW9^CdRC}g9*IsBZwO86}?Tz+Ud#AnEK4>4cPugegi}qEsZgHbK
z>n^&h?xwrz9(p1@v7SUvswdNv>nZe<dMZ7&o<>irr_<Bx8FWtPb)*Zrs7pH5WnIyU
zPIXn+bX{k<p_{s=o>9-FXV$amS@mpsc0GrlQ_rR6*7N9j^?Z7My?|a&FQgaNi|9r5
zVtR4CgkDlFrI*&r=w<bCdU?HqUQw^4SJu7sDtcAjTd$^9*K6oC^;&vuy^da2ucz17
z8|V%7MtWntiQZKA(VOYMdUL&n?x*|fE%jEqO%Kom_11bDy{#Uk2kY(h_IijOs)y;}
zdI!Cu-bwGQchS4*-SqBy551?}OYg1s(fjKC^#1w)eV{%_kI)C}Lv*`7R3D}f*GK3h
z^-=n0JyIW|kJZQN<Mj#pM17JzS)Za$)u-vx^%?q1eU?63pQF#!=jrqH1^PmLk-k`8
zqA%5#>C5#M`bvG3zFJ?SuhrM->-7!#MtzgMS>K{>)wk)}^&R?7eV4vl-=pu<_v!of
z1NuSzkbYP{q94_d>BsdG`bqtiep)}HpViOl=k*KvMg5X~S-+xR)vxK-^&9$4{g!@P
zzoXyP@9FpT2l_+(k^WeJqCeH2>Cg2S`b+(l{#t*dzt!LA@AVJ*NBxujS^uJc)va3`
zFlXk%T$vklXC5pOOU#n6q%0Xr&Qh?HEEP-5(y+8F9ZSzLFplvIF@cFpVwlNHVT4hp
zGL7ksF@u@RlVxO?SZ0=mWo6k|c9w(XWVu*wmWSnK`B;8dfE8qgSYcL#6=lU(aaMwr
zWTjYXR)&>j<yd)EfmLLcSY_tLs<5ien^j}gSq)Z`)nc_-9afjsWA#}B){r$~jad`c
zl=-k`%$GH1EtntkXDwMPW@7;?khNxQSX&mvf>}G(o`tYb7RJI^2iB2wVx3tR)|GW*
z-B}OTll5Y~Ss&Jy^<(|n05*^fVi9aG8^Y{tC>zFxvk`108^uPmNH&IzW#ia*Hi1oK
zlh|Z7g-vDC*mO37&1AFKY&M6@W%Jm4wty{Ui`Zhege_&u*mAaltz@g%YPN>0W$V~_
zwt;PAo7iTyg>7Zq*mkyq?PR;yZnlT*W&7BEc7PpZhuC3ugdJta*l~7(on)ujX?BL4
zW#`y=c7a`Fm)K=?g<WOW*mZV;-DJ1eZFYy<W%t;9_JBQPkJw}Oggs@?*mL%Ry=1T0
zYxah{W$)N~_JMt5pV(*ig?(k;n3LgbxEQX6o8fMF7>SI;MiL{bk<3VLq%cw%sf^S{
z8Y8Wd&PZ=$FgSxZkRceNAsN_^4aFb^HB>`0bb}d&VH%!BMkAAv*~nsKHL@AmjT}Z!
zBbSle$YbO+@)`M!0!BfjkWttuViYxs8O4nfMoFWTQQ9bDlr_p3<&6qPMWd2Y+3+%|
z7*!2#qnc6OsA1GJY8kbSI!0Zio>AXuU^Fxu8I6r5MpMJbXlD2t&5ag@pW$z`G+G%p
zBftnWS{rSQwnmT<Y_v1l8zDxh5oUxN9gL1fC!@2`#pr5uGrAi+jGjg>qqot==xg*d
z`Wpj`fyN*s!We7}G3>@rW0*1A7-5VwMj4}xNMnpK));4uHzpVpjY-C2V~R1=m}X2j
zW*9S#S;lN*jxpDmXUsPi7z>R>#$scMvD8>*EH_pdD~(mgYGaMD)>vn(H#Qg>jZMa8
zV~erX*k)`ub{IR2UB+%>kFnR-XY4l)7zd3*#$n@#anv|w95+rFCyi6aY2%D>);MRJ
zH!c_#jZ4O5<BD<BxMo~8ZWuR>TgGkUj&awxXWTa)7!Qp{#$)4&@zi)`JU3n#FO65m
zYvYaa)_7;UH$E62jZemB<BRdt_+~hn&Zdj$YPy;3riYoxOl&4GlbXrQ<Yo#prJ2f1
zZKg5Pn(55+W(Jcpc@vp}DVmarP1#gTVp3BzHB&d4X_%(zX=XGtnVHQjW>zzsncd7`
z<}`Dexy?LgUNfJW-z;DjGz*!9%_3$|vzS@jEMb;3OPQt3GG<w`oLSziU{*9MnUzg1
zvx-^O^fs%R)y*1aO|zC++pJ^OHS3x6%?4&evys`@Y+^PweavR2ui4yeVfvZ=W=pe`
zX)^=NK(n>k#%yZ_nZaf|v%MK&hMHk!xY@z%Xm&C?n_bMVW;e6D*~9E<_A+~$eayaQ
zKeN9%z#M1}G9%2v<`C0v4mF3F!_5)qNOP1q+Ke>Em}AXx=6G|0InkVCPBy2QQ_X4S
zbaRF|)0}0_Hs_dg&3Wd0bAh?gTx2dbmzYb<W#)2og}KsPWv({Ym}|{-=6Z93xzXHY
zZZ@}=Tg`3ec5{ch)7)k5Huso&&3)#6^MHBKJY*g=kC;czW9D)5gn80DWu7+Am}kv%
z=6UmidC|ONUN*0oSIukYb@PUK)4XNgHt(2s&3op3^MU!$d}Ka0pO{b0XXbPBh56Ea
zWxh7wm~YK@=6myl`O*Ajem1|DU(Ii(lc%$%i>Irno2R>{hi4+s#GXk!lX@oeOzxS&
zGo@!LPsfkX*%SK&hxqx2`1?ghgg3Xl2Ly%X${!i{#M#LyQ}4*`kr@M@TTg8)(9gnp
zX{+XKtNz!Yrn~RjKQge5wMj(jk|g?v?4o~vF8a9({K*w`1KmLnFcFv-OadkalYz;>
z6ktj)6_^@K1EvMjf$6~vAP4dw0tHY6B@lx$sDLpaox*Ds{ON9k)-UQsFF*YXam=p~
zTEDaxy<`G2gIU0=U^Xy2m;=lS<^pqrdBD72J}^I604xX=0t<shz@lI=usB!(ED4qZ
zOM_*=vS2x|JXis&2v!0sgI-`2uqqh!V~5uBYVf)`SOcsH)&gsTb-=n{J+MC50Bi_0
z0vm%(z^0%N*bMXqn}aPtKhPg+3AO@lU;r2hwg%gPZNVTg7;Fc&2SdP6FboU_JAfU*
zPGD!S3)mIx26hK~fIY!pU~jMw*ca>v_6G-m1HnOH1UMKR0@}f$;4pAFI076Ajsi!6
zk>D6`EI1Av4^99lf|J0>;1qBwI1QW*&H!hEv%uNl9B?i;51bD!02hLbz{TJaa4EP9
zTn?@PSAwg+)!-U%Ew~O`4{iWAf}6n2;1+NzxDDJ6?f`d!yTIMx9&j(X58MwP01twP
zz{B7X@F;i;JPw`!PlBhw)8HBKEO-t)4_*K-f|tO{;1%#Hcn!P`-T-fcx4_%r9q=xA
z54;aP03U*nz{lVd@G1BVd=9<<UxKf|*WerQE%*+64}JhYf}g<8;1}>K_zkpvcs6Qr
z23<f`&<%74J-|d@VlWAq6ifyt2UCD4!Bk*sFb$X%Ob4b1Gk_e(g9sEr5tKj-%Af)g
zkb)|xfjY=Q12jQTFe8`=%nW7$vx3>c>|hQsCzuP&4dwy!g89JwU;(foSO_c(76FTb
z#lYfV39uws3M>tl0n38r!17=Pup(FqtPFaARlusCH&_j<4%Pr`g0;ZfU>&e7SP!fZ
zHUJxfjljlW6R;`h12zMF!RBBK(9xK7EdF3iuoY+n1HeGAHRxz*JC?R!5Eu-$1KWck
zU?>;{hJzizj$kLSGuQ>}3U&j#gFV2WU@x#Y*az$j_5=Ha1HggcATR<P3=RS9;81WF
zI2;@Sjs!=6qrpgU3^*1X2aX3PfD^$<;AC(LI2D`*P6uazGr?KlY;X=Z7n}#q2N!?~
z!A0O=a0$2+Tm~)&SAZ+QRp4rH4Y(Fu2d)P<fE&S0;AU_OxE0(6ZU=XOJHcJxZg3B{
z7u*N#2M>S;!9(C-@CbMmJO&;IPk<-EQ{ZXv40ski2c8EnfEU3_;AQX%con<`UI%Z0
zH^E!rZSW3w7rY1F2OoeB!AIa@@Co=7d<H%TUw|*cSKw>#4fqy(2fhbCfFHq6;Aij)
z_!ayHT0c7<wK#(=peyJGx`Q5IA}}$S1WXDh1CxU(z?5JrFg2J4Obez1(}Nj64&*@u
z3ZMu|AO>Yn0SQP!71Tf-WS{|>peL9S%miizvw&H_Y+!aU2bdGg1?C3xfO)}uV1BRw
zSP(1(76yxeMZscVaj*nf5-bIl2FrkD!E#`EumV^StOQmDy}&A9RnQx(237}afHlEd
zU~RAtSQo4Z)(0Da4Z%iWW3UO>6!Za`fxcjKum$J``hzXOR-g?G00Y6+U>mS47z74`
z?ZEb62p9^6f#F~Wup`(B><o4RyMo=o?qCnFC)f+@4fX;1g8jh$-~ezSI0%dY2ZKXE
zJ2(^^1`Y>DfFr?C;Ak)s90QI8$ARO)3E)I<5;z&00!{^|fz!bm;7o89I2)V;&IRXz
z^T7q+LU0kd7+eA_1($)#!4=?2a22>3Tm!BJ*MaN74d6y_6Sx`N0&WGjf!o0y;7)KC
zxEtI9?gjUO`@sX?LGTcG7(4<V1&@Kp!4u#~@Dz9&JOiEu&w=N`3*bfY5_lQB0$v5L
zf!Dzs;7#xrcpJO}-UaW0_rV9?L+}y!7<>Xg1)qV>!5835@D=zPd;`7(-+}MJ58y}e
z6ZjeY0)7R*f!0lDqU(Rq1#|`7KzGmsOavwdlYmLVWMFbI1(*^{1*QhmfN8;WV0thE
z$bmeFKmim%3B;fbDj)$VsDc`(gA6o46Z8Z#f|<a~U=}bdm<`Mh<^Xeoxxn0D9xyMM
z56ll101JYJz`|e=uqapzEDn|cOM<1q(qI{|ELaXK4^{vxf|bC^pchyLtO|OA)xhdt
z4X`Fy3#<*+0qcVG!1`bVup!t8Yz#I5n}R-IGtd`o4z>XOK!30$*b20P0bn558f*i$
z1%tp~upQVQ3;{#IFfbhK0CogBft|rFU{|mk*d6Qv_5^!@y}>?UU$7t89~=M<1P6f;
z;9zhFXa|Ra!@%L-2yi4g3LFhaf@8q3;5cwRI02jpP68)`Q^2X<G;lgN1DpxY0%wDB
zz`5W&a6Y&ITnH`#7lTW{rQkAfIk*B`39bTHgKNOG;5u+UxB=V<ZUQ%hTfnX0HgG$*
z1KbJj0(XOZz`fu;a6fneJO~~F4}(X*qu?>{ICug)37!H^gJ;0A;5qO-cmcc!UIH(J
zSHP>_HSjuk1H1{|0&jzNz`Ni*F!r`^fsUScJk#yYR%IJe`gkHoP3!FB;^g|@8@>Ja
zMsLm@&WZkeqqqOw=<UBZdi(E<-u`=|xBpLX^yaAk1Kl0fyLCyPn{`QE!CzdFXR8|T
z<#=9+tQ*BPjA#;>(eCUUX?JdJ{cmCY_p|={TmM^H|64`+$J~4N*Nu#*gA|V_lgP>0
z#nsK-<HwT<w|j*8hXh!6skP3*?&=rbF4CU1oo|qzZ)o-w!6E+HVXfVxR`h10Bcrb-
zv~>+^X0>Rn7a5yG+E_xL!9C_P*aq4L*&=L%Z3AP^*w@<m=iB3YId<3(Q6^*b&_m4Q
z^Ahl^WVDIF)FmAs$Z?>*e-_;%1B+Up#tMVL;+9RKu9qxjJqxU0MP6WO$Fp+QgO`(+
zd-VN!!+is5IR0ALS|1*2oq%;cr9FvHTmLX$Ki@FlNb7W}`3BZ;Y~q+(3~wGd)$u&P
zwfV0CaD;7?BN0T}64_nL`gV?t4d&$CBW)=oY^fq_scq@JoUIdZa@;Q1#(6o1H@8jk
z{`F;?UP+^GFkH3Z4>uTgv;BO7Ve2i+xCDhpTF15ig+&f<gk(gS>P}93?_cydlh(;8
zLzmz>4*in3&33qI^7xkyU9x8@b=I=RwjIMAPOo0~s>5eD9%emfSv<67SBFF0(jPpR
z)^XUVn<M`+x~+4KzT(vJ#?gxvR8fl+T2TwMrmc2y^x_VBfKg3n>v>{$odk?Y88N{d
z==VMR|9;2@wzH1%FAb*n(_?q*t>dJ}q;b=uueClVJq9**Y}~~0@IykhyZeRtgmsCG
zH!c3=5@AR7g!qchp}&i-I0@iyM26k1U1#f*?Jhp;tgMmIp0rM_9OZmN!~8>nA}ded
z`0bmQlijsS)!NqkSb5`@S2jnTc67vLbfUhO3-MMurpwzN^>_H~YxC6(y+^!rvC=V}
zqS|Ymjhk-&rNI(^ggZv;<3#vR1(0JVBjE_Qh5pB)`kyS?-}j8k!2hAZcT5i?6!?y5
zir)l2i9hgdrlZWa>d%a}KwE2nTUN)joVGl1ZWbR^8(OuuRpdE4J3GQ7flZvPD&Db)
zzpawDt+L~AKLnihCV#!)-^(R7H>dtN7dws=RcyyOO6y-8CH7kbWl(%&b?njn<F5+-
ze$D@XIw<2WJ+KuU2_<01{87RQIM8tkOZ;g#;jfc;{g5u8O14J0q8IC(qn6loVKokZ
z{JKh1vl+g$l2G%~dW+~LvaN}&sm&+0X&Fyr*s-~lom^d<qB2u>bj$I7@?*pw8x4Ps
z4cEApcQtE$!m(j@@eBLU)!e@-&Y>}6H3j~x&FF~ksK)wlg8ZLqWyBu^j){hZGDNT6
zMuE-sZ|8=8>NCaHQ;O;y+C03iu0zrq;l3eOqo`bv-}fFw$_RUk&@k)o0YR;*1_y=N
zBE75|a9JVbm=o|y9va=1@ap8~Rs=fc1AcmNWT^jOggItB5{fX#%*$``jC<RFPS)4r
z{uf4D1t<fZyqs-#u+@7RcJ%PXerdeDqxyJ06-vVT|8N{QW<e5)1IKL4Z{on!8vOX*
z6$A10VPc{rrk;v<{U0v45?g)WgyP`SZ{r}`o+!9w%TWKY=)s;O4g7sWO8d782(m^U
z0*Q6_*g*%Y$Z&5PT~s(bii&uKAM8oX`iA*ei#n#`7@j{K!`jX6-r0H!$M8aE)ELGf
zTWULJjgI(-hXu6B9^&7|H!Prozn^~_YrMpsBJS&$bFq6worrZ1Cwro{0YTwm)>xI}
zd8mJj;2=N8^T4du`+7Os-E6_(A(7s;EZ){mQCmiO+ui(pJ4fx1ylp^>kl^ooS%Wn;
z-%y{hfVTdQ<GDL_j@mGB8-L$+F?+dMPpqRP`(bD6WulJ${;i|exH&#zq<5fhorAYE
zA`>+(V^11Y(8dOC$)5ry{t+F!TSdo*umZEfKd?_d>vSCZc>l1E-OVQ;s70hTXp|^+
z(8wA{auoc<9p?&Xn#hp~9QB2@d)*&)w@xwYGey@G@g)F{s?p(W?cC(YogHZ=-chT-
z7LH!kACJT&wk3fPvxJZ8m;w)U%zwky)(42H;asCqaCE#RhYB(_X4?OF*1y;z+$LCk
zG`nk%Z&;*Fw5Rm(v7Y&a2ek`m(YlSlRmpnyx2jFsl&I319)9+9_3Jd=fsU*AG6p)X
z;j6rSL-gQ8xmwYK6Hx_wWOQMl>+`hPuhaZ-NJLc%c8{RokhZ>U;v6EnzOcJB54LLe
zsAD%<RW9Se*J=JR=%UWop0reGfNzf4!L2(7M^+ut!!e{%`G>O(x4T;d!~Sg?W23fE
zdzv3diKAE8-K$4!5R-RpU2Q$02Z*hHXhKbRs}BmJ-0qda{M(kYN)Csme;C<zT*&u_
z2XPED$MypJf7NVqu|{!S21X4sPmd|yqm3D4j?t+ktOsKTonuT5e}CUIdb=3^(fwnN
zlm0Kqi>-G4C;$WJS_wLEp7jtL0>1G#hSxbZ{&8^IC2oIYnzcTvUXL0-jEVoKVZ|nq
z){u4{D^da{SsTR+Y5!q-fA6w6W(2;^2K+eA@T*@Z_@{;*e|zvBMna+^({VLM^b*@g
z{bh1;{hpj+zOM=U{hSPbPBt;ojY>xEfI$jsAOlS>BbXV?3T6j$g1NxlU>-0pm=DYk
z761!^g}}mK5wIv&?9b+2;*0MDlIHWDljcud<r8s|X5eAR=264lM;t4f+ml4K`F$J{
zT7M^n#+FFY8Oo8XVlz~roAv5%mPL;7y;^a{_ng0{p4j;hr>G?+^~Cs(7e<bB8}B>D
zH|Y|S=&j5hXNmcHjPb2^ys4E>epBW<Lzy2Hz*g-VGt!t)G570z{QpwGWwffOz^Lgv
zVhx+dDdDVnE=Ls{9=nARcME5)m@?2mdV(*Bwf=h<_?Luc)rtW@e*T@JYDHHYvuA*M
zF)VuKG<!R1-Za>nuT54x>Pe-jjYmgq9{Fe0b3F4IKTMmsK~y+~%p8jcm<UV^CIORz
z$-v}b3NW^~3w&p_r+!;-{%|JI@Bh8vgacZav_&sg&WT#AK#f}dWCAa0xY*H<5A%<-
z{f)_^1m;Np<uiF!@@N_2YjrWK^kz@m!fHDP2U#7LHj#1LPtn`hlUV&J-||u0|HEyl
zn7m;d_(Nv;%^2Gs)_6aT$~wM~_@`xGli!cVI?7pVG}gu0$&u!wFO{%+__XkCVRM9;
z-7Uy3psk|=3FEYXQabz)0tr+w!{X+9s{<Lg?b9W0zOQS2?jI`{Yf`O)wZ&f=K7p&O
zBp&E!t^Ts9V6{94M72BPDe9d{M(2x|hC+Pp$(Z#q?ZzKw$`T5q*lI0K8##ec+W2!Q
z#gvY4`NP`ZgcA96DE(;)`mrT%?el$-C$lZf|HOQmi<3*#EWi)*We|L}%)grE`W}AP
zTtjSwAokPPvixeN@2#IeyL*@|#NR5$TF34jJsTEPU;e6T5S?0LC)%S^$IlId@2Mf)
z3V*=gnzR4&YW#;x7aJVEY{JKuQ-fm5se~$UH>-w7xB^cacZ<JN;3pGKqS2k0U*_e6
zKgjoJ{Wo@Eep#qk{SQYn8C0fIbn=LKj6az~Z}C6Z`*0+?s5yd!3ZM7_EP>*uY}~kY
z^7^S4klkAUL*wk{LfW3fy3{4ABaqv=;w3cl?-WLH&pIMD-~P5$^v_K8MCH4frjEC5
zN}%I=pYgN<9dqVI>@I)rQZMULn3zjo5;nF;7~2AKoL2O}k!zW9wIi#hPU#piay(7=
zBAkEbWVa9T5#G6`bNY?HK3T_iZLLw!I<?C<CdU%GPUxRGMW-*@=X|kF@i)ev;=iYe
zO}HqFEt4bus7&ttbFPeOI>mOX#>U;^dzl=&#lpB-xWwjc=yo+pIA;gCI~pc`qttff
zmgpH%N2wk6V$^?qmj0(}_~>@tFT3>d*YGj*K}^Rz=CvaY#&@WgE9IOXP5&?=eHtgH
z!nucCa~QEp%bvmVeC>7J9D3gMI+P*JuMZX7+Ks+`*{x-8a4xGsZ;?BiJkjKhCST+a
zlc9l*?^W62&VT-|$|!?lGfKia>wAMS;)ik0gc>|g;%1ER{gT+M^D6EZzsOu4;%?#U
z<@WQa$G5m!xOjOu2E?s#5y*b$tn~?1tjS|4*1uh^eh-<rjr`wqS^qE8sspUR|JPLZ
zzo|E4>eYX9<&;o%@V74QPPqNwA?_C6vjZgMQr7wel5(KqtEPXaCUN_@Ciyou4gZhT
zBi0bP)xn9XM*>|O(e3JZaC1DkJ03i&2S@1te_2KRgN?5xjt~6foZVj<U#sG7@jXMt
z)*u_=Zt;uyVO!iS{?a@fXRS{tQ#r<sf1RlUB}dE6F@<8q)?Zs6QB8*UW+q}9E<ZP9
zVqRMf{HP_q*N$@Sx82lVS4;_XQoqFwE2r2Q1Q;XxlyF$tQ#gi&t*>19v}_Y>H7kA{
zY=7F!_=hS}t8WWIjeZW=n4pYX75t*yagA+vCU}g0Yz+B(r~B_*(;GdE95~HNXx7xw
z42!4%<bR-a_&zq3aJa|j-S30$2~}p<tt<R~$h*JDnYrU`@xAp2l^L?uCy?*L!?XWC
D;uJx#

literal 0
HcmV?d00001

diff --git a/pandas/tests/io/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle b/pandas/tests/io/data/legacy_pickle/0.18.1/0.18.1_x86_64_darwin_3.5.2.pickle
index db1d17a8b67c36e035bdd8bf8ba24b6502c78832..20af597c57a1babcfe68b501e331e79e0f15534e 100644
GIT binary patch
delta 13641
zcmcgy36K=k8J?bVPgx+GMiyKHgUGHN8j2z%VL6m>N5oZBf|HruUfw)rXO@{Bc0o6-
zB9JvL5xdRCR8ndx(K0EE7%fdqDOfR-NsLFCXvL_h!~>EtWi(iYNeub__j-Eznx0t}
z8H%2_)9?7-fB)Zmx4stn$9s`;ZVukvHX``&Gpt3GKu=QAun|mWrDf{ciOUbX+chJy
zM^i%CV*kKS`0kZ<4v3nnDgN{#H73luz$a;`k<N%u@CSmSum*oAb%|WQSImpO+KAGa
zfH}pjxqMnIXa{~Y_u4s1=(23DI0SRBm9oixQHv|V-lUY&NE4Y5^X<{d#C_UEEgBPI
zusE5f_^()|#e6Y={~91?$K&z!C*i{$u}fgp(Z4?_oVTX97seG+N(eTvWU)rV^KZdr
zNWEj`u9kUx>hxms)QYe|d-TR7E8-E-IzncXHZ{?7!P<Fo67`tDO59p2ag0H$ZeAw%
z)Hzc(Jn`@^g@4Z4J^%hE=HD-@p0Qjf`0k4-lY);?zUX34%8vzr@-<If`3vFoNQX`)
zxg59YPABW%J+f+LtIwd}_$*Ohx~SAk>3*>wCHn`MNCH{V0;Opnne9y$+NDA!ozj%E
z5()S@QOpjcd$wgz1LAvt)k-jxlZZxgN`1F6iK^XR=lklTYohy(PgLI$W-h4f%4H?8
zCff_cxImQpY%brQ%%pdTpd`U<gS`b<TZtsp1^zh~2_S19%32Y!T1-$TC5i<xkry*b
zDZNAN6*HhJBOvAAz)oowsA6IOCRQgbnIkKH_<`(z^=Ci-!T%mRJ~2*4qDsTnS9PpO
z79=sB)z<&y=O<6bd`e)=Ro$Si8#T60urDue=L3C<{(J8aj!z_mN?>(zr$I5fX@m?9
zYeXiuf?DeIkv%bi>;+QdY?km$zAMW_arUlw;4E=kbD~%5OBORyVp|%Rp@dSIT+cR5
zeIqjSK;esRq>X%`q7-965PFB&FHBeO5SGf%D$T3GuuyU)b3z5UGfqZ<_{LSmLOLrJ
z3T~SKvXyYIudg6V8teyl!}(oIx#)SUfg4JfNzH??AX%&4?!U2-X_2@El&?vh99S^h
zZn=G?QLz9Fx*>3r>@agdx5cGqTO@9l)ENwbrJ$d27-S4*`DCOs7}0y6Q1-$<?E0?B
zuhi*|<$X07yj1=u*5I7V&!?(Gs|;6Mu+$4l3hKQoEedNQBi|qC+yFhxA9YLPjM0@x
zh3R+Ao}_LIHE(sLr?fR3=e_v`y@v-KVWdO-b7<v?@vNn84$qK1RThcv7}=tcr@*76
zn5KB^xG~`A8V6irj5t)|2xGuu9H1_aY?6-^!Rn|4agYwe0|e#gp@#0l-|FFE>*0?D
z%spD2Jp!?yxyMD=LsWvvAqa!y@nQkiR)Pr#rkbKoshcnb0u}~EAVdidi1~D`S0ins
zjqeC7_>7XBdLZ-kLY0iS8+2}T_!VJ;jH<)rzyhwQkt5_e-Q_U9anE>`fjDjU5$Gap
zp-{cYKU4kv)Q#<&h5$rq*rrG9VkXnA&YHSu8XHnn!Uf<CFcC#|J8)H9S~qvDQlID-
zr6kKCK;7mg>0Wqc^hLM$2>GdcL)|Q)O&zXVxI8zY#X?FDLIN}dc&~&Y0OlodKWv>O
z%;IoTAyj9zx#e78P`$8aimZnOrI`&)XScF2r^T8I7#HYYfrMC7F%=s^Q>5>}pSA~h
z-xRLYMt=t{aYrxWm=JvlzmP2YGPZP@hBcbg`)!ABc?)AgS#M#aS?6AZKE))x4Zi4a
zu#HLwQ8Un69MI@9@Cz3<eHMHC3LnTddITPfWAr&_L7ym+=nPFO%UK%Z3>bQpJVl-+
zhsa^_6z<KNmG7%vjq~M}m8g@93(h5KT*1beDtz(#%Y6A*hAoyD<Ch9!=F52Prx|qv
z6J3Z6^da@$Dc84F<{HKiG=|Mx<6YLVs?1ld%E<<^hAlL4@@#B#4I{#swTsFY3kX!O
z08qqIMh~gEm%d<%jG+gLnp85c;S(~&KwmDW8JcOX3gi+;aNx|5&Vb)1usGWa*`14x
zR-&*f11*8$3WI^NkFGCskUolrI93y_&5O8uwg76@Ezu6yWv?~*qbg=MWbD;Px0ko7
z`1{i)tD6_C%^j&NP3OzujJQM06f|@^r6{5Va#>Mxi-ef0K_qMe*0K-Kz&=JyG|eFm
z=3M7M?SMI^W5VZCr#DZV!|X;>>I=EN#L8YMX2Y3eszg_ly1w~**~p5~Q-XShd_F<o
z3f4q_43wC%Q*Z`GF9GUmq)&tj913N&RjNXN`aO8?5)LKi8ml1HjPfn2*EG&Hds@05
zMq|9W6TZ#dV{?y>d5kibDZ(+vJhi2{l{*`eJ^-u7>4Q2Ao{ShHyXepOqz+y&`;__S
zF^E=C+Cwl+DTaOn+j<Q3_ev(Etx;zU`&{7}<|~MkJ_O`~L`P@6Eks!&_nDdMzfHUT
zmuc64{v@Ej2FC;(982<G?7Z5>O>qqhcXD9;eNXag{qinetwU{#IqGsyUhj(BMFY{S
zAjWQ<yjYKR!0A&<y{c%<x!@j3TGNbeJuW^_a_IzZucoxCX3n{L07$Wx$e+m`mznms
zsJurd)Sn&#<gc_-*r<FacFJP?I_~XQ?u;0aZ~qI=f9E!NB8On;b)d@YR_cAmmwQ)H
zfQ=^R#N;w}ib{l|F06i08d$-wO5B>xYGja#(6I!Nc`LWlfZ)#+I)F1sGtQApXs4LW
z7ofV43q)3?LTw}#rF5p<3Zp5_*1t>ZE8+fhwkRPq8}3K+4rcOU(YO;EbUQ2v7*der
zih1Y~VqKsS;iGtGBmjPsp=9gH0p1mdGL0k)2?VWhwg~^7b#CfU_vCY?A3``adsUYa
zlLPv~P`m&p7grih-ErALQQXGf0D{FZxvwKUcqvjx<UzX6$)5^9p3-z>5$ZrOZ%Ado
z%UGkl3&1k<*TFuG?y94^>;f9n8yN!`c1YjEWbZ9_aF<#Vy?GHbL~23+l;9@d1S-?_
z4LanqtAr^5bx3D>5cP|cP(QjC`o@8C-)6qGs%urkU0gR{nS;hM<yCAD=~Yn4B1Wo2
zkWq*N!??$*kAo7LajA)oE-InEJf0Px>-<n{X<fkm$`*}Rt^0s~P~<<x>s8DLJ+M@x
zI$-r`xmK+)901%28P`<XQAHpY+Kaf_q1N5ruvn$llVd9$UIiAGqRbI&>$ypDPg0f=
zfZiwiGC2Ub=ri!r9Ic;)zfXZ_m_IW&i%csQQMTy93ZunN^nydg_TwEP766`MMh%A}
z(^*zyRCvxa8<s&@i!&t*2TVN#)j|nod(-{k@CtZtQ<M1VCI|`iT6|z|kd0jxQ;c5E
zSqvnxR<P0-P=-TAi^w6X+M-2K!ONhI4U9aAg0}WRz>=Meakw%7Ky*>$u+Il3wP=8Y
zxCGQjJ-JLJl|V3rj>(W*wq&;<zk4l@Hl)|`v=K-KMl{RtjpB2WJSrbZ-Y;<Uo1e{j
z$=J|VE@Kq0NxCd`b_@qkZ4_!QJF9M=7D22<LFo9~GHmPiS<f=01V`a4rU@XxDcYtN
z0B09Sg>oJQGShj;iNz@MYF&*TGn{26e~ZY$h0`4wD0>MB_phT%oG<-s-VXJ1^Owli
zphI$GXJfFyvO#$pz9CeL$;uv)!WhAh3s|x5y@a860E3nTi`C9;W{zeDvDl(K!>BM;
zJGXg)-Oe7iLx&M(4}UBi^D{V_9c_FQdx$c37XA1K*HImRvMA>%>t>lF=D0y6v`7|6
zXV?NMR}DP0Q14$5RlggpQ#UM%ddF1vMXFn@<)r6L<bZCmFPXTp4+Dh!S%*<4Cd2g-
z5}$OwAc1poF;8RP<xpn0IYUwD*FchPqm4|2;Ur2(B6(5NIJQ*6?@KU^d>U~Yzt7LW
zFVVnh{D6otGC8KjY~Sio0C_e!qxr4;;K0izO$N288$JBC(83anGviNOEnPN8bep9|
zVdvhK-i|%=i2iX|EeXI0HCpI(Rq|m&i!7-fT?*546LqyvJjzx#G)C2F?H8T_>r5aU
zbnD!RZQWO`11e9Ob+TI)=94UwHF;q%bJ2x!EErO_N?M6WUgHU)E*B?w7)6TdU7#CO
z#fOHEyYcZYc;FUJkMhfwyIDqTv_#X>8BC28^wWJA4$E8(%)`F+Rkg1>oIPxjRMmUP
zro`NK2jCv;q8sTTw)JeU2{2Qh@JYMyc(F!{bvM}ZIfA>z)}HD`%_skK3Q$Ex-7dMY
zqX^?*zd^r&$1nfPRomMZZsIwS6}VLbool+au9ma#q7^prqE#&m*yjIJ+d}=U0FA1)
zYHS?2pqpv|dSVi~&37MnYK|>StB1=&CAE0fq!!I3wO9;@x|d$aV=Ksvh|W*qYxG`L
z=Vg*5or2izDM$`9W3@vw=qJE|Q<x0Ypvy5C*#}V8)!m(VL+@0SONO(tPY?g_)4=3v
z-^Kyq>2adE50&}3zv1G>>_p2qp4{}-OKmShrIbCZi|etAwTI2&WlfsB0NdQ*0mNPC
zNoEjw#^HEHhrR)ixC6kn!Gt<W*mZ*W0Fq(1IDJQsy&nF<B>fI0#Y7x|p1imfF88*%
z9Aon6LouCnOL&d&PB+Pz)Em%`64+iGGF=TQ`I?HUv*CbEM~wEA*(+nN>*a_BoKgsa
z-yaB|mJzoz(7IAbI1I*r>^!(q;#^QdaB)}6Fy^O&>i+3-_45}4St#|W87(!o3Rkx!
zvH!~&eI4X&xE5B)(LZ9MfnM60wL{9@1ULW|o$Tr(<cm|)b<q~Jud!ux*-@co{s}<6
z@N<Hre~#g$(cS8s=X}LG%`Djxqi>A&0j*;ySrkLa*d;+*B>0QdUTl%zsL5VlDccME
z5G~{d_`w~beHq@AB_&shf>!rm#vf`-4D8(;*xh+Xj_Z^&;NM)#YI4SZyaiQ>%<A>J
z<@(GlxaeD|2pX}?T@VyTFUU1Av<g2B)o1F#^?y~_O|S}yZU$##FOw#+OI?6ulr;Z7
zhvus<pPlk%lXZ3aCD@{2b=6sAfi_l~SaorkRa4;he_<eDx>Gp~+~Vk-_k?zt%v0)9
zQbB)bx*K><nN%162QRXdywYIui21&tDTl!O=)ZLwZIKtNTs)r%u44kYJ|p?G=7ysu
zjtRr(vaA5XT<ZmI7+McQp+LP9*ghApS9h%ZC^o^Vw8&z(G7etY=WBxh+zZ=}DOj)3
zgGYtWZy5i^_`c`>NMWRNNPWb=Vf3=U2^Z9Q18;&E>>oCXwWlOjrJ*mK$s~3pGeuEb
zC!fH*fpMh3F1A1)T@>?_NXjf*!Y_UvgzKzaxcW=0Ty3X46Wqc5Gbxeb8cDL8w76aT
Xu`u>A6JsAPF_u0EEfYhF#rFRL@uhkQ

delta 11870
zcmcgyYmgMx72Z2N`*c}g6=b7=8eK+Mad`+x2qG)tG42S8n$qAnv(rn@Wp`$shXTn~
z`vP36jYZL>tyFxal2lR}E66ISBw8_L@`JMUhlxcrsZ^{)li-sd=Aq;}w;$6p)7>+R
z;8M)=bl<-B-0z<AoyWaL|JHohADTPv2tSy7o;A?ls2(wtr<rVg{xn;%x0!XHF-2=!
zmr2Rn^u0PBYT;C(kl1s5tSh`z*BUd$;gRk5P8GI~$Y`O3u3oz5moqQ8yr3t?HlsPa
zLf6<T{O`m6D|H$7wQwdeEbB32WItP^F4QJnnJCDG^srnu%wSHwRoAAEBr>T)eqJFj
z=hAY1UUqOWFBkH<cG{|9KAn;C`70CKG44fo3gK8rk1{R1_4ZUAzqQs(HaDCYO7D>M
zF|EmbSJzrF12G;N6pZ10tbIj86#sVX*XYHh7Aow}W6E@;OF28*uv1rN()~!3MceS>
z0xi_PR7Y!O-VJMPxo-U8udLhXA7|HfD7~@bF0_|1ccsht`8b<<uG2xY*}<T@14-Fv
zd^*m)6R8@GnXS`{T*Fc8A3VcmPWNn`GhTa!P3!7XZY~#`#+AqVmKs;Q1v&rtEbCPD
zg1$!mRIGS|vNhUPJ=+jo%8m84P@5+F#~VriRuWo-tyYB0F8rhB6_OBykYs}+$rA24
zIL#e{Q;$9a+b!B^587l7+UpG3W;}U-O;W$$$Tld>bj6x?Drdk-ywryzo(-E&<|YWr
zUe(kTrH(gx%s1+P;o>@8m_|D72sO`o!#I49-Jm9lG>N0ihiDQqbzS*Hxy!j5ib~Gi
zhn%}oH0<1cswivW#5OsvE0c?PvijzDzL*3}=vsSzB$3O@@xdJNfHo<fk_QvTp+bCX
zI+FsKB#l2m&6>KyanZV!2*+$%Oy;#FlL>^mhf)RIn0$zJhm|(vG~=d2Y$oe8b{t|C
zUnP$a6Ex-viCh5&q=oZ^EN(U02AaS^R2fktmZ-|YVREqVh%xY6c8S`_PX|u|96k--
zQO2j!R|7wjzDk{s>dL!>;6FB#oB~KZw!z`&pmi~!e8#8H1)&46wAT2rTu9JtbqLp+
z;1ZrX^93_WDYZDF^ETWz`A%u$9p;Y+-tbo>nir{FM47AfC>Ja9l(|I2HL-xHM#u!6
zKO^=Q>oV^66>C#Ze6$+itR+kcoUhS^1rA??AN*4KAXNC`0eYBJB!4vOTPfh?sRlH4
zB8&)8Ws<bz<ShLblAC6C3tjllPE1Y<4X1V9<-6R*k(b$=3meIq=AkhWP0D6qPEz6a
ztS)O!L-K9%ka(bU?w!A(NA+-NMDkGhbcIKX#bWD^<4=4<8vwvqAE~PN3t8X7A@uS)
zNmg=s0BAR^N#8w2l6JQ0`~f^4>ziTBdWp@LWxaow{XU7DNZKNI887^X%~2iSX_;nv
zewjCncuCsW(7+fo{T2Tqx-a*J=!ySK2-4WV8d!)L%35nan=52<DLJQ`G%E;VMr8~@
zV%VzDZ5aX|MQ@lkSMiN?5Rh=1m(1@rZv|t6Rj{Br0g%sxL=H-l(~}9ZP+p~12xG=2
z{1Ho{t{Tux97DbzU0dK;Hm>?H;B#my%wPvy1e-zA2x7l%DuDQWO})E}mHljew(*xk
z?0j{Z*0!+-cOmCi!woqx<s{)l(wQy&*-SyvqwUJ&QKqaM)%oX;J=&gER*e;t(QOn5
za0ctB_#mbwWvay=Ry>ffpz}lSn2p@g+Q>-*@apw4WLLQ}lS<xU&pFG~YL+BjGwnzU
zH3)OlPK#qec}tc;^&o$QS51GUNy$VGl$A{Ez~^8V#09kMk+qh%ut+(jtAMyBh$WFd
zF?2!}{uQi}(7sN(2-ZHNHu1s^wQd;8Q7nZ7(11&oCB%-GDa$m;ownEtrFF)O2#a#^
zP@<5&O-{)}2nEIvIAsTKrnE;_zJYkm%2b5bDeJWG$aW+wS|~mOm5pd^*Ij$b>O>w&
zoYB`m_WbeVh<&81`Zs~)K+zt=Omujj@$28Q1*(mEO`uD&@&vjR=xnxuURejw>Tr4+
zmbzYj4FKBHql*A&jpe)$x@qlBZaI`rA{SIQAqR!A4-=C2A@TX{tB1dT&(v5<SauI)
zux6$8dc8JX&W3&_T`*BnKjXu-VG1Gp^@YgRFun#ISuvu`R#Ds<<5%I+LyaimtxhVD
zu0c1Re2Lw7p<6eq;H7rt+l|LxU^m=A>hpyJ@$BD}qokvrn%Cxp*pCpB6lZC45p3d8
zI#VxAF7}W4G3zS9$+!eCE8alzaPsWxWqWpqYR;~xHA2tjA$_zFp~(_v{Oj+mTSdM!
z3HpJwArg=`I6N)#hO(TYraQBfRZrhM-^!Rmwpt-~u0~Hn$`sly0<I68<b96ut$(tg
z2BaSWLVNqxl3zG#!+OMqRpcF;bu$&SfTXlT<-bQ0I}57^OA56G1)-c)kcUwg6W^=K
zJRYaCCSXCFPLRPVJ#Js=31G+u(3o<RJrRJJ(=YEpC5KPOhr4rc+?>JypN00q*qZ5r
zjLpo~Le!ba*Cbr2NMTzA$H!BY$DS~PrjR75TS)#QRwM|!CH4w<V25H|WvI^_@K#XL
zd(lz0?0hq+@M-B#;OWM!_t?4sxb3Qom`EL^8h53p>K)m|gb|7_sB8n8`liE$F<KS-
ziL0Q)H_-zbB)OC>{MN0Q1c#Uf7<9sy7;nDH=6-1m;;GnDw54$uh~eJ8ZRNz^O4of0
zabJKS?kAkctBm>qriLjjPik$5gyYD^2eLyt-;UN3;R;(n>{4MMmzGnhDr(QX%V8(2
z2|@XQB2zs)oJpmJQ5Vbeuhy^?+f?`*G~^Wfl$lyDZjAA>sgnx3QK4UOdS+q}VxoF$
zh3>u!;yqn&(m~0R!ZGC&K5nnYPuy#vHt-VbSZmRa5Z>gS!VUya6I~6594mRl%LiUd
z<%WuKpePL65m8K8$FC#cykl{{`s1}X+OE=xt*EDs=m~|NzSW!b{wW<4>(gaGtnb%<
z#K^uwR_)%u@GtRsuUG*2Ixm!!va6^|wYbY!<p0vzdQsOD><tL&dfUDF)r-$>gFV*0
z<SOYFv2D<rSCo6ejp-b?lPa|XSyT>ORUcoKcltLCY%Z3O3=H`5uhY!Tf|~E^4uy$Y
zlh#;Ja<Z(OH7Aik^A&b4@<b{=H`Z4+3pIp9L)lgG5lUV`1~AsB;AyLv+HE{`gf*Y#
z-O6;IFCM|3;Fqs>WqPNAU1Y+~OYyeaiZ`DwynLp~I(!invu?$+^^WR`vlITMdN0cH
zV;paoo$_VmT;FJBS1D`ho#EXL^1}W<v4z!75p%*b&o*9vgDq6Q4ZRlLqbze4s&dNL
z7;L20+>5ify=obpW3SjfDa(VZD9o)%D*35L{WR&v?gy^DGya78K`7cpNsw@P`{)71
zYY)&xfbOLU_wAB8v9JR>`sLe1Zl8Z5ns>TWm@emmlcIX6w_=(z;Nw7uKSLkS(#HY%
zIEW9L(#g?3wN!TZ_HOmK!+Q148KJbZt3oFNwU3l9gv}678f$Ssa4TPj`+;kpm!Clo
zNclbnj8MMotFt_l`LHSH8DjCXh!H8n#R62ujg*!MS79LAx+%Z|lq|6#1NM#=OG)Y`
zlAmJt4RqmeJldpzgj$H>+(~?JC`$!&&)Ed5uCIu}HsqAA<;^pZ#(Ai>WE1n+*V!!9
z73A}00WEeU4&sZ#d#}ixMG-YrRP1Zz0i`%Y<N*|>ILUw$D{_$y6zevtvJfx;Mn~tZ
z0~2Nz<fy!g&2=Z=b!WAsZVd(;PB3|=t--^=8o&>l^y$H#d_CzfRT|`(zDn#W1YGnc
zTYD;$`2fv;lzD&fREjv_lyS=16wh47w?ld=Z_oeVqdz(}b%Ld0Q4B}oVRdru;SW9^
zKQ=XHCL;VAbZvaJk4;yl3|Y?Y_^oxs<M<ITW=7HjTZibBSZpV_OKUPuXTiS81TyRG
zS~!`-Zkzq=mi?<EyN^x9&Pj{dhI8-G+1n@FRpFjixl<wpG@jbW&Wjd#2L=QdS|bX-
z3m0II9ly=|+}m>cr_6?Hq>b_gW!uji_rK4U$9gO)3fTS@9SF6!`{4}-9^Sz2TdF?T
zKHuC;uyZckj<hhuT3TDGD9&|R_S}qpY$@E>L=jyro2JuXC`9hv&Cc(u8k!K9zqt0;
z{qGW){0GW+ATh>{G}nWHc!jFr2U2_3`dL-{fhrTu|KOk@Ee#zrluyY%0tCPbH0bI0
zOYEA34gzHJ;OBrN3}w3<lbsjeLx)PI@yc_o=jzM&JAlM0_0+TR;!y;!hoNI~_dU)w
zSbO*;`xUk3{6Jy|$EOG-ei)<KCerZAyGNSd=(M)0NGkqse8eUwQvau-z)}>6Gu4dL
zoV;ZLZC_X9y&_Q+d1gHcuM=CA5?<j+H0jpjHme5io%*U3$1ol5>N;mJp`5qHF&mH|
zW1)~FnVFEsi3qe^VA)s;ofe^g)YzT*=5csrBwjj}rbx!s@3VQv;~%l<8!L`zdG3Z-
z^&&^5XrHL+=wvI*7AH^EI`z&O#^}fF%#~ZCldD4o!pJ_n*$$TR&h$hpW8_12cJR5j
zl3=b<Gx-uT*O}}%*hg_DQh23@=t6{8qWZs>Or%$9PFf%-$F)h;89?)nii5ZOURWUC
zi#IJQ{vIw%hg8dsy4l4vWqG-+VN+b`!}ca>aQ1m#Wu50?-pN3ybkwia%)i6tS-);$
z`3G#YV#~AQB<m?%j{cJ}KmHm397BaXHqJH}_kX~8GB#;>$I7LJe4Eyf!i$&bqG};q
WlZAX~DLbvDq$Aw68DP(#IPd>$y!DO%

diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py
index 996965999724ec..1cb20814093126 100755
--- a/pandas/tests/io/generate_legacy_storage_files.py
+++ b/pandas/tests/io/generate_legacy_storage_files.py
@@ -1,6 +1,39 @@
 #!/usr/env/bin python
 
-""" self-contained to write legacy storage (pickle/msgpack) files """
+"""
+self-contained to write legacy storage (pickle/msgpack) files
+
+To use this script. Create an environment where you want
+generate pickles, say its for 0.18.1, with your pandas clone
+in ~/pandas
+
+. activate pandas_0.18.1
+cd ~/
+
+$ python pandas/pandas/tests/io/generate_legacy_storage_files.py \
+    pandas/pandas/tests/io/data/legacy_pickle/0.18.1/ pickle
+
+This script generates a storage file for the current arch, system,
+and python version
+  pandas version: 0.18.1
+  output dir    : pandas/pandas/tests/io/data/legacy_pickle/0.18.1/
+  storage format: pickle
+created pickle file: 0.18.1_x86_64_darwin_3.5.2.pickle
+
+The idea here is you are using the *current* version of the
+generate_legacy_storage_files with an *older* version of pandas to
+generate a pickle file. We will then check this file into a current
+branch, and test using test_pickle.py. This will load the *older*
+pickles and test versus the current data that is generated
+(with master). These are then compared.
+
+If we have cases where we changed the signature (e.g. we renamed
+offset -> freq in Timestamp). Then we have to conditionally execute
+in the generate_legacy_storage_files.py to make it
+run under the older AND the newer version.
+
+"""
+
 from __future__ import print_function
 from warnings import catch_warnings
 from distutils.version import LooseVersion
@@ -9,6 +42,11 @@
                     Index, MultiIndex, bdate_range, to_msgpack,
                     date_range, period_range,
                     Timestamp, NaT, Categorical, Period)
+from pandas.tseries.offsets import (
+    DateOffset, Hour, Minute, Day,
+    MonthBegin, MonthEnd, YearBegin,
+    YearEnd, Week,
+    QuarterBegin, QuarterEnd)
 from pandas.compat import u
 import os
 import sys
@@ -151,10 +189,28 @@ def create_data():
 
     timestamp = dict(normal=Timestamp('2011-01-01'),
                      nat=NaT,
-                     tz=Timestamp('2011-01-01', tz='US/Eastern'),
-                     freq=Timestamp('2011-01-01', freq='D'),
-                     both=Timestamp('2011-01-01', tz='Asia/Tokyo',
-                                    freq='M'))
+                     tz=Timestamp('2011-01-01', tz='US/Eastern'))
+
+    if _loose_version < '0.19.2':
+        timestamp['freq'] = Timestamp('2011-01-01', offset='D')
+        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo',
+                                      offset='M')
+    else:
+        timestamp['freq'] = Timestamp('2011-01-01', freq='D')
+        timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo',
+                                      freq='M')
+
+    off = {'DateOffset': DateOffset(years=1),
+           'MonthBegin': MonthBegin(1),
+           'MonthEnd': MonthEnd(1),
+           'QuarterBegin': QuarterBegin(1),
+           'QuarterEnd': QuarterEnd(1),
+           'Day': Day(1),
+           'YearBegin': YearBegin(1),
+           'YearEnd': YearEnd(1),
+           'Week': Week(1),
+           'Hour': Hour(1),
+           'Minute': Minute(1)}
 
     return dict(series=series,
                 frame=frame,
@@ -166,7 +222,8 @@ def create_data():
                                ts=_create_sp_tsseries()),
                 sp_frame=dict(float=_create_sp_frame()),
                 cat=cat,
-                timestamp=timestamp)
+                timestamp=timestamp,
+                offsets=off)
 
 
 def create_pickle_data():
diff --git a/pandas/tests/io/test_pickle.py b/pandas/tests/io/test_pickle.py
index d56b36779efe78..91c1f19f5caab9 100644
--- a/pandas/tests/io/test_pickle.py
+++ b/pandas/tests/io/test_pickle.py
@@ -193,26 +193,18 @@ def legacy_pickle_versions():
     for v in os.listdir(path):
         p = os.path.join(path, v)
         if os.path.isdir(p):
-            yield v
+            for f in os.listdir(p):
+                yield (v, f)
 
 
-@pytest.mark.parametrize('version', legacy_pickle_versions())
-def test_pickles(current_pickle_data, version):
+@pytest.mark.parametrize('version, f', legacy_pickle_versions())
+def test_pickles(current_pickle_data, version, f):
     if not is_platform_little_endian():
         pytest.skip("known failure on non-little endian")
 
-    pth = tm.get_data_path('legacy_pickle/{0}'.format(version))
-    n = 0
-    for f in os.listdir(pth):
-        vf = os.path.join(pth, f)
-        with catch_warnings(record=True):
-            data = compare(current_pickle_data, vf, version)
-
-        if data is None:
-            continue
-        n += 1
-    assert n > 0, ('Pickle files are not '
-                   'tested: {version}'.format(version=version))
+    vf = tm.get_data_path('legacy_pickle/{}/{}'.format(version, f))
+    with catch_warnings(record=True):
+        compare(current_pickle_data, vf, version)
 
 
 def test_round_trip_current(current_pickle_data):

From 361ef9ee6ad9313a44259c4443b16d295596df4e Mon Sep 17 00:00:00 2001
From: Berkay <berkayd@sabanciuniv.edu>
Date: Mon, 2 Oct 2017 13:18:21 +0300
Subject: [PATCH 183/188] Fixed the memory usage explanation of categorical in
 gotchas from O(nm) to O(n+m) (#17736)

---
 doc/source/categorical.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst
index cadbc895354b71..c5bbc3c004675d 100644
--- a/doc/source/categorical.rst
+++ b/doc/source/categorical.rst
@@ -979,7 +979,7 @@ Memory Usage
 
 .. _categorical.memory:
 
-The memory usage of a ``Categorical`` is proportional to the number of categories times the length of the data. In contrast,
+The memory usage of a ``Categorical`` is proportional to the number of categories plus the length of the data. In contrast,
 an ``object`` dtype is a constant times the length of the data.
 
 .. ipython:: python

From bf5b08980526e36e03bc8f4637b8028e0e2d6145 Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Mon, 2 Oct 2017 04:19:54 -0600
Subject: [PATCH 184/188] CLN: replace %s syntax with .format in pandas.core:
 categorical, common, config, config_init (#17735)

Replaced %s syntax with .format in pandas.core: categorical.py, common.py, config.py, config_init.py. Additionally, made some of the existing positional .format code more explicit.
---
 pandas/core/categorical.py | 36 +++++++++++++-----------
 pandas/core/common.py      | 14 +++++-----
 pandas/core/config.py      | 57 +++++++++++++++++++++-----------------
 pandas/core/config_init.py |  8 +++---
 4 files changed, 62 insertions(+), 53 deletions(-)

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
index 61e28dde2e34c0..5619f15ac85d99 100644
--- a/pandas/core/categorical.py
+++ b/pandas/core/categorical.py
@@ -263,7 +263,8 @@ def __init__(self, values, categories=None, ordered=None, dtype=None,
                 if dtype == 'category':
                     dtype = CategoricalDtype(categories, ordered)
                 else:
-                    raise ValueError("Unknown `dtype` {}".format(dtype))
+                    msg = "Unknown `dtype` {dtype}"
+                    raise ValueError(msg.format(dtype=dtype))
             elif categories is not None or ordered is not None:
                 raise ValueError("Cannot specify both `dtype` and `categories`"
                                  " or `ordered`.")
@@ -931,9 +932,9 @@ def add_categories(self, new_categories, inplace=False):
             new_categories = [new_categories]
         already_included = set(new_categories) & set(self.dtype.categories)
         if len(already_included) != 0:
-            msg = ("new categories must not include old categories: %s" %
-                   str(already_included))
-            raise ValueError(msg)
+            msg = ("new categories must not include old categories: "
+                   "{already_included!s}")
+            raise ValueError(msg.format(already_included=already_included))
         new_categories = list(self.dtype.categories) + list(new_categories)
         new_dtype = CategoricalDtype(new_categories, self.ordered)
 
@@ -989,8 +990,8 @@ def remove_categories(self, removals, inplace=False):
             new_categories = [x for x in new_categories if notna(x)]
 
         if len(not_included) != 0:
-            raise ValueError("removals must all be in old categories: %s" %
-                             str(not_included))
+            msg = "removals must all be in old categories: {not_included!s}"
+            raise ValueError(msg.format(not_included=not_included))
 
         return self.set_categories(new_categories, ordered=self.ordered,
                                    rename=False, inplace=inplace)
@@ -1443,7 +1444,8 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'):
         """
         inplace = validate_bool_kwarg(inplace, 'inplace')
         if na_position not in ['last', 'first']:
-            raise ValueError('invalid na_position: {!r}'.format(na_position))
+            msg = 'invalid na_position: {na_position!r}'
+            raise ValueError(msg.format(na_position=na_position))
 
         codes = np.sort(self._codes)
         if not ascending:
@@ -1653,9 +1655,10 @@ def _tidy_repr(self, max_vals=10, footer=True):
         head = self[:num]._get_repr(length=False, footer=False)
         tail = self[-(max_vals - num):]._get_repr(length=False, footer=False)
 
-        result = '%s, ..., %s' % (head[:-1], tail[1:])
+        result = u('{head}, ..., {tail}').format(head=head[:-1], tail=tail[1:])
         if footer:
-            result = '%s\n%s' % (result, self._repr_footer())
+            result = u('{result}\n{footer}').format(result=result,
+                                                    footer=self._repr_footer())
 
         return compat.text_type(result)
 
@@ -1683,7 +1686,8 @@ def _repr_categories_info(self):
         dtype = getattr(self.categories, 'dtype_str',
                         str(self.categories.dtype))
 
-        levheader = "Categories (%d, %s): " % (len(self.categories), dtype)
+        levheader = "Categories ({length}, {dtype}): ".format(
+            length=len(self.categories), dtype=dtype)
         width, height = get_terminal_size()
         max_width = get_option("display.width") or width
         if com.in_ipython_frontend():
@@ -1708,7 +1712,8 @@ def _repr_categories_info(self):
 
     def _repr_footer(self):
 
-        return u('Length: %d\n%s') % (len(self), self._repr_categories_info())
+        return u('Length: {length}\n{info}').format(
+            length=len(self), info=self._repr_categories_info())
 
     def _get_repr(self, length=True, na_rep='NaN', footer=True):
         from pandas.io.formats import format as fmt
@@ -1725,9 +1730,8 @@ def __unicode__(self):
         elif len(self._codes) > 0:
             result = self._get_repr(length=len(self) > _maxlen)
         else:
-            result = ('[], %s' %
-                      self._get_repr(length=False,
-                                     footer=True, ).replace("\n", ", "))
+            msg = self._get_repr(length=False, footer=True).replace("\n", ", ")
+            result = ('[], {repr_msg}'.format(repr_msg=msg))
 
         return result
 
@@ -1869,8 +1873,8 @@ def _reduce(self, op, name, axis=0, skipna=True, numeric_only=None,
         """ perform the reduction type operation """
         func = getattr(self, name, None)
         if func is None:
-            raise TypeError("Categorical cannot perform the operation "
-                            "{op}".format(op=name))
+            msg = 'Categorical cannot perform the operation {op}'
+            raise TypeError(msg.format(op=name))
         return func(numeric_only=numeric_only, **kwds)
 
     def min(self, numeric_only=None, **kwargs):
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 515a4010961205..0f7b86f5e74a09 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -96,8 +96,8 @@ def __init__(self, class_instance):
         self.class_instance = class_instance
 
     def __str__(self):
-        return ("This method must be defined in the concrete class of %s" %
-                self.class_instance.__class__.__name__)
+        msg = "This method must be defined in the concrete class of {name}"
+        return (msg.format(name=self.class_instance.__class__.__name__))
 
 
 def flatten(l):
@@ -150,8 +150,8 @@ def _maybe_match_name(a, b):
 def _get_info_slice(obj, indexer):
     """Slice the info axis of `obj` with `indexer`."""
     if not hasattr(obj, '_info_axis_number'):
-        raise TypeError('object of type %r has no info axis' %
-                        type(obj).__name__)
+        msg = 'object of type {typ!r} has no info axis'
+        raise TypeError(msg.format(typ=type(obj).__name__))
     slices = [slice(None)] * obj.ndim
     slices[obj._info_axis_number] = indexer
     return tuple(slices)
@@ -214,8 +214,8 @@ def _mut_exclusive(**kwargs):
     label1, val1 = item1
     label2, val2 = item2
     if val1 is not None and val2 is not None:
-        raise TypeError('mutually exclusive arguments: %r and %r' %
-                        (label1, label2))
+        msg = 'mutually exclusive arguments: {label1!r} and {label2!r}'
+        raise TypeError(msg.format(label1=label1, label2=label2))
     elif val1 is not None:
         return val1
     else:
@@ -517,7 +517,7 @@ def standardize_mapping(into):
                 collections.defaultdict, into.default_factory)
         into = type(into)
     if not issubclass(into, collections.Mapping):
-        raise TypeError('unsupported type: {}'.format(into))
+        raise TypeError('unsupported type: {into}'.format(into=into))
     elif into == collections.defaultdict:
         raise TypeError(
             'to_dict() only accepts initialized defaultdicts')
diff --git a/pandas/core/config.py b/pandas/core/config.py
index b406f6724aa6d4..2354b7ca04e7ff 100644
--- a/pandas/core/config.py
+++ b/pandas/core/config.py
@@ -80,7 +80,7 @@ def _get_single_key(pat, silent):
     if len(keys) == 0:
         if not silent:
             _warn_if_deprecated(pat)
-        raise OptionError('No such keys(s): %r' % pat)
+        raise OptionError('No such keys(s): {pat!r}'.format(pat=pat))
     if len(keys) > 1:
         raise OptionError('Pattern matched multiple keys')
     key = keys[0]
@@ -112,8 +112,8 @@ def _set_option(*args, **kwargs):
     silent = kwargs.pop('silent', False)
 
     if kwargs:
-        raise TypeError('_set_option() got an unexpected keyword '
-                        'argument "{0}"'.format(list(kwargs.keys())[0]))
+        msg = '_set_option() got an unexpected keyword argument "{kwarg}"'
+        raise TypeError(msg.format(list(kwargs.keys())[0]))
 
     for k, v in zip(args[::2], args[1::2]):
         key = _get_single_key(k, silent)
@@ -436,9 +436,11 @@ def register_option(key, defval, doc='', validator=None, cb=None):
     key = key.lower()
 
     if key in _registered_options:
-        raise OptionError("Option '%s' has already been registered" % key)
+        msg = "Option '{key}' has already been registered"
+        raise OptionError(msg.format(key=key))
     if key in _reserved_keys:
-        raise OptionError("Option '%s' is a reserved key" % key)
+        msg = "Option '{key}' is a reserved key"
+        raise OptionError(msg.format(key=key))
 
     # the default value should be legal
     if validator:
@@ -449,22 +451,21 @@ def register_option(key, defval, doc='', validator=None, cb=None):
 
     for k in path:
         if not bool(re.match('^' + tokenize.Name + '$', k)):
-            raise ValueError("%s is not a valid identifier" % k)
+            raise ValueError("{k} is not a valid identifier".format(k=k))
         if keyword.iskeyword(k):
-            raise ValueError("%s is a python keyword" % k)
+            raise ValueError("{k} is a python keyword".format(k=k))
 
     cursor = _global_config
+    msg = "Path prefix to option '{option}' is already an option"
     for i, p in enumerate(path[:-1]):
         if not isinstance(cursor, dict):
-            raise OptionError("Path prefix to option '%s' is already an option"
-                              % '.'.join(path[:i]))
+            raise OptionError(msg.format(option='.'.join(path[:i])))
         if p not in cursor:
             cursor[p] = {}
         cursor = cursor[p]
 
     if not isinstance(cursor, dict):
-        raise OptionError("Path prefix to option '%s' is already an option" %
-                          '.'.join(path[:-1]))
+        raise OptionError(msg.format(option='.'.join(path[:-1])))
 
     cursor[path[-1]] = defval  # initialize
 
@@ -516,8 +517,8 @@ def deprecate_option(key, msg=None, rkey=None, removal_ver=None):
     key = key.lower()
 
     if key in _deprecated_options:
-        raise OptionError("Option '%s' has already been defined as deprecated."
-                          % key)
+        msg = "Option '{key}' has already been defined as deprecated."
+        raise OptionError(msg.format(key=key))
 
     _deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver)
 
@@ -614,11 +615,12 @@ def _warn_if_deprecated(key):
             print(d.msg)
             warnings.warn(d.msg, DeprecationWarning)
         else:
-            msg = "'%s' is deprecated" % key
+            msg = "'{key}' is deprecated".format(key=key)
             if d.removal_ver:
-                msg += ' and will be removed in %s' % d.removal_ver
+                msg += (' and will be removed in {version}'
+                        .format(version=d.removal_ver))
             if d.rkey:
-                msg += ", please use '%s' instead." % d.rkey
+                msg += ", please use '{rkey}' instead.".format(rkey=d.rkey)
             else:
                 msg += ', please refrain from using it.'
 
@@ -633,7 +635,7 @@ def _build_option_description(k):
     o = _get_registered_option(k)
     d = _get_deprecated_option(k)
 
-    s = u('%s ') % k
+    s = u('{k} ').format(k=k)
 
     if o.doc:
         s += '\n'.join(o.doc.strip().split('\n'))
@@ -641,12 +643,13 @@ def _build_option_description(k):
         s += 'No description available.'
 
     if o:
-        s += u('\n    [default: %s] [currently: %s]') % (o.defval,
-                                                         _get_option(k, True))
+        s += (u('\n    [default: {default}] [currently: {current}]')
+              .format(default=o.defval, current=_get_option(k, True)))
 
     if d:
         s += u('\n    (Deprecated')
-        s += (u(', use `%s` instead.') % d.rkey if d.rkey else '')
+        s += (u(', use `{rkey}` instead.')
+              .format(rkey=d.rkey if d.rkey else ''))
         s += u(')')
 
     s += '\n\n'
@@ -718,7 +721,7 @@ def config_prefix(prefix):
 
     def wrap(func):
         def inner(key, *args, **kwds):
-            pkey = '%s.%s' % (prefix, key)
+            pkey = '{prefix}.{key}'.format(prefix=prefix, key=key)
             return func(pkey, *args, **kwds)
 
         return inner
@@ -754,7 +757,8 @@ def is_type_factory(_type):
 
     def inner(x):
         if type(x) != _type:
-            raise ValueError("Value must have type '%s'" % str(_type))
+            msg = "Value must have type '{typ!s}'"
+            raise ValueError(msg.format(typ=_type))
 
     return inner
 
@@ -777,11 +781,12 @@ def is_instance_factory(_type):
         from pandas.io.formats.printing import pprint_thing
         type_repr = "|".join(map(pprint_thing, _type))
     else:
-        type_repr = "'%s'" % _type
+        type_repr = "'{typ}'".format(typ=_type)
 
     def inner(x):
         if not isinstance(x, _type):
-            raise ValueError("Value must be an instance of %s" % type_repr)
+            msg = "Value must be an instance of {type_repr}"
+            raise ValueError(msg.format(type_repr=type_repr))
 
     return inner
 
@@ -797,10 +802,10 @@ def inner(x):
 
             if not any([c(x) for c in callables]):
                 pp_values = pp("|".join(lmap(pp, legal_values)))
-                msg = "Value must be one of {0}".format(pp_values)
+                msg = "Value must be one of {pp_values}"
                 if len(callables):
                     msg += " or a callable"
-                raise ValueError(msg)
+                raise ValueError(msg.format(pp_values=pp_values))
 
     return inner
 
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
index ea5c213dbe0577..5652424a8f75b7 100644
--- a/pandas/core/config_init.py
+++ b/pandas/core/config_init.py
@@ -453,10 +453,10 @@ def use_inf_as_na_cb(key):
         cf.register_option(ext + '.writer', default, doc, validator=str)
 
     def _register_xlsx(engine, other):
-        cf.register_option('xlsx.writer', engine,
-                           writer_engine_doc.format(ext='xlsx', default=engine,
-                                                    others=", '%s'" % other),
-                           validator=str)
+        others = ", '{other}'".format(other=other)
+        doc = writer_engine_doc.format(ext='xlsx', default=engine,
+                                       others=others)
+        cf.register_option('xlsx.writer', engine, doc, validator=str)
 
     try:
         # better memory footprint

From 2781b18008a7dca575a4f3496c8f11c1ea05cced Mon Sep 17 00:00:00 2001
From: jschendel <jschendel@users.noreply.github.com>
Date: Mon, 2 Oct 2017 05:28:30 -0600
Subject: [PATCH 185/188] DEPR: Deprecate cdate_range and merge into
 bdate_range (#17691)

---
 doc/source/api.rst                            |   1 -
 doc/source/timeseries.rst                     | 241 ++++++++++--------
 doc/source/whatsnew/v0.21.0.txt               |   9 +-
 pandas/core/api.py                            |   3 +-
 pandas/core/indexes/datetimes.py              |  38 ++-
 pandas/tests/api/test_api.py                  |  12 +-
 .../indexes/datetimes/test_date_range.py      | 189 ++++++++------
 pandas/tests/indexes/datetimes/test_ops.py    |   8 +-
 pandas/tests/indexes/datetimes/test_setops.py |   7 +-
 pandas/tseries/offsets.py                     |   3 +-
 10 files changed, 299 insertions(+), 212 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 4ffeb5035912f5..28d4567027572f 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -218,7 +218,6 @@ Top-level dealing with datetimelike
    to_timedelta
    date_range
    bdate_range
-   cdate_range
    period_range
    timedelta_range
    infer_freq
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
index 7399deb1319d88..d2d5ee344591ae 100644
--- a/doc/source/timeseries.rst
+++ b/doc/source/timeseries.rst
@@ -76,21 +76,21 @@ Overview
 Following table shows the type of time-related classes pandas can handle and
 how to create them.
 
-=================  =============================== ==================================================
+=================  =============================== ===================================================================
 Class              Remarks                         How to create
-=================  =============================== ==================================================
-``Timestamp``      Represents a single time stamp   ``to_datetime``, ``Timestamp``
-``DatetimeIndex``  Index of ``Timestamp``          ``to_datetime``, ``date_range``, ``DatetimeIndex``
+=================  =============================== ===================================================================
+``Timestamp``      Represents a single timestamp   ``to_datetime``, ``Timestamp``
+``DatetimeIndex``  Index of ``Timestamp``          ``to_datetime``, ``date_range``, ``bdate_range``, ``DatetimeIndex``
 ``Period``         Represents a single time span   ``Period``
 ``PeriodIndex``    Index of ``Period``             ``period_range``, ``PeriodIndex``
-=================  =============================== ==================================================
+=================  =============================== ===================================================================
 
 .. _timeseries.representation:
 
-Time Stamps vs. Time Spans
---------------------------
+Timestamps vs. Time Spans
+-------------------------
 
-Time-stamped data is the most basic type of timeseries data that associates
+Timestamped data is the most basic type of time series data that associates
 values with points in time. For pandas objects it means using the points in
 time.
 
@@ -149,10 +149,10 @@ future releases.
 Converting to Timestamps
 ------------------------
 
-To convert a Series or list-like object of date-like objects e.g. strings,
+To convert a ``Series`` or list-like object of date-like objects e.g. strings,
 epochs, or a mixture, you can use the ``to_datetime`` function. When passed
-a Series, this returns a Series (with the same index), while a list-like
-is converted to a DatetimeIndex:
+a ``Series``, this returns a ``Series`` (with the same index), while a list-like
+is converted to a ``DatetimeIndex``:
 
 .. ipython:: python
 
@@ -175,11 +175,9 @@ you can pass the ``dayfirst`` flag:
    can't be parsed with the day being first it will be parsed as if
    ``dayfirst`` were False.
 
-If you pass a single string to ``to_datetime``, it returns single ``Timestamp``.
-
-Also, ``Timestamp`` can accept the string input.
-Note that ``Timestamp`` doesn't accept string parsing option like ``dayfirst``
-or ``format``, use ``to_datetime`` if these are required.
+If you pass a single string to ``to_datetime``, it returns a single ``Timestamp``. 
+``Timestamp`` can also accept string input, but it doesn't accept string parsing
+options like ``dayfirst`` or ``format``, so use ``to_datetime`` if these are required.
 
 .. ipython:: python
 
@@ -191,9 +189,7 @@ Providing a Format Argument
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 In addition to the required datetime string, a ``format`` argument can be passed to ensure specific parsing.
-It will potentially speed up the conversion considerably.
-
-For example:
+This could also potentially speed up the conversion considerably.
 
 .. ipython:: python
 
@@ -203,7 +199,7 @@ For example:
 
 For more information on how to specify the ``format`` options, see https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior.
 
-Assembling datetime from multiple DataFrame columns
+Assembling Datetime from Multiple DataFrame Columns
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. versionadded:: 0.18.1
@@ -238,28 +234,24 @@ Invalid Data
    In version 0.17.0, the default for ``to_datetime`` is now ``errors='raise'``, rather than ``errors='ignore'``. This means
    that invalid parsing will raise rather that return the original input as in previous versions.
 
-Pass ``errors='coerce'`` to convert invalid data to ``NaT`` (not a time):
-
-Raise when unparseable, this is the default
+The default behavior, ``errors='raise'``, is to raise when unparseable: 
 
 .. code-block:: ipython
 
     In [2]: pd.to_datetime(['2009/07/31', 'asd'], errors='raise')
     ValueError: Unknown string format
 
-Return the original input when unparseable
+Pass ``errors='ignore'`` to return the original input when unparseable:
 
-.. code-block:: ipython
+.. ipython:: python
 
-    In [4]: pd.to_datetime(['2009/07/31', 'asd'], errors='ignore')
-    Out[4]: array(['2009/07/31', 'asd'], dtype=object)
+   pd.to_datetime(['2009/07/31', 'asd'], errors='ignore')
 
-Return NaT for input when unparseable
+Pass ``errors='coerce'`` to convert unparseable data to ``NaT`` (not a time):
 
-.. code-block:: ipython
+.. ipython:: python
 
-    In [6]: pd.to_datetime(['2009/07/31', 'asd'], errors='coerce')
-    Out[6]: DatetimeIndex(['2009-07-31', 'NaT'], dtype='datetime64[ns]', freq=None)
+   pd.to_datetime(['2009/07/31', 'asd'], errors='coerce')
 
 
 .. _timeseries.converting.epoch:
@@ -267,12 +259,11 @@ Return NaT for input when unparseable
 Epoch Timestamps
 ~~~~~~~~~~~~~~~~
 
-It's also possible to convert integer or float epoch times. The default unit
-for these is nanoseconds (since these are how ``Timestamp`` s are stored). However,
-often epochs are stored in another ``unit`` which can be specified. These are computed
-from the starting point specified by the :ref:`Origin Parameter <timeseries.origin>`.
-
-Typical epoch stored units
+pandas supports converting integer or float epoch times to ``Timestamp`` and 
+``DatetimeIndex``. The default unit is nanoseconds, since that is how ``Timestamp``
+objects are stored internally. However, epochs are often stored in another ``unit``
+which can be specified. These are computed from the starting point specified by the
+``origin`` parameter.
 
 .. ipython:: python
 
@@ -299,6 +290,10 @@ Typical epoch stored units
       pd.to_datetime([1490195805.433, 1490195805.433502912], unit='s')
       pd.to_datetime(1490195805433502912, unit='ns')
 
+.. seealso::
+
+   :ref:`timeseries.origin`
+
 .. _timeseries.converting.epoch_inverse:
 
 From Timestamps to Epoch
@@ -319,15 +314,13 @@ We convert the ``DatetimeIndex`` to an ``int64`` array, then divide by the conve
 
 .. _timeseries.origin:
 
-Using the Origin Parameter
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+Using the ``origin`` Parameter
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 .. versionadded:: 0.20.0
 
 Using the ``origin`` parameter, one can specify an alternative starting point for creation
-of a ``DatetimeIndex``.
-
-Start with 1960-01-01 as the starting date
+of a ``DatetimeIndex``. For example, to use 1960-01-01 as the starting date:
 
 .. ipython:: python
 
@@ -345,8 +338,8 @@ Commonly called 'unix epoch' or POSIX time.
 Generating Ranges of Timestamps
 -------------------------------
 
-To generate an index with time stamps, you can use either the DatetimeIndex or
-Index constructor and pass in a list of datetime objects:
+To generate an index with timestamps, you can use either the ``DatetimeIndex`` or
+``Index`` constructor and pass in a list of datetime objects:
 
 .. ipython:: python
 
@@ -360,37 +353,36 @@ Index constructor and pass in a list of datetime objects:
    index = pd.Index(dates)
    index
 
-Practically, this becomes very cumbersome because we often need a very long
+In practice this becomes very cumbersome because we often need a very long
 index with a large number of timestamps. If we need timestamps on a regular
-frequency, we can use the pandas functions ``date_range`` and ``bdate_range``
-to create timestamp indexes.
+frequency, we can use the :func:`date_range` and :func:`bdate_range` functions
+to create a ``DatetimeIndex``. The default frequency for ``date_range`` is a
+**calendar day** while the default for ``bdate_range`` is a **business day**:
 
 .. ipython:: python
 
-   index = pd.date_range('2000-1-1', periods=1000, freq='M')
+   start = datetime(2011, 1, 1)
+   end = datetime(2012, 1, 1)
+
+   index = pd.date_range(start, end)
    index
 
-   index = pd.bdate_range('2012-1-1', periods=250)
+   index = pd.bdate_range(start, end)
    index
 
-Convenience functions like ``date_range`` and ``bdate_range`` utilize a
-variety of frequency aliases. The default frequency for ``date_range`` is a
-**calendar day** while the default for ``bdate_range`` is a **business day**
+Convenience functions like ``date_range`` and ``bdate_range`` can utilize a
+variety of :ref:`frequency aliases <timeseries.offset_aliases>`:
 
 .. ipython:: python
 
-   start = datetime(2011, 1, 1)
-   end = datetime(2012, 1, 1)
-
-   rng = pd.date_range(start, end)
-   rng
+   pd.date_range(start, periods=1000, freq='M')
 
-   rng = pd.bdate_range(start, end)
-   rng
+   pd.bdate_range(start, periods=250, freq='BQS')
 
 ``date_range`` and ``bdate_range`` make it easy to generate a range of dates
-using various combinations of parameters like ``start``, ``end``,
-``periods``, and ``freq``:
+using various combinations of parameters like ``start``, ``end``, ``periods``,
+and ``freq``. The start and end dates are strictly inclusive, so dates outside
+of those specified will not be generated:
 
 .. ipython:: python
 
@@ -402,15 +394,45 @@ using various combinations of parameters like ``start``, ``end``,
 
    pd.bdate_range(start=start, periods=20)
 
-The start and end dates are strictly inclusive. So it will not generate any
-dates outside of those dates if specified.
+.. _timeseries.custom-freq-ranges:
+
+Custom Frequency Ranges
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. warning::
+
+   This functionality was originally exclusive to ``cdate_range``, which is
+   deprecated as of version 0.21.0 in favor of ``bdate_range``.  Note that
+   ``cdate_range`` only utilizes the ``weekmask`` and ``holidays`` parameters
+   when custom business day, 'C', is passed as the frequency string. Support has 
+   been expanded with ``bdate_range`` to work with any custom frequency string.
+
+.. versionadded:: 0.21.0
+
+``bdate_range`` can also generate a range of custom frequency dates by using
+the ``weekmask`` and ``holidays`` parameters.  These parameters will only be
+used if a custom frequency string is passed.
+
+.. ipython:: python
+
+   weekmask = 'Mon Wed Fri'
+
+   holidays = [datetime(2011, 1, 5), datetime(2011, 3, 14)]
+
+   pd.bdate_range(start, end, freq='C', weekmask=weekmask, holidays=holidays)
+
+   pd.bdate_range(start, end, freq='CBMS', weekmask=weekmask)
+
+.. seealso::
+
+   :ref:`timeseries.custombusinessdays`
 
 .. _timeseries.timestamp-limits:
 
-Timestamp limitations
+Timestamp Limitations
 ---------------------
 
-Since pandas represents timestamps in nanosecond resolution, the timespan that
+Since pandas represents timestamps in nanosecond resolution, the time span that
 can be represented using a 64-bit integer is limited to approximately 584 years:
 
 .. ipython:: python
@@ -418,7 +440,9 @@ can be represented using a 64-bit integer is limited to approximately 584 years:
    pd.Timestamp.min
    pd.Timestamp.max
 
-See :ref:`here <timeseries.oob>` for ways to represent data outside these bound.
+.. seealso::
+
+   :ref:`timeseries.oob`
 
 .. _timeseries.datetimeindex:
 
@@ -426,20 +450,20 @@ Indexing
 --------
 
 One of the main uses for ``DatetimeIndex`` is as an index for pandas objects.
-The ``DatetimeIndex`` class contains many timeseries related optimizations:
+The ``DatetimeIndex`` class contains many time series related optimizations:
 
   - A large range of dates for various offsets are pre-computed and cached
     under the hood in order to make generating subsequent date ranges very fast
     (just have to grab a slice)
   - Fast shifting using the ``shift`` and ``tshift`` method on pandas objects
-  - Unioning of overlapping DatetimeIndex objects with the same frequency is
+  - Unioning of overlapping ``DatetimeIndex`` objects with the same frequency is
     very fast (important for fast data alignment)
   - Quick access to date fields via properties such as ``year``, ``month``, etc.
   - Regularization functions like ``snap`` and very fast ``asof`` logic
 
-DatetimeIndex objects has all the basic functionality of regular Index objects
-and a smorgasbord of advanced timeseries-specific methods for easy frequency
-processing.
+``DatetimeIndex`` objects have all the basic functionality of regular ``Index``
+objects, and a smorgasbord of advanced time series specific methods for easy
+frequency processing.
 
 .. seealso::
     :ref:`Reindexing methods <basics.reindexing>`
@@ -447,8 +471,7 @@ processing.
 .. note::
 
     While pandas does not force you to have a sorted date index, some of these
-    methods may have unexpected or incorrect behavior if the dates are
-    unsorted. So please be careful.
+    methods may have unexpected or incorrect behavior if the dates are unsorted.
 
 ``DatetimeIndex`` can be used like a regular index and offers all of its
 intelligent functionality like selection, slicing, etc.
@@ -466,7 +489,7 @@ intelligent functionality like selection, slicing, etc.
 Partial String Indexing
 ~~~~~~~~~~~~~~~~~~~~~~~
 
-You can pass in dates and strings that parse to dates as indexing parameters:
+Dates and strings that parse to timestamps can be passed as indexing parameters:
 
 .. ipython:: python
 
@@ -485,9 +508,9 @@ the year or year and month as strings:
 
    ts['2011-6']
 
-This type of slicing will work on a DataFrame with a ``DateTimeIndex`` as well. Since the
+This type of slicing will work on a ``DataFrame`` with a ``DatetimeIndex`` as well. Since the
 partial string selection is a form of label slicing, the endpoints **will be** included. This
-would include matching times on an included date. Here's an example:
+would include matching times on an included date:
 
 .. ipython:: python
 
@@ -523,7 +546,7 @@ We are stopping on the included end-point as it is part of the index
 
 .. versionadded:: 0.18.0
 
-DatetimeIndex Partial String Indexing also works on DataFrames with a ``MultiIndex``. For example:
+``DatetimeIndex`` partial string indexing also works on a ``DataFrame`` with a ``MultiIndex``:
 
 .. ipython:: python
 
@@ -541,14 +564,14 @@ DatetimeIndex Partial String Indexing also works on DataFrames with a ``MultiInd
 
 .. _timeseries.slice_vs_exact_match:
 
-Slice vs. exact match
+Slice vs. Exact Match
 ~~~~~~~~~~~~~~~~~~~~~
 
 .. versionchanged:: 0.20.0
 
-The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of an index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match.
+The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of the index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match.
 
-For example, let us consider ``Series`` object which index has minute resolution.
+Consider a ``Series`` object with a minute resolution index:
 
 .. ipython:: python
 
@@ -593,7 +616,7 @@ If the timestamp string is treated as a slice, it can be used to index ``DataFra
 
 .. warning::
 
-   However if the string is treated as an exact match, the selection in ``DataFrame``'s ``[]`` will be column-wise and not row-wise, see :ref:`Indexing Basics <indexing.basics>`. For example ``dft_minute['2011-12-31 23:59']`` will raise ``KeyError`` as ``'2012-12-31 23:59'`` has the same resolution as index and there is no column with such name:
+   However, if the string is treated as an exact match, the selection in ``DataFrame``'s ``[]`` will be column-wise and not row-wise, see :ref:`Indexing Basics <indexing.basics>`. For example ``dft_minute['2011-12-31 23:59']`` will raise ``KeyError`` as ``'2012-12-31 23:59'`` has the same resolution as the index and there is no column with such name:
 
    To *always* have unambiguous selection, whether the row is treated as a slice or a single selection, use ``.loc``.
 
@@ -616,7 +639,7 @@ Note also that ``DatetimeIndex`` resolution cannot be less precise than day.
 Exact Indexing
 ~~~~~~~~~~~~~~
 
-As discussed in previous section, indexing a ``DateTimeIndex`` with a partial string depends on the "accuracy" of the period, in other words how specific the interval is in relation to the resolution of the index. In contrast, indexing with ``Timestamp`` or ``datetime`` objects is exact, because the objects have exact meaning. These also follow the semantics of *including both endpoints*.
+As discussed in previous section, indexing a ``DatetimeIndex`` with a partial string depends on the "accuracy" of the period, in other words how specific the interval is in relation to the resolution of the index. In contrast, indexing with ``Timestamp`` or ``datetime`` objects is exact, because the objects have exact meaning. These also follow the semantics of *including both endpoints*.
 
 These ``Timestamp`` and ``datetime`` objects have exact ``hours, minutes,`` and ``seconds``, even though they were not explicitly specified (they are ``0``).
 
@@ -640,8 +663,8 @@ A ``truncate`` convenience function is provided that is equivalent to slicing:
 
    ts.truncate(before='10/31/2011', after='12/31/2011')
 
-Even complicated fancy indexing that breaks the DatetimeIndex's frequency
-regularity will result in a ``DatetimeIndex`` (but frequency is lost):
+Even complicated fancy indexing that breaks the ``DatetimeIndex`` frequency
+regularity will result in a ``DatetimeIndex``, although frequency is lost:
 
 .. ipython:: python
 
@@ -652,7 +675,7 @@ regularity will result in a ``DatetimeIndex`` (but frequency is lost):
 Time/Date Components
 --------------------
 
-There are several time/date properties that one can access from ``Timestamp`` or a collection of timestamps like a ``DateTimeIndex``.
+There are several time/date properties that one can access from ``Timestamp`` or a collection of timestamps like a ``DatetimeIndex``.
 
 .. csv-table::
     :header: "Property", "Description"
@@ -688,10 +711,10 @@ Furthermore, if you have a ``Series`` with datetimelike values, then you can acc
 
 .. _timeseries.offsets:
 
-DateOffset objects
+DateOffset Objects
 ------------------
 
-In the preceding examples, we created DatetimeIndex objects at various
+In the preceding examples, we created ``DatetimeIndex`` objects at various
 frequencies by passing in :ref:`frequency strings <timeseries.offset_aliases>`
 like 'M', 'W', and 'BM to the ``freq`` keyword. Under the hood, these frequency
 strings are being translated into an instance of pandas ``DateOffset``,
@@ -704,7 +727,7 @@ which represents a regular frequency increment. Specific offset logic like
 
     DateOffset, "Generic offset class, defaults to 1 calendar day"
     BDay, "business day (weekday)"
-    CDay, "custom business day (experimental)"
+    CDay, "custom business day"
     Week, "one week, optionally anchored on a day of the week"
     WeekOfMonth, "the x-th day of the y-th week of each month"
     LastWeekOfMonth, "the x-th day of the last week of each month"
@@ -805,7 +828,7 @@ These operations (``apply``, ``rollforward`` and ``rollback``) preserves time (h
    hour.apply(pd.Timestamp('2014-01-01 23:00'))
 
 
-Parametric offsets
+Parametric Offsets
 ~~~~~~~~~~~~~~~~~~
 
 Some of the offsets can be "parameterized" when created to result in different
@@ -840,7 +863,7 @@ Another example is parameterizing ``YearEnd`` with the specific ending month:
 
 .. _timeseries.offsetseries:
 
-Using offsets with ``Series`` / ``DatetimeIndex``
+Using Offsets with ``Series`` / ``DatetimeIndex``
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Offsets can be used with either a ``Series`` or ``DatetimeIndex`` to
@@ -1091,7 +1114,7 @@ frequencies. We will refer to these aliases as *offset aliases*.
     :widths: 15, 100
 
     "B", "business day frequency"
-    "C", "custom business day frequency (experimental)"
+    "C", "custom business day frequency"
     "D", "calendar day frequency"
     "W", "weekly frequency"
     "M", "month end frequency"
@@ -1326,10 +1349,10 @@ or calendars with additional rules.
 
 .. _timeseries.advanced_datetime:
 
-Time series-related instance methods
+Time Series-Related Instance Methods
 ------------------------------------
 
-Shifting / lagging
+Shifting / Lagging
 ~~~~~~~~~~~~~~~~~~
 
 One may want to *shift* or *lag* the values in a time series back and forward in
@@ -1360,7 +1383,7 @@ all the dates in the index by a specified number of offsets:
 Note that with ``tshift``, the leading entry is no longer NaN because the data
 is not being realigned.
 
-Frequency conversion
+Frequency Conversion
 ~~~~~~~~~~~~~~~~~~~~
 
 The primary function for changing frequencies is the ``asfreq`` function.
@@ -1381,13 +1404,13 @@ method for any gaps that may appear after the frequency conversion
 
    ts.asfreq(BDay(), method='pad')
 
-Filling forward / backward
+Filling Forward / Backward
 ~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Related to ``asfreq`` and ``reindex`` is the ``fillna`` function documented in
 the :ref:`missing data section <missing_data.fillna>`.
 
-Converting to Python datetimes
+Converting to Python Datetimes
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 ``DatetimeIndex`` can be converted to an array of Python native datetime.datetime objects using the
@@ -1471,10 +1494,10 @@ labels.
    ts.resample('5Min', label='left', loffset='1s').mean()
 
 The ``axis`` parameter can be set to 0 or 1 and allows you to resample the
-specified axis for a DataFrame.
+specified axis for a ``DataFrame``.
 
 ``kind`` can be set to 'timestamp' or 'period' to convert the resulting index
-to/from time-stamp and time-span representations. By default ``resample``
+to/from timestamp and time span representations. By default ``resample``
 retains the input representation.
 
 ``convention`` can be set to 'start' or 'end' when resampling period data
@@ -1482,8 +1505,8 @@ retains the input representation.
 frequency periods.
 
 
-Up Sampling
-~~~~~~~~~~~
+Upsampling
+~~~~~~~~~~
 
 For upsampling, you can specify a way to upsample and the ``limit`` parameter to interpolate over the gaps that are created:
 
@@ -1559,13 +1582,13 @@ We can select a specific column or columns using standard getitem.
 
    r[['A','B']].mean()
 
-You can pass a list or dict of functions to do aggregation with, outputting a DataFrame:
+You can pass a list or dict of functions to do aggregation with, outputting a ``DataFrame``:
 
 .. ipython:: python
 
    r['A'].agg([np.sum, np.mean, np.std])
 
-On a resampled DataFrame, you can pass a list of functions to apply to each
+On a resampled ``DataFrame``, you can pass a list of functions to apply to each
 column, which produces an aggregated result with a hierarchical index:
 
 .. ipython:: python
@@ -1573,7 +1596,7 @@ column, which produces an aggregated result with a hierarchical index:
    r.agg([np.sum, np.mean])
 
 By passing a dict to ``aggregate`` you can apply a different aggregation to the
-columns of a DataFrame:
+columns of a ``DataFrame``:
 
 .. ipython:: python
    :okexcept:
@@ -1890,7 +1913,7 @@ frequencies ``Q-JAN`` through ``Q-DEC``.
 
 .. _timeseries.interchange:
 
-Converting between Representations
+Converting Between Representations
 ----------------------------------
 
 Timestamped data can be converted to PeriodIndex-ed data using ``to_period``
@@ -1934,7 +1957,7 @@ the quarter end:
 
 .. _timeseries.oob:
 
-Representing out-of-bounds spans
+Representing Out-of-Bounds Spans
 --------------------------------
 
 If you have data that is outside of the ``Timestamp`` bounds, see :ref:`Timestamp limitations <timeseries.timestamp-limits>`,
@@ -2031,7 +2054,7 @@ which gives you more control over which time zone is used:
    rng_dateutil.tz == tz_dateutil
 
 Timestamps, like Python's ``datetime.datetime`` object can be either time zone
-naive or time zone aware. Naive time series and DatetimeIndex objects can be
+naive or time zone aware. Naive time series and ``DatetimeIndex`` objects can be
 *localized* using ``tz_localize``:
 
 .. ipython:: python
@@ -2099,8 +2122,8 @@ Localization of ``Timestamp`` functions just like ``DatetimeIndex`` and ``Series
    rng[5].tz_localize('Asia/Shanghai')
 
 
-Operations between Series in different time zones will yield UTC
-Series, aligning the data on the UTC timestamps:
+Operations between ``Series`` in different time zones will yield UTC
+``Series``, aligning the data on the UTC timestamps:
 
 .. ipython:: python
 
@@ -2180,7 +2203,7 @@ constructor as well as ``tz_localize``.
 
 .. _timeseries.timezone_series:
 
-TZ aware Dtypes
+TZ Aware Dtypes
 ~~~~~~~~~~~~~~~
 
 .. versionadded:: 0.17.0
diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index c8a0a6bff5cc75..d69a5c22acc035 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -488,7 +488,7 @@ Additionally, DataFrames with datetime columns that were parsed by :func:`read_s
 Consistency of Range Functions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-In previous versions, there were some inconsistencies between the various range functions: :func:`date_range`, :func:`bdate_range`, :func:`cdate_range`, :func:`period_range`, :func:`timedelta_range`, and :func:`interval_range`. (:issue:`17471`).
+In previous versions, there were some inconsistencies between the various range functions: :func:`date_range`, :func:`bdate_range`, :func:`period_range`, :func:`timedelta_range`, and :func:`interval_range`. (:issue:`17471`).
 
 One of the inconsistent behaviors occurred when the ``start``, ``end`` and ``period`` parameters were all specified, potentially leading to ambiguous ranges.  When all three parameters were passed, ``interval_range`` ignored the ``period`` parameter, ``period_range`` ignored the ``end`` parameter, and the other range functions raised.  To promote consistency among the range functions, and avoid potentially ambiguous ranges, ``interval_range`` and ``period_range`` will now raise when all three parameters are passed.
 
@@ -571,8 +571,9 @@ Deprecations
 - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`).
 - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`)
 - ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`)
+- ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation <timeseries.custom-freq-ranges>` for more details (:issue:`17596`)
 
-.. _whatsnew_0210.deprecations.argmin_min
+.. _whatsnew_0210.deprecations.argmin_min:
 
 Series.argmax and Series.argmin
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -738,9 +739,9 @@ Numeric
 
 Categorical
 ^^^^^^^^^^^
-- Bug in :func:`Series.isin` when called with a categorical (:issue`16639`)
+- Bug in :func:`Series.isin` when called with a categorical (:issue:`16639`)
 - Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`)
-- Bug in categorical operations with :ref:`Series.cat <categorical.cat>' not preserving the original Series' name (:issue:`17509`)
+- Bug in categorical operations with :ref:`Series.cat <categorical.cat>` not preserving the original Series' name (:issue:`17509`)
 
 PyPy
 ^^^^
diff --git a/pandas/core/api.py b/pandas/core/api.py
index a012ccce839653..2f818a400162b3 100644
--- a/pandas/core/api.py
+++ b/pandas/core/api.py
@@ -16,8 +16,7 @@
                                PeriodIndex, NaT)
 from pandas.core.indexes.period import Period, period_range, pnow
 from pandas.core.indexes.timedeltas import Timedelta, timedelta_range
-from pandas.core.indexes.datetimes import (Timestamp, date_range, bdate_range,
-                                           cdate_range)
+from pandas.core.indexes.datetimes import Timestamp, date_range, bdate_range
 from pandas.core.indexes.interval import Interval, interval_range
 
 from pandas.core.series import Series
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 9127864eab8a16..1419da3fa8861b 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -17,6 +17,7 @@
     is_period_dtype,
     is_bool_dtype,
     is_string_dtype,
+    is_string_like,
     is_list_like,
     is_scalar,
     pandas_dtype,
@@ -37,7 +38,8 @@
     Resolution)
 from pandas.core.indexes.datetimelike import (
     DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin)
-from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
+from pandas.tseries.offsets import (
+    DateOffset, generate_range, Tick, CDay, prefix_mapping)
 from pandas.core.tools.datetimes import (
     parse_time_string, normalize_date, to_time)
 from pandas.core.tools.timedeltas import to_timedelta
@@ -2049,7 +2051,8 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None,
 
 
 def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
-                normalize=True, name=None, closed=None, **kwargs):
+                normalize=True, name=None, weekmask=None, holidays=None,
+                closed=None, **kwargs):
     """
     Return a fixed frequency DatetimeIndex, with business day as the default
     frequency
@@ -2071,6 +2074,20 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
         Normalize start/end dates to midnight before generating date range
     name : string, default None
         Name of the resulting DatetimeIndex
+    weekmask : string or None, default None
+        Weekmask of valid business days, passed to ``numpy.busdaycalendar``,
+        only used when custom frequency strings are passed.  The default
+        value None is equivalent to 'Mon Tue Wed Thu Fri'
+
+        .. versionadded:: 0.21.0
+
+    holidays : list-like or None, default None
+        Dates to exclude from the set of valid business days, passed to
+        ``numpy.busdaycalendar``, only used when custom frequency strings
+        are passed
+
+        .. versionadded:: 0.21.0
+
     closed : string, default None
         Make the interval closed with respect to the given frequency to
         the 'left', 'right', or both sides (None)
@@ -2088,6 +2105,18 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None,
     rng : DatetimeIndex
     """
 
+    if is_string_like(freq) and freq.startswith('C'):
+        try:
+            weekmask = weekmask or 'Mon Tue Wed Thu Fri'
+            freq = prefix_mapping[freq](holidays=holidays, weekmask=weekmask)
+        except (KeyError, TypeError):
+            msg = 'invalid custom frequency string: {freq}'.format(freq=freq)
+            raise ValueError(msg)
+    elif holidays or weekmask:
+        msg = ('a custom frequency string is required when holidays or '
+               'weekmask are passed, got frequency {freq}').format(freq=freq)
+        raise ValueError(msg)
+
     return DatetimeIndex(start=start, end=end, periods=periods,
                          freq=freq, tz=tz, normalize=normalize, name=name,
                          closed=closed, **kwargs)
@@ -2099,6 +2128,8 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
     Return a fixed frequency DatetimeIndex, with CustomBusinessDay as the
     default frequency
 
+    .. deprecated:: 0.21.0
+
     Parameters
     ----------
     start : string or datetime-like, default None
@@ -2137,6 +2168,9 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None,
     -------
     rng : DatetimeIndex
     """
+    warnings.warn("cdate_range is deprecated and will be removed in a future "
+                  "version, instead use pd.bdate_range(..., freq='{freq}')"
+                  .format(freq=freq), FutureWarning, stacklevel=2)
 
     if freq == 'C':
         holidays = kwargs.pop('holidays', [])
diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py
index c593290410b961..fad455d6391c33 100644
--- a/pandas/tests/api/test_api.py
+++ b/pandas/tests/api/test_api.py
@@ -63,7 +63,7 @@ class TestPDApi(Base):
     # top-level functions
     funcs = ['bdate_range', 'concat', 'crosstab', 'cut',
              'date_range', 'interval_range', 'eval',
-             'factorize', 'get_dummies', 'cdate_range',
+             'factorize', 'get_dummies',
              'infer_freq', 'isna', 'isnull', 'lreshape',
              'melt', 'notna', 'notnull', 'offsets',
              'merge', 'merge_ordered', 'merge_asof',
@@ -240,3 +240,13 @@ def test_deprecation_access_func(self):
                 [c1, c2],
                 sort_categories=True,
                 ignore_order=True)
+
+
+class TestCDateRange(object):
+
+    def test_deprecation_cdaterange(self):
+        # GH17596
+        from pandas.core.indexes.datetimes import cdate_range
+        with tm.assert_produces_warning(FutureWarning,
+                                        check_stacklevel=False):
+            cdate_range('2017-01-01', '2017-12-31')
diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py
index c373942cb4c63c..3b40ef092f3643 100644
--- a/pandas/tests/indexes/datetimes/test_date_range.py
+++ b/pandas/tests/indexes/datetimes/test_date_range.py
@@ -1,6 +1,5 @@
 """
-test date_range, bdate_range, cdate_range
-construction from the convenience range functions
+test date_range, bdate_range construction from the convenience range functions
 """
 
 import pytest
@@ -12,10 +11,9 @@
 import pandas as pd
 import pandas.util.testing as tm
 from pandas import compat
-from pandas.core.indexes.datetimes import bdate_range, cdate_range
-from pandas import date_range, offsets, DatetimeIndex, Timestamp
-from pandas.tseries.offsets import (generate_range, CDay, BDay,
-                                    DateOffset, MonthEnd)
+from pandas import date_range, bdate_range, offsets, DatetimeIndex, Timestamp
+from pandas.tseries.offsets import (generate_range, CDay, BDay, DateOffset,
+                                    MonthEnd, prefix_mapping)
 
 from pandas.tests.series.common import TestData
 
@@ -241,9 +239,6 @@ def test_precision_finer_than_offset(self):
 
 class TestBusinessDateRange(object):
 
-    def setup_method(self, method):
-        self.rng = bdate_range(START, END)
-
     def test_constructor(self):
         bdate_range(START, END, freq=BDay())
         bdate_range(START, periods=20, freq=BDay())
@@ -258,33 +253,31 @@ def test_constructor(self):
 
     def test_naive_aware_conflicts(self):
         naive = bdate_range(START, END, freq=BDay(), tz=None)
-        aware = bdate_range(START, END, freq=BDay(),
-                            tz="Asia/Hong_Kong")
-        tm.assert_raises_regex(TypeError, "tz-naive.*tz-aware",
-                               naive.join, aware)
-        tm.assert_raises_regex(TypeError, "tz-naive.*tz-aware",
-                               aware.join, naive)
+        aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong")
+
+        msg = 'tz-naive.*tz-aware'
+        with tm.assert_raises_regex(TypeError, msg):
+            naive.join(aware)
+
+        with tm.assert_raises_regex(TypeError, msg):
+            aware.join(naive)
 
     def test_cached_range(self):
         DatetimeIndex._cached_range(START, END, offset=BDay())
         DatetimeIndex._cached_range(START, periods=20, offset=BDay())
         DatetimeIndex._cached_range(end=START, periods=20, offset=BDay())
 
-        tm.assert_raises_regex(TypeError, "offset",
-                               DatetimeIndex._cached_range,
-                               START, END)
+        with tm.assert_raises_regex(TypeError, "offset"):
+            DatetimeIndex._cached_range(START, END)
 
-        tm.assert_raises_regex(TypeError, "specify period",
-                               DatetimeIndex._cached_range, START,
-                               offset=BDay())
+        with tm.assert_raises_regex(TypeError, "specify period"):
+            DatetimeIndex._cached_range(START, offset=BDay())
 
-        tm.assert_raises_regex(TypeError, "specify period",
-                               DatetimeIndex._cached_range, end=END,
-                               offset=BDay())
+        with tm.assert_raises_regex(TypeError, "specify period"):
+            DatetimeIndex._cached_range(end=END, offset=BDay())
 
-        tm.assert_raises_regex(TypeError, "start or end",
-                               DatetimeIndex._cached_range, periods=20,
-                               offset=BDay())
+        with tm.assert_raises_regex(TypeError, "start or end"):
+            DatetimeIndex._cached_range(periods=20, offset=BDay())
 
     def test_cached_range_bug(self):
         rng = date_range('2010-09-01 05:00:00', periods=50,
@@ -300,8 +293,9 @@ def test_timezone_comparaison_bug(self):
 
     def test_timezone_comparaison_assert(self):
         start = Timestamp('20130220 10:00', tz='US/Eastern')
-        pytest.raises(AssertionError, date_range, start, periods=2,
-                      tz='Europe/Berlin')
+        msg = 'Inferred time zone not equal to passed time zone'
+        with tm.assert_raises_regex(AssertionError, msg):
+            date_range(start, periods=2, tz='Europe/Berlin')
 
     def test_misc(self):
         end = datetime(2009, 5, 13)
@@ -315,14 +309,17 @@ def test_misc(self):
     def test_date_parse_failure(self):
         badly_formed_date = '2007/100/1'
 
-        pytest.raises(ValueError, Timestamp, badly_formed_date)
+        with pytest.raises(ValueError):
+            Timestamp(badly_formed_date)
+
+        with pytest.raises(ValueError):
+            bdate_range(start=badly_formed_date, periods=10)
 
-        pytest.raises(ValueError, bdate_range, start=badly_formed_date,
-                      periods=10)
-        pytest.raises(ValueError, bdate_range, end=badly_formed_date,
-                      periods=10)
-        pytest.raises(ValueError, bdate_range, badly_formed_date,
-                      badly_formed_date)
+        with pytest.raises(ValueError):
+            bdate_range(end=badly_formed_date, periods=10)
+
+        with pytest.raises(ValueError):
+            bdate_range(badly_formed_date, badly_formed_date)
 
     def test_daterange_bug_456(self):
         # GH #456
@@ -334,8 +331,9 @@ def test_daterange_bug_456(self):
         assert isinstance(result, DatetimeIndex)
 
     def test_error_with_zero_monthends(self):
-        pytest.raises(ValueError, date_range, '1/1/2000', '1/1/2001',
-                      freq=MonthEnd(0))
+        msg = 'Offset <0 \* MonthEnds> did not increment date'
+        with tm.assert_raises_regex(ValueError, msg):
+            date_range('1/1/2000', '1/1/2001', freq=MonthEnd(0))
 
     def test_range_bug(self):
         # GH #770
@@ -343,8 +341,8 @@ def test_range_bug(self):
         result = date_range("2011-1-1", "2012-1-31", freq=offset)
 
         start = datetime(2011, 1, 1)
-        exp_values = [start + i * offset for i in range(5)]
-        tm.assert_index_equal(result, DatetimeIndex(exp_values))
+        expected = DatetimeIndex([start + i * offset for i in range(5)])
+        tm.assert_index_equal(result, expected)
 
     def test_range_tz_pytz(self):
         # see gh-2906
@@ -525,20 +523,18 @@ def test_freq_divides_end_in_nanos(self):
 
 
 class TestCustomDateRange(object):
-    def setup_method(self, method):
-        self.rng = cdate_range(START, END)
 
     def test_constructor(self):
-        cdate_range(START, END, freq=CDay())
-        cdate_range(START, periods=20, freq=CDay())
-        cdate_range(end=START, periods=20, freq=CDay())
+        bdate_range(START, END, freq=CDay())
+        bdate_range(START, periods=20, freq=CDay())
+        bdate_range(end=START, periods=20, freq=CDay())
 
         msg = 'periods must be a number, got C'
         with tm.assert_raises_regex(TypeError, msg):
             date_range('2011-1-1', '2012-1-1', 'C')
 
         with tm.assert_raises_regex(TypeError, msg):
-            cdate_range('2011-1-1', '2012-1-1', 'C')
+            bdate_range('2011-1-1', '2012-1-1', 'C')
 
     def test_cached_range(self):
         DatetimeIndex._cached_range(START, END, offset=CDay())
@@ -547,66 +543,93 @@ def test_cached_range(self):
         DatetimeIndex._cached_range(end=START, periods=20,
                                     offset=CDay())
 
-        pytest.raises(Exception, DatetimeIndex._cached_range, START, END)
+        # with pytest.raises(TypeError):
+        with tm.assert_raises_regex(TypeError, "offset"):
+            DatetimeIndex._cached_range(START, END)
 
-        pytest.raises(Exception, DatetimeIndex._cached_range, START,
-                      freq=CDay())
+        # with pytest.raises(TypeError):
+        with tm.assert_raises_regex(TypeError, "specify period"):
+            DatetimeIndex._cached_range(START, offset=CDay())
 
-        pytest.raises(Exception, DatetimeIndex._cached_range, end=END,
-                      freq=CDay())
+        # with pytest.raises(TypeError):
+        with tm.assert_raises_regex(TypeError, "specify period"):
+            DatetimeIndex._cached_range(end=END, offset=CDay())
 
-        pytest.raises(Exception, DatetimeIndex._cached_range, periods=20,
-                      freq=CDay())
+        # with pytest.raises(TypeError):
+        with tm.assert_raises_regex(TypeError, "start or end"):
+            DatetimeIndex._cached_range(periods=20, offset=CDay())
 
     def test_misc(self):
         end = datetime(2009, 5, 13)
-        dr = cdate_range(end=end, periods=20)
+        dr = bdate_range(end=end, periods=20, freq='C')
         firstDate = end - 19 * CDay()
 
         assert len(dr) == 20
         assert dr[0] == firstDate
         assert dr[-1] == end
 
-    def test_date_parse_failure(self):
-        badly_formed_date = '2007/100/1'
-
-        pytest.raises(ValueError, Timestamp, badly_formed_date)
-
-        pytest.raises(ValueError, cdate_range, start=badly_formed_date,
-                      periods=10)
-        pytest.raises(ValueError, cdate_range, end=badly_formed_date,
-                      periods=10)
-        pytest.raises(ValueError, cdate_range, badly_formed_date,
-                      badly_formed_date)
-
     def test_daterange_bug_456(self):
         # GH #456
-        rng1 = cdate_range('12/5/2011', '12/5/2011')
-        rng2 = cdate_range('12/2/2011', '12/5/2011')
+        rng1 = bdate_range('12/5/2011', '12/5/2011', freq='C')
+        rng2 = bdate_range('12/2/2011', '12/5/2011', freq='C')
         rng2.offset = CDay()
 
         result = rng1.union(rng2)
         assert isinstance(result, DatetimeIndex)
 
     def test_cdaterange(self):
-        rng = cdate_range('2013-05-01', periods=3)
-        xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03'])
-        tm.assert_index_equal(xp, rng)
+        result = bdate_range('2013-05-01', periods=3, freq='C')
+        expected = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03'])
+        tm.assert_index_equal(result, expected)
 
     def test_cdaterange_weekmask(self):
-        rng = cdate_range('2013-05-01', periods=3,
-                          weekmask='Sun Mon Tue Wed Thu')
-        xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05'])
-        tm.assert_index_equal(xp, rng)
+        result = bdate_range('2013-05-01', periods=3, freq='C',
+                             weekmask='Sun Mon Tue Wed Thu')
+        expected = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05'])
+        tm.assert_index_equal(result, expected)
+
+        # raise with non-custom freq
+        msg = ('a custom frequency string is required when holidays or '
+               'weekmask are passed, got frequency B')
+        with tm.assert_raises_regex(ValueError, msg):
+            bdate_range('2013-05-01', periods=3,
+                        weekmask='Sun Mon Tue Wed Thu')
 
     def test_cdaterange_holidays(self):
-        rng = cdate_range('2013-05-01', periods=3, holidays=['2013-05-01'])
-        xp = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06'])
-        tm.assert_index_equal(xp, rng)
+        result = bdate_range('2013-05-01', periods=3, freq='C',
+                             holidays=['2013-05-01'])
+        expected = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06'])
+        tm.assert_index_equal(result, expected)
+
+        # raise with non-custom freq
+        msg = ('a custom frequency string is required when holidays or '
+               'weekmask are passed, got frequency B')
+        with tm.assert_raises_regex(ValueError, msg):
+            bdate_range('2013-05-01', periods=3, holidays=['2013-05-01'])
 
     def test_cdaterange_weekmask_and_holidays(self):
-        rng = cdate_range('2013-05-01', periods=3,
-                          weekmask='Sun Mon Tue Wed Thu',
-                          holidays=['2013-05-01'])
-        xp = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06'])
-        tm.assert_index_equal(xp, rng)
+        result = bdate_range('2013-05-01', periods=3, freq='C',
+                             weekmask='Sun Mon Tue Wed Thu',
+                             holidays=['2013-05-01'])
+        expected = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06'])
+        tm.assert_index_equal(result, expected)
+
+        # raise with non-custom freq
+        msg = ('a custom frequency string is required when holidays or '
+               'weekmask are passed, got frequency B')
+        with tm.assert_raises_regex(ValueError, msg):
+            bdate_range('2013-05-01', periods=3,
+                        weekmask='Sun Mon Tue Wed Thu',
+                        holidays=['2013-05-01'])
+
+    @pytest.mark.parametrize('freq', [freq for freq in prefix_mapping
+                                      if freq.startswith('C')])
+    def test_all_custom_freq(self, freq):
+        # should not raise
+        bdate_range(START, END, freq=freq, weekmask='Mon Wed Fri',
+                    holidays=['2009-03-14'])
+
+        bad_freq = freq + 'FOO'
+        msg = 'invalid custom frequency string: {freq}'
+        with tm.assert_raises_regex(ValueError, msg.format(freq=bad_freq)):
+            bdate_range(START, END, freq=bad_freq)
diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py
index 86e65feec04f36..7cb051d351444c 100644
--- a/pandas/tests/indexes/datetimes/test_ops.py
+++ b/pandas/tests/indexes/datetimes/test_ops.py
@@ -10,7 +10,6 @@
 import pandas._libs.tslib as tslib
 import pandas.util.testing as tm
 from pandas.errors import PerformanceWarning
-from pandas.core.indexes.datetimes import cdate_range
 from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta,
                     date_range, TimedeltaIndex, _np_version_under1p10, Index,
                     datetime, Float64Index, offsets, bdate_range)
@@ -1208,7 +1207,7 @@ def test_identical(self):
 class TestCustomDatetimeIndex(object):
 
     def setup_method(self, method):
-        self.rng = cdate_range(START, END)
+        self.rng = bdate_range(START, END, freq='C')
 
     def test_comparison(self):
         d = self.rng[10]
@@ -1277,10 +1276,11 @@ def test_summary(self):
         self.rng[2:2].summary()
 
     def test_summary_pytz(self):
-        cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary()
+        bdate_range('1/1/2005', '1/1/2009', freq='C', tz=pytz.utc).summary()
 
     def test_summary_dateutil(self):
-        cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary()
+        bdate_range('1/1/2005', '1/1/2009', freq='C',
+                    tz=dateutil.tz.tzutc()).summary()
 
     def test_equals(self):
         assert not self.rng.equals(list(self.rng))
diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py
index 4ffd2e1cd1e615..ff436e0501849f 100644
--- a/pandas/tests/indexes/datetimes/test_setops.py
+++ b/pandas/tests/indexes/datetimes/test_setops.py
@@ -4,7 +4,6 @@
 
 import pandas as pd
 import pandas.util.testing as tm
-from pandas.core.indexes.datetimes import cdate_range
 from pandas import (DatetimeIndex, date_range, Series, bdate_range, DataFrame,
                     Int64Index, Index, to_datetime)
 from pandas.tseries.offsets import Minute, BMonthEnd, MonthEnd
@@ -345,7 +344,7 @@ def test_month_range_union_tz_dateutil(self):
 class TestCustomDatetimeIndex(object):
 
     def setup_method(self, method):
-        self.rng = cdate_range(START, END)
+        self.rng = bdate_range(START, END, freq='C')
 
     def test_union(self):
         # overlapping
@@ -412,7 +411,7 @@ def test_outer_join(self):
 
     def test_intersection_bug(self):
         # GH #771
-        a = cdate_range('11/30/2011', '12/31/2011')
-        b = cdate_range('12/10/2011', '12/20/2011')
+        a = bdate_range('11/30/2011', '12/31/2011', freq='C')
+        b = bdate_range('12/10/2011', '12/20/2011', freq='C')
         result = a.intersection(b)
         tm.assert_index_equal(result, b)
diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py
index ea37434e3a8d98..3a2a613986dcae 100644
--- a/pandas/tseries/offsets.py
+++ b/pandas/tseries/offsets.py
@@ -2987,6 +2987,7 @@ def generate_range(start=None, end=None, periods=None,
     CustomBusinessHour,        # 'CBH'
     MonthEnd,                  # 'M'
     MonthBegin,                # 'MS'
+    Nano,                      # 'N'
     SemiMonthEnd,              # 'SM'
     SemiMonthBegin,            # 'SMS'
     Week,                      # 'W'
@@ -3002,5 +3003,3 @@ def generate_range(start=None, end=None, periods=None,
     FY5253,
     FY5253Quarter,
 ])
-
-prefix_mapping['N'] = Nano

From a6078728ecf95db2b6e3b31830e30673dee3200e Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 2 Oct 2017 07:59:15 -0400
Subject: [PATCH 186/188] BUG: Regression in .loc accepting a boolean Index as
 an indexer (#17738)

closes #17131
---
 doc/source/whatsnew/v0.21.0.txt   |  1 +
 pandas/core/common.py             |  4 ++--
 pandas/tests/indexing/test_loc.py | 17 +++++++++++++++++
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index d69a5c22acc035..f17e5b5e8fa488 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -668,6 +668,7 @@ Indexing
 - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`)
 - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`)
 - Bug in :func:`Series.rename` when called with a `callable`, incorrectly alters the name of the `Series`, rather than the name of the `Index`. (:issue:`17407`)
+- Regression in ``.loc`` accepting a boolean ``Index`` as an indexer (:issue:`17131`)
 
 I/O
 ^^^
diff --git a/pandas/core/common.py b/pandas/core/common.py
index 0f7b86f5e74a09..2686ad370e1ed2 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -15,7 +15,7 @@
 from pandas import compat
 from pandas.compat import long, zip, iteritems
 from pandas.core.config import get_option
-from pandas.core.dtypes.generic import ABCSeries
+from pandas.core.dtypes.generic import ABCSeries, ABCIndex
 from pandas.core.dtypes.common import _NS_DTYPE
 from pandas.core.dtypes.inference import _iterable_not_string
 from pandas.core.dtypes.missing import isna, isnull, notnull  # noqa
@@ -182,7 +182,7 @@ def _maybe_box_datetimelike(value):
 
 
 def is_bool_indexer(key):
-    if isinstance(key, (ABCSeries, np.ndarray)):
+    if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)):
         if key.dtype == np.object_:
             key = np.asarray(_values_from_object(key))
 
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
index 17316a714e2609..95d6a24e68425c 100644
--- a/pandas/tests/indexing/test_loc.py
+++ b/pandas/tests/indexing/test_loc.py
@@ -317,6 +317,23 @@ def test_loc_getitem_label_slice(self):
         self.check_result('mixed slice', 'loc', slice(2, 4, 2), 'ix', slice(
             2, 4, 2), typs=['mixed'], axes=0, fails=TypeError)
 
+    def test_loc_index(self):
+        # gh-17131
+        # a boolean index should index like a boolean numpy array
+
+        df = DataFrame(
+            np.random.random(size=(5, 10)),
+            index=["alpha_0", "alpha_1", "alpha_2", "beta_0", "beta_1"])
+
+        mask = df.index.map(lambda x: "alpha" in x)
+        expected = df.loc[np.array(mask)]
+
+        result = df.loc[mask]
+        tm.assert_frame_equal(result, expected)
+
+        result = df.loc[mask.values]
+        tm.assert_frame_equal(result, expected)
+
     def test_loc_general(self):
 
         df = DataFrame(

From 1a6b7ab8ecb0270227066ec7cca8a6bbcd9ddbc3 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Mon, 2 Oct 2017 08:32:44 -0400
Subject: [PATCH 187/188] DOC: remove whatsnew note for xref #17131

---
 doc/source/whatsnew/v0.21.0.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index f17e5b5e8fa488..d69a5c22acc035 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -668,7 +668,6 @@ Indexing
 - Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`)
 - Bug in :meth:`DataFrame.first_valid_index` and :meth:`DataFrame.last_valid_index` when no valid entry (:issue:`17400`)
 - Bug in :func:`Series.rename` when called with a `callable`, incorrectly alters the name of the `Series`, rather than the name of the `Index`. (:issue:`17407`)
-- Regression in ``.loc`` accepting a boolean ``Index`` as an indexer (:issue:`17131`)
 
 I/O
 ^^^

From a3d538ab72380471f5de7b8e4a3f811aa4de84af Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Mon, 2 Oct 2017 05:43:39 -0700
Subject: [PATCH 188/188] Separate out _convert_datetime_to_tsobject (#17715)

---
 pandas/_libs/tslib.pyx | 145 +++++++++++++++++++++++++++--------------
 1 file changed, 95 insertions(+), 50 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 096ebe9a5627b0..ff20ea287bd9d1 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -708,7 +708,7 @@ class Timestamp(_Timestamp):
         # reconstruct & check bounds
         ts_input = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min,
                             dts.sec, dts.us, tzinfo=_tzinfo)
-        ts = convert_to_tsobject(ts_input, _tzinfo, None, 0, 0)
+        ts = convert_datetime_to_tsobject(ts_input, _tzinfo)
         value = ts.value + (dts.ps // 1000)
         if value != NPY_NAT:
             _check_dts_bounds(&dts)
@@ -1455,52 +1455,11 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
             obj.value = ts
             pandas_datetime_to_datetimestruct(ts, PANDAS_FR_ns, &obj.dts)
     elif PyDateTime_Check(ts):
-        if tz is not None:
-            # sort of a temporary hack
-            if ts.tzinfo is not None:
-                if (hasattr(tz, 'normalize') and
-                    hasattr(ts.tzinfo, '_utcoffset')):
-                    ts = tz.normalize(ts)
-                    obj.value = _pydatetime_to_dts(ts, &obj.dts)
-                    obj.tzinfo = ts.tzinfo
-                else: #tzoffset
-                    try:
-                        tz = ts.astimezone(tz).tzinfo
-                    except:
-                        pass
-                    obj.value = _pydatetime_to_dts(ts, &obj.dts)
-                    ts_offset = get_utcoffset(ts.tzinfo, ts)
-                    obj.value -= _delta_to_nanoseconds(ts_offset)
-                    tz_offset = get_utcoffset(tz, ts)
-                    obj.value += _delta_to_nanoseconds(tz_offset)
-                    pandas_datetime_to_datetimestruct(obj.value,
-                                                      PANDAS_FR_ns, &obj.dts)
-                    obj.tzinfo = tz
-            elif not is_utc(tz):
-                ts = _localize_pydatetime(ts, tz)
-                obj.value = _pydatetime_to_dts(ts, &obj.dts)
-                obj.tzinfo = ts.tzinfo
-            else:
-                # UTC
-                obj.value = _pydatetime_to_dts(ts, &obj.dts)
-                obj.tzinfo = pytz.utc
-        else:
-            obj.value = _pydatetime_to_dts(ts, &obj.dts)
-            obj.tzinfo = ts.tzinfo
-
-        if obj.tzinfo is not None and not is_utc(obj.tzinfo):
-            offset = get_utcoffset(obj.tzinfo, ts)
-            obj.value -= _delta_to_nanoseconds(offset)
-
-        if is_timestamp(ts):
-            obj.value += ts.nanosecond
-            obj.dts.ps = ts.nanosecond * 1000
-        _check_dts_bounds(&obj.dts)
-        return obj
+        return convert_datetime_to_tsobject(ts, tz)
     elif PyDate_Check(ts):
         # Keep the converter same as PyDateTime's
         ts = datetime.combine(ts, datetime_time())
-        return convert_to_tsobject(ts, tz, None, 0, 0)
+        return convert_datetime_to_tsobject(ts, tz)
     elif getattr(ts, '_typ', None) == 'period':
         raise ValueError(
             "Cannot convert Period to Timestamp "
@@ -1518,6 +1477,83 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
     return obj
 
 
+cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
+                                            int32_t nanos=0):
+    """
+    Convert a datetime (or Timestamp) input `ts`, along with optional timezone
+    object `tz` to a _TSObject.
+
+    The optional argument `nanos` allows for cases where datetime input
+    needs to be supplemented with higher-precision information.
+
+    Parameters
+    ----------
+    ts : datetime or Timestamp
+        Value to be converted to _TSObject
+    tz : tzinfo or None
+        timezone for the timezone-aware output
+    nanos : int32_t, default is 0
+        nanoseconds supplement the precision of the datetime input ts
+
+    Returns
+    -------
+    obj : _TSObject
+    """
+    cdef:
+        _TSObject obj = _TSObject()
+
+    if tz is not None:
+        tz = maybe_get_tz(tz)
+
+        # sort of a temporary hack
+        if ts.tzinfo is not None:
+            if (hasattr(tz, 'normalize') and
+                hasattr(ts.tzinfo, '_utcoffset')):
+                ts = tz.normalize(ts)
+                obj.value = _pydatetime_to_dts(ts, &obj.dts)
+                obj.tzinfo = ts.tzinfo
+            else:
+                # tzoffset
+                try:
+                    tz = ts.astimezone(tz).tzinfo
+                except:
+                    pass
+                obj.value = _pydatetime_to_dts(ts, &obj.dts)
+                ts_offset = get_utcoffset(ts.tzinfo, ts)
+                obj.value -= int(ts_offset.total_seconds() * 1e9)
+                tz_offset = get_utcoffset(tz, ts)
+                obj.value += int(tz_offset.total_seconds() * 1e9)
+                pandas_datetime_to_datetimestruct(obj.value,
+                                                  PANDAS_FR_ns, &obj.dts)
+                obj.tzinfo = tz
+        elif not is_utc(tz):
+            ts = _localize_pydatetime(ts, tz)
+            obj.value = _pydatetime_to_dts(ts, &obj.dts)
+            obj.tzinfo = ts.tzinfo
+        else:
+            # UTC
+            obj.value = _pydatetime_to_dts(ts, &obj.dts)
+            obj.tzinfo = pytz.utc
+    else:
+        obj.value = _pydatetime_to_dts(ts, &obj.dts)
+        obj.tzinfo = ts.tzinfo
+
+    if obj.tzinfo is not None and not is_utc(obj.tzinfo):
+        offset = get_utcoffset(obj.tzinfo, ts)
+        obj.value -= int(offset.total_seconds() * 1e9)
+
+    if is_timestamp(ts):
+        obj.value += ts.nanosecond
+        obj.dts.ps = ts.nanosecond * 1000
+    
+    if nanos:
+        obj.value += nanos
+        obj.dts.ps = nanos * 1000
+
+    _check_dts_bounds(&obj.dts)
+    return obj
+
+
 cpdef convert_str_to_tsobject(object ts, object tz, object unit,
                               dayfirst=False, yearfirst=False):
     """ ts must be a string """
@@ -1538,11 +1574,12 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit,
     elif ts == 'now':
         # Issue 9000, we short-circuit rather than going
         # into np_datetime_strings which returns utc
-        ts = Timestamp.now(tz)
+        ts = datetime.now(tz)
     elif ts == 'today':
         # Issue 9000, we short-circuit rather than going
         # into np_datetime_strings which returns a normalized datetime
-        ts = Timestamp.today(tz)
+        ts = datetime.now(tz)
+        # equiv: datetime.today().replace(tzinfo=tz)
     else:
         try:
             _string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset)
@@ -1557,7 +1594,15 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit,
                     return obj
                 else:
                     # Keep the converter same as PyDateTime's
-                    ts = Timestamp(obj.value, tz=obj.tzinfo)
+                    obj = convert_to_tsobject(obj.value, obj.tzinfo,
+                                              None, 0, 0)
+                    dtime = datetime(obj.dts.year, obj.dts.month, obj.dts.day,
+                                     obj.dts.hour, obj.dts.min, obj.dts.sec,
+                                     obj.dts.us, obj.tzinfo)
+                    obj = convert_datetime_to_tsobject(dtime, tz,
+                                                       nanos=obj.dts.ps / 1000)
+                    return obj
+
             else:
                 ts = obj.value
                 if tz is not None:
@@ -1706,7 +1751,7 @@ def datetime_to_datetime64(ndarray[object] values):
                 else:
                     inferred_tz = get_timezone(val.tzinfo)
 
-                _ts = convert_to_tsobject(val, None, None, 0, 0)
+                _ts = convert_datetime_to_tsobject(val, None)
                 iresult[i] = _ts.value
                 _check_dts_bounds(&_ts.dts)
             else:
@@ -2026,7 +2071,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
                 seen_datetime=1
                 if val.tzinfo is not None:
                     if utc_convert:
-                        _ts = convert_to_tsobject(val, None, 'ns', 0, 0)
+                        _ts = convert_datetime_to_tsobject(val, None)
                         iresult[i] = _ts.value
                         try:
                             _check_dts_bounds(&_ts.dts)
@@ -2135,7 +2180,7 @@ cpdef array_to_datetime(ndarray[object] values, errors='raise',
                         raise TypeError("invalid string coercion to datetime")
 
                     try:
-                        _ts = convert_to_tsobject(py_dt, None, None, 0, 0)
+                        _ts = convert_datetime_to_tsobject(py_dt, None)
                         iresult[i] = _ts.value
                     except ValueError:
                         if is_coerce: