Merge pull request pandas-dev#7 from jbrockmendel/disown3

Rebased pandas-dev#24024
jbrockmendel · Dec 27, 2018 · 0b570b1 · 0b570b1
2 parents 4522dfe + eb594e7
commit 0b570b1
Show file tree

Hide file tree

Showing 77 changed files with 3,600 additions and 2,831 deletions.
diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py
@@ -50,7 +50,7 @@ def setup(self, axis):
         self.empty_right = [df, DataFrame()]
 
     def time_concat_series(self, axis):
-        concat(self.series, axis=axis)
+        concat(self.series, axis=axis, sort=False)
 
     def time_concat_small_frames(self, axis):
         concat(self.small_frames, axis=axis)

diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py
@@ -1,7 +1,7 @@
 import warnings
 from datetime import datetime, timedelta
 
-from pandas import DataFrame, Panel, DatetimeIndex, date_range
+from pandas import DataFrame, Panel, date_range
 
 
 class DifferentIndexes(object):
@@ -23,9 +23,9 @@ def time_from_dict(self):
 class SameIndexes(object):
 
     def setup(self):
-        idx = DatetimeIndex(start=datetime(1990, 1, 1),
-                            end=datetime(2012, 1, 1),
-                            freq='D')
+        idx = date_range(start=datetime(1990, 1, 1),
+                         end=datetime(2012, 1, 1),
+                         freq='D')
         df = DataFrame({'a': 0, 'b': 1, 'c': 2}, index=idx)
         self.data_frames = dict(enumerate([df] * 100))
 
@@ -40,10 +40,10 @@ def setup(self):
         start = datetime(1990, 1, 1)
         end = datetime(2012, 1, 1)
         df1 = DataFrame({'a': 0, 'b': 1, 'c': 2},
-                        index=DatetimeIndex(start=start, end=end, freq='D'))
+                        index=date_range(start=start, end=end, freq='D'))
         end += timedelta(days=1)
         df2 = DataFrame({'a': 0, 'b': 1, 'c': 2},
-                        index=DatetimeIndex(start=start, end=end, freq='D'))
+                        index=date_range(start=start, end=end, freq='D'))
         dfs = [df1] * 50 + [df2] * 50
         self.data_frames = dict(enumerate(dfs))
 

diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py
@@ -1,14 +1,14 @@
 import numpy as np
 import pandas.util.testing as tm
-from pandas import (DataFrame, Series, DatetimeIndex, MultiIndex, Index,
+from pandas import (DataFrame, Series, MultiIndex, Index,
                     date_range)
 from .pandas_vb_common import lib
 
 
 class Reindex(object):
 
     def setup(self):
-        rng = DatetimeIndex(start='1/1/1970', periods=10000, freq='1min')
+        rng = date_range(start='1/1/1970', periods=10000, freq='1min')
         self.df = DataFrame(np.random.rand(10000, 10), index=rng,
                             columns=range(10))
         self.df['foo'] = 'bar'

diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py
@@ -1,8 +1,9 @@
 import datetime
 
 import numpy as np
-from pandas import Series, timedelta_range, to_timedelta, Timestamp, \
-    Timedelta, TimedeltaIndex, DataFrame
+
+from pandas import (
+    DataFrame, Series, Timedelta, Timestamp, timedelta_range, to_timedelta)
 
 
 class TimedeltaConstructor(object):
@@ -122,8 +123,8 @@ def time_timedelta_nanoseconds(self, series):
 class TimedeltaIndexing(object):
 
     def setup(self):
-        self.index = TimedeltaIndex(start='1985', periods=1000, freq='D')
-        self.index2 = TimedeltaIndex(start='1986', periods=1000, freq='D')
+        self.index = timedelta_range(start='1985', periods=1000, freq='D')
+        self.index2 = timedelta_range(start='1986', periods=1000, freq='D')
         self.series = Series(range(1000), index=self.index)
         self.timedelta = self.index[500]
 

diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py
@@ -1,8 +1,9 @@
 import datetime
 
-from pandas import Timestamp
-import pytz
 import dateutil
+import pytz
+
+from pandas import Timestamp
 
 
 class TimestampConstruction(object):
@@ -46,7 +47,7 @@ def time_dayofweek(self, tz, freq):
         self.ts.dayofweek
 
     def time_weekday_name(self, tz, freq):
-        self.ts.weekday_name
+        self.ts.day_name
 
     def time_dayofyear(self, tz, freq):
         self.ts.dayofyear

diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -145,7 +145,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     MSG='Check that the deprecated `assert_raises_regex` is not used (`pytest.raises(match=pattern)` should be used instead)' ; echo $MSG
-    invgrep -R --exclude=*.pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas
+    invgrep -R --exclude=*.pyc --exclude=testing.py --exclude=test_util.py assert_raises_regex pandas
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     # Check that we use pytest.raises only as a context manager

diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst
@@ -371,6 +371,7 @@ Other Enhancements
 - :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
 - :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
 - :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue:`8839`)
+- The ``scatter_matrix``, ``andrews_curves``, ``parallel_coordinates``, ``lag_plot``, ``autocorrelation_plot``, ``bootstrap_plot``, and ``radviz`` plots from the ``pandas.plotting`` module are now accessible from calling :meth:`DataFrame.plot` (:issue:`11978`)
 - :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`)
 
 .. _whatsnew_0240.api_breaking:
@@ -673,7 +674,7 @@ changes were made:
   * The ``out`` and ``mode`` parameters are now longer accepted (previously, this raised if they were specified).
   * Passing a scalar for ``indices`` is no longer allowed.
 
-- The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
+- The result of :func:`concat` with a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
 - ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray.
 - Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed.
 - ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`).
@@ -1124,12 +1125,14 @@ Other API Changes
   has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
 - Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
 - :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
+- :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`).
 - :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`).
 - :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
 - :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).
 - The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`)
 - :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`).
 - :func:`Series.to_list` and :func:`Index.to_list` are now aliases of ``Series.tolist`` respectively ``Index.tolist`` (:issue:`8826`)
+- The result of ``SparseSeries.unstack`` is now a :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (issue:`24372`).
 
 .. _whatsnew_0240.deprecations:
 
@@ -1177,25 +1180,19 @@ Deprecations
 
 .. _whatsnew_0240.deprecations.datetimelike_int_ops:
 
-Integer Addition/Subtraction with Datetime-like Classes Is Deprecated
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-In the past, users could add or subtract integers or integer-dtypes arrays
-from :class:`Period`, :class:`PeriodIndex`, and in some cases
-:class:`Timestamp`, :class:`DatetimeIndex` and :class:`TimedeltaIndex`.
+Integer Addition/Subtraction with Datetimes and Timedeltas is Deprecated
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In the past, users could—in some cases—add or subtract integers or integer-dtype
+arrays from :class:`Timestamp`, :class:`DatetimeIndex` and :class:`TimedeltaIndex`.
 
 This usage is now deprecated.  Instead add or subtract integer multiples of
-the object's ``freq`` attribute. The result of subtraction of :class:`Period`
-objects will be agnostic of the multiplier of the objects' ``freq`` attribute
-(:issue:`21939`, :issue:`23878`).
+the object's ``freq`` attribute (:issue:`21939`, :issue:`23878`).
 
 *Previous Behavior*:
 
 .. code-block:: ipython
 
-    In [3]: per = pd.Period('2016Q1')
-    In [4]: per + 3
-    Out[4]: Period('2016Q4', 'Q-DEC')
-
     In [5]: ts = pd.Timestamp('1994-05-06 12:15:16', freq=pd.offsets.Hour())
     In [6]: ts + 2
     Out[6]: Timestamp('1994-05-06 14:15:16', freq='H')
@@ -1213,12 +1210,6 @@ objects will be agnostic of the multiplier of the objects' ``freq`` attribute
 .. ipython:: python
     :okwarning:
 
-    per = pd.Period('2016Q1')
-    per + 3
-
-    per = pd.Period('2016Q1')
-    per + 3 * per.freq
-
     ts = pd.Timestamp('1994-05-06 12:15:16', freq=pd.offsets.Hour())
     ts + 2 * ts.freq
 
@@ -1428,6 +1419,7 @@ Numeric
 - Added ``log10`` to the list of supported functions in :meth:`DataFrame.eval` (:issue:`24139`)
 - Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`)
 - Checking PEP 3141 numbers in :func:`~pandas.api.types.is_scalar` function returns ``True`` (:issue:`22903`)
+- Reduction methods like :meth:`Series.sum` now accept the default value of ``keepdims=False`` when called from a NumPy ufunc, rather than raising a ``TypeError``. Full support for ``keepdims`` has not been implemented (:issue:`24356`).
 
  Conversion
 ^^^^^^^^^^
@@ -1643,6 +1635,7 @@ Sparse
 - Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`)
 - Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`)
 - Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`)
+- Bug in :func:`concat` when concatenating a list of :class:`Series` with all-sparse values changing the ``fill_value`` and converting to a dense Series (:issue:`24371`)
 
 Style
 ^^^^^

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -153,10 +153,10 @@ def is_scalar(val: object) -> bool:
     """
 
     return (cnp.PyArray_IsAnyScalar(val)
-            # As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3.
-            or isinstance(val, (bytes, Fraction, Number))
-            # We differ from numpy (as of 1.10), which claims that None is
-            # not scalar in np.isscalar().
+            # PyArray_IsAnyScalar is always False for bytearrays on Py3
+            or isinstance(val, (Fraction, Number))
+            # We differ from numpy, which claims that None is not scalar;
+            # see np.isscalar
             or val is None
             or PyDate_Check(val)
             or PyDelta_Check(val)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 import cython
-from cython import Py_ssize_t
 
 from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
                                PyDateTime_CheckExact,

diff --git a/pandas/_libs/tslibs/ccalendar.pyx b/pandas/_libs/tslibs/ccalendar.pyx
@@ -5,7 +5,6 @@ Cython implementations of functions resembling the stdlib calendar module
 """
 
 import cython
-from cython import Py_ssize_t
 
 from numpy cimport int64_t, int32_t
 
@@ -151,12 +150,9 @@ cpdef int32_t get_week_of_year(int year, int month, int day) nogil:
     Assumes the inputs describe a valid date.
     """
     cdef:
-        bint isleap
         int32_t doy, dow
         int woy
 
-    isleap = is_leapyear(year)
-
     doy = get_day_of_year(year, month, day)
     dow = dayofweek(year, month, day)
 

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
 import cython
-from cython import Py_ssize_t
 
 import numpy as np
 cimport numpy as cnp
@@ -1133,7 +1132,7 @@ def normalize_date(dt: object) -> datetime:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def normalize_i8_timestamps(int64_t[:] stamps, object tz=None):
+def normalize_i8_timestamps(int64_t[:] stamps, object tz):
     """
     Normalize each of the (nanosecond) timezone aware timestamps in the given
     array by rounding down to the beginning of the day (i.e. midnight).
@@ -1152,7 +1151,6 @@ def normalize_i8_timestamps(int64_t[:] stamps, object tz=None):
         Py_ssize_t n = len(stamps)
         int64_t[:] result = np.empty(n, dtype=np.int64)
 
-    tz = maybe_get_tz(tz)
     result = _normalize_local(stamps, tz)
 
     return result.base  # .base to access underlying np.ndarray
@@ -1185,15 +1183,7 @@ cdef int64_t[:] _normalize_local(int64_t[:] stamps, tzinfo tz):
         npy_datetimestruct dts
         int64_t delta, local_val
 
-    if is_utc(tz):
-        with nogil:
-            for i in range(n):
-                if stamps[i] == NPY_NAT:
-                    result[i] = NPY_NAT
-                    continue
-                dt64_to_dtstruct(stamps[i], &dts)
-                result[i] = _normalized_stamp(&dts)
-    elif is_tzlocal(tz):
+    if is_tzlocal(tz):
         for i in range(n):
             if stamps[i] == NPY_NAT:
                 result[i] = NPY_NAT

diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx
@@ -37,7 +37,7 @@ def get_time_micros(ndarray[int64_t] dtindex):
         ndarray[int64_t] micros
 
     micros = np.mod(dtindex, DAY_SECONDS * 1000000000, dtype=np.int64)
-    micros //= 1000LL
+    micros //= 1000
     return micros
 
 
@@ -48,12 +48,10 @@ def build_field_sarray(int64_t[:] dtindex):
     Datetime as int64 representation to a structured array of fields
     """
     cdef:
-        Py_ssize_t i, count = 0
+        Py_ssize_t i, count = len(dtindex)
         npy_datetimestruct dts
         ndarray[int32_t] years, months, days, hours, minutes, seconds, mus
 
-    count = len(dtindex)
-
     sa_dtype = [('Y', 'i4'),  # year
                 ('M', 'i4'),  # month
                 ('D', 'i4'),  # day
@@ -93,12 +91,11 @@ def get_date_name_field(int64_t[:] dtindex, object field, object locale=None):
     name based on requested field (e.g. weekday_name)
     """
     cdef:
-        Py_ssize_t i, count = 0
+        Py_ssize_t i, count = len(dtindex)
         ndarray[object] out, names
         npy_datetimestruct dts
         int dow
 
-    count = len(dtindex)
     out = np.empty(count, dtype=object)
 
     if field == 'day_name' or field == 'weekday_name':
@@ -147,7 +144,7 @@ def get_start_end_field(int64_t[:] dtindex, object field,
     """
     cdef:
         Py_ssize_t i
-        int count = 0
+        int count = len(dtindex)
         bint is_business = 0
         int end_month = 12
         int start_month = 1
@@ -162,7 +159,6 @@ def get_start_end_field(int64_t[:] dtindex, object field,
          [0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]],
         dtype=np.int32)
 
-    count = len(dtindex)
     out = np.zeros(count, dtype='int8')
 
     if freqstr:
@@ -388,11 +384,10 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
     field and return an array of these values.
     """
     cdef:
-        Py_ssize_t i, count = 0
+        Py_ssize_t i, count = len(dtindex)
         ndarray[int32_t] out
         npy_datetimestruct dts
 
-    count = len(dtindex)
     out = np.empty(count, dtype='i4')
 
     if field == 'Y':
@@ -551,11 +546,10 @@ def get_timedelta_field(int64_t[:] tdindex, object field):
     field and return an array of these values.
     """
     cdef:
-        Py_ssize_t i, count = 0
+        Py_ssize_t i, count = len(tdindex)
         ndarray[int32_t] out
         pandas_timedeltastruct tds
 
-    count = len(tdindex)
     out = np.empty(count, dtype='i4')
 
     if field == 'days':

diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -1,7 +1,6 @@
 # -*- coding: utf-8 -*-
 
 import cython
-from cython import Py_ssize_t
 
 import time
 from cpython.datetime cimport (PyDateTime_IMPORT,

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
@@ -6,8 +6,6 @@ import sys
 import re
 import time
 
-from cython import Py_ssize_t
-
 from cpython.datetime cimport datetime