Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/pandas-dev/pandas into di…
Browse files Browse the repository at this point in the history
…sown3
  • Loading branch information
jbrockmendel committed Dec 24, 2018
2 parents 4522dfe + fc7bc3f commit eb594e7
Show file tree
Hide file tree
Showing 77 changed files with 3,600 additions and 2,831 deletions.
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def setup(self, axis):
self.empty_right = [df, DataFrame()]

def time_concat_series(self, axis):
concat(self.series, axis=axis)
concat(self.series, axis=axis, sort=False)

def time_concat_small_frames(self, axis):
concat(self.small_frames, axis=axis)
Expand Down
12 changes: 6 additions & 6 deletions asv_bench/benchmarks/panel_ctor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import warnings
from datetime import datetime, timedelta

from pandas import DataFrame, Panel, DatetimeIndex, date_range
from pandas import DataFrame, Panel, date_range


class DifferentIndexes(object):
Expand All @@ -23,9 +23,9 @@ def time_from_dict(self):
class SameIndexes(object):

def setup(self):
idx = DatetimeIndex(start=datetime(1990, 1, 1),
end=datetime(2012, 1, 1),
freq='D')
idx = date_range(start=datetime(1990, 1, 1),
end=datetime(2012, 1, 1),
freq='D')
df = DataFrame({'a': 0, 'b': 1, 'c': 2}, index=idx)
self.data_frames = dict(enumerate([df] * 100))

Expand All @@ -40,10 +40,10 @@ def setup(self):
start = datetime(1990, 1, 1)
end = datetime(2012, 1, 1)
df1 = DataFrame({'a': 0, 'b': 1, 'c': 2},
index=DatetimeIndex(start=start, end=end, freq='D'))
index=date_range(start=start, end=end, freq='D'))
end += timedelta(days=1)
df2 = DataFrame({'a': 0, 'b': 1, 'c': 2},
index=DatetimeIndex(start=start, end=end, freq='D'))
index=date_range(start=start, end=end, freq='D'))
dfs = [df1] * 50 + [df2] * 50
self.data_frames = dict(enumerate(dfs))

Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/reindex.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import numpy as np
import pandas.util.testing as tm
from pandas import (DataFrame, Series, DatetimeIndex, MultiIndex, Index,
from pandas import (DataFrame, Series, MultiIndex, Index,
date_range)
from .pandas_vb_common import lib


class Reindex(object):

def setup(self):
rng = DatetimeIndex(start='1/1/1970', periods=10000, freq='1min')
rng = date_range(start='1/1/1970', periods=10000, freq='1min')
self.df = DataFrame(np.random.rand(10000, 10), index=rng,
columns=range(10))
self.df['foo'] = 'bar'
Expand Down
9 changes: 5 additions & 4 deletions asv_bench/benchmarks/timedelta.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import datetime

import numpy as np
from pandas import Series, timedelta_range, to_timedelta, Timestamp, \
Timedelta, TimedeltaIndex, DataFrame

from pandas import (
DataFrame, Series, Timedelta, Timestamp, timedelta_range, to_timedelta)


class TimedeltaConstructor(object):
Expand Down Expand Up @@ -122,8 +123,8 @@ def time_timedelta_nanoseconds(self, series):
class TimedeltaIndexing(object):

def setup(self):
self.index = TimedeltaIndex(start='1985', periods=1000, freq='D')
self.index2 = TimedeltaIndex(start='1986', periods=1000, freq='D')
self.index = timedelta_range(start='1985', periods=1000, freq='D')
self.index2 = timedelta_range(start='1986', periods=1000, freq='D')
self.series = Series(range(1000), index=self.index)
self.timedelta = self.index[500]

Expand Down
7 changes: 4 additions & 3 deletions asv_bench/benchmarks/timestamp.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import datetime

from pandas import Timestamp
import pytz
import dateutil
import pytz

from pandas import Timestamp


class TimestampConstruction(object):
Expand Down Expand Up @@ -46,7 +47,7 @@ def time_dayofweek(self, tz, freq):
self.ts.dayofweek

def time_weekday_name(self, tz, freq):
self.ts.weekday_name
self.ts.day_name

def time_dayofyear(self, tz, freq):
self.ts.dayofyear
Expand Down
2 changes: 1 addition & 1 deletion ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
RET=$(($RET + $?)) ; echo $MSG "DONE"

MSG='Check that the deprecated `assert_raises_regex` is not used (`pytest.raises(match=pattern)` should be used instead)' ; echo $MSG
invgrep -R --exclude=*.pyc --exclude=testing.py --exclude=test_testing.py assert_raises_regex pandas
invgrep -R --exclude=*.pyc --exclude=testing.py --exclude=test_util.py assert_raises_regex pandas
RET=$(($RET + $?)) ; echo $MSG "DONE"

# Check that we use pytest.raises only as a context manager
Expand Down
31 changes: 12 additions & 19 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ Other Enhancements
- :meth:`MultiIndex.to_flat_index` has been added to flatten multiple levels into a single-level :class:`Index` object.
- :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` can write mixed sting columns to Stata strl format (:issue:`23633`)
- :meth:`DataFrame.between_time` and :meth:`DataFrame.at_time` have gained the an ``axis`` parameter (:issue:`8839`)
- The ``scatter_matrix``, ``andrews_curves``, ``parallel_coordinates``, ``lag_plot``, ``autocorrelation_plot``, ``bootstrap_plot``, and ``radviz`` plots from the ``pandas.plotting`` module are now accessible from calling :meth:`DataFrame.plot` (:issue:`11978`)
- :class:`IntervalIndex` has gained the :attr:`~IntervalIndex.is_overlapping` attribute to indicate if the ``IntervalIndex`` contains any overlapping intervals (:issue:`23309`)

.. _whatsnew_0240.api_breaking:
Expand Down Expand Up @@ -673,7 +674,7 @@ changes were made:
* The ``out`` and ``mode`` parameters are now longer accepted (previously, this raised if they were specified).
* Passing a scalar for ``indices`` is no longer allowed.

- The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
- The result of :func:`concat` with a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
- ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray.
- Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed.
- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`).
Expand Down Expand Up @@ -1124,12 +1125,14 @@ Other API Changes
has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)
- Slicing a single row of a DataFrame with multiple ExtensionArrays of the same type now preserves the dtype, rather than coercing to object (:issue:`22784`)
- :class:`DateOffset` attribute `_cacheable` and method `_should_cache` have been removed (:issue:`23118`)
- :meth:`Series.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23801`).
- :meth:`Categorical.searchsorted`, when supplied a scalar value to search for, now returns a scalar instead of an array (:issue:`23466`).
- :meth:`Categorical.searchsorted` now raises a ``KeyError`` rather that a ``ValueError``, if a searched for key is not found in its categories (:issue:`23466`).
- :meth:`Index.hasnans` and :meth:`Series.hasnans` now always return a python boolean. Previously, a python or a numpy boolean could be returned, depending on circumstances (:issue:`23294`).
- The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`)
- :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`).
- :func:`Series.to_list` and :func:`Index.to_list` are now aliases of ``Series.tolist`` respectively ``Index.tolist`` (:issue:`8826`)
- The result of ``SparseSeries.unstack`` is now a :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (issue:`24372`).

.. _whatsnew_0240.deprecations:

Expand Down Expand Up @@ -1177,25 +1180,19 @@ Deprecations

.. _whatsnew_0240.deprecations.datetimelike_int_ops:

Integer Addition/Subtraction with Datetime-like Classes Is Deprecated
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In the past, users could add or subtract integers or integer-dtypes arrays
from :class:`Period`, :class:`PeriodIndex`, and in some cases
:class:`Timestamp`, :class:`DatetimeIndex` and :class:`TimedeltaIndex`.
Integer Addition/Subtraction with Datetimes and Timedeltas is Deprecated
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In the past, users could—in some cases—add or subtract integers or integer-dtype
arrays from :class:`Timestamp`, :class:`DatetimeIndex` and :class:`TimedeltaIndex`.

This usage is now deprecated. Instead add or subtract integer multiples of
the object's ``freq`` attribute. The result of subtraction of :class:`Period`
objects will be agnostic of the multiplier of the objects' ``freq`` attribute
(:issue:`21939`, :issue:`23878`).
the object's ``freq`` attribute (:issue:`21939`, :issue:`23878`).

*Previous Behavior*:

.. code-block:: ipython
In [3]: per = pd.Period('2016Q1')
In [4]: per + 3
Out[4]: Period('2016Q4', 'Q-DEC')
In [5]: ts = pd.Timestamp('1994-05-06 12:15:16', freq=pd.offsets.Hour())
In [6]: ts + 2
Out[6]: Timestamp('1994-05-06 14:15:16', freq='H')
Expand All @@ -1213,12 +1210,6 @@ objects will be agnostic of the multiplier of the objects' ``freq`` attribute
.. ipython:: python
:okwarning:
per = pd.Period('2016Q1')
per + 3
per = pd.Period('2016Q1')
per + 3 * per.freq
ts = pd.Timestamp('1994-05-06 12:15:16', freq=pd.offsets.Hour())
ts + 2 * ts.freq
Expand Down Expand Up @@ -1428,6 +1419,7 @@ Numeric
- Added ``log10`` to the list of supported functions in :meth:`DataFrame.eval` (:issue:`24139`)
- Logical operations ``&, |, ^`` between :class:`Series` and :class:`Index` will no longer raise ``ValueError`` (:issue:`22092`)
- Checking PEP 3141 numbers in :func:`~pandas.api.types.is_scalar` function returns ``True`` (:issue:`22903`)
- Reduction methods like :meth:`Series.sum` now accept the default value of ``keepdims=False`` when called from a NumPy ufunc, rather than raising a ``TypeError``. Full support for ``keepdims`` has not been implemented (:issue:`24356`).

Conversion
^^^^^^^^^^
Expand Down Expand Up @@ -1643,6 +1635,7 @@ Sparse
- Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`)
- Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`)
- Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`)
- Bug in :func:`concat` when concatenating a list of :class:`Series` with all-sparse values changing the ``fill_value`` and converting to a dense Series (:issue:`24371`)

Style
^^^^^
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,10 @@ def is_scalar(val: object) -> bool:
"""

return (cnp.PyArray_IsAnyScalar(val)
# As of numpy-1.9, PyArray_IsAnyScalar misses bytearrays on Py3.
or isinstance(val, (bytes, Fraction, Number))
# We differ from numpy (as of 1.10), which claims that None is
# not scalar in np.isscalar().
# PyArray_IsAnyScalar is always False for bytearrays on Py3
or isinstance(val, (Fraction, Number))
# We differ from numpy, which claims that None is not scalar;
# see np.isscalar
or val is None
or PyDate_Check(val)
or PyDelta_Check(val)
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
import cython
from cython import Py_ssize_t

from cpython.datetime cimport (PyDateTime_Check, PyDate_Check,
PyDateTime_CheckExact,
Expand Down
4 changes: 0 additions & 4 deletions pandas/_libs/tslibs/ccalendar.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ Cython implementations of functions resembling the stdlib calendar module
"""

import cython
from cython import Py_ssize_t

from numpy cimport int64_t, int32_t

Expand Down Expand Up @@ -151,12 +150,9 @@ cpdef int32_t get_week_of_year(int year, int month, int day) nogil:
Assumes the inputs describe a valid date.
"""
cdef:
bint isleap
int32_t doy, dow
int woy

isleap = is_leapyear(year)

doy = get_day_of_year(year, month, day)
dow = dayofweek(year, month, day)

Expand Down
14 changes: 2 additions & 12 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# -*- coding: utf-8 -*-
import cython
from cython import Py_ssize_t

import numpy as np
cimport numpy as cnp
Expand Down Expand Up @@ -1133,7 +1132,7 @@ def normalize_date(dt: object) -> datetime:

@cython.wraparound(False)
@cython.boundscheck(False)
def normalize_i8_timestamps(int64_t[:] stamps, object tz=None):
def normalize_i8_timestamps(int64_t[:] stamps, object tz):
"""
Normalize each of the (nanosecond) timezone aware timestamps in the given
array by rounding down to the beginning of the day (i.e. midnight).
Expand All @@ -1152,7 +1151,6 @@ def normalize_i8_timestamps(int64_t[:] stamps, object tz=None):
Py_ssize_t n = len(stamps)
int64_t[:] result = np.empty(n, dtype=np.int64)

tz = maybe_get_tz(tz)
result = _normalize_local(stamps, tz)

return result.base # .base to access underlying np.ndarray
Expand Down Expand Up @@ -1185,15 +1183,7 @@ cdef int64_t[:] _normalize_local(int64_t[:] stamps, tzinfo tz):
npy_datetimestruct dts
int64_t delta, local_val

if is_utc(tz):
with nogil:
for i in range(n):
if stamps[i] == NPY_NAT:
result[i] = NPY_NAT
continue
dt64_to_dtstruct(stamps[i], &dts)
result[i] = _normalized_stamp(&dts)
elif is_tzlocal(tz):
if is_tzlocal(tz):
for i in range(n):
if stamps[i] == NPY_NAT:
result[i] = NPY_NAT
Expand Down
18 changes: 6 additions & 12 deletions pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def get_time_micros(ndarray[int64_t] dtindex):
ndarray[int64_t] micros

micros = np.mod(dtindex, DAY_SECONDS * 1000000000, dtype=np.int64)
micros //= 1000LL
micros //= 1000
return micros


Expand All @@ -48,12 +48,10 @@ def build_field_sarray(int64_t[:] dtindex):
Datetime as int64 representation to a structured array of fields
"""
cdef:
Py_ssize_t i, count = 0
Py_ssize_t i, count = len(dtindex)
npy_datetimestruct dts
ndarray[int32_t] years, months, days, hours, minutes, seconds, mus

count = len(dtindex)

sa_dtype = [('Y', 'i4'), # year
('M', 'i4'), # month
('D', 'i4'), # day
Expand Down Expand Up @@ -93,12 +91,11 @@ def get_date_name_field(int64_t[:] dtindex, object field, object locale=None):
name based on requested field (e.g. weekday_name)
"""
cdef:
Py_ssize_t i, count = 0
Py_ssize_t i, count = len(dtindex)
ndarray[object] out, names
npy_datetimestruct dts
int dow

count = len(dtindex)
out = np.empty(count, dtype=object)

if field == 'day_name' or field == 'weekday_name':
Expand Down Expand Up @@ -147,7 +144,7 @@ def get_start_end_field(int64_t[:] dtindex, object field,
"""
cdef:
Py_ssize_t i
int count = 0
int count = len(dtindex)
bint is_business = 0
int end_month = 12
int start_month = 1
Expand All @@ -162,7 +159,6 @@ def get_start_end_field(int64_t[:] dtindex, object field,
[0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]],
dtype=np.int32)

count = len(dtindex)
out = np.zeros(count, dtype='int8')

if freqstr:
Expand Down Expand Up @@ -388,11 +384,10 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
field and return an array of these values.
"""
cdef:
Py_ssize_t i, count = 0
Py_ssize_t i, count = len(dtindex)
ndarray[int32_t] out
npy_datetimestruct dts

count = len(dtindex)
out = np.empty(count, dtype='i4')

if field == 'Y':
Expand Down Expand Up @@ -551,11 +546,10 @@ def get_timedelta_field(int64_t[:] tdindex, object field):
field and return an array of these values.
"""
cdef:
Py_ssize_t i, count = 0
Py_ssize_t i, count = len(tdindex)
ndarray[int32_t] out
pandas_timedeltastruct tds

count = len(tdindex)
out = np.empty(count, dtype='i4')

if field == 'days':
Expand Down
1 change: 0 additions & 1 deletion pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-

import cython
from cython import Py_ssize_t

import time
from cpython.datetime cimport (PyDateTime_IMPORT,
Expand Down
2 changes: 0 additions & 2 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ import sys
import re
import time

from cython import Py_ssize_t

from cpython.datetime cimport datetime


Expand Down
Loading

0 comments on commit eb594e7

Please sign in to comment.