Skip to content

Commit

Permalink
Merge branch 'master' into period_immutable
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Aug 19, 2017
2 parents b86a7b9 + 4e9c0d1 commit 27df31a
Show file tree
Hide file tree
Showing 74 changed files with 2,310 additions and 1,935 deletions.
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ install:
- cmd: conda info -a

# create our env
- cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest pytest-xdist
- cmd: conda create -n pandas python=%PYTHON_VERSION% cython pytest>=3.1.0 pytest-xdist
- cmd: activate pandas
- SET REQ=ci\requirements-%PYTHON_VERSION%_WIN.run
- cmd: echo "installing requirements from %REQ%"
Expand Down
14 changes: 14 additions & 0 deletions asv_bench/benchmarks/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,3 +510,17 @@ def time_begin_incr_rng(self):

def time_begin_decr_rng(self):
self.rng - self.semi_month_begin


class DatetimeAccessor(object):
def setup(self):
self.N = 100000
self.series = pd.Series(
pd.date_range(start='1/1/2000', periods=self.N, freq='T')
)

def time_dt_accessor(self):
self.series.dt

def time_dt_accessor_normalize(self):
self.series.dt.normalize()
2 changes: 1 addition & 1 deletion ci/install_circle.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ fi
# create envbuild deps
echo "[create env: ${REQ_BUILD}]"
time conda create -n pandas -q --file=${REQ_BUILD} || exit 1
time conda install -n pandas pytest || exit 1
time conda install -n pandas pytest>=3.1.0 || exit 1

source activate pandas

Expand Down
2 changes: 1 addition & 1 deletion ci/install_travis.sh
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ if [ -e ${REQ} ]; then
time bash $REQ || exit 1
fi

time conda install -n pandas pytest
time conda install -n pandas pytest>=3.1.0
time pip install pytest-xdist

if [ "$LINT" ]; then
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements_all.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pytest
pytest>=3.1.0
pytest-cov
pytest-xdist
flake8
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ python-dateutil
pytz
numpy
cython
pytest
pytest>=3.1.0
pytest-cov
flake8
24 changes: 20 additions & 4 deletions doc/source/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -598,6 +598,10 @@ Like many packages, *pandas* uses `pytest
extensions in `numpy.testing
<http://docs.scipy.org/doc/numpy/reference/routines.testing.html>`_.

.. note::

The earliest supported pytest version is 3.1.0.

Writing tests
~~~~~~~~~~~~~

Expand Down Expand Up @@ -654,7 +658,9 @@ Using ``pytest``
Here is an example of a self-contained set of tests that illustrate multiple features that we like to use.

- functional style: tests are like ``test_*`` and *only* take arguments that are either fixtures or parameters
- ``pytest.mark`` can be used to set metadata on test functions, e.g. ``skip`` or ``xfail``.
- using ``parametrize``: allow testing of multiple cases
- to set a mark on a parameter, ``pytest.param(..., marks=...)`` syntax should be used
- ``fixture``, code for object construction, on a per-test basis
- using bare ``assert`` for scalars and truth-testing
- ``tm.assert_series_equal`` (and its counter part ``tm.assert_frame_equal``), for pandas object comparisons.
Expand All @@ -673,6 +679,13 @@ We would name this file ``test_cool_feature.py`` and put in an appropriate place
def test_dtypes(dtype):
assert str(np.dtype(dtype)) == dtype
@pytest.mark.parametrize('dtype', ['float32',
pytest.param('int16', marks=pytest.mark.skip),
pytest.param('int32',
marks=pytest.mark.xfail(reason='to show how it works'))])
def test_mark(dtype):
assert str(np.dtype(dtype)) == 'float32'
@pytest.fixture
def series():
return pd.Series([1, 2, 3])
Expand All @@ -695,13 +708,16 @@ A test run of this yields
((pandas) bash-3.2$ pytest test_cool_feature.py -v
=========================== test session starts ===========================
platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.31, pluggy-0.4.0
collected 8 items
platform darwin -- Python 3.6.2, pytest-3.2.1, py-1.4.31, pluggy-0.4.0
collected 11 items
tester.py::test_dtypes[int8] PASSED
tester.py::test_dtypes[int16] PASSED
tester.py::test_dtypes[int32] PASSED
tester.py::test_dtypes[int64] PASSED
tester.py::test_mark[float32] PASSED
tester.py::test_mark[int16] SKIPPED
tester.py::test_mark[int32] xfail
tester.py::test_series[int8] PASSED
tester.py::test_series[int16] PASSED
tester.py::test_series[int32] PASSED
Expand All @@ -714,8 +730,8 @@ Tests that we have ``parametrized`` are now accessible via the test name, for ex
((pandas) bash-3.2$ pytest test_cool_feature.py -v -k int8
=========================== test session starts ===========================
platform darwin -- Python 3.5.2, pytest-3.0.5, py-1.4.31, pluggy-0.4.0
collected 8 items
platform darwin -- Python 3.6.2, pytest-3.2.1, py-1.4.31, pluggy-0.4.0
collected 11 items
test_cool_feature.py::test_dtypes[int8] PASSED
test_cool_feature.py::test_series[int8] PASSED
Expand Down
7 changes: 7 additions & 0 deletions doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2020,6 +2020,13 @@ into a flat table.
.. ipython:: python
from pandas.io.json import json_normalize
data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
{'name': {'given': 'Mose', 'family': 'Regner'}},
{'id': 2, 'name': 'Faye Raker'}]
json_normalize(data)
.. ipython:: python
data = [{'state': 'Florida',
'shortname': 'FL',
'info': {
Expand Down
14 changes: 12 additions & 2 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ Other Enhancements
- :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`)
- Integration with `Apache Parquet <https://parquet.apache.org/>`__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here <io.parquet>`.
- :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`)
- `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`).
- :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`).

.. _whatsnew_0210.api_breaking:

Expand Down Expand Up @@ -275,7 +277,7 @@ Other API Changes
- Removed the ``@slow`` decorator from ``pandas.util.testing``, which caused issues for some downstream packages' test suites. Use ``@pytest.mark.slow`` instead, which achieves the same thing (:issue:`16850`)
- Moved definition of ``MergeError`` to the ``pandas.errors`` module.
- The signature of :func:`Series.set_axis` and :func:`DataFrame.set_axis` has been changed from ``set_axis(axis, labels)`` to ``set_axis(labels, axis=0)``, for consistency with the rest of the API. The old signature is deprecated and will show a ``FutureWarning`` (:issue:`14636`)

- :func:`Series.argmin` and :func:`Series.argmax` will now raise a ``TypeError`` when used with ``object`` dtypes, instead of a ``ValueError`` (:issue:`13595`)

.. _whatsnew_0210.deprecations:

Expand Down Expand Up @@ -306,6 +308,7 @@ Performance Improvements
~~~~~~~~~~~~~~~~~~~~~~~~

- Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`)
- :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`)


.. _whatsnew_0210.bug_fixes:
Expand All @@ -317,8 +320,11 @@ Bug Fixes
Conversion
^^^^^^^^^^

- Bug in assignment against datetime-like data with ``int`` may incorrectly converte to datetime-like (:issue:`14145`)
- Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`)
- Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`)
- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`)
- Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`)
- Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`)


Indexing
Expand Down Expand Up @@ -379,6 +385,7 @@ Reshaping
- Fixes regression from 0.20, :func:`Series.aggregate` and :func:`DataFrame.aggregate` allow dictionaries as return values again (:issue:`16741`)
- Fixes dtype of result with integer dtype input, from :func:`pivot_table` when called with ``margins=True`` (:issue:`17013`)
- Bug in :func:`crosstab` where passing two ``Series`` with the same name raised a ``KeyError`` (:issue:`13279`)
- :func:`Series.argmin`, :func:`Series.argmax`, and their counterparts on ``DataFrame`` and groupby objects work correctly with floating point data that contains infinite values (:issue:`13595`).

Numeric
^^^^^^^
Expand All @@ -388,6 +395,9 @@ Numeric
Categorical
^^^^^^^^^^^
- Bug in :func:`Series.isin` when called with a categorical (:issue`16639`)
- Bug in the categorical constructor with empty values and categories causing
the ``.categories`` to be an empty ``Float64Index`` rather than an empty
``Index`` with object dtype (:issue:`17248`)


Other
Expand Down
22 changes: 21 additions & 1 deletion pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,27 @@

from cpython cimport PyObject, Py_INCREF, PyList_Check, PyTuple_Check

from khash cimport *
from khash cimport (
khiter_t,

kh_str_t, kh_init_str, kh_put_str, kh_exist_str,
kh_get_str, kh_destroy_str, kh_resize_str,

kh_put_strbox, kh_get_strbox, kh_init_strbox,

kh_int64_t, kh_init_int64, kh_resize_int64, kh_destroy_int64,
kh_get_int64, kh_exist_int64, kh_put_int64,

kh_float64_t, kh_exist_float64, kh_put_float64, kh_init_float64,
kh_get_float64, kh_destroy_float64, kh_resize_float64,

kh_resize_uint64, kh_exist_uint64, kh_destroy_uint64, kh_put_uint64,
kh_get_uint64, kh_init_uint64,

kh_destroy_pymap, kh_exist_pymap, kh_init_pymap, kh_get_pymap,
kh_put_pymap, kh_resize_pymap)


from numpy cimport *

from libc.stdlib cimport malloc, free
Expand Down
18 changes: 7 additions & 11 deletions pandas/_libs/index.pyx
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# cython: profile=False

from numpy cimport ndarray

from numpy cimport (float64_t, int32_t, int64_t, uint8_t,
from numpy cimport (ndarray, float64_t, int32_t, int64_t, uint8_t, uint64_t,
NPY_DATETIME, NPY_TIMEDELTA)
cimport cython

Expand All @@ -16,7 +14,9 @@ cimport util
import numpy as np

cimport tslib
from hashtable cimport *

from hashtable cimport HashTable

from pandas._libs import tslib, algos, hashtable as _hash
from pandas._libs.tslib import Timestamp, Timedelta
from datetime import datetime, timedelta
Expand All @@ -32,13 +32,9 @@ cdef extern from "datetime.h":

cdef int64_t iNaT = util.get_nat()

try:
from dateutil.tz import tzutc as _du_utc
import pytz
UTC = pytz.utc
have_pytz = True
except ImportError:
have_pytz = False
from dateutil.tz import tzutc as _du_utc
import pytz
UTC = pytz.utc

PyDateTime_IMPORT

Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/join_func_helper.pxi.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ WARNING: DO NOT edit .pxi FILE directly, .pxi is generated from .pxi.in
# asof_join_by
#----------------------------------------------------------------------

from hashtable cimport PyObjectHashTable, UInt64HashTable, Int64HashTable

{{py:

# table_type, by_dtype
Expand All @@ -23,7 +25,6 @@ on_dtypes = ['uint8_t', 'uint16_t', 'uint32_t', 'uint64_t',
}}


from hashtable cimport *

{{for table_type, by_dtype in by_dtypes}}
{{for on_dtype in on_dtypes}}
Expand Down
24 changes: 14 additions & 10 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,14 @@ from numpy cimport *

np.import_array()

cdef extern from "numpy/arrayobject.h":
cdef enum NPY_TYPES:
NPY_intp "NPY_INTP"

from libc.stdlib cimport malloc, free

from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
PyDict_Contains, PyDict_Keys,
Py_INCREF, PyTuple_SET_ITEM,
from cpython cimport (Py_INCREF, PyTuple_SET_ITEM,
PyList_Check, PyFloat_Check,
PyString_Check,
PyBytes_Check,
PyTuple_SetItem,
PyUnicode_Check,
PyTuple_New,
PyObject_SetAttrString,
PyObject_RichCompareBool,
PyBytes_GET_SIZE,
PyUnicode_GET_SIZE,
Expand Down Expand Up @@ -55,7 +48,18 @@ cdef double NAN = nan
from datetime import datetime as pydatetime

# this is our tseries.pxd
from datetime cimport *
from datetime cimport (
get_timedelta64_value, get_datetime64_value,
npy_timedelta, npy_datetime,
PyDateTime_Check, PyDate_Check, PyTime_Check, PyDelta_Check,
PyDateTime_GET_YEAR,
PyDateTime_GET_MONTH,
PyDateTime_GET_DAY,
PyDateTime_DATE_GET_HOUR,
PyDateTime_DATE_GET_MINUTE,
PyDateTime_DATE_GET_SECOND,
PyDateTime_IMPORT)


from tslib cimport (convert_to_tsobject, convert_to_timedelta64,
_check_all_nulls)
Expand Down
11 changes: 9 additions & 2 deletions pandas/_libs/parsers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ cdef extern from "stdlib.h":
cimport cython
cimport numpy as cnp

from numpy cimport ndarray, uint8_t, uint64_t
from numpy cimport ndarray, uint8_t, uint64_t, int64_t

import numpy as np
cimport util
Expand All @@ -57,7 +57,14 @@ import os

cnp.import_array()

from khash cimport *
from khash cimport (
khiter_t,
kh_str_t, kh_init_str, kh_put_str, kh_exist_str,
kh_get_str, kh_destroy_str,
kh_float64_t, kh_get_float64, kh_destroy_float64,
kh_put_float64, kh_init_float64,
kh_strbox_t, kh_put_strbox, kh_get_strbox, kh_init_strbox,
kh_destroy_strbox)

import sys

Expand Down
20 changes: 13 additions & 7 deletions pandas/_libs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ from datetime import datetime, date, timedelta
import operator

from cpython cimport (
PyUnicode_Check,
PyObject_RichCompareBool,
Py_EQ, Py_NE)

Expand All @@ -18,21 +19,29 @@ from pandas import compat
from pandas.compat import PY2

cimport cython
from datetime cimport *

from datetime cimport (
is_leapyear,
PyDateTime_IMPORT,
pandas_datetimestruct,
pandas_datetimestruct_to_datetime,
pandas_datetime_to_datetimestruct,
PANDAS_FR_ns,
INT32_MIN)


cimport util, lib

from lib cimport is_null_datetimelike, is_period
from pandas._libs import tslib, lib
from pandas._libs.tslib import (Timedelta, Timestamp, iNaT,
NaT, have_pytz, _get_utcoffset)
NaT, _get_utcoffset)
from tslib cimport (
maybe_get_tz,
_is_utc,
_is_tzlocal,
_get_dst_info,
_nat_scalar_rules,
)
_nat_scalar_rules)

from pandas.tseries import offsets
from pandas.core.tools.datetimes import parse_time_string
Expand Down Expand Up @@ -611,9 +620,6 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps,
ndarray[int64_t] trans, deltas, pos
pandas_datetimestruct dts

if not have_pytz:
raise Exception('Could not find pytz module')

if _is_utc(tz):
for i in range(n):
if stamps[i] == NPY_NAT:
Expand Down
Loading

0 comments on commit 27df31a

Please sign in to comment.