Skip to content

Commit

Permalink
standalone implementation of ccalendar (pandas-dev#18540)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and jreback committed Dec 7, 2017
1 parent 279578c commit 3e506a3
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 32 deletions.
12 changes: 12 additions & 0 deletions pandas/_libs/tslibs/ccalendar.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# -*- coding: utf-8 -*-
# cython: profile=False

from cython cimport Py_ssize_t

from numpy cimport int64_t, int32_t


cdef int dayofweek(int y, int m, int m) nogil
cdef bint is_leapyear(int64_t year) nogil
cpdef int32_t get_days_in_month(int year, Py_ssize_t month) nogil
cpdef int32_t get_week_of_year(int year, int month, int day) nogil
163 changes: 163 additions & 0 deletions pandas/_libs/tslibs/ccalendar.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# -*- coding: utf-8 -*-
# cython: profile=False
# cython: boundscheck=False
"""
Cython implementations of functions resembling the stdlib calendar module
"""

cimport cython
from cython cimport Py_ssize_t

import numpy as np
cimport numpy as np
from numpy cimport int64_t, int32_t
np.import_array()


# ----------------------------------------------------------------------
# Constants

# Slightly more performant cython lookups than a 2D table
# The first 12 entries correspond to month lengths for non-leap years.
# The remaining 12 entries give month lengths for leap years
cdef int32_t* days_per_month_array = [
31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31,
31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

cdef int* sakamoto_arr = [0, 3, 2, 5, 0, 3, 5, 1, 4, 6, 2, 4]

# The first 13 entries give the month days elapsed as of the first of month N
# (or the total number of days in the year for N=13) in non-leap years.
# The remaining 13 entries give the days elapsed in leap years.
cdef int32_t* _month_offset = [
0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365,
0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]

# ----------------------------------------------------------------------


@cython.wraparound(False)
@cython.boundscheck(False)
cpdef inline int32_t get_days_in_month(int year, Py_ssize_t month) nogil:
"""Return the number of days in the given month of the given year.
Parameters
----------
year : int
month : int
Returns
-------
days_in_month : int
Notes
-----
Assumes that the arguments are valid. Passing a month not between 1 and 12
risks a segfault.
"""
return days_per_month_array[12 * is_leapyear(year) + month - 1]


@cython.wraparound(False)
@cython.boundscheck(False)
@cython.cdivision
cdef int dayofweek(int y, int m, int d) nogil:
"""Find the day of week for the date described by the Y/M/D triple y, m, d
using Sakamoto's method, from wikipedia.
0 represents Monday. See [1]_.
Parameters
----------
y : int
m : int
d : int
Returns
-------
weekday : int
Notes
-----
Assumes that y, m, d, represents a valid date.
See Also
--------
[1] https://docs.python.org/3.6/library/calendar.html#calendar.weekday
[2] https://en.wikipedia.org/wiki/\
Determination_of_the_day_of_the_week#Sakamoto.27s_methods
"""
cdef:
int day

y -= m < 3
day = (y + y / 4 - y / 100 + y / 400 + sakamoto_arr[m - 1] + d) % 7
# convert to python day
return (day + 6) % 7


cdef bint is_leapyear(int64_t year) nogil:
"""Returns 1 if the given year is a leap year, 0 otherwise.
Parameters
----------
year : int
Returns
-------
is_leap : bool
"""
return ((year & 0x3) == 0 and # year % 4 == 0
((year % 100) != 0 or (year % 400) == 0))


@cython.wraparound(False)
@cython.boundscheck(False)
cpdef int32_t get_week_of_year(int year, int month, int day) nogil:
"""Return the ordinal week-of-year for the given day.
Parameters
----------
year : int
month : int
day : int
Returns
-------
week_of_year : int32_t
Notes
-----
Assumes the inputs describe a valid date.
"""
cdef:
bint isleap, isleap_prev
int32_t mo_off
int32_t doy, dow
int woy

isleap = is_leapyear(year)
isleap_prev = is_leapyear(year - 1)

mo_off = _month_offset[isleap * 13 + month - 1]

doy = mo_off + day
dow = dayofweek(year, month, day)

# estimate
woy = (doy - 1) - dow + 3
if woy >= 0:
woy = woy / 7 + 1

# verify
if woy < 0:
if (woy > -2) or (woy == -2 and isleap_prev):
woy = 53
else:
woy = 52
elif woy == 53:
if 31 - day + dow < 3:
woy = 1

return woy
36 changes: 6 additions & 30 deletions pandas/_libs/tslibs/fields.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ from numpy cimport ndarray, int64_t, int32_t, int8_t
np.import_array()


from ccalendar cimport (get_days_in_month, is_leapyear, dayofweek,
get_week_of_year)
from np_datetime cimport (pandas_datetimestruct, pandas_timedeltastruct,
dt64_to_dtstruct, td64_to_tdstruct,
days_per_month_table, is_leapyear, dayofweek)
dt64_to_dtstruct, td64_to_tdstruct)
from nattype cimport NPY_NAT


Expand Down Expand Up @@ -379,7 +380,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
ndarray[int32_t, ndim=2] _month_offset
int isleap, isleap_prev
pandas_datetimestruct dts
int mo_off, doy, dow, woy
int mo_off, doy, dow

_month_offset = np.array(
[[ 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 ],
Expand Down Expand Up @@ -507,28 +508,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
continue

dt64_to_dtstruct(dtindex[i], &dts)
isleap = is_leapyear(dts.year)
isleap_prev = is_leapyear(dts.year - 1)
mo_off = _month_offset[isleap, dts.month - 1]
doy = mo_off + dts.day
dow = dayofweek(dts.year, dts.month, dts.day)

# estimate
woy = (doy - 1) - dow + 3
if woy >= 0:
woy = woy / 7 + 1

# verify
if woy < 0:
if (woy > -2) or (woy == -2 and isleap_prev):
woy = 53
else:
woy = 52
elif woy == 53:
if 31 - dts.day + dow < 3:
woy = 1

out[i] = woy
out[i] = get_week_of_year(dts.year, dts.month, dts.day)
return out

elif field == 'q':
Expand All @@ -551,7 +531,7 @@ def get_date_field(ndarray[int64_t] dtindex, object field):
continue

dt64_to_dtstruct(dtindex[i], &dts)
out[i] = days_in_month(dts)
out[i] = get_days_in_month(dts.year, dts.month)
return out
elif field == 'is_leap_year':
return isleapyear_arr(get_date_field(dtindex, 'Y'))
Expand Down Expand Up @@ -676,10 +656,6 @@ def get_timedelta_field(ndarray[int64_t] tdindex, object field):
raise ValueError("Field %s not supported" % field)


cdef inline int days_in_month(pandas_datetimestruct dts) nogil:
return days_per_month_table[is_leapyear(dts.year)][dts.month - 1]


cpdef isleapyear_arr(ndarray years):
"""vectorized version of isleapyear; NaT evaluates as False"""
cdef:
Expand Down
6 changes: 5 additions & 1 deletion pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ from util cimport (is_datetime64_object, is_timedelta64_object,
is_integer_object, is_string_object,
INT64_MAX)

cimport ccalendar
from conversion import tz_localize_to_utc, date_normalize
from conversion cimport (tz_convert_single, _TSObject,
convert_to_tsobject, convert_datetime_to_tsobject)
Expand Down Expand Up @@ -699,6 +700,9 @@ class Timestamp(_Timestamp):

@property
def week(self):
if self.freq is None:
# fastpath for non-business
return ccalendar.get_week_of_year(self.year, self.month, self.day)
return self._get_field('woy')

weekofyear = week
Expand All @@ -709,7 +713,7 @@ class Timestamp(_Timestamp):

@property
def days_in_month(self):
return self._get_field('dim')
return ccalendar.get_days_in_month(self.year, self.month)

daysinmonth = days_in_month

Expand Down
7 changes: 6 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ class CheckSDist(sdist_class):
'pandas/_libs/skiplist.pyx',
'pandas/_libs/sparse.pyx',
'pandas/_libs/parsers.pyx',
'pandas/_libs/tslibs/ccalendar.pyx',
'pandas/_libs/tslibs/period.pyx',
'pandas/_libs/tslibs/strptime.pyx',
'pandas/_libs/tslibs/np_datetime.pyx',
Expand Down Expand Up @@ -537,6 +538,8 @@ def pxd(name):
'_libs/tslibs/nattype'],
'depends': tseries_depends,
'sources': np_datetime_sources},
'_libs.tslibs.ccalendar': {
'pyxfile': '_libs/tslibs/ccalendar'},
'_libs.tslibs.conversion': {
'pyxfile': '_libs/tslibs/conversion',
'pxdfiles': ['_libs/src/util',
Expand All @@ -547,7 +550,8 @@ def pxd(name):
'sources': np_datetime_sources},
'_libs.tslibs.fields': {
'pyxfile': '_libs/tslibs/fields',
'pxdfiles': ['_libs/tslibs/nattype'],
'pxdfiles': ['_libs/tslibs/ccalendar',
'_libs/tslibs/nattype'],
'depends': tseries_depends,
'sources': np_datetime_sources},
'_libs.tslibs.frequencies': {
Expand Down Expand Up @@ -594,6 +598,7 @@ def pxd(name):
'_libs.tslibs.timestamps': {
'pyxfile': '_libs/tslibs/timestamps',
'pxdfiles': ['_libs/src/util',
'_libs/tslibs/ccalendar',
'_libs/tslibs/conversion',
'_libs/tslibs/nattype',
'_libs/tslibs/timedeltas',
Expand Down

0 comments on commit 3e506a3

Please sign in to comment.