diff --git a/pandas/_libs/src/period_helper.c b/pandas/_libs/src/period_helper.c index 19a7282f38049..cb6f0a220fafe 100644 --- a/pandas/_libs/src/period_helper.c +++ b/pandas/_libs/src/period_helper.c @@ -58,18 +58,6 @@ npy_int64 unix_date_from_ymd(int year, int month, int day) { return unix_date; } -/* Sets the date part of the date_info struct - Assumes GREGORIAN_CALENDAR */ -static int dInfoCalc_SetFromAbsDate(register struct date_info *dinfo, - npy_int64 unix_date) { - pandas_datetimestruct dts; - - pandas_datetime_to_datetimestruct(unix_date, PANDAS_FR_D, &dts); - dinfo->year = dts.year; - dinfo->month = dts.month; - dinfo->day = dts.day; - return 0; -} /////////////////////////////////////////////// @@ -139,9 +127,9 @@ static npy_int64 DtoB_weekday(npy_int64 unix_date) { return floordiv(unix_date + 4, 7) * 5 + mod_compat(unix_date + 4, 7) - 4; } -static npy_int64 DtoB(struct date_info *dinfo, +static npy_int64 DtoB(pandas_datetimestruct *dts, int roll_back, npy_int64 unix_date) { - int day_of_week = dayofweek(dinfo->year, dinfo->month, dinfo->day); + int day_of_week = dayofweek(dts->year, dts->month, dts->day); if (roll_back == 1) { if (day_of_week > 4) { @@ -161,32 +149,32 @@ static npy_int64 DtoB(struct date_info *dinfo, //************ FROM DAILY *************** static npy_int64 asfreq_DTtoA(npy_int64 ordinal, asfreq_info *af_info) { - struct date_info dinfo; + pandas_datetimestruct dts; ordinal = downsample_daytime(ordinal, af_info); - dInfoCalc_SetFromAbsDate(&dinfo, ordinal); - if (dinfo.month > af_info->to_a_year_end) { - return (npy_int64)(dinfo.year + 1 - 1970); + pandas_datetime_to_datetimestruct(ordinal, PANDAS_FR_D, &dts); + if (dts.month > af_info->to_end) { + return (npy_int64)(dts.year + 1 - 1970); } else { - return (npy_int64)(dinfo.year - 1970); + return (npy_int64)(dts.year - 1970); } } static int DtoQ_yq(npy_int64 ordinal, asfreq_info *af_info, int *year) { - struct date_info dinfo; + pandas_datetimestruct dts; int quarter; - dInfoCalc_SetFromAbsDate(&dinfo, ordinal); - if (af_info->to_q_year_end != 12) { - dinfo.month -= af_info->to_q_year_end; - if (dinfo.month <= 0) { - dinfo.month += 12; + pandas_datetime_to_datetimestruct(ordinal, PANDAS_FR_D, &dts); + if (af_info->to_end != 12) { + dts.month -= af_info->to_end; + if (dts.month <= 0) { + dts.month += 12; } else { - dinfo.year += 1; + dts.year += 1; } } - *year = dinfo.year; - quarter = monthToQuarter(dinfo.month); + *year = dts.year; + quarter = monthToQuarter(dts.month); return quarter; } @@ -200,29 +188,28 @@ static npy_int64 asfreq_DTtoQ(npy_int64 ordinal, asfreq_info *af_info) { } static npy_int64 asfreq_DTtoM(npy_int64 ordinal, asfreq_info *af_info) { - struct date_info dinfo; + pandas_datetimestruct dts; ordinal = downsample_daytime(ordinal, af_info); - dInfoCalc_SetFromAbsDate(&dinfo, ordinal); - return (npy_int64)((dinfo.year - 1970) * 12 + dinfo.month - 1); + pandas_datetime_to_datetimestruct(ordinal, PANDAS_FR_D, &dts); + return (npy_int64)((dts.year - 1970) * 12 + dts.month - 1); } static npy_int64 asfreq_DTtoW(npy_int64 ordinal, asfreq_info *af_info) { ordinal = downsample_daytime(ordinal, af_info); - return floordiv(ordinal + 3 - af_info->to_week_end, 7) + 1; + return floordiv(ordinal + 3 - af_info->to_end, 7) + 1; } static npy_int64 asfreq_DTtoB(npy_int64 ordinal, asfreq_info *af_info) { - struct date_info dinfo; int roll_back; - - ordinal = downsample_daytime(ordinal, af_info); - dInfoCalc_SetFromAbsDate(&dinfo, ordinal); + pandas_datetimestruct dts; + npy_int64 unix_date = downsample_daytime(ordinal, af_info); + pandas_datetime_to_datetimestruct(unix_date, PANDAS_FR_D, &dts); // This usage defines roll_back the opposite way from the others roll_back = 1 - af_info->is_end; - return DtoB(&dinfo, roll_back, ordinal); + return DtoB(&dts, roll_back, unix_date); } //************ FROM BUSINESS *************** @@ -252,7 +239,7 @@ static npy_int64 asfreq_BtoW(npy_int64 ordinal, asfreq_info *af_info) { //************ FROM WEEKLY *************** static npy_int64 asfreq_WtoDT(npy_int64 ordinal, asfreq_info *af_info) { - ordinal = ordinal * 7 + af_info->from_week_end - 4 + + ordinal = ordinal * 7 + af_info->from_end - 4 + (7 - 1) * (af_info->is_end - 1); return upsample_daytime(ordinal, af_info); } @@ -274,12 +261,13 @@ static npy_int64 asfreq_WtoW(npy_int64 ordinal, asfreq_info *af_info) { } static npy_int64 asfreq_WtoB(npy_int64 ordinal, asfreq_info *af_info) { - struct date_info dinfo; + int roll_back; + pandas_datetimestruct dts; npy_int64 unix_date = asfreq_WtoDT(ordinal, af_info); - int roll_back = af_info->is_end; - dInfoCalc_SetFromAbsDate(&dinfo, unix_date); - return DtoB(&dinfo, roll_back, unix_date); + pandas_datetime_to_datetimestruct(unix_date, PANDAS_FR_D, &dts); + roll_back = af_info->is_end; + return DtoB(&dts, roll_back, unix_date); } //************ FROM MONTHLY *************** @@ -313,12 +301,13 @@ static npy_int64 asfreq_MtoW(npy_int64 ordinal, asfreq_info *af_info) { } static npy_int64 asfreq_MtoB(npy_int64 ordinal, asfreq_info *af_info) { - struct date_info dinfo; + int roll_back; + pandas_datetimestruct dts; npy_int64 unix_date = asfreq_MtoDT(ordinal, af_info); - int roll_back = af_info->is_end; - dInfoCalc_SetFromAbsDate(&dinfo, unix_date); - return DtoB(&dinfo, roll_back, unix_date); + pandas_datetime_to_datetimestruct(unix_date, PANDAS_FR_D, &dts); + roll_back = af_info->is_end; + return DtoB(&dts, roll_back, unix_date); } //************ FROM QUARTERLY *************** @@ -328,8 +317,8 @@ static void QtoD_ym(npy_int64 ordinal, int *year, int *month, *year = floordiv(ordinal, 4) + 1970; *month = mod_compat(ordinal, 4) * 3 + 1; - if (af_info->from_q_year_end != 12) { - *month += af_info->from_q_year_end; + if (af_info->from_end != 12) { + *month += af_info->from_end; if (*month > 12) { *month -= 12; } else { @@ -367,23 +356,24 @@ static npy_int64 asfreq_QtoW(npy_int64 ordinal, asfreq_info *af_info) { } static npy_int64 asfreq_QtoB(npy_int64 ordinal, asfreq_info *af_info) { - struct date_info dinfo; + int roll_back; + pandas_datetimestruct dts; npy_int64 unix_date = asfreq_QtoDT(ordinal, af_info); - int roll_back = af_info->is_end; - dInfoCalc_SetFromAbsDate(&dinfo, unix_date); - return DtoB(&dinfo, roll_back, unix_date); + pandas_datetime_to_datetimestruct(unix_date, PANDAS_FR_D, &dts); + roll_back = af_info->is_end; + return DtoB(&dts, roll_back, unix_date); } //************ FROM ANNUAL *************** -static void AtoD_ym(npy_int64 ordinal, int *year, int *month, +static void AtoD_ym(npy_int64 ordinal, npy_int64 *year, int *month, asfreq_info *af_info) { *year = ordinal + 1970; *month = 1; - if (af_info->from_a_year_end != 12) { - *month += af_info->from_a_year_end; + if (af_info->from_end != 12) { + *month += af_info->from_end; if (*month > 12) { // This case is never reached, but is kept for symmetry // with QtoD_ym @@ -395,8 +385,8 @@ static void AtoD_ym(npy_int64 ordinal, int *year, int *month, } static npy_int64 asfreq_AtoDT(npy_int64 ordinal, asfreq_info *af_info) { - npy_int64 unix_date; - int year, month; + npy_int64 unix_date, year; + int month; ordinal += af_info->is_end; AtoD_ym(ordinal, &year, &month, af_info); @@ -423,12 +413,13 @@ static npy_int64 asfreq_AtoW(npy_int64 ordinal, asfreq_info *af_info) { } static npy_int64 asfreq_AtoB(npy_int64 ordinal, asfreq_info *af_info) { - struct date_info dinfo; + int roll_back; + pandas_datetimestruct dts; npy_int64 unix_date = asfreq_AtoDT(ordinal, af_info); - int roll_back = af_info->is_end; - dInfoCalc_SetFromAbsDate(&dinfo, unix_date); - return DtoB(&dinfo, roll_back, unix_date); + pandas_datetime_to_datetimestruct(unix_date, PANDAS_FR_D, &dts); + roll_back = af_info->is_end; + return DtoB(&dts, roll_back, unix_date); } static npy_int64 nofunc(npy_int64 ordinal, asfreq_info *af_info) { diff --git a/pandas/_libs/src/period_helper.h b/pandas/_libs/src/period_helper.h index c6313924adddd..8f538b261db9e 100644 --- a/pandas/_libs/src/period_helper.h +++ b/pandas/_libs/src/period_helper.h @@ -81,27 +81,23 @@ typedef struct asfreq_info { // char relation == 'S' (for START) --> is_end = 0 // char relation == 'E' (for END) --> is_end = 1 - int from_week_end; // day the week ends on in the "from" frequency - int to_week_end; // day the week ends on in the "to" frequency - - int from_a_year_end; // month the year ends on in the "from" frequency - int to_a_year_end; // month the year ends on in the "to" frequency - - int from_q_year_end; // month the year ends on in the "from" frequency - int to_q_year_end; // month the year ends on in the "to" frequency + int from_end; + int to_end; + // weekly: + // from_end --> day the week ends on in the "from" frequency + // to_end --> day the week ends on in the "to" frequency + // + // annual: + // from_end --> month the year ends on in the "from" frequency + // to_end --> month the year ends on in the "to" frequency + // + // quarterly: + // from_end --> month the year ends on in the "from" frequency + // to_end --> month the year ends on in the "to" frequency npy_int64 intraday_conversion_factor; } asfreq_info; -typedef struct date_info { - double second; - int minute; - int hour; - int day; - int month; - int year; -} date_info; - typedef npy_int64 (*freq_conv_func)(npy_int64, asfreq_info *af_info); /* diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 9cf7e39791f2b..89f38724cde1a 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -79,14 +79,8 @@ cdef extern from "period_helper.h": int64_t intraday_conversion_factor int is_end - int from_week_end - int to_week_end - - int from_a_year_end - int to_a_year_end - - int from_q_year_end - int to_q_year_end + int to_end + int from_end ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) nogil @@ -147,23 +141,13 @@ cdef inline int get_freq_group_index(int freq) nogil: # specifically _dont_ use cdvision or else ordinals near -1 are assigned to # incorrect dates GH#19643 @cython.cdivision(False) -cdef int64_t get_period_ordinal(int year, int month, int day, - int hour, int minute, int second, - int microseconds, int picoseconds, - int freq) nogil: +cdef int64_t get_period_ordinal(pandas_datetimestruct *dts, int freq) nogil: """ Generate an ordinal in period space Parameters ---------- - year : int - month : int - day : int - hour : int - minute : int - second : int - microseconds : int - picoseconds : int + dts: pandas_datetimestruct* freq : int Returns @@ -182,52 +166,54 @@ cdef int64_t get_period_ordinal(int year, int month, int day, fmonth = freq - FR_ANN if fmonth == 0: fmonth = 12 - if month <= fmonth: - return year - 1970 + + mdiff = dts.month - fmonth + if mdiff <= 0: + return dts.year - 1970 else: - return year - 1970 + 1 + return dts.year - 1970 + 1 elif freq_group == FR_QTR: fmonth = freq - FR_QTR if fmonth == 0: fmonth = 12 - mdiff = month - fmonth + mdiff = dts.month - fmonth # TODO: Aren't the next two conditions equivalent to # unconditional incrementing? if mdiff < 0: mdiff += 12 - if month >= fmonth: + if dts.month >= fmonth: mdiff += 12 - return (year - 1970) * 4 + (mdiff - 1) // 3 + return (dts.year - 1970) * 4 + (mdiff - 1) // 3 elif freq == FR_MTH: - return (year - 1970) * 12 + month - 1 + return (dts.year - 1970) * 12 + dts.month - 1 - unix_date = unix_date_from_ymd(year, month, day) + unix_date = pandas_datetimestruct_to_datetime(PANDAS_FR_D, dts) if freq >= FR_SEC: - seconds = unix_date * 86400 + hour * 3600 + minute * 60 + second + seconds = unix_date * 86400 + dts.hour * 3600 + dts.min * 60 + dts.sec if freq == FR_MS: - return seconds * 1000 + microseconds // 1000 + return seconds * 1000 + dts.us // 1000 elif freq == FR_US: - return seconds * 1000000 + microseconds + return seconds * 1000000 + dts.us elif freq == FR_NS: return (seconds * 1000000000 + - microseconds * 1000 + picoseconds // 1000) + dts.us * 1000 + dts.ps // 1000) else: return seconds elif freq == FR_MIN: - return unix_date * 1440 + hour * 60 + minute + return unix_date * 1440 + dts.hour * 60 + dts.min elif freq == FR_HR: - return unix_date * 24 + hour + return unix_date * 24 + dts.hour elif freq == FR_DAY: return unix_date @@ -374,34 +360,6 @@ cdef double get_abs_time(int freq, int64_t unix_date, int64_t ordinal) nogil: return result -cdef int64_t unix_date_from_ymd(int year, int month, int day) nogil: - """ - Find the unix_date (days elapsed since datetime(1970, 1, 1) - for the given year/month/day. - - Parameters - ---------- - year : int - month : int - day : int - - Returns - ------- - unix_date : int - days elapsed since datetime(1970, 1, 1) - """ - cdef: - pandas_datetimestruct dts - int64_t unix_date - - memset(&dts, 0, sizeof(pandas_datetimestruct)) - dts.year = year - dts.month = month - dts.day = day - unix_date = pandas_datetimestruct_to_datetime(PANDAS_FR_D, &dts) - return unix_date - - cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): """ Find the year and quarter of a Period with the given ordinal and frequency @@ -434,6 +392,7 @@ cdef int get_yq(int64_t ordinal, int freq, int *quarter, int *year): else: qtr_freq = FR_QTR + assert (qtr_freq % 1000) <= 12 get_asfreq_info(FR_DAY, qtr_freq, True, &af_info) quarter[0] = DtoQ_yq(unix_date, &af_info, year) @@ -447,8 +406,8 @@ cdef int DtoQ_yq(int64_t unix_date, asfreq_info *af_info, int *year): date_info_from_days_and_time(&dts, unix_date, 0) - if af_info.to_q_year_end != 12: - dts.month -= af_info.to_q_year_end + if af_info.to_end != 12: + dts.month -= af_info.to_end if dts.month <= 0: dts.month += 12 else: @@ -490,9 +449,7 @@ def dt64arr_to_periodarr(ndarray[int64_t] dtarr, int freq, tz=None): out[i] = NPY_NAT continue dt64_to_dtstruct(dtarr[i], &dts) - out[i] = get_period_ordinal(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, - dts.us, dts.ps, freq) + out[i] = get_period_ordinal(&dts, freq) else: out = localize_dt64arr_to_period(dtarr, freq, tz) return out @@ -570,18 +527,18 @@ cdef void get_asfreq_info(int from_freq, int to_freq, get_freq_group_index(max_value(to_group, FR_DAY))) if from_group == FR_WK: - af_info.from_week_end = calc_week_end(from_freq, from_group) + af_info.from_end = calc_week_end(from_freq, from_group) elif from_group == FR_ANN: - af_info.from_a_year_end = calc_a_year_end(from_freq, from_group) + af_info.from_end = calc_a_year_end(from_freq, from_group) elif from_group == FR_QTR: - af_info.from_q_year_end = calc_a_year_end(from_freq, from_group) + af_info.from_end = calc_a_year_end(from_freq, from_group) if to_group == FR_WK: - af_info.to_week_end = calc_week_end(to_freq, to_group) + af_info.to_end = calc_week_end(to_freq, to_group) elif to_group == FR_ANN: - af_info.to_a_year_end = calc_a_year_end(to_freq, to_group) + af_info.to_end = calc_a_year_end(to_freq, to_group) elif to_group == FR_QTR: - af_info.to_q_year_end = calc_a_year_end(to_freq, to_group) + af_info.to_end = calc_a_year_end(to_freq, to_group) @cython.cdivision @@ -635,15 +592,43 @@ def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end): return result -def period_ordinal(int y, int m, int d, int h, int min, - int s, int us, int ps, int freq): - return get_period_ordinal(y, m, d, h, min, s, us, ps, freq) +cpdef int64_t period_ordinal(int y, int m, int d, int h, int min, + int s, int us, int ps, int freq): + """ + Find the ordinal representation of the given datetime components at the + frequency `freq`. + + Parameters + ---------- + y : int + m : int + d : int + h : int + min : int + s : int + us : int + ps : int + + Returns + ------- + ordinal : int64_t + """ + cdef: + pandas_datetimestruct dts + dts.year = y + dts.month = m + dts.day = d + dts.hour = h + dts.min = min + dts.sec = s + dts.us = us + dts.ps = ps + return get_period_ordinal(&dts, freq) cpdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) nogil: cdef: pandas_datetimestruct dts - float subsecond_fraction if ordinal == NPY_NAT: return NPY_NAT @@ -770,19 +755,15 @@ cdef int pyear(int64_t ordinal, int freq): @cython.cdivision cdef int pqyear(int64_t ordinal, int freq): cdef: - int year, quarter, qtr_freq - qtr_freq = get_yq(ordinal, freq, &quarter, &year) - if (qtr_freq % 1000) > 12: - year -= 1 + int year, quarter + get_yq(ordinal, freq, &quarter, &year) return year cdef int pquarter(int64_t ordinal, int freq): cdef: - int year, quarter, qtr_freq - qtr_freq = get_yq(ordinal, freq, &quarter, &year) - if (qtr_freq % 1000) > 12: - year -= 1 + int year, quarter + get_yq(ordinal, freq, &quarter, &year) return quarter @@ -968,9 +949,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, result[i] = NPY_NAT continue dt64_to_dtstruct(stamps[i], &dts) - result[i] = get_period_ordinal(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, - dts.us, dts.ps, freq) + result[i] = get_period_ordinal(&dts, freq) elif is_tzlocal(tz): for i in range(n): @@ -979,9 +958,7 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, continue local_val = tz_convert_utc_to_tzlocal(stamps[i], tz) dt64_to_dtstruct(local_val, &dts) - result[i] = get_period_ordinal(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, - dts.us, dts.ps, freq) + result[i] = get_period_ordinal(&dts, freq) else: # Adjust datetime64 timestamp, recompute datetimestruct trans, deltas, typ = get_dst_info(tz) @@ -998,18 +975,14 @@ cdef ndarray[int64_t] localize_dt64arr_to_period(ndarray[int64_t] stamps, result[i] = NPY_NAT continue dt64_to_dtstruct(stamps[i] + deltas[0], &dts) - result[i] = get_period_ordinal(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, - dts.us, dts.ps, freq) + result[i] = get_period_ordinal(&dts, freq) else: for i in range(n): if stamps[i] == NPY_NAT: result[i] = NPY_NAT continue dt64_to_dtstruct(stamps[i] + deltas[pos[i]], &dts) - result[i] = get_period_ordinal(dts.year, dts.month, dts.day, - dts.hour, dts.min, dts.sec, - dts.us, dts.ps, freq) + result[i] = get_period_ordinal(&dts, freq) return result @@ -1595,9 +1568,9 @@ class Period(_Period): if ordinal is None: base, mult = get_freq_code(freq) - ordinal = get_period_ordinal(dt.year, dt.month, dt.day, - dt.hour, dt.minute, dt.second, - dt.microsecond, 0, base) + ordinal = period_ordinal(dt.year, dt.month, dt.day, + dt.hour, dt.minute, dt.second, + dt.microsecond, 0, base) return cls._from_ordinal(ordinal, freq) @@ -1608,8 +1581,8 @@ cdef int64_t _ordinal_from_fields(year, month, quarter, day, if quarter is not None: year, month = _quarter_to_myear(year, quarter, freq) - return get_period_ordinal(year, month, day, hour, - minute, second, 0, 0, base) + return period_ordinal(year, month, day, hour, + minute, second, 0, 0, base) def _quarter_to_myear(year, quarter, freq):