Skip to content

Commit

Permalink
PERF: lazify pytz seqToRE call, trims 35ms from import (#28228)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and WillAyd committed Aug 30, 2019
1 parent 7b25463 commit 51db82d
Showing 1 changed file with 29 additions and 15 deletions.
44 changes: 29 additions & 15 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,8 @@ def array_strptime(object[:] values, object fmt,
return result, result_timezone.base


"""_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
"""
_getlang, LocaleTime, TimeRE, _calc_julian_from_U_or_W are vendored
from the standard library, see
https://github.com/python/cpython/blob/master/Lib/_strptime.py
The original module-level docstring follows.
Expand All @@ -363,7 +364,8 @@ def _getlang():


class LocaleTime:
"""Stores and handles locale-specific information related to time.
"""
Stores and handles locale-specific information related to time.
ATTRIBUTES:
f_weekday -- full weekday names (7-item list)
Expand All @@ -382,7 +384,8 @@ class LocaleTime:
"""

def __init__(self):
"""Set all attributes.
"""
Set all attributes.
Order of methods called matters for dependency reasons.
Expand All @@ -399,7 +402,6 @@ class LocaleTime:
Only other possible issue is if someone changed the timezone and did
not call tz.tzset . That is an issue for the programmer, though,
since changing the timezone is worthless without that call.
"""
self.lang = _getlang()
self.__calc_weekday()
Expand Down Expand Up @@ -518,15 +520,16 @@ class TimeRE(dict):
"""

def __init__(self, locale_time=None):
"""Create keys/values.
"""
Create keys/values.
Order of execution is important for dependency reasons.
"""
if locale_time:
self.locale_time = locale_time
else:
self.locale_time = LocaleTime()
self._Z = None
base = super()
base.__init__({
# The " \d" part of the regex is to make %c from ANSI C work
Expand Down Expand Up @@ -555,21 +558,29 @@ class TimeRE(dict):
'B': self.__seqToRE(self.locale_time.f_month[1:], 'B'),
'b': self.__seqToRE(self.locale_time.a_month[1:], 'b'),
'p': self.__seqToRE(self.locale_time.am_pm, 'p'),
'Z': self.__seqToRE(pytz.all_timezones, 'Z'),
# 'Z' key is generated lazily via __getitem__
'%': '%'})
base.__setitem__('W', base.__getitem__('U').replace('U', 'W'))
base.__setitem__('c', self.pattern(self.locale_time.LC_date_time))
base.__setitem__('x', self.pattern(self.locale_time.LC_date))
base.__setitem__('X', self.pattern(self.locale_time.LC_time))

def __getitem__(self, key):
if key == "Z":
# lazy computation
if self._Z is None:
self._Z = self.__seqToRE(pytz.all_timezones, 'Z')
return self._Z
return super().__getitem__(key)

def __seqToRE(self, to_convert, directive):
"""Convert a list to a regex string for matching a directive.
"""
Convert a list to a regex string for matching a directive.
Want possible matching values to be from longest to shortest. This
prevents the possibility of a match occurring for a value that also
a substring of a larger value that should have matched (e.g., 'abc'
matching when 'abcdef' should have been the match).
"""
to_convert = sorted(to_convert, key=len, reverse=True)
for value in to_convert:
Expand All @@ -582,11 +593,11 @@ class TimeRE(dict):
return '%s)' % regex

def pattern(self, format):
"""Return regex pattern for the format string.
"""
Return regex pattern for the format string.
Need to make sure that any characters that might be interpreted as
regex syntax are escaped.
"""
processed_format = ''
# The sub() call escapes all characters that might be misconstrued
Expand Down Expand Up @@ -619,7 +630,8 @@ _regex_cache = {}

cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
int day_of_week, int week_starts_Mon):
"""Calculate the Julian day based on the year, week of the year, and day of
"""
Calculate the Julian day based on the year, week of the year, and day of
the week, with week_start_day representing whether the week of the year
assumes the week starts on Sunday or Monday (6 or 0).
Expand Down Expand Up @@ -660,8 +672,10 @@ cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
return 1 + days_to_week + day_of_week


cdef object _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
"""Calculate the Julian day based on the ISO 8601 year, week, and weekday.
cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
"""
Calculate the Julian day based on the ISO 8601 year, week, and weekday.
ISO weeks start on Mondays, with week 01 being the week containing 4 Jan.
ISO week days range from 1 (Monday) to 7 (Sunday).
Expand Down Expand Up @@ -694,7 +708,7 @@ cdef object _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday):
return iso_year, ordinal


cdef parse_timezone_directive(object z):
cdef parse_timezone_directive(str z):
"""
Parse the '%z' directive and return a pytz.FixedOffset
Expand Down

0 comments on commit 51db82d

Please sign in to comment.