-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathdrdatetime.py
286 lines (241 loc) · 12.7 KB
/
drdatetime.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
#####
#
# Module name: drdatetime.py
# Purpose: Miscellaneous date and time management functions for dupReport
#
# Notes:
#
#####
# Import system modules
import re
import datetime
# Import dupReport modules
import globs
# dtFmtDefs - definitions for date field formats
# Tuples are defined as follows:
# Field Purpose
# 0 separator character
# 1, 2, 3 Positions in format string for (year, month, day) or (hour, minute, seconds)
#
# Note: There's only one recognized time string format. But with all the
# problems I had with date string recoznition, this makes time strings
# more flexible should the need arise in the future.
dtFmtDefs={
# Format Str [0]Delimiter [1]Y/H Col [2]M/Mn Col [3]D/S Col
'YYYY/MM/DD': ('/', 0, 1, 2),
'YYYY/DD/MM': ('/', 0, 2, 1),
'MM/DD/YYYY': ('/', 2, 0, 1),
'DD/MM/YYYY': ('/', 2, 1, 0),
'YYYY-MM-DD': ('-', 0, 1, 2),
'YYYY-DD-MM': ('-', 0, 2, 1),
'MM-DD-YYYY': ('-', 2, 0, 1),
'DD-MM-YYYY': ('-', 2, 1, 0),
'YYYY.MM.DD': ('.', 0, 1, 2),
'YYYY.DD.MM': ('.', 0, 2, 1),
'MM.DD.YYYY': ('.', 2, 0, 1),
'DD.MM.YYYY': ('.', 2, 1, 0),
'HH:MM:SS' : (':', 0, 1, 2)
}
# Issue #83. Changed regex for the date formats to allow any standard delimiter ('/', '-', or '.')
# The program (via toTimestamp()) will use this regex to extract the date from the parsed emails
# If the structure is correct (e.g., 'MM/DD/YYYY') but the delimiters are wrong (e.g., '04-30-2018') the program will still be able to parse it.
# As a result, all the regex's for dtFmtDefs date fields are all the same now. (Change from previous versions)
dateParseRegex = r'(\s)*(\d)+[/\-\.](\s)*(\d)+[/\-\.](\s)*(\d)+' # i.e., <numbers>[/-.]<numbers>[/-.]<numbers>
timeParseRegex = r'(\d)+[:](\d+)[:](\d+)' # i.e., <numbers>:<numbers>:<numbers>
validDateDelims = r'[/\-\.]' # Valid delimiters in a date string
validTimeDelims = ':' # Valid delimiters in a time string
# Print error messages to the log and stderr if there is a date or time format problem.
# It happens more often than you'd think!
def timeStampCrash(msg):
globs.log.write(globs.SEV_NOTICE, function='DateTime', action='timeStampCrash', msg=msg)
globs.log.write(globs.SEV_NOTICE, function='DateTime', action='timeStampCrash', msg='This is likely caused by an email using a different date or time format than expected,\nparticularly if you\'re collecting emails from multiple locations or time zones.')
globs.log.write(globs.SEV_NOTICE, function='DateTime', action='timeStampCrash', msg='Please check the \'dateformat=\' and \'timeformat=\' value(s) in the [main] section\nand any [<source>-<destination>] sections of your .rc file for accuracy.')
globs.log.err('Date/time format specification mismatch. See log file for details. Exiting program.')
globs.closeEverythingAndExit(1)
# Convert a date/time string to a timestamp
# Input string = YYYY/MM/DD HH:MM:SS AM/PM (epochDate)."
# May also be variants of the above. Must check for all cases
# dtStr = date/time string
# dfmt is date format - defaults to user-defined date format in .rc file
# tfmt is time format - - defaults to user-defined time format in .rc file
# utcOffset is UTC offset info as extracted from the incoming email message header
def toTimestamp(dtStr, dfmt = None, tfmt = None, utcOffset = None):
globs.log.write(globs.SEV_DEBUG, function='DateTime', action='toTimestamp', msg='Converting \'{}\' (dfmt=\'{}\', tfmt=\'{}\' offset=\'{}\')'.format(dtStr, dfmt, tfmt, utcOffset))
# Set default formats
if (dfmt is None):
dfmt = globs.opts['dateformat']
if (tfmt is None):
tfmt = globs.opts['timeformat']
# Find proper date spec
# Get column positions
yrCol = dtFmtDefs[dfmt][1] # Which field holds the year?
moCol = dtFmtDefs[dfmt][2] # Which field holds the month?
dyCol = dtFmtDefs[dfmt][3] # Which field holds the day?
# Extract the date
dtPat = re.compile(dateParseRegex) # Compile regex for date/time pattern
dateMatch = re.match(dtPat,dtStr) # Match regex against date/time
if dateMatch:
dateStr = dtStr[dateMatch.regs[0][0]:dateMatch.regs[0][1]] # Extract the date string
else:
timeStampCrash('Can\'t find a match for date pattern {} in date/time string {}.'.format(dfmt, dtStr)) # Write error message, close program
datePart = re.split(validDateDelims, dateStr) # Split date string based on any valid delimeter
year = int(datePart[yrCol])
month = int(datePart[moCol])
day = int(datePart[dyCol])
# Get column positions
hrCol = dtFmtDefs[tfmt][1] # Which field holds the Hour?
mnCol = dtFmtDefs[tfmt][2] # Which field holds the minute?
seCol = dtFmtDefs[tfmt][3] # Which field holds the seconds?
# Extract the time
tmPat = re.compile(timeParseRegex)
timeMatch = re.search(tmPat,dtStr)
if timeMatch:
timeStr = dtStr[timeMatch.regs[0][0]:timeMatch.regs[0][1]]
else:
timeStr = '00:00:00'
globs.log.write(globs.SEV_DEBUG, function='DateTime', action='toTimestamp', msg='No time portion provided. Defaulting to \'00:00:00\'')
timePart = re.split(validTimeDelims, timeStr)
hour = int(timePart[hrCol])
minute = int(timePart[mnCol])
second = int(timePart[seCol])
# See if we need AM/PM adjustment
pmPat = re.compile(r'AM|PM')
pmMatch = re.search(pmPat,dtStr)
if pmMatch:
dayPart = dtStr[pmMatch.regs[0][0]:pmMatch.regs[0][1]]
if ((hour == 12) and (dayPart == 'AM')):
hour = 0
elif ((hour != 12) and (dayPart == 'PM')):
hour += 12
# Convert to datetime object, then get timestamp
try:
ts = datetime.datetime(year, month, day, hour, minute, second).timestamp()
except ValueError as err:
globs.log.write(globs.SEV_ERROR, function='DateTime', action='toTimestamp', msg='Error: {}'.format(err.args[0]))
timeStampCrash('Error creating timestamp: DateString={} DateFormat={} year={} month={} day={} hour={} minute={} second={}'.format(dtStr, dfmt, year, month, day, hour, minute, second)) # Write error message, close program
# Apply email's UTC offset to date/time
# Need to separate the two 'if' statements because the init routines crash otherwise
# (Referencing globs.opts[] before they're set)
if utcOffset is not None:
if globs.opts['applyutcoffset']:
ts += float(utcOffset)
globs.log.write(globs.SEV_DEBUG, function='DateTime', action='toTimestamp', msg='Date/Time {} converted to {}'.format(dtStr, ts))
return ts
# Convert an RFC 3339 format datetime string to an epoch-style timestamp
# Needed because the JSON output format uses this style for datetime notation
# Basically, decode the RFC3339 string elements into separate date & time strings, then send to toTimeStamp() as a normal date/time string.
def toTimestampRfc3339(tsString, utcOffset = None):
globs.log.write(globs.SEV_DEBUG, function='DateTime', action='toTimestampRfc3339', msg='Converting {}, offset={}'.format(tsString, utcOffset))
# Strip trailing 'Z' and last digit from milliseconds, the float number is too big to convert
tsStringNew = tsString[:-2]
# Convert to datetime object
dt = datetime.datetime.strptime(tsStringNew, '%Y-%m-%dT%H:%M:%S.%f')
# Now, use existing methods to convert to a timestamp
ts = toTimestamp("{}/{}/{} {}:{}:{}".format(dt.month, dt.day, dt.year, dt.hour, dt.minute, dt.second), "MM/DD/YYYY", "HH:MM:SS", utcOffset)
globs.log.write(globs.SEV_DEBUG, function='DateTime', action='toTimestampRfc3339', msg='Date/Time {} converted to {}'.format(tsString, ts))
return ts
# Convert from timestamp to resulting time and date formats
def fromTimestamp(ts, dfmt = None, tfmt = None):
globs.log.write(globs.SEV_DEBUG, function='DateTime', action='fromTimestamp', msg='Converting {} (dfmt=\'{}\', tfmt\'{}\')'.format(ts, dfmt, tfmt))
# 'x' holds the array for yr/mo/day or hh/m/ss
# Placement in the array is determined by the position values (columns 2, 3, & 4) in the dtFmtDefs[] list
x = [0, 0, 0]
# If date & time formats are not specified, use the global defaults as defined in the .rc file
if (dfmt is None):
dfmt = globs.opts['dateformat']
if (tfmt is None):
tfmt = globs.opts['timeformat']
if ts is None:
timeStampCrash('Timestamp conversion error.') # Write error message, close program
# Get datetime object from incoming timestamp
dt = datetime.datetime.fromtimestamp(float(ts))
# Get date column positions
delim = dtFmtDefs[dfmt][0] # Get the Date delimeter
yrCol = dtFmtDefs[dfmt][1] # Which field holds the year?
moCol = dtFmtDefs[dfmt][2] # Which field holds the month?
dyCol = dtFmtDefs[dfmt][3] # Which field holds the day?
# Place strftime() format specs in appropriate year/month/day columns
x[yrCol] = '%Y'
x[moCol] = '%m'
x[dyCol] = '%d'
retDate = dt.strftime('{}{}{}{}{}'.format(x[0],delim,x[1],delim,x[2]))
# Get time column positions
delim = dtFmtDefs[tfmt][0] # Get the time delimeter
hrCol = dtFmtDefs[tfmt][1] # Which field holds the Hour?
mnCol = dtFmtDefs[tfmt][2] # Which field holds the minute?
seCol = dtFmtDefs[tfmt][3] # Which field holds the seconds?
if not globs.opts['show24hourtime']:
x[hrCol] = '%I'
if dt.hour < 12:
ampm = ' AM'
else:
ampm = ' PM'
else:
x[hrCol] = '%H'
ampm = ''
x[mnCol] = '%M'
x[seCol] = '%S'
retTime = dt.strftime('{}{}{}{}{}{}'.format(x[0],delim,x[1],delim,x[2], ampm))
globs.log.write(globs.SEV_DEBUG, function='DateTime', action='fromTimestamp', msg='Converted {} to {} {}'.format(ts, retDate, retTime))
return retDate, retTime
# Calculate # of days since some arbitrary date
def daysSince(tsIn):
# Get the current time (timestamp)
nowTimestamp = datetime.datetime.now().timestamp()
now = datetime.datetime.fromtimestamp(nowTimestamp)
then = datetime.datetime.fromtimestamp(tsIn)
diff = (now-then).days
globs.log.write(globs.SEV_DEBUG, function='DateTime', action='daysSince', msg= 'Now={} {} Then={} {} Days Between={}'.format(nowTimestamp,fromTimestamp(nowTimestamp), tsIn, fromTimestamp(tsIn), diff))
return diff
# Calculate time difference between two dates
def timeDiff(td, durationZeroes = False):
# Cast td as a timedelta object
tDelt = datetime.timedelta(seconds = td)
# Calculate unit values
days = tDelt.days
hours, remainder = divmod(tDelt.seconds, 3600)
minutes, seconds = divmod(remainder, 60)
# Set return string value based on opts['durationzeroes'] setting
if durationZeroes is True:
retVal = "{}d {}h {}m {}s".format(days, hours, minutes, seconds)
else: # Leave out parts that == 0
retVal = ""
if days != 0:
retVal += "{}d ".format(days)
if hours != 0:
retVal += "{}h ".format(hours)
if minutes != 0:
retVal += "{}m ".format(minutes)
if seconds != 0:
retVal += "{}s ".format(seconds)
globs.log.write(globs.SEV_DEBUG, function='DateTime', action='timeDiff', msg='td={} duration={}'.format(td, retVal))
return retVal
def checkValidDateTimeSpec(tspec, dfmt = None, tfmt = None):
globs.log.write(globs.SEV_NOTICE, function='DateTime', action='checkValidDateTimeSpec', msg='Checking {} for date/time specification validity.'.format(tspec))
# Set default formats
if (dfmt is None):
dfmt = globs.opts['dateformat']
if (tfmt is None):
tfmt = globs.opts['timeformat']
# Extract the date
dtPat = re.compile(dateParseRegex) # Compile regex for date/time pattern
dateMatch = re.match(dtPat,tspec) # Match regex against date/time
if dateMatch == None: # Bad date, check timedelta format
if timeDeltaSpec(tspec) == False:
return False
return True
# If using a time delta rollback scheme (i.e., '1w,3h')
# Check if the scheme is valid
def timeDeltaSpec(spec):
validSpec = True
# Check if it's time delta format
tsParts = spec.split(',')
p = re.compile(r'\d+[smhdw]')
for spec in range(len(tsParts)):
m = p.match(tsParts[spec])
if m == None:
validSpec = False
if validSpec:
return tsParts
else:
return False