-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathdate-time-functions.R
368 lines (308 loc) · 11.8 KB
/
date-time-functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
#' convert hms to 'HH:MM:SS'
#'
#' @param x A numeric or character object to to be converted
#'
#' @return A string formatted to 'HH:MM:SS'
#'
#' @noRd
convert_hms <- function (x) {
if (is.numeric (x)) {
if (nchar (x) <= 2) { # presume it's HH
if (x < 0 | x > 24)
stop ("hms values must be between 0 and 24")
if (x < 24)
res <- paste0 (sprintf ("%02d", x), ":00:00")
else
res <- paste0 (23, ":59:59")
} else if (nchar (x) == 4) {
res <- paste0 (substring (x, 1, 2), ":", substring (x, 3, 4),
":00")
} else if (nchar (x) == 6) {
res <- paste0 (substring (x, 1, 2), ":", substring (x, 3, 4),
":", substring (x, 5, 6))
} else {
stop ("Unable to convert time value")
}
} else if (is.character (x)) {
# split at all non-numeric characters
x <- vapply (strsplit (x, "[^0-9]") [[1]], as.numeric, numeric (1))
if (length (x) == 0)
stop ("Can not convert to hms without numeric values")
if (length (x) == 1) {
if (x < 24)
res <- paste0 (sprintf ("%02d", x [1]), ":00:00")
else
res <- paste0 (23, ":59:59")
}
else if (length (x) == 2)
res <- paste0 (sprintf ("%02d", x [1]), ":",
sprintf ("%02d", x [2]), ":00")
else if (length (x) == 3)
res <- paste0 (sprintf ("%02d", x [1]), ":",
sprintf ("%02d", x [2]), ":",
sprintf ("%02d", x [2]))
else
warning ("only first 3 numeric components used to convert to hms")
} else
stop ("hms values must be either numeric or character")
return (res)
}
#' convert ymd to 'YYYY-MM-DD'
#'
#' @param x A numeric or character object to to be converted
#'
#' @return A string formatted to 'YYYY-MM-DD'
#'
#' lubridate::ymd requires a day to be specified. This function just appends
#' days (and months where necessary) where they don't exist.
#'
#' @noRd
convert_ymd <- function (x) {
if (is.numeric (x)) { # presume it's HH
if (nchar (x) == 2) # can only be YY
x <- as.numeric (paste0 ("20", x, "0101"))
else if (nchar (x) == 4) { # Either YYYY or YYMM
if (substring (x, 1, 2) == "20")
x <- as.numeric (paste0 (x, "0101"))
else
x <- as.numeric (paste0 ("20", x, "01"))
} else if (nchar (x) == 6 & substring (x, 1, 2) == "20")
x <- as.numeric (paste0 (x, "01"))
} else {
xsp <- strsplit (x, "[[:space:]]|[[:punct:]]") [[1]]
if (length (xsp) == 1)
x <- paste (c (xsp, "01", "01"), collapse = " ")
if (length (xsp) == 2)
x <- paste (c (xsp, "01"), collapse = " ")
}
paste0 (lubridate::ymd (x))
}
#" convert weekday vector to numbered weekdays
#'
#' @param wd Vector of numeric or character denoting weekdays
#'
#' @return Equivalent character vector of numbered weekdays
#'
#' @noRd
convert_weekday <- function (wd) {
if (!is.numeric (wd)) {
if (!is.character (wd))
stop ("don't know how to convert weekdays of class ", class (wd))
wdlist <- c ("sunday", "monday", "tuesday", "wednesday",
"thursday", "friday", "saturday")
wd <- vapply (tolower (wd), function (i) {
res <- grep (paste0 ("\\<", i), wdlist)
if (length (res) != 1)
res <- NA
return (res)
},
numeric (1))
if (any (is.na (wd)))
stop ("weekday specification is ambiguous")
} else if (any (!wd %in% 1:7))
stop ("weekdays must be between 1 and 7")
return (paste (sort (wd) - 1)) # sql is 0-indexed
}
# ------ functions for converting "dates" arg of dl_bikedata
#' Paste "20" onto start of any 2-digit years
#'
#' @noRd
prepend_year <- function (x) {
if (any (nchar (x) == 2))
x [which (nchar (x) == 2)] <- paste0 ("20", x [which (nchar (x) == 2)])
return (x)
}
#' Paste Jan and Dec respectively on to first and last value of year vector
#'
#' @noRd
add_month_range <- function (x) {
x [1] <- paste0 (x [1], "01")
x [2] <- paste0 (x [2], "12")
return (x)
}
#' Convert arbitrary character or numeric month to standard two-digit format
#'
#' @noRd
convert_month <- function (x) {
if (is.numeric (x))
x <- paste0 (x)
if (!is.numeric (utils::type.convert (x))) {
x <- substring (tolower (x), 1, 3)
x <- pmatch (x, tolower (month.abb))
}
if (any (nchar (x) == 1))
x [which (nchar (x) == 1)] <- paste0 ("0", x [which (nchar (x) == 1)])
return (x)
}
#' Expand start and end dates given as YYYYMM to sequential range
#'
#' @param x Vector of one or two values giving start and potential end dates as
#' YYYYMM
#'
#' @return Vector all all sequential months between start and end dates of x
#'
#' @noRd
expand_dates_to_range <- function (x) {
if (length (x) == 2) {
if (identical (substring (x [1], 1, 4), substring (x [2], 1, 4)))
x <- x [1]:x [2]
else {
yy <- unique (substring (x, 1, 4))
yy <- yy [1]:yy [2]
xstart <- paste0 (yy [1], substring (x [1], 5, 6))
xstart_12 <- paste0 (yy [1], "12")
xstart <- paste0 (as.numeric (xstart):as.numeric (xstart_12))
xend_1 <- paste0 (utils::tail (yy, 1), "01")
xend <- paste0 (utils::tail (yy, 1), substring (x [2], 5, 6))
xend <- paste0 (as.numeric (xend_1):as.numeric (xend))
xmid <- NULL
if (length (yy) > 2) {
ymid <- yy [2:(length (yy) - 1)]
mm <- c (paste0 ("0", 1:9), paste0 (10:12))
xmid <- vapply (ymid, function (i)
paste0 (i, mm), FUN.VALUE = character (12))
}
x <- c (xstart, xmid, xend)
}
}
return (unique (x))
}
#' Convert vector of dates returned by \code{expand_dates_to_range} to
#' appropriate character format matching file names for designed city
#'
#' Different cities use different date formats for their data files. While
#' NY and Boston use simple "YYYYMM" formats, other cities (DC, LA, Chicago,
#' Philly) disseminate data quarterly or with corresponding file names. London
#' is it's own unique case.
#'
#' @param x Vector of dates in YYYYMM format
#' @param city City for which dates to be matched
#'
#' @return Vector of YYYY_Q1-style date specifications to be matched against
#' file names for designated city
#'
#' @noRd
convert_dates_to_filenames <- function (x, city = "ny") {
yy <- substring (x, 1, 4)
if (city == "ch") {
# Chicago has 2013 bundled as single file, after which
# YYYY_Q1Q2 or YYYY_Q3Q4
indx13 <- which (grepl ("2013", paste0 (x)))
indx <- which (!seq (x) %in% indx13)
x <- x [indx]
hh <- ceiling (as.numeric (substring (x, 5, 6)) / 6)
hh [hh == 1] <- "Q1Q2"
hh [hh == 2] <- "Q3Q4"
x <- unique (c (paste0 (yy [indx], "_", hh),
paste0 (yy [indx], "-", hh)))
if (length (indx13) > 0)
x <- c ("2013", x)
} else if (city == "bo") {
# Boston now has 2011-2013 bundled as single files, and 2014 bundled as
# two files
for (i in paste0 (2011:2013)) {
indx <- which (grepl (i, paste0 (x)))
if (length (indx) > 0) {
x <- x [which (!seq (x) %in% indx)]
x <- c (i, x)
}
}
indx14 <- grep ("2014", paste0 (x))
if (length (indx14) > 0) {
x14 <- x [indx14]
x <- x [which (!seq (x) %in% indx14)]
x14a <- vapply (paste0 (201401:201406), function (i)
any (grepl (i, x14)), logical (1))
if (any (x14a))
x <- c ("2014_1", x)
x14b <- vapply (paste0 (201407:201412), function (i)
any (grepl (i, x14)), logical (1))
if (any (x14b))
x <- c ("2014_2", x)
}
} else if (city == "lo") {
indx1 <- which (yy < 2015)
indx2 <- which (yy >= 2015)
x1 <- yy [indx1]
x <- x [indx2]
if (length (x) > 0) {
mm <- month.abb [as.numeric (substring (x, 5, 6))]
x <- c (paste0 (mm, yy),
paste0 (mm, substring (yy, 3, 4), "[[:punct:]]"))
}
x <- unique (c (x, x1))
} else if (city %in% c ("la", "ph")) {
# LA uses both "YYYY_QX" and "QX_YYYY"
qq <- paste0 ("Q", ceiling (as.numeric (substring (x, 5, 6)) / 3))
if (city == "dc")
x <- unique (paste0 (yy, "-", qq))
else
x <- unique (c (paste0 (yy, "_", qq), paste0 (qq, "_", yy),
paste0 (yy, "-", qq), paste0 (qq, "-", yy),
paste0 (yy, qq)))
} else if (city %in% c ("mo")) { # annual file dumps
x <- unique (yy)
x <- x [which (x > 2013)]
} else if (city %in% c ("dc")) { # annual up to current year
yr <- substr (Sys.Date (), 1, 4)
x <- c (unique (yy [which (yy < yr)]), x [which (yy == yr)])
} else if (city %in% c ("gu")) { # strict YYYY_MM
mm <- sprintf ("%02i", as.integer (substring (x, 5, 6)))
x <- unique (paste0 (yy, "_", mm))
} else
x <- paste0 (x)
return (x)
}
#' Convert dates argument for dl_bikedata to single start and end values in
#' YYYYMM format.
#'
#' @param dates Specified range of dates in almost any format
#'
#' @return Vector of one or two YYYYMM values
#'
#' @noRd
bike_convert_dates <- function (dates) {
if (is.numeric (dates)) {
if (length (dates) > 2)
dates <- c (dates [1], utils::tail (dates, 1))
if (length (unique (nchar (dates))) > 1)
stop ("Ambiguous dates format")
if (all (nchar (dates) == 2))
dates <- 200000 + 100 * dates + c (1, 12)
else if (all (nchar (dates) == 4))
dates <- 100 * dates + c (1, 12)
} else {
dates <- strsplit (dates, "[[:space:]]|[[:punct:]]") [[1]]
if (length (dates) > 4)
stop ("Cannot determine date range")
if (length (dates) == 1) {
if (nchar (dates) < 6)
dates <- add_month_range (rep (prepend_year (dates), 2))
} else if (length (dates) == 2) {
# either range of years or year + month
if (all (nchar (dates) == 2)) {
if (as.numeric (dates [2]) > 12) # try year-year
dates <- add_month_range (prepend_year (dates))
else # try single year-month
dates <- paste0 (prepend_year (dates [1]),
convert_month (dates [2]))
} else if (all (nchar (dates) == 4)) # presume year-year
dates <- add_month_range (dates)
else if (!all (nchar (dates) == 6)) # presume year + month
dates <- paste0 (prepend_year (dates [1]),
convert_month (dates [2]))
} else if (length (dates) == 3) {
# presume year + month-month
dates [1] <- prepend_year (dates [1])
dates <- c (paste0 (dates [1], convert_month (dates [2])),
paste0 (dates [1], convert_month (dates [3])))
} else {
# length == 4: year-month year-month
dates [c (1, 3)] <- prepend_year (dates [c (1, 3)])
dates [c (2, 4)] <- convert_month (dates [c (2, 4)])
dates <- c (paste0 (dates [1], dates [2]),
paste0 (dates [3], dates [4]))
}
}
return (as.numeric (dates))
}