-
Notifications
You must be signed in to change notification settings - Fork 66
/
Copy pathoverline.R
469 lines (441 loc) · 17.1 KB
/
overline.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
#' Do the intersections between two geometries create lines?
#'
#' This is a function required in [overline()]. It identifies
#' whether sets of lines overlap (beyond shared points) or
#' not.
#'
#' @param g1 A spatial object
#' @param g2 A spatial object
#' @family rnet
#' @export
#' @examples
#' \dontrun{
#' # sf implementation
#' islines(routes_fast_sf[2, ], routes_fast_sf[3, ])
#' islines(routes_fast_sf[2, ], routes_fast_sf[22, ])
#' }
islines <- function(g1, g2) {
UseMethod("islines")
}
#' @export
islines.sf <- function(g1, g2) {
sf::st_geometry_type(sf::st_intersection(sf::st_geometry(g1), sf::st_geometry(g2))) == "MULTILINESTRING"
}
#' Function to split overlapping SpatialLines into segments
#'
#' Divides SpatialLinesDataFrame objects into separate Lines.
#' Each new Lines object is the aggregate of a single number
#' of aggregated lines.
#'
#' @param sl SpatialLinesDataFrame with overlapping Lines to split by
#' number of overlapping features.
#' @param buff_dist A number specifying the distance in meters of the buffer to be used to crop lines before running the operation.
#' If the distance is zero (the default) touching but non-overlapping lines may be aggregated.
#' @family rnet
#' @export
#' @examples
#' lib_versions <- sf::sf_extSoftVersion()
#' lib_versions
#' # fails on some systems (with early versions of PROJ)
#' if (lib_versions[3] >= "6.3.1") {
#' sl <- routes_fast_sf[2:4, ]
#' rsec <- gsection(sl)
#' length(rsec) # sections
#' plot(rsec, col = seq(length(rsec)))
#' rsec <- gsection(sl, buff_dist = 50)
#' length(rsec) # 4 features: issue
#' plot(rsec, col = seq(length(rsec)))
#' }
gsection <- function(sl, buff_dist = 0) {
UseMethod("gsection")
}
#' @export
gsection.sf <- function(sl, buff_dist = 0) {
if (buff_dist > 0) {
sl <- geo_toptail(sl, toptail_dist = buff_dist)
}
u <- sf::st_union(sl)
u_merged <- sf::st_line_merge(u)
u_disag <- sf::st_cast(u_merged, "LINESTRING")
u_disag
}
#' Convert series of overlapping lines into a route network
#'
#' This function takes a series of overlapping lines and converts them into a
#' single route network.
#'
#' The function can be used to estimate the amount of transport 'flow' at the
#' route segment level based on input datasets from routing services, for
#' example linestring geometries created with the `route()` function.
#' @param sl A spatial object representing routes on a transport network
#' @param attrib character, column names in sl to be aggregated
#' @param ncores integer, how many cores to use in parallel processing, default = 1
#' @param simplify logical, if TRUE group final segments back into lines, default = TRUE
#' @param regionalise integer, during simplification regonalisation is used if the number of segments exceeds this value
#' @param quiet Should the the function omit messages? `NULL` by default,
#' which means the output will only be shown if `sl` has more than 1000 rows.
#' @param fun Named list of functions to summaries the attributes by? `sum` is the default.
#' `list(sum = sum, average = mean)` will summarise all `attrib`utes by sum and mean.
#' @author Barry Rowlingson
#' @references
#' Morgan M and Lovelace R (2020). Travel flow aggregation: Nationally scalable methods
#' for interactive and online visualisation of transport behaviour at the road network level.
#' Environment and Planning B: Urban Analytics and City Science. July 2020.
#' \doi{10.1177/2399808320942779}.
#'
#' Rowlingson, B (2015). Overlaying lines and aggregating their values for overlapping
#' segments. Reproducible question from <https://gis.stackexchange.com>. See
#' <https://gis.stackexchange.com/questions/139681/>.
#'
#'
#' @details The `overline()` function breaks each line into many straight
#' segments and then looks for duplicated segments. Attributes are summed for
#' all duplicated segments, and if simplify is TRUE the segments with identical
#' attributes are recombined into linestrings.
#'
#' The following arguments only apply to the `sf` implementation of `overline()`:
#'
#' - `ncores`, the number of cores to use in parallel processing
#' - `simplify`, should the final segments be converted back into longer lines? The default
#' setting is `TRUE`. `simplify = FALSE` results in straight line segments consisting
#' of only 2 vertices (the start and end point),
#' resulting in a data frame with many more rows than the simplified results (see examples).
#' - `regionalise` the threshold number of rows above which
#' regionalisation is used (see details).
#'
#'
#' For `sf` objects Regionalisation breaks the dataset into a 10 x 10 grid and
#' then performed the simplification across each grid. This significantly
#' reduces computation time for large datasets, but slightly increases the final
#' file size. For smaller datasets it increases computation time slightly but
#' reduces memory usage and so may also be useful.
#'
#' A known limitation of this method is that overlapping segments of different
#' lengths are not aggregated. This can occur when lines stop halfway down a
#' road. Typically these errors are small, but some artefacts may remain within
#' the resulting data.
#'
#' For very large datasets nrow(x) > 1000000, memory usage can be significant.
#' In these cases is is possible to overline subsets of the dataset, rbind the
#' results together, and then overline again, to produce a final result.
#'
#' Multicore support is only enabled for the regionalised simplification stage
#' as it does not help with other stages.
#'
#' @family rnet
#' @export
#' @examples
#' sl <- routes_fast_sf[2:4, ]
#' sl$All <- flowlines_sf$All[2:4]
#' rnet <- overline(sl = sl, attrib = "All")
#' nrow(sl)
#' nrow(rnet)
#' plot(rnet)
#' rnet_mean <- overline(sl, c("All", "av_incline"), fun = list(mean = mean, sum = sum))
#' plot(rnet_mean, lwd = rnet_mean$All_sum / mean(rnet_mean$All_sum))
#' rnet_sf_raw <- overline(sl, attrib = "length", simplify = FALSE)
#' nrow(rnet_sf_raw)
#' summary(n_vertices(rnet_sf_raw))
#' plot(rnet_sf_raw)
#' rnet_sf_raw$n <- 1:nrow(rnet_sf_raw)
#' plot(rnet_sf_raw[10:25, ])
overline <- function(sl,
attrib,
ncores = 1,
simplify = TRUE,
regionalise = 1e9,
quiet = ifelse(nrow(sl) < 1000, TRUE, FALSE),
fun = sum) {
UseMethod("overline")
}
#' Convert series of overlapping lines into a route network (new method)
#'
#' @description This function is intended as a replacement for overline() and is significantly faster
#' especially on large datasets. However, it also uses more memory.
#'
#' @family rnet
#' @author Malcolm Morgan
#' @return An `sf` object representing a route network
#' @export
#' @rdname overline
overline2 <-
function(sl,
attrib,
ncores = 1,
simplify = TRUE,
regionalise = 1e7,
quiet = ifelse(nrow(sl) < 1000, TRUE, FALSE),
fun = sum) {
if(as.character(unique(sf::st_geometry_type(sl))) == "MULTILINESTRING") {
message("Converting from MULTILINESTRING to LINESTRING")
sl <- sf::st_cast(sl, "LINESTRING")
}
if (!"sfc_LINESTRING" %in% class(sf::st_geometry(sl))) {
stop("Only LINESTRING is supported")
}
if (is(sl, "data.table")) {
sl_df <- as.data.frame(sf::st_drop_geometry(sl))
sl <- sf::st_sf(sl_df, geometry = sl$geometry)
}
if (any(c("1", "2", "3", "4", "grid") %in% attrib)) {
stop("1, 2, 3, 4, grid are not a permitted column names, please rename that column")
}
sl <- sf::st_zm(sl)
sl <- sl[, attrib]
sl_crs <- sf::st_crs(sl)
if (!quiet) {
message(paste0(Sys.time(), " constructing segments"))
}
c1 <- sf::st_coordinates(sl)
sf::st_geometry(sl) <- NULL
l1 <- c1[, 3] # Get which line each point is part of
c1 <- c1[, 1:2]
l1_start <- duplicated(l1) # find the break points between lines
l1_start <- c(l1_start[2:length(l1)], FALSE)
c2 <- c1[2:nrow(c1), 1:2] # Create new coords offset by one row
c2 <- rbind(c2, c(NA, NA))
c2[nrow(c1), ] <- c(NA, NA)
c2[!l1_start, 1] <- NA
c2[!l1_start, 2] <- NA
c3 <- cbind(c1, c2) # make new matrix of start and end coords
rm(c1, c2)
c3 <- c3[!is.na(c3[, 3]), ]
sl <- sl[l1[l1_start], , drop = FALSE] # repeate attributes
rm(l1, l1_start)
# if (!quiet) {
# message(paste0(Sys.time(), " transposing 'B to A' to 'A to B'"))
# }
attributes(c3)$dimnames <- NULL
c3 <- t(apply(c3, MARGIN = 1, FUN = function(y) {
if (y[1] != y[3]) {
if (y[1] > y[3]) {
c(y[3], y[4], y[1], y[2])
} else {
y
}
} else {
if (y[2] > y[4]) {
c(y[3], y[4], y[1], y[2])
} else {
y
}
}
}))
# if (!quiet) {
# message(paste0(Sys.time(), " removing duplicates"))
# }
sl <- cbind(c3, sl)
rm(c3)
sls <- dplyr::group_by_at(sl, c("1", "2", "3", "4"))
sls <- dplyr::ungroup(dplyr::summarise_all(sls, .funs = fun))
attrib <- names(sls)[5:ncol(sls)]
coords <- as.matrix(sls[, 1:4])
sl <- sls[, -c(1:4)]
# Make Geometry
if (!quiet) {
message(paste0(Sys.time(), " building geometry"))
}
sf::st_geometry(sl) <- sf::st_as_sfc(
if (requireNamespace("pbapply", quietly = TRUE)) {
pbapply::pblapply(1:nrow(coords), function(y) {
sf::st_linestring(matrix(coords[y, ], ncol = 2, byrow = T))
})
} else {
lapply(1:nrow(coords), function(y) {
sf::st_linestring(matrix(coords[y, ], ncol = 2, byrow = T))
})
},
crs = sl_crs
)
rm(coords)
# Recombine into fewer lines
if (simplify) {
if (!quiet) {
message(paste0(Sys.time(), " simplifying geometry"))
}
if (nrow(sl) > regionalise) {
message(paste0("large data detected, using regionalisation, nrow = ", nrow(sl)))
# Fix for https://github.com/ropensci/stplanr/issues/510
sl <- sl[sf::st_is_valid(sl),]
suppressWarnings(cents <- sf::st_centroid(sl))
# Fix for https://github.com/r-spatial/sf/issues/1777
if(sf::st_is_longlat(cents)){
bbox <- sf::st_bbox(cents)
bbox[1] <- bbox[1] - abs(bbox[1] * 0.001)
bbox[2] <- bbox[2] - abs(bbox[2] * 0.001)
bbox[3] <- bbox[3] + abs(bbox[3] * 0.001)
bbox[4] <- bbox[4] + abs(bbox[4] * 0.001)
bbox <- sf::st_as_sfc(bbox)
grid <- sf::st_make_grid(bbox, what = "polygons")
} else {
grid <- sf::st_make_grid(cents, what = "polygons")
}
suppressWarnings(inter <- unlist(lapply(sf::st_intersects(cents, grid), `[[`, 1)))
sl$grid <- inter
rm(cents, grid, inter)
# split into a list of df by grid
sl <- dplyr::group_split(sl, grid)
message(paste0(Sys.time(), " regionalisation complete, aggregating flows"))
if (ncores > 1) {
cl <- parallel::makeCluster(ncores)
parallel::clusterExport(
cl = cl,
varlist = c("attrib","ol_grp"),
envir = environment()
)
parallel::clusterEvalQ(cl, {
library(sf)
# library(dplyr)
})
overlined_simple <- if (requireNamespace("pbapply", quietly = TRUE)) {
pbapply::pblapply(sl, function(y) {
ol_grp(y, attrib)
}, cl = cl)
} else {
lapply(sl, function(y) {
ol_grp(y, attrib)
})
}
parallel::stopCluster(cl)
rm(cl)
} else {
overlined_simple <- if (requireNamespace("pbapply", quietly = TRUE)) {
pbapply::pblapply(sl, function(y) {
ol_grp(y, attrib)
})
} else {
lapply(sl, function(y) {
ol_grp(y, attrib)
})
}
}
rm(sl)
overlined_simple <- data.table::rbindlist(overlined_simple)
overlined_simple <- sf::st_sf(overlined_simple)
overlined_simple <- overlined_simple[seq_len(nrow(overlined_simple)), ]
} else {
if (!quiet) {
message(paste0(Sys.time(), " aggregating flows"))
}
overlined_simple <- ol_grp(sl, attrib)
rm(sl)
}
overlined_simple <- as.data.frame(overlined_simple)
overlined_simple <- sf::st_sf(overlined_simple)
# Separate our the linestrings and the mulilinestrings
if (!quiet) {
message(paste0(Sys.time(), " rejoining segments into linestrings"))
}
overlined_simple <- sf::st_line_merge(overlined_simple)
geom_types <- sf::st_geometry_type(overlined_simple)
overlined_simple_l <- overlined_simple[geom_types == "LINESTRING", ]
overlined_simple_ml <- overlined_simple[geom_types == "MULTILINESTRING", ]
suppressWarnings(overlined_simple_ml <-
sf::st_cast(
sf::st_cast(overlined_simple_ml, "MULTILINESTRING"),
"LINESTRING"
))
return(rbind(overlined_simple_l, overlined_simple_ml))
} else {
return(sl)
}
}
#' @export
overline.sf <- overline2
ol_grp <- function(sl, attrib){
sl <- data.table::data.table(sl)
sl <- sl[, .(geometry = sf::st_combine(geometry)), by = attrib]
sf::st_as_sf(sl)
}
#' Aggregate flows so they become non-directional (by geometry - the slow way)
#'
#' Flow data often contains movement in two directions: from point A to point B
#' and then from B to A. This can be problematic for transport planning, because
#' the magnitude of flow along a route can be masked by flows the other direction.
#' If only the largest flow in either direction is captured in an analysis, for
#' example, the true extent of travel will by heavily under-estimated for
#' OD pairs which have similar amounts of travel in both directions.
#'
#' This function aggregates directional flows into non-directional flows,
#' potentially halving the number of lines objects and reducing the number
#' of overlapping lines to zero.
#'
#' @param x A dataset containing linestring geometries
#' @param attrib A text string containing the name of the line's attribute to
#' aggregate or a numeric vector of the columns to be aggregated
#'
#' @return `onewaygeo` outputs a SpatialLinesDataFrame with single lines
#' and user-selected attribute values that have been aggregated. Only lines
#' with a distance (i.e. not intra-zone flows) are included
#' @family lines
#' @export
onewaygeo <- function(x, attrib) {
UseMethod("onewaygeo")
}
#' @export
onewaygeo.sf <- function(x, attrib) {
geq <- sf::st_equals(x, x, sparse = FALSE) | sf::st_equals_exact(x, x, sparse = FALSE, par = 0.0)
sel1 <- !duplicated(geq) # repeated rows
x$matching_rows <- apply(geq, 1, function(x) paste0(formatC(which(x), width = 4, format = "d", flag = 0), collapse = "-"))
singlelines <- stats::aggregate(x[attrib], list(x$matching_rows), FUN = sum)
return(singlelines)
}
#' Convert series of overlapping lines into a route network
#'
#' This function takes overlapping `LINESTRING`s stored in an
#' `sf` object and returns a route network composed of non-overlapping
#' geometries and aggregated values.
#'
#' @param sl An `sf` `LINESTRING` object with overlapping elements
#' @inheritParams overline
#' @export
#' @examples
#' routes_fast_sf$value <- 1
#' sl <- routes_fast_sf[4:6, ]
#' attrib <- c("value", "length")
#' rnet <- overline_intersection(sl = sl, attrib)
#' plot(rnet, lwd = rnet$value)
#' # A larger example
#' sl <- routes_fast_sf[4:7, ]
#' rnet <- overline_intersection(sl = sl, attrib = c("value", "length"))
#' plot(rnet, lwd = rnet$value)
#' rnet_sf <- overline(routes_fast_sf[4:7, ], attrib = c("value", "length"))
#' plot(rnet_sf, lwd = rnet_sf$value)
#'
#' # An even larger example (not shown, takes time to run)
#' # rnet = overline_intersection(routes_fast_sf, attrib = c("value", "length"))
#' # rnet_sf <- overline(routes_fast_sf, attrib = c("value", "length"), buff_dist = 10)
#' # plot(rnet$geometry, lwd = rnet$value * 2, col = "grey")
#' # plot(rnet_sf$geometry, lwd = rnet_sf$value, add = TRUE)
overline_intersection <- function(sl, attrib, fun = sum) {
sl <- sl[attrib]
sli <- sf::st_intersection(sl)
# check it's all in there:
# sli_union = sf::st_union(sli)
# plot(sli_union, lwd = 9)
# plot(sl$geometry, add = T, col = "red")
# sl_difference = sf::st_difference(sl, sli)
gtypes <- sf::st_geometry_type(sli)
sli_lines <- sli[gtypes == "LINESTRING", ]
sli_mlines <- sli[gtypes == "MULTILINESTRING", ]
if (any(gtypes == "GEOMETRYCOLLECTION")) {
sli_collections <- sli[gtypes == "GEOMETRYCOLLECTION", ]
sli_clines <- sf::st_collection_extract(sli_collections, "LINESTRING")
sli_lines <- rbind(sli_lines, sli_clines)
}
# # Test plots:
# plot(sli_lines$geometry, lwd = sli_lines$n.overlaps * 2, col = "grey")
# plot(sli_mlines$geometry, add = TRUE, lwd = sli_mlines$n.overlaps)
# plot(sl$geometry, col = "red", add = TRUE)
# # removes many lines!
l_mlines <- lapply(sli_mlines$geometry, sf::st_line_merge)
l_mlines_sfc <- sf::st_sfc(l_mlines, crs = sf::st_crs(sl))
# plot(l_mlines_sfc)
l_mlines_data <- lapply(attrib, function(a) {
vapply(sli_mlines$origins, function(i) fun(sl[[a]][i]), FUN.VALUE = numeric(1))
})
names(l_mlines_data) <- attrib
mlines <- sf::st_sf(l_mlines_data, geometry = l_mlines_sfc)
slines <- sli_lines[attrib]
rbind(mlines, slines)
}