diff --git a/.Rbuildignore b/.Rbuildignore index f40d94a..a41e455 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -19,3 +19,4 @@ ^\.binder$ ^\.vscode$ ^CRAN-SUBMISSION$ +^ad-hoc-tests$ \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 94ee03c..345b2f9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: od Title: Manipulate and Map Origin-Destination Data -Version: 0.4.4 +Version: 0.5.0 Authors@R: c( person("Robin", "Lovelace", email = "rob00x@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-5679-6536")), @@ -27,6 +27,7 @@ Depends: R (>= 3.4.0) Imports: sfheaders, methods, + nngeo, vctrs Suggests: sf, @@ -35,6 +36,6 @@ Suggests: tinytest, covr, lwgeom -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 VignetteBuilder: knitr Roxygen: list(markdown = TRUE) diff --git a/NEWS.md b/NEWS.md index bbd4e03..653f204 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# od 0.5.0 (2024-08) + +* New `max_dist` argument in `points_to_od()` (also applicable to `points_to_odl()`) to limit the distance between origins and destinations. Credit to Malcolm Morgan @mem48 for this contribution, closing 4-year-old issue #18. + # od 0.4.4 (2024-03) * Fix minor issue with geometry checking, result of upstream changes diff --git a/R/od-funs.R b/R/od-funs.R index 0184cbf..0da58f0 100644 --- a/R/od-funs.R +++ b/R/od-funs.R @@ -77,8 +77,14 @@ od_to_sfc = function(x, #' Create matrices representing origin-destination coordinates #' -#' This function takes a wide range of input data types (spatial lines, points or text strings) -#' and returns a data frame of coordinates representing origin (ox, oy) and destination (dx, dy) points. +#' This function takes an 'od data frame' with the first +#' two columns matching IDs of spatial objects, and +#' matches them with objects representing origins and destinations +#' in wide range of input data types (spatial lines, points or text strings). +#' It returns a data frame of coordinates representing movement between all origin (ox, oy) and destination (dx, dy) points. +#' +#' See [points_to_od()] for a function that creates +#' an 'od data frame' from a set (or two sets) of points. #' @param p Points representing origins and destinations #' @param pd Points representing destinations, if different from origin points #' @param sfnames Should output column names be compatible with the sf package? diff --git a/R/points_to_od.R b/R/points_to_od.R index 0dfaabb..fcf9699 100644 --- a/R/points_to_od.R +++ b/R/points_to_od.R @@ -1,20 +1,31 @@ #' Convert a series of points into a dataframe of origins and destinations #' #' Takes a series of geographical points and converts them into a data.frame -#' representing the potential flows, or 'spatial interaction', between every combination -#' of points. +#' representing the potential flows, or 'spatial interaction', between every +#' combination of points. #' -#' `points_to_odl()` generates the same output but returns -#' a geographic object representing desire lines in the class `sf`. +#' `points_to_odl()` generates the same output but returns a geographic object +#' representing desire lines in the class `sf`. #' -#' @param p A spatial points object or a matrix of coordinates representing points -#' @param pd Optional spatial points object or matrix objects representing destinations -#' @param interzone_only Should the result only include interzonal OD pairs, in which -#' the ID of the origin is different from the ID of the destination zone? -#' `FALSE` by default -#' @param ids_only Should a data frame with only 2 columns (origin and destination IDs) -#' be returned? The default is `FALSE`, meaning the result should also contain the -#' coordinates of the start and end points of each OD pair. +#' @param p A spatial points object or a matrix of coordinates representing +#' points +#' @param pd Optional spatial points object objects representing +#' destinations. +#' `pd` is ignored if `p` is a matrix. +#' If `pd` is not provided, `p` is used as the destination points. +#' @param interzone_only Should the result only include interzonal OD pairs, in +#' which the ID of the origin is different from the ID of the destination +#' zone? `FALSE` by default +#' @param ids_only Should a data frame with only 2 columns (origin and +#' destination IDs) be returned? The default is `FALSE`, meaning the result +#' should also contain the coordinates of the start and end points of each OD +#' pair. +#' @param max_dist Numeric, maximum distance to consider. Default Inf. +#' Not applicable when `p` is a matrix. +#' @param max_dest The maximum number of destinations for each origin (numeric) +#' sorted from closest to furthest. Default is Inf. Alternative to max_dist +#' for limiting the number of ODs. +#' Not applicable when `p` is a matrix. #' @export #' @examples #' library(sf) @@ -23,7 +34,6 @@ #' points_to_od(p, ids_only = TRUE) #' (l = points_to_odl(p, interzone_only = TRUE)) #' plot(l) -#' library(sf) # for subsetting sf objects: #' points_to_od(od_data_centroids[1:2, ], od_data_centroids[3, ]) #' l = points_to_odl(od_data_centroids[1:2, ], od_data_centroids[3, ]) #' plot(l) @@ -31,40 +41,54 @@ #' l2 = od_to_sf(od, od_data_centroids) #' l2$v = 1 #' (l2_oneway = od_oneway(l2)) -#' plot(l2) -points_to_od = function(p, pd = NULL, interzone_only = FALSE, ids_only = FALSE) { +#' sf::st_length(l2) +#' # With max_dist: +#' (l3 = points_to_odl(p, max_dist = 10000)) +#' sf::st_length(l3) +points_to_od = function(p, pd = NULL, interzone_only = FALSE, ids_only = FALSE, + max_dist = Inf, max_dest = Inf) { # to work with other classes at some point, possibly, it's a generic: UseMethod("points_to_od") } #' @export -points_to_od.sf = function(p, pd = NULL, interzone_only = FALSE, ids_only = FALSE) { +points_to_od.sf = function(p, pd = NULL, interzone_only = FALSE, ids_only = FALSE, + max_dist = Inf, max_dest = Inf) { + single_geometry = is.null(pd) - if(single_geometry) { - pd = p - ids = p[[1]] - if(any(duplicated(ids))) { - warning("Duplicated ids found in first column of origins") - } - odf = data.frame( - stringsAsFactors = FALSE, - expand.grid(p[[1]], pd[[1]], stringsAsFactors = FALSE)[2:1] - ) - } else { - ids = p[[1]] - if(any(duplicated(ids))) { - warning("Duplicated ids found in first column of origins") - } - ids = pd[[1]] - if(any(duplicated(ids))) { + + if(any(duplicated(p[[1]]))) { + warning("Duplicated ids found in first column of origins") + } + + if(any(sf::st_geometry_type(p) != "POINT")){ + message("Converting p to centroids") + suppressWarnings(p <- sf::st_centroid(p)) + } + + if(!single_geometry){ + if(any(duplicated(pd[[1]]))) { warning("Duplicated ids found in first column of destinations") } - odf = data.frame( - stringsAsFactors = FALSE, - expand.grid(p[[1]], pd[[1]], stringsAsFactors = FALSE) - ) + if(any(sf::st_geometry_type(p) != "POINT")){ + message("Converting pd to centroids") + suppressWarnings(p <- sf::st_centroid(p)) + } } - names(odf) = c("O", "D") + if(single_geometry) { + pd = p + } + + if(max_dest > nrow(pd)){ + max_dest = nrow(pd) + } + + nn <- nngeo::st_nn(p, pd, k = max_dest, maxdist = max_dist, returnDist = FALSE, + progress = FALSE) + odf = data.frame(O = rep(p[[1]], lengths(nn)), + D = pd[[1]][unlist(nn, use.names = FALSE)]) + + if(interzone_only) { odf = od_interzone(odf) } @@ -79,15 +103,16 @@ points_to_od.sf = function(p, pd = NULL, interzone_only = FALSE, ids_only = FALS cbind(odf, odc) } #' @export -points_to_od.matrix = function(p, pd = NULL, interzone_only = FALSE, ids_only = FALSE) { +points_to_od.matrix = function(p, pd = NULL, interzone_only = FALSE, ids_only = FALSE, max_dist = NULL, max_dest = NULL) { coords_to_od(p, interzone_only = interzone_only, ids_only = ids_only) } #' @rdname points_to_od #' @inheritParams points_to_od #' @inheritParams odc_to_sf +#' @param ... Additional arguments passed to `points_to_od)` #' @export -points_to_odl = function(p, pd = NULL, interzone_only = FALSE, ids_only = FALSE, crs = 4326) { - odf = points_to_od(p, pd, interzone_only, ids_only) +points_to_odl = function(p, pd = NULL, crs = 4326, ...) { + odf = points_to_od(p, pd, ...) odc_to_sf(odf[3:6], d = odf[1:2], crs = crs) } #' Convert coordinates into a data frame of origins and destinations diff --git a/ad-hoc-tests/.gitignore b/ad-hoc-tests/.gitignore new file mode 100644 index 0000000..075b254 --- /dev/null +++ b/ad-hoc-tests/.gitignore @@ -0,0 +1 @@ +/.quarto/ diff --git a/ad-hoc-tests/test-max-dist-speedup.qmd b/ad-hoc-tests/test-max-dist-speedup.qmd new file mode 100644 index 0000000..f51568e --- /dev/null +++ b/ad-hoc-tests/test-max-dist-speedup.qmd @@ -0,0 +1,69 @@ +--- +format: gfm +--- + +This document tests the new `max-dist` functionality in PR [#48](https://github.com/ITSLeeds/od/pull/48). + +Let's start the test documented in the PR with the installed version of the package. + +```{r} +remotes::install_cran("od") +library(sf) +``` + +# Test 1: 1000 points + + +```{r} +p = pct::get_centroids_ew() +p = p[1:1000,] + +system.time(r1 <- od::points_to_od(p)) +head(r1) +nrow(r1) +``` + +Now let's test the new `max-dist` functionality. + + +```{r} +if (!file.exists("DESCRIPTION")) { + setwd("..") +} +devtools::load_all() +system.time(r2 <- points_to_od(p)) +head(r2) +nrow(r2) +``` + + +```{r} +system.time(r3 <- points_to_od(p, max_dist = 1000)) +head(r3) +nrow(r3) +``` + +The benchmark shows that the new `max-dist` functionality is faster than the original implementation for large datasets. + +Let's compare the results. + + +```{r} +waldo::compare(head(r1), head(r2)) +r2_sorted = r2 |> + dplyr::arrange(desc(O), desc(D)) +r1_sorted = r1 |> + dplyr::arrange(desc(O), desc(D)) +waldo::compare(head(r1_sorted), head(r2_sorted)) +``` + +Let's plot the results for the max-dist = 1000 case. + + +```{r} +r3_sf = od::od_to_sf(r3, p) +plot(sf::st_geometry(p), col = "red") +plot(sf::st_geometry(r3_sf), add = TRUE) +``` + +# Test 2: od_coordinates \ No newline at end of file diff --git a/man/coords_to_od.Rd b/man/coords_to_od.Rd index 0d278b7..ab96446 100644 --- a/man/coords_to_od.Rd +++ b/man/coords_to_od.Rd @@ -7,15 +7,17 @@ coords_to_od(p, interzone_only = FALSE, ids_only = FALSE) } \arguments{ -\item{p}{A spatial points object or a matrix of coordinates representing points} +\item{p}{A spatial points object or a matrix of coordinates representing +points} -\item{interzone_only}{Should the result only include interzonal OD pairs, in which -the ID of the origin is different from the ID of the destination zone? -\code{FALSE} by default} +\item{interzone_only}{Should the result only include interzonal OD pairs, in +which the ID of the origin is different from the ID of the destination +zone? \code{FALSE} by default} -\item{ids_only}{Should a data frame with only 2 columns (origin and destination IDs) -be returned? The default is \code{FALSE}, meaning the result should also contain the -coordinates of the start and end points of each OD pair.} +\item{ids_only}{Should a data frame with only 2 columns (origin and +destination IDs) be returned? The default is \code{FALSE}, meaning the result +should also contain the coordinates of the start and end points of each OD +pair.} } \value{ A data frame object with O and D codes and origin and destination coordinates. diff --git a/man/od_coordinates.Rd b/man/od_coordinates.Rd index 4563477..503069e 100644 --- a/man/od_coordinates.Rd +++ b/man/od_coordinates.Rd @@ -22,8 +22,15 @@ representing points/zones of origin and destination} A data frame with origin and destination coordinates } \description{ -This function takes a wide range of input data types (spatial lines, points or text strings) -and returns a data frame of coordinates representing origin (ox, oy) and destination (dx, dy) points. +This function takes an 'od data frame' with the first +two columns matching IDs of spatial objects, and +matches them with objects representing origins and destinations +in wide range of input data types (spatial lines, points or text strings). +It returns a data frame of coordinates representing movement between all origin (ox, oy) and destination (dx, dy) points. +} +\details{ +See \code{\link[=points_to_od]{points_to_od()}} for a function that creates +an 'od data frame' from a set (or two sets) of points. } \examples{ x = od_data_df diff --git a/man/points_to_od.Rd b/man/points_to_od.Rd index 370d24f..78fba73 100644 --- a/man/points_to_od.Rd +++ b/man/points_to_od.Rd @@ -5,40 +5,56 @@ \alias{points_to_odl} \title{Convert a series of points into a dataframe of origins and destinations} \usage{ -points_to_od(p, pd = NULL, interzone_only = FALSE, ids_only = FALSE) - -points_to_odl( +points_to_od( p, pd = NULL, interzone_only = FALSE, ids_only = FALSE, - crs = 4326 + max_dist = Inf, + max_dest = Inf ) + +points_to_odl(p, pd = NULL, crs = 4326, ...) } \arguments{ -\item{p}{A spatial points object or a matrix of coordinates representing points} +\item{p}{A spatial points object or a matrix of coordinates representing +points} + +\item{pd}{Optional spatial points object objects representing +destinations. +\code{pd} is ignored if \code{p} is a matrix. +If \code{pd} is not provided, \code{p} is used as the destination points.} -\item{pd}{Optional spatial points object or matrix objects representing destinations} +\item{interzone_only}{Should the result only include interzonal OD pairs, in +which the ID of the origin is different from the ID of the destination +zone? \code{FALSE} by default} -\item{interzone_only}{Should the result only include interzonal OD pairs, in which -the ID of the origin is different from the ID of the destination zone? -\code{FALSE} by default} +\item{ids_only}{Should a data frame with only 2 columns (origin and +destination IDs) be returned? The default is \code{FALSE}, meaning the result +should also contain the coordinates of the start and end points of each OD +pair.} -\item{ids_only}{Should a data frame with only 2 columns (origin and destination IDs) -be returned? The default is \code{FALSE}, meaning the result should also contain the -coordinates of the start and end points of each OD pair.} +\item{max_dist}{Numeric, maximum distance to consider. Default Inf. +Not applicable when \code{p} is a matrix.} + +\item{max_dest}{The maximum number of destinations for each origin (numeric) +sorted from closest to furthest. Default is Inf. Alternative to max_dist +for limiting the number of ODs. +Not applicable when \code{p} is a matrix.} \item{crs}{The coordinate reference system of the output, if not known in \code{z}. 4326 by default.} + +\item{...}{Additional arguments passed to \verb{points_to_od)}} } \description{ Takes a series of geographical points and converts them into a data.frame -representing the potential flows, or 'spatial interaction', between every combination -of points. +representing the potential flows, or 'spatial interaction', between every +combination of points. } \details{ -\code{points_to_odl()} generates the same output but returns -a geographic object representing desire lines in the class \code{sf}. +\code{points_to_odl()} generates the same output but returns a geographic object +representing desire lines in the class \code{sf}. } \examples{ library(sf) @@ -47,7 +63,6 @@ points_to_od(p) points_to_od(p, ids_only = TRUE) (l = points_to_odl(p, interzone_only = TRUE)) plot(l) -library(sf) # for subsetting sf objects: points_to_od(od_data_centroids[1:2, ], od_data_centroids[3, ]) l = points_to_odl(od_data_centroids[1:2, ], od_data_centroids[3, ]) plot(l) @@ -55,5 +70,8 @@ plot(l) l2 = od_to_sf(od, od_data_centroids) l2$v = 1 (l2_oneway = od_oneway(l2)) -plot(l2) +sf::st_length(l2) +# With max_dist: +(l3 = points_to_odl(p, max_dist = 10000)) +sf::st_length(l3) }