diff --git a/DESCRIPTION b/DESCRIPTION index 7fd24a5..b1a2464 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: caRtociudad Type: Package Title: Interface to Cartociudad API -Version: 0.5.2 +Version: 0.5.5 Date: 2017-07-26 Encoding: UTF-8 Authors@R: c(person("Carlos J.", "Gil Bellosta", email="cgb@datanalytics.com", role=c('cre', 'aut')), @@ -9,7 +9,7 @@ Authors@R: c(person("Carlos J.", "Gil Bellosta", email="cgb@datanalytics.com", r Author: Carlos J. Gil Bellosta, Luz Frías Maintainer: Carlos J. Gil Bellosta Description: Access to Cartociudad cartography API, which provides mapping and other related services for Spain. -Imports: httr, jsonlite, xml2, plyr, geosphere +Imports: httr, jsonlite, xml2, plyr, geosphere, utils Depends: R (>= 3.0.0) Suggests: ggmap, testthat URL: https://github.com/cjgb/caRtociudad diff --git a/R/cartociudad_geocode.R b/R/cartociudad_geocode.R index 0fa708d..d63b1d3 100644 --- a/R/cartociudad_geocode.R +++ b/R/cartociudad_geocode.R @@ -5,25 +5,29 @@ #' @title Interface to Cartociudad geolocation API #' -#' @description Geolocation of Spanish addresses via Cartociudad API calls, providing the -#' full address in a single text string via \code{full_address}. It is -#' advisable to add the street type (calle, etc.) and to omit the country -#' name. +#' @description Geolocation of Spanish addresses via Cartociudad API calls, +#' providing the full address in a single text string via \code{full_address}. +#' It is advisable to add the street type (calle, etc.) and to omit the +#' country name. #' -#' @usage cartociudad_geocode(full_address, on.error = "fail", ...) +#' @usage cartociudad_geocode(full_address, version = c("current", "prev"), +#' output_format = "JSON", on_error = c("warn", "fail"), ntries = 10) #' #' @param full_address Character string providing the full address to be #' geolocated; e.g., "calle miguel servet 5, zaragoza". Adding the country may #' cause problems. -#' @param on.error Defaults to \code{fail}; in such case, in case of errors in the API call, the process will fail. Set it to -#' "warn" and, in case of errors, the function will return \code{NULL} and a warning. -#' @param ... Other parameters for the API. See Details section below. -#' -#' @details The entity geolocation API admits more parameters beyond the address field such as \code{id} or \code{type}. -#' You can use these extra arguments (see the References or the Examples sections below for further information) -#' at your own risk. +#' @param version Character string. Geocoder version to use: \code{current} or +#' \code{prev}. +#' @param output_format Character string. Output format of the query: +#' \code{JSON} or \code{GeoJSON}. Only applicable if you choose version = +#' "current". +#' @param on_error Character string. Defaults to \code{warn}: in case of errors, +#' the function will return an empty \code{data.frame} and a warning. Set it +#' to \code{fail} to stop the function call in case of errors in the API call. +#' @param ntries Numeric. In case of connection failure, number of \code{GET} +#' requests to be made before stopping the function call. #' -#' @return A data frame consisting of a single row per guess. See the reference +#' @return A data frame consisting of a single row per query. See the reference #' below for an explanation of the data frame columns. #' #' @author Carlos J. Gil Bellosta @@ -32,43 +36,107 @@ #' \url{http://www.cartociudad.es/recursos/Documentacion_tecnica/CARTOCIUDAD_ServiciosWeb.pdf} #' #' @examples -#' # standard usage -#' res <- cartociudad_geocode(full_address = "plaza de cascorro 11, 28005 madrid") -#' -#' #' # km 41 of A-23 motorway -#' res <- cartociudad_geocode("A-23 41") -#' -#' # specific usage (see References for details) -#' res <- cartociudad_geocode("A-23 41", type = "portal", id = "600000000045", portal = 41) -#' -#' # vectorized call -#' \dontrun{ -#' addresses <- paste("A-23", 1:10) -#' res <- lapply(addresses, cartociudad_geocode, on.error = "warn") -#' } +#' # Query a single address +#' address <- "plaza de cascorro 11, 28005 madrid" +#' my.address <- cartociudad_geocode(full_address = address) +#' print(my.address) +#' +#' # Query multiple addresses +#' address <- c(address, "plaza del ayunamiento 1, valencia") +#' my.address <- cartociudad_geocode(full_address = address) +#' print(my.address) +#' #' @export +#' +cartociudad_geocode <- function(full_address, version = c("current", "prev"), + output_format = "JSON", on_error = c("warn", "fail"), + ntries = 1) { + + stopifnot(class(full_address) == "character") + stopifnot(length(full_address) >= 1) + version <- match.arg(version) + on_error <- match.arg(on_error) + no_geocode <- which(nchar(full_address) == 0) + total <- length(full_address) + res_list <- vector("list", total) + curr_names <- c("id", "province", "muni", "tip_via", "address", "portalNumber", + "refCatastral", "postalCode", "lat", "lng", "stateMsg", + "state", "type") + prev_names <- c("road_fid", "province", "municipality", "road_type", "road_name", + "numpk_name", "numpk_fid", "zip", "latitude", "longitude", + "comments", "status") + pb <- utils::txtProgressBar(min = 0, max = total, style = 3) + empty_df <- as.data.frame( + matrix(NA_character_, nrow = 0, ncol = length(curr_names), dimnames = list(c(), curr_names)), + stringsAsFactors = FALSE + ) + con_out <- numeric() + + for (i in seq_len(total)) { + res_list[[i]] <- empty_df + if (!i %in% no_geocode) { + ua <- get_cartociudad_user_agent() + if (version == "current") { + api.args <- list(q = full_address[i], outputformat = output_format) + get_url <- "http://www.cartociudad.es/geocoder/api/geocoder/findJsonp" + } else { + api.args <- list(max_results = 1, address = full_address[i]) + get_url <- "http://www.cartociudad.es/CartoGeocoder/Geocode" + } + res <- get_ntries(get_url, api.args, ua, ntries) -cartociudad_geocode <- function(full_address, on.error = "fail", ...) { - - api.args <- c(list(q = full_address), ...) - ua <- get_cartociudad_user_agent() - res <- httr::GET("http://www.cartociudad.es/geocoder/api/geocoder/findJsonp", - query = api.args, ua) - - if (httr::http_error(res)){ - if (on.error == "fail") - stop("Call to cartociudad API failed with error code ", res$status_code) - - warning("Call to cartociudad API failed with error code ", res$status_code) - return(NULL) + if (length(res) == 0) { + warning("Failing to connect with server in query ", i, + ": try later with addressess in attr(results, 'rerun').") + res_list[[i]] <- plyr::rbind.fill( + res_list[[i]], + data.frame(address = full_address[i], version = version, stringsAsFactors = FALSE) + ) + con_out <- c(con_out, i) + } else if (httr::http_error(res)) { + if (on_error == "fail") + stop("Call to cartociudad API failed with error code ", res$status_code) + warning("Error in query ", i, ": ", httr::http_status(res)$message) + res_list[[i]] <- plyr::rbind.fill( + res_list[[i]], + data.frame(address = full_address[i], version = version, stringsAsFactors = FALSE) + ) + } else { + res <- jsonp_to_json(suppressMessages(httr::content(res, as = "text"))) + res <- jsonlite::fromJSON(res) + res <- res[-which(names(res) %in% c("geom", "countryCode", "error", "success"))] + if (version == "current") { + res <- lapply(res, function(x) ifelse(is.null(x), NA_character_, x)) + } else { + res <- res[[1]] + } + if (length(res) == 0) { + warning("The query ", i, " has 0 results.") + res_list[[i]] <- plyr::rbind.fill( + res_list[[i]], + data.frame(address = full_address[i], version = version, stringsAsFactors = FALSE) + ) + } else { + if (version == "current") { + res_list[[i]] <- as.data.frame(t(unlist(res)), stringsAsFactors = FALSE)[, curr_names] + res_list[[i]] <- cbind(res_list[[i]], version = "current") + } else { + res_list[[i]] <- cbind(res[, prev_names], type = NA_character_, version = "prev") + names(res_list[[i]]) <- c(curr_names, "version") + row.names(res_list[[i]]) <- NULL + } + } + } + } else { + warning("Empty string as query in address ", i, ": NA returned.") + res_list[[i]] <- empty_df[1, ] + } + utils::setTxtProgressBar(pb, i) } - - res <- jsonp_to_json(httr::content(res, as = "text", encoding = "UTF8")) - res <- jsonlite::fromJSON(res) - res <- as.data.frame(t(unlist(res)), stringsAsFactors = FALSE) - - res$lat <- as.numeric(res$lat) - res$lng <- as.numeric(res$lng) - res + cat("\n") + results <- plyr::rbind.fill(res_list) + results[, c("lat", "lng")] <- apply(results[, c("lat", "lng")], 2, as.numeric) + attributes(results)$rerun <- full_address[con_out] + return(results) } diff --git a/R/cartociudad_reverse_geocode.R b/R/cartociudad_reverse_geocode.R index d139521..ab13bcf 100644 --- a/R/cartociudad_reverse_geocode.R +++ b/R/cartociudad_reverse_geocode.R @@ -10,13 +10,15 @@ #' @details This function performs reverse geocoding of a location. It returns #' the details of the closest address in Spain. #' -#' @usage cartociudad_reverse_geocode(latitude, longitude) +#' @usage cartociudad_reverse_geocode(latitude, longitude, ntries = 10) #' #' @param latitude Point latitude in geographical coordinates (e.g., 40.473219) #' @param longitude Point longitude in geographical coordinates (e.g., #' -3.7227241) +#' @param ntries Numeric. In case of connection failure, number of \code{GET} +#' requests to be made before stopping the function call. #' -#' @return A list with the following items: +#' @return A data frame consisting of a single row per query, with columns: #' \item{tipo}{type of location.} #' \item{tipo.via}{road type.} #' \item{nombre.via}{road name.} @@ -32,34 +34,53 @@ #' \url{http://www.cartociudad.es/recursos/Documentacion_tecnica/CARTOCIUDAD_ServiciosWeb.pdf} #' #' @examples +#' # Query one point #' cartociudad_reverse_geocode(40.473219, -3.7227241) #' +#' # Query multiple points +#' cartociudad_reverse_geocode(c(40.473219, 39.46979), c(-3.7227241, -0.376963)) +#' #' @export #' -cartociudad_reverse_geocode <- function(latitude, longitude) { - - query.parms <- list( - lat = latitude, - lon = longitude - ) - - url <- "http://www.cartociudad.es/services/api/geocoder/reverseGeocode" - ua <- get_cartociudad_user_agent() - - - res <- httr::GET(url, query = query.parms, ua) - httr::stop_for_status(res) - info <- httr::content(res) - # Parse the response - res <- list( - tipo = info$type, - tipo.via = info$tip_via, - nombre.via = info$address, - num.via = info$portalNumber, - num.via.id = info$id, - municipio = info$muni, - provincia = info$province, - cod.postal = info$postalCode - ) - return(res) +cartociudad_reverse_geocode <- function(latitude, longitude, ntries = 1) { + + stopifnot(length(latitude) == length(longitude) | length(latitude) == 0) + + res_list <- list() + url <- "http://www.cartociudad.es/services/api/geocoder/reverseGeocode" + ua <- get_cartociudad_user_agent() + no_select <- c("geom", "poblacion", "stateMsg", "state", "priority", "countryCode") + total <- length(latitude) + pb <- utils::txtProgressBar(min = 0, max = total, style = 3) + + for (i in seq_len(total)) { + query.parms <- list(lat = latitude[i], lon = longitude[i]) + res <- get_ntries(url, query.parms, ua, ntries) + if (httr::http_error(res)) { + warning("Error in query ", i, ": ", httr::http_status(res)$message) + res_list[[i]] <- data.frame(lat = latitude[i], lng = longitude[i], + stringsAsFactors = FALSE) + } else if (length(httr::content(res)) == 0) { + warning("Query ", i, " produced 0 results.") + res_list[[i]] <- data.frame(lat = latitude[i], lng = longitude[i], + stringsAsFactors = FALSE) + } else { + info <- httr::content(res) + info <- info[-which(names(info) %in% no_select)] + res_list[[i]] <- as.data.frame(t(unlist(info)), stringsAsFactors = FALSE) + } + utils::setTxtProgressBar(pb, i) + } + + cat("\n") + results <- plyr::rbind.fill(res_list) + names_old <- c("type", "tip_via", "address", "portalNumber", "id", + "muni", "province", "postalCode", "lat", "lng") + names_new <- c("tipo", "tipo.via", "nombre.via", "num.via", "num.via.id", + "municipio", "provincia", "cod.postal", "lat", "lng") + for (i in seq_len(ncol(results))) { + colnames(results)[colnames(results) == names_old[i]] <- names_new[i] + } + + return(results) } diff --git a/R/utils.R b/R/utils.R index faabd70..8e637fd 100644 --- a/R/utils.R +++ b/R/utils.R @@ -14,3 +14,18 @@ jsonp_to_json <- function(text) { text <- gsub("\\)$", "", text) return(text) } + +get_ntries <- function(url, query, ua, tries) { + withRestarts( + tryCatch(httr::GET(url, query = query, ua), + error = function(e) {invokeRestart("retry")}), + retry = function() { + if (tries <= 0) { + return(character()) + } + message("Failing to connect with server: retrying...") + Sys.sleep(5) + get_ntries(url, query, ua, tries - 1) + } + ) +} diff --git a/man/cartociudad_geocode.Rd b/man/cartociudad_geocode.Rd index d028868..343ac24 100644 --- a/man/cartociudad_geocode.Rd +++ b/man/cartociudad_geocode.Rd @@ -4,48 +4,49 @@ \alias{cartociudad_geocode} \title{Interface to Cartociudad geolocation API} \usage{ -cartociudad_geocode(full_address, on.error = "fail", ...) +cartociudad_geocode(full_address, version = c("current", "prev"), + output_format = "JSON", on_error = c("warn", "fail"), ntries = 10) } \arguments{ \item{full_address}{Character string providing the full address to be geolocated; e.g., "calle miguel servet 5, zaragoza". Adding the country may cause problems.} -\item{on.error}{Defaults to \code{fail}; in such case, in case of errors in the API call, the process will fail. Set it to -"warn" and, in case of errors, the function will return \code{NULL} and a warning.} +\item{version}{Character string. Geocoder version to use: \code{current} or +\code{prev}.} -\item{...}{Other parameters for the API. See Details section below.} +\item{output_format}{Character string. Output format of the query: +\code{JSON} or \code{GeoJSON}. Only applicable if you choose version = +"current".} + +\item{on_error}{Character string. Defaults to \code{warn}: in case of errors, +the function will return an empty \code{data.frame} and a warning. Set it +to \code{fail} to stop the function call in case of errors in the API call.} + +\item{ntries}{Numeric. In case of connection failure, number of \code{GET} +requests to be made before stopping the function call.} } \value{ -A data frame consisting of a single row per guess. See the reference +A data frame consisting of a single row per query. See the reference below for an explanation of the data frame columns. } \description{ -Geolocation of Spanish addresses via Cartociudad API calls, providing the - full address in a single text string via \code{full_address}. It is - advisable to add the street type (calle, etc.) and to omit the country - name. -} -\details{ -The entity geolocation API admits more parameters beyond the address field such as \code{id} or \code{type}. - You can use these extra arguments (see the References or the Examples sections below for further information) - at your own risk. +Geolocation of Spanish addresses via Cartociudad API calls, + providing the full address in a single text string via \code{full_address}. + It is advisable to add the street type (calle, etc.) and to omit the + country name. } \examples{ -# standard usage -res <- cartociudad_geocode(full_address = "plaza de cascorro 11, 28005 madrid") +# Query a single address +address <- "plaza de cascorro 11, 28005 madrid" +my.address <- cartociudad_geocode(full_address = address) +print(my.address) -#' # km 41 of A-23 motorway -res <- cartociudad_geocode("A-23 41") +# Query multiple addresses +address <- c(address, "plaza del ayunamiento 1, valencia") +my.address <- cartociudad_geocode(full_address = address) +print(my.address) -# specific usage (see References for details) -res <- cartociudad_geocode("A-23 41", type = "portal", id = "600000000045", portal = 41) - -# vectorized call -\dontrun{ -addresses <- paste("A-23", 1:10) -res <- lapply(addresses, cartociudad_geocode, on.error = "warn") -} } \references{ \url{http://www.cartociudad.es/recursos/Documentacion_tecnica/CARTOCIUDAD_ServiciosWeb.pdf} diff --git a/man/cartociudad_reverse_geocode.Rd b/man/cartociudad_reverse_geocode.Rd index 0c03928..f7458ae 100644 --- a/man/cartociudad_reverse_geocode.Rd +++ b/man/cartociudad_reverse_geocode.Rd @@ -4,16 +4,19 @@ \alias{cartociudad_reverse_geocode} \title{Reverse geocoding of locations} \usage{ -cartociudad_reverse_geocode(latitude, longitude) +cartociudad_reverse_geocode(latitude, longitude, ntries = 10) } \arguments{ \item{latitude}{Point latitude in geographical coordinates (e.g., 40.473219)} \item{longitude}{Point longitude in geographical coordinates (e.g., -3.7227241)} + +\item{ntries}{Numeric. In case of connection failure, number of \code{GET} +requests to be made before stopping the function call.} } \value{ -A list with the following items: +A data frame consisting of a single row per query, with columns: \item{tipo}{type of location.} \item{tipo.via}{road type.} \item{nombre.via}{road name.} @@ -31,8 +34,12 @@ This function performs reverse geocoding of a location. It returns the details of the closest address in Spain. } \examples{ +# Query one point cartociudad_reverse_geocode(40.473219, -3.7227241) +# Query multiple points +cartociudad_reverse_geocode(c(40.473219, 39.46979), c(-3.7227241, -0.376963)) + } \references{ \url{http://www.cartociudad.es/recursos/Documentacion_tecnica/CARTOCIUDAD_ServiciosWeb.pdf} diff --git a/tests/testthat/test-caRtociudad.R b/tests/testthat/test-caRtociudad.R index 9b707f9..483c6dd 100644 --- a/tests/testthat/test-caRtociudad.R +++ b/tests/testthat/test-caRtociudad.R @@ -1,46 +1,65 @@ context("requests") -test_that("cartociudad_geocode returns the location of a full address", { - result <- cartociudad_geocode("plaza de cascorro 11, 28005 madrid") +test_that("cartociudad_geocode & cartociudad_reverse_geocode return valid locations", { + res_geo <- cartociudad_geocode("plaza de cascorro 11, 28005 madrid") + res_inv_geo <- cartociudad_reverse_geocode(res_geo$lat, res_geo$lng) - expect_that(nrow(result) > 0, is_true()) + expect_false(all(sapply(res_geo, is.null))) + expect_false(all(sapply(res_inv_geo, is.null))) + expect_true(nrow(res_geo) > 0) + expect_true(nrow(res_inv_geo) > 0) + + expect_equal(res_geo$state, "1") + expect_equal(res_geo$lat, 40.40988, tolerance = 1e-06) + expect_equal(res_geo$lng, -3.707076, tolerance = 1e-06) + + expect_equal(res_inv_geo$num.via, "11") + expect_equal(res_inv_geo$cod.postal, "28005") + expect_equal(res_inv_geo$municipio, "MADRID") + expect_equal(res_inv_geo$tipo, "portal") + + expect_equal(res_geo$lat, as.numeric(res_inv_geo$lat)) + expect_equal(res_geo$lng, as.numeric(res_inv_geo$lng)) +}) + +test_that("Geocoding and reverse geocoding wrong addresses", { + addresses <- c( + "plaza de cascorro 9000, madrid", + "plaza de cascorro 9001, madrid", + "a7 3000", + "plaza doctor balmis 2, alicante", + "calle inventadisima 1, valencia" + ) + res_geo <- cartociudad_geocode(addresses) + res_inv_geo <- cartociudad_reverse_geocode(res_geo$lat[-5], res_geo$lng[-5]) + + expect_true(nrow(res_geo) == length(addresses)) + expect_true(nrow(res_inv_geo) == length(addresses[-5])) + + expect_equal(res_geo$state, c("2", "3", "4", "5", "10")) + + expect_equal(res_geo$address[1:3], res_inv_geo$nombre.via[1:3]) + + expect_warning(cartociudad_reverse_geocode(res_geo$lat[5], res_geo$lng[5])) +}) + +test_that("Server error handling", { + address <- c("calle hondon de las nieves 5, alicante") + expect_warning(cartociudad_geocode(address, on_error = "warn")) + expect_error(cartociudad_geocode(address, on_error = "fail")) }) + test_that("get_cartociudadmap returns a map for a valid location", { map <- get_cartociudadmap(c(40.41137, -3.707168), 1) - expect_that(map, is_a("raster")) - expect_that(map, is_a("ggmap")) + expect_is(map, c("raster", "ggmap")) }) test_that("get_cartociudad_location_info returns info for a valid location", { result <- get_cartociudad_location_info(40.473219, -3.7227241) - expect_that(!is.null(result$seccion), is_true()) - expect_that(!is.null(result$distrito), is_true()) - expect_that(!is.null(result$provincia), is_true()) - expect_that(!is.null(result$municipio), is_true()) - expect_that(!is.null(result$ref.catastral), is_true()) - expect_that(!is.null(result$url.ref.catastral), is_true()) - expect_that(!is.null(result$tipo), is_true()) - expect_that(!is.null(result$tipo.via), is_true()) - expect_that(!is.null(result$nombre.via), is_true()) - expect_that(!is.null(result$num.via), is_true()) - expect_that(!is.null(result$num.via.id), is_true()) - expect_that(!is.null(result$cod.postal), is_true()) -}) - -test_that("cartociudad_reverse_geocode returns an address for a valid location", { - result <- cartociudad_reverse_geocode(40.473219, -3.7227241) - - expect_that(!is.null(result$tipo), is_true()) - expect_that(!is.null(result$tipo.via), is_true()) - expect_that(!is.null(result$nombre.via), is_true()) - expect_that(!is.null(result$num.via), is_true()) - expect_that(!is.null(result$num.via.id), is_true()) - expect_that(!is.null(result$municipio), is_true()) - expect_that(!is.null(result$provincia), is_true()) - expect_that(!is.null(result$cod.postal), is_true()) + expect_false(all(sapply(result, is.null))) }) test_that("get_cartociudad_user_agent returns the package name and github repo url", { @@ -49,12 +68,12 @@ test_that("get_cartociudad_user_agent returns the package name and github repo u httr::stop_for_status(result) user.agent <- httr::content(result)$"user-agent" - expect_that(length(grep("caRtociudad/[0-9.]+", user.agent)) == 1, is_true()) - expect_that(length(grep("github.com/cjgb/caRtociudad", user.agent)) == 1, is_true()) + expect_length(grep("caRtociudad/[0-9.]+", user.agent), 1) + expect_length(grep("github.com/cjgb/caRtociudad", user.agent), 1) }) test_that("get_cartociudad_area with valid parameters returns a polygon", { result <- get_cartociudad_area(40.3930144, -3.6596683, 500) - expect_that(nrow(result) > 2, is_true()) + expect_gt(nrow(result), 2) })