Skip to content

Commit

Permalink
Merge pull request #111 from rOpenSpain/23-improve-type-safety-with-r…
Browse files Browse the repository at this point in the history
…lang-or-assertthat-or-checkmate

argument checks for all user facing functions
  • Loading branch information
e-kotov authored Dec 13, 2024
2 parents c822ca3 + 34ed5d6 commit da7a822
Show file tree
Hide file tree
Showing 14 changed files with 164 additions and 57 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ BugReports: https://github.com/rOpenSpain/spanishoddata/issues
Depends:
R (>= 3.5.0)
Imports:
checkmate,
curl (>= 5.0.0),
DBI,
dplyr,
Expand Down
17 changes: 16 additions & 1 deletion R/connect_to_converted_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ spod_connect <- function(
max_n_cpu = parallelly::availableCores() - 1,
temp_path = spod_get_temp_dir()
){
# Validate imputs
checkmate::assert_access(data_path, access = 'r')
checkmate::assert_character(target_table_name, null.ok = TRUE)
checkmate::assert_flag(quiet)
checkmate::assert_number(max_mem_gb, lower = 1)
checkmate::assert_integerish(max_n_cpu, lower = 1, upper = parallelly::availableCores())
checkmate::assert_directory_exists(temp_path, access = "rw")

# determine if data_path is a folder or a duckdb file
if (grepl("\\.duckdb$", data_path)) {
duckdb_path <- data_path
Expand Down Expand Up @@ -102,7 +110,14 @@ spod_connect <- function(
#' spod_disconnect(od_distr)
#' }
#'
spod_disconnect <- function(tbl_con, free_mem = TRUE) {
spod_disconnect <- function(
tbl_con,
free_mem = TRUE
) {
# Validate imputs
checkmate::assert_class(tbl_con, "tbl_duckdb_connection")
checkmate::assert_flag(free_mem)

DBI::dbDisconnect(tbl_con$src$con, shutdown = TRUE)
if (free_mem){
gc()
Expand Down
34 changes: 26 additions & 8 deletions R/convert_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,26 +40,44 @@ spod_convert <- function(
data_dir = spod_get_data_dir(),
quiet = FALSE,
max_mem_gb = max(4, spod_available_ram() - 4),
max_n_cpu = parallelly::availableCores() - 1,
max_n_cpu = max(1, parallelly::availableCores() - 1),
max_download_size_gb = 1,
ignore_missing_dates = FALSE
) {

# Validate inputs
checkmate::assert_choice(type, choices = c("od", "origin-destination", "os", "overnight_stays", "nt", "number_of_trips"))
checkmate::assert_choice(zones, choices = c(
"districts", "dist", "distr", "distritos",
"municipalities", "muni", "municip", "municipios",
"lua", "large_urban_areas", "gau", "grandes_areas_urbanas"
))
checkmate::assert_choice(save_format, choices = c("duckdb", "parquet"), null.ok = TRUE)
checkmate::assert_character(save_path, len = 1, null.ok = TRUE)
checkmate::assert_flag(overwrite)
checkmate::assert_directory_exists(data_dir, access = "rw")
checkmate::assert_flag(quiet)
checkmate::assert_number(max_mem_gb, lower = 0.1)
checkmate::assert_integerish(max_n_cpu, lower = 1, upper = parallelly::availableCores())
checkmate::assert_number(max_download_size_gb, lower = 0.1)
checkmate::assert_flag(ignore_missing_dates)


# simple null check is enough here, as spod_dates_arugument_to_dates_seq will do additional checks anyway
if (is.null(dates)) {
message("No period specified in the `dates` argument. Please set `dates='cached_v1'` or `dates='cached_v2'` to convert all data that was previously downloaded. Alternatively, specify at least one date between 2020-02-14 and 2021-05-09 (for v1 data) or between 2022-01-01 onwards (for v2). Any missing data will be downloaded before conversion.")
message("No period specified in the `dates` argument. Please set `dates='cached_v1'` or `dates='cached_v2'` to convert all data that was previously downloaded. Alternatively, specify at least one date between 2020-02-14 and 2021-05-09 (for v1 data) or between 2022-01-01 onwards (for v2). Any missing data will be downloaded before conversion. For more details on the dates argument, see ?spod_convert.")
}

dates <- spod_dates_argument_to_dates_seq(dates = dates)
ver <- spod_infer_data_v_from_dates(dates = dates, ignore_missing_dates = ignore_missing_dates)

# check if user is requesting to just get all cached data
cached_data_requested <- length(dates) == 1 &&
all(as.character(dates) %in% c("cached_v1", "cached_v2"))

type <- match.arg(type)

ver <- spod_infer_data_v_from_dates(dates = dates, ignore_missing_dates = ignore_missing_dates)

# normalise type
type <- spod_match_data_type(type = type)

zones <- match.arg(zones)
# normalise zones
zones <- spod_zone_names_en2es(zones)

# check format
Expand Down
4 changes: 4 additions & 0 deletions R/data-dir.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ spod_set_data_dir <- function(
data_dir,
quiet = FALSE
){
checkmate::assert_character(data_dir, len = 1, null.ok = FALSE)
checkmate::assert_flag(quiet)

data_dir_abs_path <- fs::path_abs(data_dir)

tryCatch({
Expand Down Expand Up @@ -55,6 +58,7 @@ spod_set_data_dir <- function(
#' @export
#' @keywords internal
spod_get_data_dir <- function(quiet = FALSE) {
checkmate::assert_flag(quiet)
data_dir_env <- Sys.getenv("SPANISH_OD_DATA_DIR")
if (data_dir_env == "") {
if (isFALSE(quiet)) warning("Warning: SPANISH_OD_DATA_DIR is not set. Using the temporary directory, which is not recommended, as the data will be deleted when the session ends.\n\n To set the data directory, use `Sys.setenv(SPANISH_OD_DATA_DIR = '/path/to/data')` or set SPANISH_OD_DATA_DIR permanently in the environment by editing the `.Renviron` file locally for current project with `usethis::edit_r_environ('project')` or `file.edit('.Renviron')` or globally for all projects with `usethis::edit_r_environ('user')` or `file.edit('~/.Renviron')`.")
Expand Down
22 changes: 20 additions & 2 deletions R/download_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,28 @@ spod_download <- function(
return_local_file_paths = FALSE,
ignore_missing_dates = FALSE
) {
# convert english zone names to spanish words used in the default data paths
zones <- match.arg(zones)

# Validate inputs
checkmate::assert_choice(type, choices = c("od", "origin-destination", "os", "overnight_stays", "nt", "number_of_trips"))
checkmate::assert_choice(zones, choices = c(
"districts", "dist", "distr", "distritos",
"municipalities", "muni", "municip", "municipios",
"lua", "large_urban_areas", "gau", "grandes_areas_urbanas"
))
checkmate::assert_number(max_download_size_gb, lower = 0.1)
checkmate::assert_directory_exists(data_dir, access = "rw")
checkmate::assert_flag(quiet)
checkmate::assert_flag(return_local_file_paths)
checkmate::assert_flag(ignore_missing_dates)

# normalise zones
zones <- spod_zone_names_en2es(zones)

# simple null check is enough here, as spod_dates_arugument_to_dates_seq will do additional checks anyway
if (is.null(dates)) {
message("`dates` argument is undefined. Please set `dates='cached_v1'` or `dates='cached_v2'` to convert all data that was previously downloaded. Alternatively, specify at least one date between 2020-02-14 and 2021-05-09 (for v1 data) or between 2022-01-01 onwards (for v2). Any missing data will be downloaded before conversion. For more details on the dates argument, see ?spod_download.")
}

dates_to_use <- spod_dates_argument_to_dates_seq(dates = dates)


Expand Down
29 changes: 22 additions & 7 deletions R/get.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,25 @@ spod_get_zones <- function(
data_dir = spod_get_data_dir(),
quiet = FALSE
) {
ver <- as.integer(ver) # todo: add type safety check
# Validate inputs
checkmate::assert_choice(zones, choices = c(
"districts", "dist", "distr", "distritos",
"municipalities", "muni", "municip", "municipios",
"lua", "large_urban_areas", "gau", "grandes_areas_urbanas"
))

checkmate::assertIntegerish(ver, max.len = 1)
if (!ver %in% c(1, 2)) {
stop("Invalid version number. Must be 1 or 2.")
stop("Invalid version number. Must be 1 (for v1 2020-2021 data) or 2 (for v2 2022 onwards).")
}

zones <- match.arg(zones)


checkmate::assert_directory_exists(data_dir, access = "rw")
checkmate::assert_flag(quiet)

# normalise zones
zones <- spod_zone_names_en2es(zones)

if (ver == 1) {
zones_sf <- spod_get_zones_v1(zones = zones, data_dir = data_dir, quiet = quiet)
} else if (ver == 2) {
Expand Down Expand Up @@ -84,10 +95,14 @@ spod_available_data <- function(
quiet = FALSE,
data_dir = spod_get_data_dir()
) {
ver <- as.integer(ver) # todo: add type safety check
# Validate input
checkmate::assertIntegerish(ver, max.len = 1)
if (!ver %in% c(1, 2)) {
stop("Invalid version number. Must be 1 or 2.")
stop("Invalid version number. Must be 1 (for v1 2020-2021 data) or 2 (for v2 2022 onwards).")
}
checkmate::assert_flag(check_local_files)
checkmate::assert_flag(quiet)
checkmate::assert_directory_exists(data_dir, access = "rw")

if (ver == 1) {
return(spod_available_data_v1(data_dir = data_dir, check_local_files = check_local_files, quiet = quiet))
Expand Down
26 changes: 21 additions & 5 deletions R/get_v1_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -488,15 +488,31 @@ spod_get <- function(
temp_path = spod_get_temp_dir(),
ignore_missing_dates = FALSE
) {

# Validate inputs
checkmate::assert_choice(type, choices = c("od", "origin-destination", "os", "overnight_stays", "nt", "number_of_trips"))
checkmate::assert_choice(zones, choices = c(
"districts", "dist", "distr", "distritos",
"municipalities", "muni", "municip", "municipios",
"lua", "large_urban_areas", "gau", "grandes_areas_urbanas"
))
checkmate::assert_flag(quiet)
checkmate::assert_number(max_mem_gb, lower = 1)
checkmate::assert_integerish(max_n_cpu, lower = 1, upper = parallelly::availableCores())
checkmate::assert_number(max_download_size_gb, lower = 0.1)
checkmate::assert_string(duckdb_target)
checkmate::assert_directory_exists(data_dir, access = "rw")
checkmate::assert_directory_exists(temp_path, access = "rw")
checkmate::assert_flag(ignore_missing_dates)

# simple null check is enough here, as spod_dates_arugument_to_dates_seq will do additional checks anyway
if (is.null(dates)) {
message("`dates` argument is undefined. Please set `dates='cached_v1'` or `dates='cached_v2'` to convert all data that was previously downloaded. Alternatively, specify at least one date between 2020-02-14 and 2021-05-09 (for v1 data) or between 2022-01-01 onwards (for v2). Any missing data will be downloaded before conversion.")
message("`dates` argument is undefined. Please set `dates='cached_v1'` or `dates='cached_v2'` to convert all data that was previously downloaded. Alternatively, specify at least one date between 2020-02-14 and 2021-05-09 (for v1 data) or between 2022-01-01 onwards (for v2). Any missing data will be downloaded before conversion. For more details on the dates argument, see ?spod_get.")
}


type <- match.arg(type)
# normalise type
type <- spod_match_data_type(type = type)

zones <- match.arg(zones)
# normalise zones
zones <- spod_zone_names_en2es(zones)

# check if user is requesting to just get all cached data
Expand Down
9 changes: 7 additions & 2 deletions R/help.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@
#' @importFrom utils vignette
#' @export
#'
spod_codebook = function(ver = 1) {
ver <- as.integer(ver)
spod_codebook <- function(ver = 1) {
# Validate input
checkmate::assertIntegerish(ver, max.len = 1)
if (!ver %in% c(1, 2)) {
stop("Invalid version number. Must be 1 (for v1 2020-2021 data) or 2 (for v2 2022 onwards).")
}

if (ver == 1){
help <- vignette(
topic = "v1-2020-2021-mitma-data-codebook",
Expand Down
12 changes: 8 additions & 4 deletions R/internal_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,11 @@ spod_expand_dates_from_regex <- function(date_regex) {
if (length(matching_dates_v1) == 0 && length(matching_dates_v2) == 0) {
stop(paste0(
"No matching dates found in the available data.",
"The valid dates range for v1 is: ", paste0(min(all_dates_v1), " to ", max(all_dates_v1)), " and for v2 is: ", paste0(min(all_dates_v2), " to ", max(all_dates_v2))
"\nThe valid dates range for v1 is: ",
paste(spod_convert_dates_to_ranges(all_dates_v1), collapse = ", "),
"\nThe valid dates range for v2 is: ",
paste(spod_convert_dates_to_ranges(all_dates_v2), collapse = ", "),
"."
))
}
# If checks above have passed, we can combine the matching dates as only one contains dates and the other is empty
Expand All @@ -221,12 +225,12 @@ spod_expand_dates_from_regex <- function(date_regex) {
#' @return A vector of type `Date` with all possible valid dates for the specified data version (v1 for 2020-2021 and v2 for 2020 onwards).
#' @export
spod_get_valid_dates <- function(ver = NULL) {
ver <- as.integer(ver) # todo: add type safety check
# Validate input
checkmate::assertIntegerish(ver, max.len = 1)
if (!ver %in% c(1, 2)) {
stop("Invalid version number. Must be 1 or 2.")
stop("Invalid version number. Must be 1 (for v1 2020-2021 data) or 2 (for v2 2022 onwards).")
}


if (ver == 1) {
# available_data <- spod_available_data_v1(check_local_files = FALSE, quiet = TRUE)
# all_dates <- unique(available_data[grepl("maestra1.*diarios", available_data$target_url),]$data_ymd, na.rm = TRUE)
Expand Down
42 changes: 22 additions & 20 deletions R/quick-get.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,27 @@ spod_quick_get_od <- function(
id_origin = NA,
id_destination = NA
){
# Validate inputs
checkmate::assert_integerish(min_trips, lower = 0, null.ok = FALSE, max.len = 1)

# Mapping user-friendly distances to GraphQL expected values
distance_mapping <- c(
"500m-2km" = "D_05_2",
"2-10km" = "D_2_10",
"10-50km" = "D_10_50",
"50+km" = "D_50"
)
# Translate user-friendly distances into GraphQL distances
graphql_distances <- unname(distance_mapping[distances])

if (any(is.na(graphql_distances))) {
stop("Invalid distance value. Allowed values are: ",
paste(names(distance_mapping), collapse = ", "))
}

checkmate::assert_character(id_origin, null.ok = TRUE)
checkmate::assert_character(id_destination, null.ok = TRUE)

# Convert the date into YYYYMMDD format
if (is.character(date)) {
# Check for "YYYY-MM-DD" format
Expand Down Expand Up @@ -94,14 +115,6 @@ spod_quick_get_od <- function(
)
)
}

# Mapping user-friendly distances to GraphQL expected values
distance_mapping <- c(
"500m-2km" = "D_05_2",
"2-10km" = "D_2_10",
"10-50km" = "D_10_50",
"50+km" = "D_50"
)

# Municipalities checks
muni_ref <- readRDS(
Expand Down Expand Up @@ -138,19 +151,8 @@ spod_quick_get_od <- function(
validate_muni_ids(id_destination, muni_ref)
}


# Validate min_trips
if (!is.numeric(min_trips) || min_trips < 0) {
stop("Invalid minimum number of trips. Must be a non-negative integer.")
}

# Translate user-friendly distances into GraphQL distances
graphql_distances <- unname(distance_mapping[distances])

if (any(is.na(graphql_distances))) {
stop("Invalid distance value. Allowed values are: ",
paste(names(distance_mapping), collapse = ", "))
}


# Construct the `journeysMunCriteria` part of the query
journeysMunCriteria <- list(
Expand Down
Loading

0 comments on commit da7a822

Please sign in to comment.