Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

argument checks for all user facing functions #111

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ BugReports: https://github.com/rOpenSpain/spanishoddata/issues
Depends:
R (>= 3.5.0)
Imports:
checkmate,
curl (>= 5.0.0),
DBI,
dplyr,
Expand Down
17 changes: 16 additions & 1 deletion R/connect_to_converted_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,14 @@ spod_connect <- function(
max_n_cpu = parallelly::availableCores() - 1,
temp_path = spod_get_temp_dir()
){
# Validate imputs
checkmate::assert_access(data_path, access = 'r')
checkmate::assert_character(target_table_name, null.ok = TRUE)
checkmate::assert_flag(quiet)
checkmate::assert_number(max_mem_gb, lower = 1)
checkmate::assert_integerish(max_n_cpu, lower = 1, upper = parallelly::availableCores())
checkmate::assert_directory_exists(temp_path, access = "rw")

# determine if data_path is a folder or a duckdb file
if (grepl("\\.duckdb$", data_path)) {
duckdb_path <- data_path
Expand Down Expand Up @@ -102,7 +110,14 @@ spod_connect <- function(
#' spod_disconnect(od_distr)
#' }
#'
spod_disconnect <- function(tbl_con, free_mem = TRUE) {
spod_disconnect <- function(
tbl_con,
free_mem = TRUE
) {
# Validate imputs
checkmate::assert_class(tbl_con, "tbl_duckdb_connection")
checkmate::assert_flag(free_mem)

DBI::dbDisconnect(tbl_con$src$con, shutdown = TRUE)
if (free_mem){
gc()
Expand Down
34 changes: 26 additions & 8 deletions R/convert_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,26 +40,44 @@ spod_convert <- function(
data_dir = spod_get_data_dir(),
quiet = FALSE,
max_mem_gb = max(4, spod_available_ram() - 4),
max_n_cpu = parallelly::availableCores() - 1,
max_n_cpu = max(1, parallelly::availableCores() - 1),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

max_download_size_gb = 1,
ignore_missing_dates = FALSE
) {

# Validate inputs
checkmate::assert_choice(type, choices = c("od", "origin-destination", "os", "overnight_stays", "nt", "number_of_trips"))
checkmate::assert_choice(zones, choices = c(
"districts", "dist", "distr", "distritos",
"municipalities", "muni", "municip", "municipios",
"lua", "large_urban_areas", "gau", "grandes_areas_urbanas"
))
checkmate::assert_choice(save_format, choices = c("duckdb", "parquet"), null.ok = TRUE)
checkmate::assert_character(save_path, len = 1, null.ok = TRUE)
checkmate::assert_flag(overwrite)
checkmate::assert_directory_exists(data_dir, access = "rw")
checkmate::assert_flag(quiet)
checkmate::assert_number(max_mem_gb, lower = 0.1)
checkmate::assert_integerish(max_n_cpu, lower = 1, upper = parallelly::availableCores())
checkmate::assert_number(max_download_size_gb, lower = 0.1)
checkmate::assert_flag(ignore_missing_dates)


# simple null check is enough here, as spod_dates_arugument_to_dates_seq will do additional checks anyway
if (is.null(dates)) {
message("No period specified in the `dates` argument. Please set `dates='cached_v1'` or `dates='cached_v2'` to convert all data that was previously downloaded. Alternatively, specify at least one date between 2020-02-14 and 2021-05-09 (for v1 data) or between 2022-01-01 onwards (for v2). Any missing data will be downloaded before conversion.")
message("No period specified in the `dates` argument. Please set `dates='cached_v1'` or `dates='cached_v2'` to convert all data that was previously downloaded. Alternatively, specify at least one date between 2020-02-14 and 2021-05-09 (for v1 data) or between 2022-01-01 onwards (for v2). Any missing data will be downloaded before conversion. For more details on the dates argument, see ?spod_convert.")
}

dates <- spod_dates_argument_to_dates_seq(dates = dates)
ver <- spod_infer_data_v_from_dates(dates = dates, ignore_missing_dates = ignore_missing_dates)

# check if user is requesting to just get all cached data
cached_data_requested <- length(dates) == 1 &&
all(as.character(dates) %in% c("cached_v1", "cached_v2"))

type <- match.arg(type)

ver <- spod_infer_data_v_from_dates(dates = dates, ignore_missing_dates = ignore_missing_dates)

# normalise type
type <- spod_match_data_type(type = type)

zones <- match.arg(zones)
# normalise zones
zones <- spod_zone_names_en2es(zones)

# check format
Expand Down
4 changes: 4 additions & 0 deletions R/data-dir.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ spod_set_data_dir <- function(
data_dir,
quiet = FALSE
){
checkmate::assert_character(data_dir, len = 1, null.ok = FALSE)
checkmate::assert_flag(quiet)

data_dir_abs_path <- fs::path_abs(data_dir)

tryCatch({
Expand Down Expand Up @@ -55,6 +58,7 @@ spod_set_data_dir <- function(
#' @export
#' @keywords internal
spod_get_data_dir <- function(quiet = FALSE) {
checkmate::assert_flag(quiet)
data_dir_env <- Sys.getenv("SPANISH_OD_DATA_DIR")
if (data_dir_env == "") {
if (isFALSE(quiet)) warning("Warning: SPANISH_OD_DATA_DIR is not set. Using the temporary directory, which is not recommended, as the data will be deleted when the session ends.\n\n To set the data directory, use `Sys.setenv(SPANISH_OD_DATA_DIR = '/path/to/data')` or set SPANISH_OD_DATA_DIR permanently in the environment by editing the `.Renviron` file locally for current project with `usethis::edit_r_environ('project')` or `file.edit('.Renviron')` or globally for all projects with `usethis::edit_r_environ('user')` or `file.edit('~/.Renviron')`.")
Expand Down
22 changes: 20 additions & 2 deletions R/download_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,28 @@ spod_download <- function(
return_local_file_paths = FALSE,
ignore_missing_dates = FALSE
) {
# convert english zone names to spanish words used in the default data paths
zones <- match.arg(zones)

# Validate inputs
checkmate::assert_choice(type, choices = c("od", "origin-destination", "os", "overnight_stays", "nt", "number_of_trips"))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great to see all these additional checks.

checkmate::assert_choice(zones, choices = c(
"districts", "dist", "distr", "distritos",
"municipalities", "muni", "municip", "municipios",
"lua", "large_urban_areas", "gau", "grandes_areas_urbanas"
))
checkmate::assert_number(max_download_size_gb, lower = 0.1)
checkmate::assert_directory_exists(data_dir, access = "rw")
checkmate::assert_flag(quiet)
checkmate::assert_flag(return_local_file_paths)
checkmate::assert_flag(ignore_missing_dates)

# normalise zones
zones <- spod_zone_names_en2es(zones)

# simple null check is enough here, as spod_dates_arugument_to_dates_seq will do additional checks anyway
if (is.null(dates)) {
message("`dates` argument is undefined. Please set `dates='cached_v1'` or `dates='cached_v2'` to convert all data that was previously downloaded. Alternatively, specify at least one date between 2020-02-14 and 2021-05-09 (for v1 data) or between 2022-01-01 onwards (for v2). Any missing data will be downloaded before conversion. For more details on the dates argument, see ?spod_download.")
}

dates_to_use <- spod_dates_argument_to_dates_seq(dates = dates)


Expand Down
29 changes: 22 additions & 7 deletions R/get.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,25 @@ spod_get_zones <- function(
data_dir = spod_get_data_dir(),
quiet = FALSE
) {
ver <- as.integer(ver) # todo: add type safety check
# Validate inputs
checkmate::assert_choice(zones, choices = c(
"districts", "dist", "distr", "distritos",
"municipalities", "muni", "municip", "municipios",
"lua", "large_urban_areas", "gau", "grandes_areas_urbanas"
))

checkmate::assertIntegerish(ver, max.len = 1)
if (!ver %in% c(1, 2)) {
stop("Invalid version number. Must be 1 or 2.")
stop("Invalid version number. Must be 1 (for v1 2020-2021 data) or 2 (for v2 2022 onwards).")
}

zones <- match.arg(zones)


checkmate::assert_directory_exists(data_dir, access = "rw")
checkmate::assert_flag(quiet)

# normalise zones
zones <- spod_zone_names_en2es(zones)

if (ver == 1) {
zones_sf <- spod_get_zones_v1(zones = zones, data_dir = data_dir, quiet = quiet)
} else if (ver == 2) {
Expand Down Expand Up @@ -84,10 +95,14 @@ spod_available_data <- function(
quiet = FALSE,
data_dir = spod_get_data_dir()
) {
ver <- as.integer(ver) # todo: add type safety check
# Validate input
checkmate::assertIntegerish(ver, max.len = 1)
if (!ver %in% c(1, 2)) {
stop("Invalid version number. Must be 1 or 2.")
stop("Invalid version number. Must be 1 (for v1 2020-2021 data) or 2 (for v2 2022 onwards).")
}
checkmate::assert_flag(check_local_files)
checkmate::assert_flag(quiet)
checkmate::assert_directory_exists(data_dir, access = "rw")

if (ver == 1) {
return(spod_available_data_v1(data_dir = data_dir, check_local_files = check_local_files, quiet = quiet))
Expand Down
26 changes: 21 additions & 5 deletions R/get_v1_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -488,15 +488,31 @@ spod_get <- function(
temp_path = spod_get_temp_dir(),
ignore_missing_dates = FALSE
) {

# Validate inputs
checkmate::assert_choice(type, choices = c("od", "origin-destination", "os", "overnight_stays", "nt", "number_of_trips"))
checkmate::assert_choice(zones, choices = c(
"districts", "dist", "distr", "distritos",
"municipalities", "muni", "municip", "municipios",
"lua", "large_urban_areas", "gau", "grandes_areas_urbanas"
))
checkmate::assert_flag(quiet)
checkmate::assert_number(max_mem_gb, lower = 1)
checkmate::assert_integerish(max_n_cpu, lower = 1, upper = parallelly::availableCores())
checkmate::assert_number(max_download_size_gb, lower = 0.1)
checkmate::assert_string(duckdb_target)
checkmate::assert_directory_exists(data_dir, access = "rw")
checkmate::assert_directory_exists(temp_path, access = "rw")
checkmate::assert_flag(ignore_missing_dates)

# simple null check is enough here, as spod_dates_arugument_to_dates_seq will do additional checks anyway
if (is.null(dates)) {
message("`dates` argument is undefined. Please set `dates='cached_v1'` or `dates='cached_v2'` to convert all data that was previously downloaded. Alternatively, specify at least one date between 2020-02-14 and 2021-05-09 (for v1 data) or between 2022-01-01 onwards (for v2). Any missing data will be downloaded before conversion.")
message("`dates` argument is undefined. Please set `dates='cached_v1'` or `dates='cached_v2'` to convert all data that was previously downloaded. Alternatively, specify at least one date between 2020-02-14 and 2021-05-09 (for v1 data) or between 2022-01-01 onwards (for v2). Any missing data will be downloaded before conversion. For more details on the dates argument, see ?spod_get.")
}


type <- match.arg(type)
# normalise type
type <- spod_match_data_type(type = type)

zones <- match.arg(zones)
# normalise zones
zones <- spod_zone_names_en2es(zones)

# check if user is requesting to just get all cached data
Expand Down
9 changes: 7 additions & 2 deletions R/help.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,13 @@
#' @importFrom utils vignette
#' @export
#'
spod_codebook = function(ver = 1) {
ver <- as.integer(ver)
spod_codebook <- function(ver = 1) {
# Validate input
checkmate::assertIntegerish(ver, max.len = 1)
if (!ver %in% c(1, 2)) {
stop("Invalid version number. Must be 1 (for v1 2020-2021 data) or 2 (for v2 2022 onwards).")
}

if (ver == 1){
help <- vignette(
topic = "v1-2020-2021-mitma-data-codebook",
Expand Down
12 changes: 8 additions & 4 deletions R/internal_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,11 @@ spod_expand_dates_from_regex <- function(date_regex) {
if (length(matching_dates_v1) == 0 && length(matching_dates_v2) == 0) {
stop(paste0(
"No matching dates found in the available data.",
"The valid dates range for v1 is: ", paste0(min(all_dates_v1), " to ", max(all_dates_v1)), " and for v2 is: ", paste0(min(all_dates_v2), " to ", max(all_dates_v2))
"\nThe valid dates range for v1 is: ",
paste(spod_convert_dates_to_ranges(all_dates_v1), collapse = ", "),
"\nThe valid dates range for v2 is: ",
paste(spod_convert_dates_to_ranges(all_dates_v2), collapse = ", "),
"."
))
}
# If checks above have passed, we can combine the matching dates as only one contains dates and the other is empty
Expand All @@ -221,12 +225,12 @@ spod_expand_dates_from_regex <- function(date_regex) {
#' @return A vector of type `Date` with all possible valid dates for the specified data version (v1 for 2020-2021 and v2 for 2020 onwards).
#' @export
spod_get_valid_dates <- function(ver = NULL) {
ver <- as.integer(ver) # todo: add type safety check
# Validate input
checkmate::assertIntegerish(ver, max.len = 1)
if (!ver %in% c(1, 2)) {
stop("Invalid version number. Must be 1 or 2.")
stop("Invalid version number. Must be 1 (for v1 2020-2021 data) or 2 (for v2 2022 onwards).")
}


if (ver == 1) {
# available_data <- spod_available_data_v1(check_local_files = FALSE, quiet = TRUE)
# all_dates <- unique(available_data[grepl("maestra1.*diarios", available_data$target_url),]$data_ymd, na.rm = TRUE)
Expand Down
42 changes: 22 additions & 20 deletions R/quick-get.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,27 @@ spod_quick_get_od <- function(
id_origin = NA,
id_destination = NA
){
# Validate inputs
checkmate::assert_integerish(min_trips, lower = 0, null.ok = FALSE, max.len = 1)

# Mapping user-friendly distances to GraphQL expected values
distance_mapping <- c(
"500m-2km" = "D_05_2",
"2-10km" = "D_2_10",
"10-50km" = "D_10_50",
"50+km" = "D_50"
)
# Translate user-friendly distances into GraphQL distances
graphql_distances <- unname(distance_mapping[distances])

if (any(is.na(graphql_distances))) {
stop("Invalid distance value. Allowed values are: ",
paste(names(distance_mapping), collapse = ", "))
}

checkmate::assert_character(id_origin, null.ok = TRUE)
checkmate::assert_character(id_destination, null.ok = TRUE)

# Convert the date into YYYYMMDD format
if (is.character(date)) {
# Check for "YYYY-MM-DD" format
Expand Down Expand Up @@ -94,14 +115,6 @@ spod_quick_get_od <- function(
)
)
}

# Mapping user-friendly distances to GraphQL expected values
distance_mapping <- c(
"500m-2km" = "D_05_2",
"2-10km" = "D_2_10",
"10-50km" = "D_10_50",
"50+km" = "D_50"
)

# Municipalities checks
muni_ref <- readRDS(
Expand Down Expand Up @@ -138,19 +151,8 @@ spod_quick_get_od <- function(
validate_muni_ids(id_destination, muni_ref)
}


# Validate min_trips
if (!is.numeric(min_trips) || min_trips < 0) {
stop("Invalid minimum number of trips. Must be a non-negative integer.")
}

# Translate user-friendly distances into GraphQL distances
graphql_distances <- unname(distance_mapping[distances])

if (any(is.na(graphql_distances))) {
stop("Invalid distance value. Allowed values are: ",
paste(names(distance_mapping), collapse = ", "))
}


# Construct the `journeysMunCriteria` part of the query
journeysMunCriteria <- list(
Expand Down
Loading
Loading