From 105baf294c3ebb6bf2c1921b4dbc16261b656ee7 Mon Sep 17 00:00:00 2001 From: Egor Kotov Date: Tue, 17 Dec 2024 12:18:16 +0100 Subject: [PATCH 1/2] make data downloading examples conditional on interactive session --- R/available-data.R | 2 +- R/connect.R | 2 +- R/convert.R | 2 +- R/data-dir.R | 1 + R/disconnect.R | 2 +- R/download_data.R | 2 +- R/get-zones.R | 2 +- R/get.R | 2 +- R/internal-utils.R | 2 +- R/quick-get.R | 2 +- man/spod_available_data.Rd | 3 ++- man/spod_connect.Rd | 3 ++- man/spod_convert.Rd | 3 ++- man/spod_disconnect.Rd | 3 ++- man/spod_download.Rd | 3 ++- man/spod_get.Rd | 3 ++- man/spod_get_data_dir.Rd | 1 + man/spod_get_valid_dates.Rd | 3 ++- man/spod_get_zones.Rd | 3 ++- man/spod_quick_get_od.Rd | 3 ++- 20 files changed, 29 insertions(+), 18 deletions(-) diff --git a/R/available-data.R b/R/available-data.R index 6c521fe..c7cf0cd 100644 --- a/R/available-data.R +++ b/R/available-data.R @@ -16,7 +16,7 @@ #' \item{downloaded}{\code{logical}. Indicator of whether the data file has been downloaded locally. This is only available if `check_local_files` is `TRUE`.} #' } #' @export -#' @examples +#' @examplesIf interactive() #' \donttest{ #' #' # Set data dir for file downloads diff --git a/R/connect.R b/R/connect.R index e3800bc..d9d686b 100644 --- a/R/connect.R +++ b/R/connect.R @@ -11,7 +11,7 @@ #' @export #' @return a `DuckDB` table connection object. #' -#' @examples +#' @examplesIf interactive() #' \donttest{ #' # Set data dir for file downloads #' spod_set_data_dir(tempdir()) diff --git a/R/convert.R b/R/convert.R index cad7534..d9159b3 100644 --- a/R/convert.R +++ b/R/convert.R @@ -24,7 +24,7 @@ #' @return Path to saved `DuckDB` database file or to a folder with `parquet` files in hive-style directory structure. #' @export #' -#' @examples +#' @examplesIf interactive() #' \donttest{ #' # Set data dir for file downloads #' spod_set_data_dir(tempdir()) diff --git a/R/data-dir.R b/R/data-dir.R index a319225..94de7ce 100644 --- a/R/data-dir.R +++ b/R/data-dir.R @@ -59,6 +59,7 @@ spod_set_data_dir <- function( #' @return A `character` vector of length 1 containing the path to the data directory where the package will download and convert the data. #' @export #' @examples +#' spod_set_data_dir(tempdir()) #' spod_get_data_dir() #' spod_get_data_dir <- function(quiet = FALSE) { diff --git a/R/disconnect.R b/R/disconnect.R index 41f22c8..95ab438 100644 --- a/R/disconnect.R +++ b/R/disconnect.R @@ -6,7 +6,7 @@ #' @param free_mem A `logical`. Whether to free up memory by running `gc()`. Defaults to `TRUE`. #' @return No return value, called for side effect of disconnecting from the database and freeing up memory. #' @export -#' @examples +#' @examplesIf interactive() #' \donttest{ #' # Set data dir for file downloads #' spod_set_data_dir(tempdir()) diff --git a/R/download_data.R b/R/download_data.R index c905b52..e94790d 100644 --- a/R/download_data.R +++ b/R/download_data.R @@ -13,7 +13,7 @@ #' @return Nothing. If `return_local_file_paths = TRUE`, a `character` vector of the paths to the downloaded files. #' #' @export -#' @examples +#' @examplesIf interactive() #' \donttest{ #' #' # Set data dir for file downloads diff --git a/R/get-zones.R b/R/get-zones.R index 9763442..62a488f 100644 --- a/R/get-zones.R +++ b/R/get-zones.R @@ -33,7 +33,7 @@ #' } #' #' @export -#' @examples +#' @examplesIf interactive() #' \donttest{ #' # get polygons for municipalities for the v2 data #' municip_v2 <- spod_get_zones(zones = "municipalities", ver = 2) diff --git a/R/get.R b/R/get.R index 42de7e2..93ed132 100644 --- a/R/get.R +++ b/R/get.R @@ -13,7 +13,7 @@ #' @inheritParams global_quiet_param #' @return A DuckDB lazy table connection object of class `tbl_duckdb_connection`. #' @export -#' @examples +#' @examplesIf interactive() #' \donttest{ #' #' # create a connection to the v1 data diff --git a/R/internal-utils.R b/R/internal-utils.R index add1cd2..49523cd 100644 --- a/R/internal-utils.R +++ b/R/internal-utils.R @@ -229,7 +229,7 @@ spod_expand_dates_from_regex <- function(date_regex) { #' @inheritParams spod_available_data #' @return A vector of type `Date` with all possible valid dates for the specified data version (v1 for 2020-2021 and v2 for 2020 onwards). #' @export -#' @examples +#' @examplesIf interactive() #' \donttest{ #' # Get all valid dates for v1 (2020-2021) data #' spod_get_valid_dates(ver = 1) diff --git a/R/quick-get.R b/R/quick-get.R index 568ae66..98b5fc3 100644 --- a/R/quick-get.R +++ b/R/quick-get.R @@ -20,7 +20,7 @@ #' #' @export #' -#' @examples +#' @examplesIf interactive() #' \donttest{ #' od_1000 <- spod_quick_get_od( #' date = "2022-01-01", diff --git a/man/spod_available_data.Rd b/man/spod_available_data.Rd index 68b78b5..cfe6033 100644 --- a/man/spod_available_data.Rd +++ b/man/spod_available_data.Rd @@ -36,6 +36,7 @@ A tibble with links, release dates of files in the data, dates of data coverage, Get a table with links to available data files for the specified data version. Optionally check (see arguments) if certain files have already been downloaded into the cache directory specified with SPANISH_OD_DATA_DIR environment variable (set by \link{spod_set_data_dir}) or a custom path specified with \code{data_dir} argument. } \examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ # Set data dir for file downloads @@ -51,5 +52,5 @@ spod_available_data(ver = 2) # while also checking for local files that are already downloaded spod_available_data(ver = 2, check_local_files = TRUE) } - +\dontshow{\}) # examplesIf} } diff --git a/man/spod_connect.Rd b/man/spod_connect.Rd index 6c2dbb7..ab93b55 100644 --- a/man/spod_connect.Rd +++ b/man/spod_connect.Rd @@ -33,6 +33,7 @@ a \code{DuckDB} table connection object. This function allows the user to quickly connect to the data converted to DuckDB with the \link{spod_convert} function. This function simplifies the connection process. The user is free to use the \code{DBI} and \code{DuckDB} packages to connect to the data manually, or to use the \code{arrow} package to connect to the \code{parquet} files folder. } \examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ # Set data dir for file downloads spod_set_data_dir(tempdir()) @@ -52,5 +53,5 @@ my_od_data_2 <- spod_connect(db_2) # disconnect from the database spod_disconnect(my_od_data_2) } - +\dontshow{\}) # examplesIf} } diff --git a/man/spod_convert.Rd b/man/spod_convert.Rd index 2ab9e2e..a055659 100644 --- a/man/spod_convert.Rd +++ b/man/spod_convert.Rd @@ -72,6 +72,7 @@ Path to saved \code{DuckDB} database file or to a folder with \code{parquet} fil Converts data for faster analysis into either \code{DuckDB} file or into \code{parquet} files in a hive-style directory structure. Running analysis on these files is sometimes 100x times faster than working with raw CSV files, espetially when these are in gzip archives. To connect to converted data, please use 'mydata <- \link{spod_connect}(data_path = path_returned_by_spod_convert)' passing the path to where the data was saved. The connected \code{mydata} can be analysed using \code{dplyr} functions such as \link[dplyr]{select}, \link[dplyr]{filter}, \link[dplyr]{mutate}, \link[dplyr]{group_by}, \link[dplyr]{summarise}, etc. In the end of any sequence of commands you will need to add \link[dplyr]{collect} to execute the whole chain of data manipulations and load the results into memory in an R \code{data.frame}/\code{tibble}. For more in-depth usage of such data, please refer to DuckDB documentation and examples at \url{https://duckdb.org/docs/api/r#dbplyr} . Some more useful examples can be found here \url{https://arrow-user2022.netlify.app/data-wrangling#combining-arrow-with-duckdb} . You may also use \code{arrow} package to work with parquet files \url{https://arrow.apache.org/docs/r/}. } \examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ # Set data dir for file downloads spod_set_data_dir(tempdir()) @@ -91,5 +92,5 @@ my_od_data_2 <- spod_connect(db_2) # disconnect from the database spod_disconnect(my_od_data_2) } - +\dontshow{\}) # examplesIf} } diff --git a/man/spod_disconnect.Rd b/man/spod_disconnect.Rd index 48b4a32..5434ded 100644 --- a/man/spod_disconnect.Rd +++ b/man/spod_disconnect.Rd @@ -18,6 +18,7 @@ No return value, called for side effect of disconnecting from the database and f This function is to ensure that \code{DuckDB} connections to CSV.gz files (created via \code{spod_get()}), as well as to \code{DuckDB} files or folders of \code{parquet} files (created via \code{spod_convert()}) are closed properly to prevent conflicting connections. Essentially this is just a wrapper around \code{DBI::dbDisconnect()} that reaches out into the \code{.$src$con} object of the \code{tbl_duckdb_connection} connection object that is returned to the user via \code{spod_get()} and \code{spod_connect()}. After disonnecting the database, it also frees up memory by running \code{gc()}. } \examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ # Set data dir for file downloads spod_set_data_dir(tempdir()) @@ -49,5 +50,5 @@ my_od_data_2 <- spod_connect(db_2) # disconnect from the database spod_disconnect(my_od_data_2) } - +\dontshow{\}) # examplesIf} } diff --git a/man/spod_download.Rd b/man/spod_download.Rd index 5ea5db2..081aca5 100644 --- a/man/spod_download.Rd +++ b/man/spod_download.Rd @@ -53,6 +53,7 @@ Nothing. If \code{return_local_file_paths = TRUE}, a \code{character} vector of This function downloads the data files of the specified type, zones, dates and data version. } \examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ # Set data dir for file downloads @@ -77,5 +78,5 @@ spod_download( dates = "2020032[0-4]" ) } - +\dontshow{\}) # examplesIf} } diff --git a/man/spod_get.Rd b/man/spod_get.Rd index 10e987e..8ec6675 100644 --- a/man/spod_get.Rd +++ b/man/spod_get.Rd @@ -66,6 +66,7 @@ If you want to analyse longer periods of time (especiially several months or eve If you want to quickly get the origin-destination data with flows aggregated for a single day at municipal level and without any extra socio-economic variables, consider using the \link[spanishoddata]{spod_quick_get_od} function. } \examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ # create a connection to the v1 data @@ -83,5 +84,5 @@ DBI::dbListTables(nt_dist$src$con) # disconnect spod_disconnect(nt_dist) } - +\dontshow{\}) # examplesIf} } diff --git a/man/spod_get_data_dir.Rd b/man/spod_get_data_dir.Rd index b6e9a77..45ad881 100644 --- a/man/spod_get_data_dir.Rd +++ b/man/spod_get_data_dir.Rd @@ -17,6 +17,7 @@ This function retrieves the data directory from the environment variable SPANISH If the environment variable is not set, it returns the temporary directory. } \examples{ +spod_set_data_dir(tempdir()) spod_get_data_dir() } diff --git a/man/spod_get_valid_dates.Rd b/man/spod_get_valid_dates.Rd index 964bbac..fb0e9f5 100644 --- a/man/spod_get_valid_dates.Rd +++ b/man/spod_get_valid_dates.Rd @@ -16,6 +16,7 @@ A vector of type \code{Date} with all possible valid dates for the specified dat Get valid dates for the specified data version } \examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ # Get all valid dates for v1 (2020-2021) data spod_get_valid_dates(ver = 1) @@ -23,5 +24,5 @@ spod_get_valid_dates(ver = 1) # Get all valid dates for v2 (2020 onwards) data spod_get_valid_dates(ver = 2) } - +\dontshow{\}) # examplesIf} } diff --git a/man/spod_get_zones.Rd b/man/spod_get_zones.Rd index 6679ded..ade9217 100644 --- a/man/spod_get_zones.Rd +++ b/man/spod_get_zones.Rd @@ -53,6 +53,7 @@ The columns for v2 (2022 onwards) data include: Get spatial zones for the specified data version. Supports both v1 (2020-2021) and v2 (2022 onwards) data. } \examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ # get polygons for municipalities for the v2 data municip_v2 <- spod_get_zones(zones = "municipalities", ver = 2) @@ -60,5 +61,5 @@ municip_v2 <- spod_get_zones(zones = "municipalities", ver = 2) # get polygons for the districts for the v1 data distr_v1 <- spod_get_zones(zones = "districts", ver = 1) } - +\dontshow{\}) # examplesIf} } diff --git a/man/spod_quick_get_od.Rd b/man/spod_quick_get_od.Rd index 7177c8c..f7681bb 100644 --- a/man/spod_quick_get_od.Rd +++ b/man/spod_quick_get_od.Rd @@ -37,6 +37,7 @@ A \code{tibble} containing the flows for the specified date, minimum number of j This function provides a quick way to get daily aggregated (no hourly data) trip counts per origin-destination municipality from v2 data (2022 onward). Compared to \link[spanishoddata]{spod_get}, which downloads large CSV files, this function downloads the data directly from the GraphQL API. No data aggregation is performed on your computer (unlike in \link[spanishoddata]{spod_get}), so you do not need to worry about memory usage and do not have to use a powerful computer with multiple CPU cores just to get this simple data. Only about 1 MB of data is downloaded for a single day. The limitation of this function is that it can only retrieve data for a single day at a time and only with total number of trips and total km travelled. So it is not possible to get any of the extra variables available in the full dataset via \link[spanishoddata]{spod_get}. } \examples{ +\dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} \donttest{ od_1000 <- spod_quick_get_od( date = "2022-01-01", @@ -44,5 +45,5 @@ od_1000 <- spod_quick_get_od( ) } - +\dontshow{\}) # examplesIf} } From 459d230ed773cd3deeab328a62eed2596ebd9d96 Mon Sep 17 00:00:00 2001 From: Egor Kotov Date: Tue, 17 Dec 2024 12:23:10 +0100 Subject: [PATCH 2/2] bring back running of examples --- .github/workflows/R-CMD-check.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 8aa4529..9f3613e 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -49,4 +49,4 @@ jobs: - uses: r-lib/actions/check-r-package@v2 with: upload-snapshots: true - build_args: 'c("--no-manual", "--compact-vignettes=gs+qpdf", "--no-examples")' + build_args: 'c("--no-manual", "--compact-vignettes=gs+qpdf")'