From fd1708fd959d913c5aa2133353939e6ed484b852 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Thu, 15 Dec 2022 17:58:34 -0700 Subject: [PATCH] Revamp `board_url()` to handle manifest files (#681) * Stash work on versioned board_url * Get started on `board_url()` changes after manifest work * Fix `pin_meta` method * Update test to accommodate `is_testing()` * Update `pin_meta` to check for versioning * Update NEWS * Get rid of "No encoding supplied: defaulting to UTF-8." * Rename to `end_with_slash()` * Pass through other board args correctly * Update R/board_url.R Co-authored-by: Hadley Wickham * Feedback from code review * Test error messages better * Update test for new error message * simplify url-patching for manifest (#686) * Add example for manifest file * Update from master to main * Update R/board_url.R Co-authored-by: Hadley Wickham * Update R/board_url.R Co-authored-by: Hadley Wickham * Fix up error message handling * Remember that paste0 is already vectorized * Remove unnecessary `which()` Co-authored-by: Hadley Wickham Co-authored-by: Ian Lyttle --- NAMESPACE | 2 + NEWS.md | 3 + R/board.R | 2 +- R/board_url.R | 204 ++++++++++++++++++++++++----- R/testthat.R | 5 + R/utils.R | 2 +- man/board_url.Rd | 69 +++++++--- tests/testthat/_snaps/board_url.md | 40 +++++- tests/testthat/test-board_folder.R | 2 +- tests/testthat/test-board_url.R | 57 +++++++- tests/testthat/test-utils.R | 10 +- 11 files changed, 334 insertions(+), 62 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index bf4fcdabe..709330a7d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -114,6 +114,7 @@ S3method(pin_version_delete,pins_board_folder) S3method(pin_version_delete,pins_board_ms365) S3method(pin_version_delete,pins_board_rsconnect) S3method(pin_version_delete,pins_board_s3) +S3method(pin_version_delete,pins_board_url) S3method(pin_versions,pins_board) S3method(pin_versions,pins_board_azure) S3method(pin_versions,pins_board_folder) @@ -121,6 +122,7 @@ S3method(pin_versions,pins_board_kaggle_dataset) S3method(pin_versions,pins_board_ms365) S3method(pin_versions,pins_board_rsconnect) S3method(pin_versions,pins_board_s3) +S3method(pin_versions,pins_board_url) S3method(print,pin_info) S3method(print,pins_board) S3method(print,pins_hidden) diff --git a/NEWS.md b/NEWS.md index 0cc7c3a58..0013323a1 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,6 +22,9 @@ recording all pins and their versions to the board's root directory. This function only works for boards that are not read-only (#661, based on work of @ijlyttle). + +* Updated `board_url()` to handle versions recorded via a manifest file + (#681, based on work of @ijlyttle). # pins 1.0.3 diff --git a/R/board.R b/R/board.R index c2335d629..6f096e931 100644 --- a/R/board.R +++ b/R/board.R @@ -222,7 +222,7 @@ make_manifest <- function(board) { result <- map( pin_names, ~fs::path(.x, pin_versions(board, name = .x)$version) %>% - append_slash() %>% # versions usually don't include slash + end_with_slash() %>% # versions usually don't include slash as.list() ) names(result) <- pin_names diff --git a/R/board_url.R b/R/board_url.R index 8e7161917..1312d3c5f 100644 --- a/R/board_url.R +++ b/R/board_url.R @@ -1,52 +1,95 @@ #' Use a vector of URLs as a board #' #' @description -#' `board_url()` lets you build up a board from individual urls. This is -#' useful because [pin_download()] and [pin_read()] will be cached - they'll -#' only re-download the data if it's changed from the last time you downloaded -#' it (using the tools of -#' [HTTP caching](https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching)). -#' You'll also be protected from the vagaries of the internet; if a fresh -#' download fails, you'll get the previously cached result with a warning. +#' `board_url()` lets you build up a board from individual urls or a [manifest +#' file][write_board_manifest()]. #' -#' `board_url()` is read only and does not currently support versions. +#' `board_url()` is read only. #' -#' @param urls A named character vector of URLs If the URL ends in a `/`, -#' `board_url` will look for a `data.txt` that provides metadata. The -#' easiest way to generate this file is to upload a pin directory created by -#' [board_folder()]. +#' @param urls Identify available pins being served at a URL or set of URLs (see details): +#' - Unnamed string: URL to a [manifest file][write_board_manifest()]. +#' - Named character vector: URLs to specific pins (does not support versioning). +#' - Named list: URLs to pin version directories (supports versioning). #' @param use_cache_on_failure If the pin fails to download, is it ok to #' use the last cached version? Defaults to `is_interactive()` so you'll #' be robust to poor internet connectivity when exploring interactively, #' but you'll get clear errors when the code is deployed. #' @family boards #' @inheritParams new_board +#' @details +#' The way `board_url()` works depends on the type of the `urls` argument: +#' - Unnamed character scalar, i.e. **a single URL** to a +#' [manifest file][write_board_manifest()]: If the URL ends in a `/`, +#' `board_url()` will look for a `_pins.yaml` manifest. If the manifest +#' file parses to a named list, versioning is supported. If it parses to a +#' named character vector, the board will not support versioning. +#' - **Named character vector of URLs**: If the URLs end in a `/`, +#' `board_url()` will look for a `data.txt` that provides metadata for the +#' associated pin. The easiest way to generate this file is to upload a pin +#' version directory created by [board_folder()]. Versioning is not supported. +#' - **Named list**, where the values are character vectors of URLs and each +#' element of the vector refers to a version of the particular pin: If a +#' URL ends in a `/`, `board_url()` will look for a `data.txt` that +#' provides metadata. Versioning is supported. +#' +#' Using a vector of URLs can be useful because [pin_download()] and +#' [pin_read()] will be cached; they'll only re-download the data if it's +#' changed from the last time you downloaded it (using the tools of +#' [HTTP caching](https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching)). +#' You'll also be protected from the vagaries of the internet; if a fresh +#' download fails, you'll get the previously cached result with a warning. +#' +#' Using a [manifest file][write_board_manifest()] can be useful because you +#' can serve a board of pins and allow collaborators to access the board +#' straight from a URL, without worrying about board-level storage details. +#' #' @export #' @examples -#' github_raw <- "https://raw.githubusercontent.com/" -#' board <- board_url(c( -#' files = paste0(github_raw, "rstudio/pins-r/master/tests/testthat/pin-files/"), -#' rds = paste0(github_raw, "rstudio/pins-r/master/tests/testthat/pin-rds/"), -#' raw = paste0(github_raw, "rstudio/pins-r/master/tests/testthat/pin-files/first.txt") +#' github_raw <- function(x) paste0("https://raw.githubusercontent.com/", x) +#' +#' ## with a named vector of URLs to specific pins: +#' b1 <- board_url(c( +#' files = github_raw("rstudio/pins-r/main/tests/testthat/pin-files/"), +#' rds = github_raw("rstudio/pins-r/main/tests/testthat/pin-rds/"), +#' raw = github_raw("rstudio/pins-r/main/tests/testthat/pin-files/first.txt") #' )) #' -#' board %>% pin_read("rds") -#' board %>% pin_browse("rds", local = TRUE) +#' b1 %>% pin_read("rds") +#' b1 %>% pin_browse("rds", local = TRUE) #' -#' board %>% pin_download("files") -#' board %>% pin_download("raw") -board_url <- function(urls, cache = NULL, use_cache_on_failure = is_interactive()) { - if (!is.character(urls) || !is_named(urls)) { - abort("`urls` must be a named character vector") +#' b1 %>% pin_download("files") +#' b1 %>% pin_download("raw") +#' +#' ## with a manifest file: +#' b2 <- board_url(github_raw("rstudio/pins-r/main/tests/testthat/pin-board/")) +#' b2 %>% pin_list() +#' b2 %>% pin_versions("y") +#' +board_url <- function(urls, + cache = NULL, + use_cache_on_failure = is_interactive()) { + + url_format <- get_url_format(urls) + if (url_format == "pins_yaml") { + manifest <- get_manifest(urls) + board <- board_url( + manifest, + cache = cache, + use_cache_on_failure = use_cache_on_failure + ) + return(board) } + versioned <- url_format == "manifest_content" # Share cache across all instances of board_url(); pins are stored in # directories based on the hash of the URL to avoid cache collisions. cache <- cache %||% board_cache_path("url") - new_board_v1("pins_board_url", + new_board_v1( + "pins_board_url", urls = urls, cache = cache, + versioned = versioned, use_cache_on_failure = use_cache_on_failure ) } @@ -69,14 +112,19 @@ pin_exists.pins_board_url <- function(board, name, ...) { pin_meta.pins_board_url <- function(board, name, version = NULL, ...) { check_name(name) check_pin_exists(board, name) - - if (!is.null(version)) { - abort("board_url() doesn't support versions") + if (!is.null(version) && !board$versioned) { + abort_board_not_versioned("board_url") } url <- board$urls[[name]] - is_dir <- grepl("/$", url) + if (board$versioned) { + versions <- pin_versions(board, name)$version + version <- check_pin_version(board, name, version) + url <- board$urls[[name]][[versions == version]] + } + + is_dir <- grepl("/$", url) cache_dir <- fs::path(board$cache, hash(url)) fs::dir_create(cache_dir) @@ -89,7 +137,8 @@ pin_meta.pins_board_url <- function(board, name, version = NULL, ...) { use_cache_on_failure = board$use_cache_on_failure ) meta <- read_meta(cache_dir) - local_meta(meta, + local_meta( + meta, name = name, dir = cache_dir, url = url, @@ -102,7 +151,8 @@ pin_meta.pins_board_url <- function(board, name, version = NULL, ...) { file = fs::path_file(url), api_version = 1 ) - local_meta(meta, + local_meta( + meta, name = name, dir = cache_dir, url = url, @@ -111,6 +161,20 @@ pin_meta.pins_board_url <- function(board, name, version = NULL, ...) { } } +#' @export +pin_versions.pins_board_url <- function(board, name, ...) { + + if (!board$versioned) { + abort_board_not_versioned("board_url") + } + + check_name(name) + check_pin_exists(board, name) + + paths <- board$urls[[name]] + version_from_path(fs::path_file(paths)) +} + #' @export pin_fetch.pins_board_url <- function(board, name, version = NULL, ...) { meta <- pin_meta(board, name, version = version) @@ -141,12 +205,90 @@ pin_store.pins_board_url <- function(board, name, paths, metadata, abort_board_read_only("board_url") } +#' @export +pin_version_delete.pins_board_url <- function(board, name, version, ...) { + abort_board_read_only("board_url") +} + #' @export write_board_manifest_yaml.pins_board_url <- function(board, manifest, ...) { abort_board_read_only("board_url") } # Helpers ------------------------------------------------------------------ + +get_url_format <- function(urls) { + if (is_scalar_character(urls) && !is_named(urls)) { + "pins_yaml" + } else if (is_list(urls) && is_named(urls) && all(map_lgl(urls, is_character))) { + "manifest_content" + } else if (is.character(urls) && is_named(urls)) { + "vector_of_urls" + } else { + cli::cli_abort( + c( + "{.var urls} must resolve to either:", + "*" = "an unnamed character scalar, i.e. a single URL", + "*" = "a named character vector", + "*" = "a named list, where all elements are character scalars or vectors" + ), + class = "pins_error_board_url_argument", + urls = urls + ) + } +} + +get_manifest <- function(url, call = rlang::caller_env()) { + # if ends with "/", look for manifest + if (grepl("/$", url)) { + url <- paste0(url, manifest_pin_yaml_filename) + } + + # if request fails or returns with error code + tryCatch( + { + resp <- httr::GET(url) + httr::stop_for_status(resp) + }, + error = function(e) { + cli::cli_abort( + message = "Failed to access manifest file at {.url {url}}:", + class = "pins_error_board_url_request", + parent = e, + url = url, + call = call + ) + } + ) + + # if file is not parsable + tryCatch( + { + text <- httr::content(resp, as = "text", encoding = "UTF-8") + manifest <- yaml::yaml.load(text) + }, + error = function(e) { + cli::cli_abort( + message = c( + "Failed to parse manifest file at URL {.url {url}}:", + " " = "{e$message}", + "i" = "Manifest file must be text and parsable as YAML." + ), + class = "pins_error_board_url_parse", + parent = e, + resp = resp, + call = call + ) + } + ) + + # url_root is directory containing manifest-file + url_root <- sub("[^/]*$", "", url) + # for each manifest entry, prepend url_root to each path entry + manifest <- map(manifest, ~ paste0(url_root, .x)) + manifest +} + http_download <- function(url, path_dir, path_file, ..., use_cache_on_failure = FALSE, on_failure = NULL) { diff --git a/R/testthat.R b/R/testthat.R index 01d009de2..2b167a87f 100644 --- a/R/testthat.R +++ b/R/testthat.R @@ -215,3 +215,8 @@ abort_pin_versioned <- function() { abort_board_read_only <- function(board) { abort(glue("{board}() is read only"), class = "pins_board_read_only") } + +abort_board_not_versioned <- function(board) { + cli::cli_abort(glue("This {board}() is not versioned"), + class = "pins_board_not_versioned") +} diff --git a/R/utils.R b/R/utils.R index e5199c38c..ee6792c16 100644 --- a/R/utils.R +++ b/R/utils.R @@ -21,7 +21,7 @@ is_url <- function(x) { grepl("^http://|^https://", x) } -append_slash <- function(x) { +end_with_slash <- function(x) { has_slash <- grepl("/$", x) x[!has_slash] <- paste0(x[!has_slash], "/") x diff --git a/man/board_url.Rd b/man/board_url.Rd index a2aa5fea6..cd0ab4331 100644 --- a/man/board_url.Rd +++ b/man/board_url.Rd @@ -7,10 +7,12 @@ board_url(urls, cache = NULL, use_cache_on_failure = is_interactive()) } \arguments{ -\item{urls}{A named character vector of URLs If the URL ends in a \code{/}, -\code{board_url} will look for a \code{data.txt} that provides metadata. The -easiest way to generate this file is to upload a pin directory created by -\code{\link[=board_folder]{board_folder()}}.} +\item{urls}{Identify available pins being served at a URL or set of URLs (see details): +\itemize{ +\item Unnamed character scalar: URL to a \link[=write_board_manifest]{manifest file}. +\item Named character vector: URLs to specific pins (does not support versioning). +\item Named list: URLs to pin version directories (supports versioning). +}} \item{cache}{Cache path. Every board requires a local cache to avoid downloading files multiple times. The default stores in a standard @@ -22,29 +24,60 @@ be robust to poor internet connectivity when exploring interactively, but you'll get clear errors when the code is deployed.} } \description{ -\code{board_url()} lets you build up a board from individual urls. This is -useful because \code{\link[=pin_download]{pin_download()}} and \code{\link[=pin_read]{pin_read()}} will be cached - they'll -only re-download the data if it's changed from the last time you downloaded -it (using the tools of +\code{board_url()} lets you build up a board from individual urls or a \link[=write_board_manifest]{manifest file}. + +\code{board_url()} is read only. +} +\details{ +The way \code{board_url()} works depends on the type of the \code{urls} argument: +\itemize{ +\item Unnamed character scalar, i.e. \strong{a single URL} to a +\link[=write_board_manifest]{manifest file}: If the URL ends in a \code{/}, +\code{board_url()} will look for a \verb{_pins.yaml} manifest. If the manifest +file parses to a named list, versioning is supported. If it parses to a +named character vector, the board will not support versioning. +\item \strong{Named character vector of URLs}: If the URLs end in a \code{/}, +\code{board_url()} will look for a \code{data.txt} that provides metadata for the +associated pin. The easiest way to generate this file is to upload a pin +version directory created by \code{\link[=board_folder]{board_folder()}}. Versioning is not supported. +\item \strong{Named list}, where the values are character vectors of URLs and each +element of the vector refers to a version of the particular pin: If a +URL ends in a \code{/}, \code{board_url()} will look for a \code{data.txt} that +provides metadata. Versioning is supported. +} + +Using a vector of URLs can be useful because \code{\link[=pin_download]{pin_download()}} and +\code{\link[=pin_read]{pin_read()}} will be cached; they'll only re-download the data if it's +changed from the last time you downloaded it (using the tools of \href{https://developer.mozilla.org/en-US/docs/Web/HTTP/Caching}{HTTP caching}). You'll also be protected from the vagaries of the internet; if a fresh download fails, you'll get the previously cached result with a warning. -\code{board_url()} is read only and does not currently support versions. +Using a \link[=write_board_manifest]{manifest file} can be useful because you +can serve a board of pins and allow collaborators to access the board +straight from a URL, without worrying about board-level storage details. } \examples{ -github_raw <- "https://raw.githubusercontent.com/" -board <- board_url(c( - files = paste0(github_raw, "rstudio/pins-r/master/tests/testthat/pin-files/"), - rds = paste0(github_raw, "rstudio/pins-r/master/tests/testthat/pin-rds/"), - raw = paste0(github_raw, "rstudio/pins-r/master/tests/testthat/pin-files/first.txt") +github_raw <- function(x) paste0("https://raw.githubusercontent.com/", x) + +## with a named vector of URLs to specific pins: +b1 <- board_url(c( + files = github_raw("rstudio/pins-r/main/tests/testthat/pin-files/"), + rds = github_raw("rstudio/pins-r/main/tests/testthat/pin-rds/"), + raw = github_raw("rstudio/pins-r/main/tests/testthat/pin-files/first.txt") )) -board \%>\% pin_read("rds") -board \%>\% pin_browse("rds", local = TRUE) +b1 \%>\% pin_read("rds") +b1 \%>\% pin_browse("rds", local = TRUE) + +b1 \%>\% pin_download("files") +b1 \%>\% pin_download("raw") + +## with a manifest file: +b2 <- board_url(github_raw("rstudio/pins-r/main/tests/testthat/pin-board/")) +b2 \%>\% pin_list() +b2 \%>\% pin_versions("y") -board \%>\% pin_download("files") -board \%>\% pin_download("raw") } \seealso{ Other boards: diff --git a/tests/testthat/_snaps/board_url.md b/tests/testthat/_snaps/board_url.md index 3b86f0677..de2a55203 100644 --- a/tests/testthat/_snaps/board_url.md +++ b/tests/testthat/_snaps/board_url.md @@ -24,13 +24,18 @@ Code board %>% pin_meta("x", version = "x") Condition - Error in `pin_meta()`: - ! board_url() doesn't support versions + Error in `abort_board_not_versioned()`: + ! This board_url() is not versioned Code board %>% pin_versions("x") Condition - Error in `pin_versions_modern()`: - ! This board doesn't support versions + Error in `abort_board_not_versioned()`: + ! This board_url() is not versioned + Code + board %>% pin_version_delete("x") + Condition + Error in `abort_board_read_only()`: + ! board_url() is read only Code board %>% board_deparse() Condition @@ -47,3 +52,30 @@ Error in `this_not_that()`: ! Use `pin_read()` with this board, not `pin_get()` +# useful errors for specifying board + + Code + board_url(c("foo", "bar")) + Condition + Error in `get_url_format()`: + ! `urls` must resolve to either: + * an unnamed character scalar, i.e. a single URL + * a named character vector + * a named list, where all elements are character scalars or vectors + Code + board_url(list("a", 1:2)) + Condition + Error in `get_url_format()`: + ! `urls` must resolve to either: + * an unnamed character scalar, i.e. a single URL + * a named character vector + * a named list, where all elements are character scalars or vectors + Code + board_url(1:10) + Condition + Error in `get_url_format()`: + ! `urls` must resolve to either: + * an unnamed character scalar, i.e. a single URL + * a named character vector + * a named list, where all elements are character scalars or vectors + diff --git a/tests/testthat/test-board_folder.R b/tests/testthat/test-board_folder.R index d803c0437..d2fc3b97d 100644 --- a/tests/testthat/test-board_folder.R +++ b/tests/testthat/test-board_folder.R @@ -66,7 +66,7 @@ test_that("contents of manifest match", { manifest, ~ expect_identical( .x, - append_slash(as.character(fs::path(.y, pin_versions(b, .y)$version))) + end_with_slash(as.character(fs::path(.y, pin_versions(b, .y)$version))) ) ) }) diff --git a/tests/testthat/test-board_url.R b/tests/testthat/test-board_url.R index f00d766dd..66bba9fc9 100644 --- a/tests/testthat/test-board_url.R +++ b/tests/testthat/test-board_url.R @@ -55,7 +55,6 @@ test_that("raw pins can only be downloaded", { test_that("can download pin from board_folder version dir", { skip_if_not_installed("webfakes") - b1 <- board_folder(withr::local_tempfile()) b1 %>% pin_write(1:10, "x") b2_path <- fs::path(b1$path, "x", pin_versions(b1, "x")$version) @@ -70,6 +69,53 @@ test_that("can download pin from board_folder version dir", { expect_equal(1:10) }) +test_that("can download pin from versioned board_folder", { + skip_if_not_installed("webfakes") + b1 <- board_folder(withr::local_tempdir(), versioned = TRUE) + b1 %>% pin_write(1:10, "x", type = "json") + b1 %>% pin_write(11:20, "y", type = "json") + b1 %>% pin_write(1:20, "y", type = "csv") + write_board_manifest(b1) + b1_path <- fs::path(b1$path) + + b1_server <- webfakes::new_app() + b1_server$use(webfakes::mw_static(root = b1_path)) + b1_process <- webfakes::new_app_process(b1_server) + + b2 <- board_url(b1_process$url()) + b2 %>% + pin_read("x") %>% + expect_equal(1:10) +}) + +test_that("useful error for missing or unparseable manifest file", { + skip_if_not_installed("webfakes") + b1 <- board_folder(withr::local_tempdir(), versioned = TRUE) + b1 %>% pin_write(1:10, "x", type = "json") + b1 %>% pin_write(1:20, "y", type = "csv") + b1_path <- fs::path(b1$path) + + b1_server <- webfakes::new_app() + b1_server$use(webfakes::mw_static(root = b1_path)) + b2 <- webfakes::new_app_process(b1_server) + + expect_error( + board_url(b2$url()), + "Failed to access manifest file" + ) + + write.csv(mtcars, file = fs::path(b1_path, "_pins.yaml")) + b3_server <- webfakes::new_app() + b3_server$use(webfakes::mw_static(root = b1_path)) + b4 <- webfakes::new_app_process(b3_server) + + expect_error( + board_url(b4$url()), + "Failed to parse manifest file at URL" + ) + +}) + test_that("useful errors for unsupported methods", { board <- board_url(c("x" = "foo")) @@ -79,12 +125,21 @@ test_that("useful errors for unsupported methods", { board %>% pin_meta("froofy", version = "x") board %>% pin_meta("x", version = "x") board %>% pin_versions("x") + board %>% pin_version_delete("x") board %>% board_deparse() pin(1:5, name = "x", board = board) pin_get(name = "x", board = board) }) }) +test_that("useful errors for specifying board", { + expect_snapshot(error = TRUE, { + board_url(c("foo", "bar")) + board_url(list("a", 1:2)) + board_url(1:10) + }) +}) + # http_download ---------------------------------------------------------- test_that("use cache with Last-Modified header", { diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R index 801d54bc9..ddcfdbca6 100644 --- a/tests/testthat/test-utils.R +++ b/tests/testthat/test-utils.R @@ -12,9 +12,9 @@ test_that("write_yaml can write non-UTF8 data", { expect_equal(y, list("fa\u00e7ile" = "fa\u00e7ile")) }) -test_that("can append slash if needed", { - expect_identical(append_slash("foo"), "foo/") - expect_identical(append_slash("foo/"), "foo/") - expect_identical(append_slash(c("foo/", "bar")), c("foo/", "bar/")) - expect_identical(append_slash(c("foo", "bar/")), c("foo/", "bar/")) +test_that("can end with slash if needed", { + expect_identical(end_with_slash("foo"), "foo/") + expect_identical(end_with_slash("foo/"), "foo/") + expect_identical(end_with_slash(c("foo/", "bar")), c("foo/", "bar/")) + expect_identical(end_with_slash(c("foo", "bar/")), c("foo/", "bar/")) })