From 8f56982be02147802238b8091aee0b2d679dcdb9 Mon Sep 17 00:00:00 2001 From: Hadley Wickham Date: Fri, 6 Aug 2021 11:17:13 -0500 Subject: [PATCH] Implement board_azure() Fixes #474 --- .github/workflows/R-CMD-check.yaml | 1 + DESCRIPTION | 1 + NAMESPACE | 9 ++ NEWS.md | 4 + R/board_azure.R | 187 +++++++++++++++++++++++++++ R/board_s3.R | 3 +- R/testthat.R | 13 ++ man/board_azure.Rd | 51 ++++++++ man/board_s3.Rd | 3 +- tests/testthat/_snaps/board_azure.md | 27 ++++ tests/testthat/test-board_azure.R | 63 +++++++++ vignettes/pins-legacy.Rmd | 2 +- 12 files changed, 361 insertions(+), 3 deletions(-) create mode 100644 R/board_azure.R create mode 100644 R/testthat.R create mode 100644 man/board_azure.Rd create mode 100644 tests/testthat/_snaps/board_azure.md create mode 100644 tests/testthat/test-board_azure.R diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index ddbd6ba5e..26eb14e4e 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -39,6 +39,7 @@ jobs: TEST_AWS_REGION: "us-east-2" TEST_AWS_SECRET: ${{ secrets.TEST_AWS_SECRET }} TEST_KAGGLE_API: ${{ secrets.TEST_KAGGLE_API }} + PINS_AZURE_TEST_SAS: ${{ secrets.TEST_AZURE_SAS }} steps: - uses: actions/checkout@v2 diff --git a/DESCRIPTION b/DESCRIPTION index f6ff07cd0..ae1bf48d8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -46,6 +46,7 @@ Imports: yaml, zip Suggests: + AzureStor, data.table, datasets, knitr, diff --git a/NAMESPACE b/NAMESPACE index 317d05b8c..c6af6215f 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -47,18 +47,22 @@ S3method(pin,default) S3method(pin_browse,pins_board_folder) S3method(pin_browse,pins_board_rsconnect) S3method(pin_browse,pins_board_url) +S3method(pin_delete,pins_board_azure) S3method(pin_delete,pins_board_folder) S3method(pin_delete,pins_board_rsconnect) S3method(pin_delete,pins_board_s3) S3method(pin_delete,pins_board_url) +S3method(pin_exists,pins_board_azure) S3method(pin_exists,pins_board_folder) S3method(pin_exists,pins_board_rsconnect) S3method(pin_exists,pins_board_s3) S3method(pin_exists,pins_board_url) +S3method(pin_fetch,pins_board_azure) S3method(pin_fetch,pins_board_folder) S3method(pin_fetch,pins_board_rsconnect) S3method(pin_fetch,pins_board_s3) S3method(pin_fetch,pins_board_url) +S3method(pin_list,pins_board_azure) S3method(pin_list,pins_board_folder) S3method(pin_list,pins_board_local) S3method(pin_list,pins_board_rsconnect) @@ -68,6 +72,7 @@ S3method(pin_load,default) S3method(pin_load,files) S3method(pin_load,package) S3method(pin_load,table) +S3method(pin_meta,pins_board_azure) S3method(pin_meta,pins_board_folder) S3method(pin_meta,pins_board_rsconnect) S3method(pin_meta,pins_board_s3) @@ -77,15 +82,18 @@ S3method(pin_preview,default) S3method(pin_preview,files) S3method(pin_search,pins_board) S3method(pin_search,pins_board_rsconnect) +S3method(pin_store,pins_board_azure) S3method(pin_store,pins_board_folder) S3method(pin_store,pins_board_rsconnect) S3method(pin_store,pins_board_s3) S3method(pin_store,pins_board_url) S3method(pin_version_delete,pins_board) +S3method(pin_version_delete,pins_board_azure) S3method(pin_version_delete,pins_board_folder) S3method(pin_version_delete,pins_board_rsconnect) S3method(pin_version_delete,pins_board_s3) S3method(pin_versions,pins_board) +S3method(pin_versions,pins_board_azure) S3method(pin_versions,pins_board_folder) S3method(pin_versions,pins_board_rsconnect) S3method(pin_versions,pins_board_s3) @@ -95,6 +103,7 @@ S3method(print,pins_hidden) S3method(print,pins_meta) S3method(str,pins_hidden) export("%>%") +export(board_azure) export(board_browse) export(board_cache_path) export(board_connect) diff --git a/NEWS.md b/NEWS.md index c378715a8..5045ad80c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -63,6 +63,10 @@ This version includes the following modern boards: [paws](https://paws-r.github.io) so supports a wide range of authentication options. +* `board_azure()` stores data in to Azure's blob storage. It is built on top of + [AzureStor](https://github.com/Azure/AzureStor) so supports a wide range of + authentication options (#474). + * `board_url()` lets you create a manual pin board from a vector of urls. This is useful because `pin_donwload()` and `pin_read()` are cached, and will only re-download the data if it's changed since the diff --git a/R/board_azure.R b/R/board_azure.R new file mode 100644 index 000000000..02a4de5a0 --- /dev/null +++ b/R/board_azure.R @@ -0,0 +1,187 @@ +#' Use an Azure Blob Storage Container as a board +#' +#' Pin data to a container on Azure's blog storage using the AzureStor package. +#' +#' @inheritParams new_board +#' @param account,container Account and container name. +#' @param key,token,sas Authentication credentials: either an access `key`, an +#' Azure Active Directory (AAD) `token`, or a `SAS`, in that order of +#' priority. If no omitted, provides read access to a public (anonymous) board. +#' @param url If omitted, `board_azure()` will generate a endpoint url of the +#' form `"https://{account}.blob.core.windows.net/{container}"`. Use this argument +#' to override if needed. +#' @export +#' @examples +#' if (requireNamespace("AzureStor")) { +#' # Public access board +#' board <- board_azure("pins", "public-data") +#' board %>% pin_read("mtcars") +#' } +#' +#' \dontrun{ +#' # To create a board that you can write to, you'll need to supply one +#' # of `key`, `token`, or `sas` +#' board <- board_azure("pins", "private-data", sas = "...") +#' board %>% pin_write(iris) +#' } +board_azure <- function( + account, + container, + versioned = TRUE, + key = NULL, + token = NULL, + sas = NULL, + url = NULL, + cache = NULL) { + + check_installed("AzureStor") + + url <- url %||% paste0("https://", account, ".blob.core.windows.net/", container) + + # TODO: check error message when container doesn't exist + container <- AzureStor::blob_container(url, key = key, token = token, sas = sas) + + cache <- cache %||% board_cache_path(paste0("azure-", hash(url))) + new_board_v1("pins_board_azure", + name = "azure", + container = container, + cache = cache, + versioned = versioned + ) +} + +board_azure_test <- function(...) { + if (!has_envvars("PINS_AZURE_TEST_SAS")) { + testthat::skip("PINS_AZURE_TEST_SAS not set") + } + + board_azure("pins", "test-data", + sas = Sys.getenv("PINS_AZURE_TEST_SAS"), + cache = tempfile(), ... + ) +} + +#' @export +pin_list.pins_board_azure <- function(board, ...) { + azure_ls(board) +} + +#' @export +pin_exists.pins_board_azure <- function(board, name, ...) { + length(azure_ls(board, name)) > 0 +} + +#' @export +pin_delete.pins_board_azure <- function(board, names, ...) { + for (name in names) { + azure_delete_dir(board, name) + } + invisible(board) +} + +#' @export +pin_versions.pins_board_azure <- function(board, name, ...) { + check_pin_exists(board, name) + version_from_path(azure_ls(board, name)) +} + +#' @export +pin_version_delete.pins_board_azure <- function(board, name, version, ...) { + azure_delete_dir(board, fs::path(name, version)) +} + +#' @export +pin_meta.pins_board_azure <- function(board, name, version = NULL, ...) { + check_pin_exists(board, name) + version <- check_pin_version(board, name, version) + + path_version <- fs::path(board$cache, name, version) + fs::dir_create(path_version) + + azure_download(board, fs::path(name, version, "data.txt"), progress = FALSE) + local_meta( + read_meta(fs::path(board$cache, name, version)), + dir = path_version, + version = version + ) +} + +#' @export +pin_fetch.pins_board_azure <- function(board, name, version = NULL, ...) { + meta <- pin_meta(board, name, version = version) + cache_touch(board, meta) + + keys <- map_chr(meta$file, ~ fs::path(name, meta$local$version, .x)) + azure_download(board, keys) + + meta +} + +#' @export +pin_store.pins_board_azure <- function(board, name, paths, metadata, + versioned = NULL, ...) { + check_name(name) + version <- version_setup(board, name, metadata, versioned = versioned) + + version_dir <- fs::path(name, version) + + + # Upload metadata + local_azure_progress(FALSE) + AzureStor::upload_blob(board$container, + src = textConnection(yaml::as.yaml(metadata)), + dest = fs::path(version_dir, "data.txt") + ) + + # Upload files + local_azure_progress() + keys <- fs::path(version_dir, fs::path_file(paths)) + AzureStor::multiupload_blob(board$container, src = paths, dest = keys) + + invisible(board) +} + +# Helpers ----------------------------------------------------------------- + +azure_delete_dir <- function(board, dir) { + ls <- AzureStor::list_blobs(board$container, dir) + for (path in ls$name) { + AzureStor::delete_blob(board$container, path, confirm = FALSE) + } +} + +azure_ls <- function(board, dir = "/") { + ls <- AzureStor::list_blobs( + board$container, + dir = dir, + recursive = FALSE + ) + paths <- ls$name + + if (dir != "/") { + # trim off name/ prefix + paths <- substr(paths, nchar(dir) + 2, nchar(paths)) + } + # trim / suffix off directories + paths <- sub("/$", "", paths) + + paths +} + + + +local_azure_progress <- function(progress = !is_testing(), env = parent.frame()) { + withr::local_options(azure_storage_progress_bar = progress, .local_envir = env) +} + +azure_download <- function(board, keys, progress = !is_testing()) { + local_azure_progress(progress) + + paths <- fs::path(board$cache, keys) + needed <- !fs::file_exists(paths) + if (any(needed)) { + AzureStor::multidownload_blob(board$container, keys[needed], paths[needed]) + } + + invisible() +} diff --git a/R/board_s3.R b/R/board_s3.R index 8baba85a5..b02d1748a 100644 --- a/R/board_s3.R +++ b/R/board_s3.R @@ -1,6 +1,7 @@ #' Use an S3 bucket as a board #' -#' Pin data to a bucket on Amazon's S3 service. +#' Pin data to a bucket on Amazon's S3 service, using the paws.storage +#' package. #' #' # Authentication #' diff --git a/R/testthat.R b/R/testthat.R new file mode 100644 index 000000000..32864951e --- /dev/null +++ b/R/testthat.R @@ -0,0 +1,13 @@ +local_pin <- function(board, value, ..., env = parent.frame()) { + name <- random_pin_name() + + pin_write(board, value, name, ...) + withr::defer(pin_delete(board, name), env) + + name +} + +random_pin_name <- function() { + rand <- sample(c(letters, LETTERS, 0:9), 10, replace = TRUE) + paste0("test-", paste(rand, collapse = "")) +} diff --git a/man/board_azure.Rd b/man/board_azure.Rd new file mode 100644 index 000000000..4b5391176 --- /dev/null +++ b/man/board_azure.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/board_azure.R +\name{board_azure} +\alias{board_azure} +\title{Use an Azure Blob Storage Container as a board} +\usage{ +board_azure( + account, + container, + versioned = TRUE, + key = NULL, + token = NULL, + sas = NULL, + url = NULL, + cache = NULL +) +} +\arguments{ +\item{account, container}{Account and container name.} + +\item{versioned}{Should this board be registered with support for versions?} + +\item{key, token, sas}{Authentication credentials: either an access \code{key}, an +Azure Active Directory (AAD) \code{token}, or a \code{SAS}, in that order of +priority. If no omitted, provides read access to a public (anonymous) board.} + +\item{url}{If omitted, \code{board_azure()} will generate a endpoint url of the +form \code{"https://{account}.blob.core.windows.net/{container}"}. Use this argument +to override if needed.} + +\item{cache}{Cache path. Every board requires a local cache to avoid +downloading files multiple times. The default stores in a standard +cache location for your operating system, but you can override if needed.} +} +\description{ +Pin data to a container on Azure's blog storage using the AzureStor package. +} +\examples{ +if (requireNamespace("AzureStor")) { + # Public access board + board <- board_azure("pins", "public-data") + board \%>\% pin_read("mtcars") +} + +\dontrun{ +# To create a board that you can write to, you'll need to supply one +# of `key`, `token`, or `sas` +board <- board_azure("pins", "private-data", sas = "...") +board \%>\% pin_write(iris) +} +} diff --git a/man/board_s3.Rd b/man/board_s3.Rd index 53307119e..521a57ded 100644 --- a/man/board_s3.Rd +++ b/man/board_s3.Rd @@ -37,7 +37,8 @@ downloading files multiple times. The default stores in a standard cache location for your operating system, but you can override if needed.} } \description{ -Pin data to a bucket on Amazon's S3 service. +Pin data to a bucket on Amazon's S3 service, using the paws.storage +package. } \section{Authentication}{ \code{board_s3()} is powered by the paws package which provides a wide range diff --git a/tests/testthat/_snaps/board_azure.md b/tests/testthat/_snaps/board_azure.md new file mode 100644 index 000000000..f1c711314 --- /dev/null +++ b/tests/testthat/_snaps/board_azure.md @@ -0,0 +1,27 @@ +# generates useful errors for missing pins/versions + + Code + board %>% pin_versions("missing") + Error + Can't find pin called 'missing' + i Use `pin_list()` to see all available pins in this board + Code + board %>% pin_read("missing") + Error + Can't find pin called 'missing' + i Use `pin_list()` to see all available pins in this board + Code + board %>% pin_read(name, version = 1) + Error + `version` must be a string or `NULL` + Code + board %>% pin_read(name, version = "missing") + Error + Not Found (HTTP 404). Failed to complete Storage Services operation. Message: + . + Code + board %>% pin_write(3, name, versioned = FALSE) + Error + Pin is versioned, but you have requested a write without versions + i To un-version a pin, you must delete it + diff --git a/tests/testthat/test-board_azure.R b/tests/testthat/test-board_azure.R new file mode 100644 index 000000000..bd6372a66 --- /dev/null +++ b/tests/testthat/test-board_azure.R @@ -0,0 +1,63 @@ +test_that("can read and write simple pin", { + board <- board_azure_test() + name <- random_pin_name() + + pin_write(board, 1:3, name) + expect_true(name %in% pin_list(board)) + expect_equal(pin_read(board, name), 1:3) + + pin_delete(board, name) + expect_false(name %in% pin_list(board)) +}) + +test_that("absent pins handled consistently", { + board <- board_azure_test() + name <- local_pin(board, 1) + + expect_true(name %in% pin_list(board)) + expect_equal(pin_exists(board, name), TRUE) + expect_equal(pin_exists(board, "this_doesnt_exist"), FALSE) + + expect_error(pin_meta(board, "this_doesnt_exist"), class = "pins_pin_absent") +}) + +test_that("tracks versions as expected", { + board <- board_azure_test() + name <- local_pin(board, 1) + + versions <- pin_versions(board, name) + expect_equal(nrow(versions), 1) + pin_write(board, 2, name) + pin_write(board, 3, name) + expect_equal(nrow(pin_versions(board, name)), 3) + + x <- pin_read(board, name, version = versions$version[[1]]) + expect_equal(x, 1) +}) + +test_that("if versioning off, overwrites existing version", { + board <- board_azure_test(versioned = FALSE) + + name <- local_pin(board, 1) + expect_equal(nrow(pin_versions(board, name)), 1) + + pin_write(board, 2, name) + expect_equal(nrow(pin_versions(board, name)), 1) + expect_equal(pin_read(board, name), 2) +}) + +test_that("generates useful errors for missing pins/versions", { + board <- board_azure_test() + name <- local_pin(board, 1) + pin_write(board, 2, name) + + expect_snapshot(error = TRUE, { + board %>% pin_versions("missing") + + board %>% pin_read("missing") + board %>% pin_read(name, version = 1) + board %>% pin_read(name, version = "missing") + + board %>% pin_write(3, name, versioned = FALSE) + }) +}) diff --git a/vignettes/pins-legacy.Rmd b/vignettes/pins-legacy.Rmd index b1de7f4f3..c28b0db5d 100644 --- a/vignettes/pins-legacy.Rmd +++ b/vignettes/pins-legacy.Rmd @@ -113,7 +113,7 @@ board %>% pin_read("test-data") | Legacy API | Modern API | |------------------------------|---------------------| -| `board_register_azure()` | | +| `board_register_azure()` | `board_azure()` | | `board_register_datatxt()` | | | `board_register_dospace()` | | | `board_register_gcloud()` | |