Skip to content

Commit

Permalink
Implement board_azure()
Browse files Browse the repository at this point in the history
Fixes #474
  • Loading branch information
hadley committed Aug 6, 2021
1 parent cc72877 commit 8f56982
Show file tree
Hide file tree
Showing 12 changed files with 361 additions and 3 deletions.
1 change: 1 addition & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
TEST_AWS_REGION: "us-east-2"
TEST_AWS_SECRET: ${{ secrets.TEST_AWS_SECRET }}
TEST_KAGGLE_API: ${{ secrets.TEST_KAGGLE_API }}
PINS_AZURE_TEST_SAS: ${{ secrets.TEST_AZURE_SAS }}

steps:
- uses: actions/checkout@v2
Expand Down
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Imports:
yaml,
zip
Suggests:
AzureStor,
data.table,
datasets,
knitr,
Expand Down
9 changes: 9 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,22 @@ S3method(pin,default)
S3method(pin_browse,pins_board_folder)
S3method(pin_browse,pins_board_rsconnect)
S3method(pin_browse,pins_board_url)
S3method(pin_delete,pins_board_azure)
S3method(pin_delete,pins_board_folder)
S3method(pin_delete,pins_board_rsconnect)
S3method(pin_delete,pins_board_s3)
S3method(pin_delete,pins_board_url)
S3method(pin_exists,pins_board_azure)
S3method(pin_exists,pins_board_folder)
S3method(pin_exists,pins_board_rsconnect)
S3method(pin_exists,pins_board_s3)
S3method(pin_exists,pins_board_url)
S3method(pin_fetch,pins_board_azure)
S3method(pin_fetch,pins_board_folder)
S3method(pin_fetch,pins_board_rsconnect)
S3method(pin_fetch,pins_board_s3)
S3method(pin_fetch,pins_board_url)
S3method(pin_list,pins_board_azure)
S3method(pin_list,pins_board_folder)
S3method(pin_list,pins_board_local)
S3method(pin_list,pins_board_rsconnect)
Expand All @@ -68,6 +72,7 @@ S3method(pin_load,default)
S3method(pin_load,files)
S3method(pin_load,package)
S3method(pin_load,table)
S3method(pin_meta,pins_board_azure)
S3method(pin_meta,pins_board_folder)
S3method(pin_meta,pins_board_rsconnect)
S3method(pin_meta,pins_board_s3)
Expand All @@ -77,15 +82,18 @@ S3method(pin_preview,default)
S3method(pin_preview,files)
S3method(pin_search,pins_board)
S3method(pin_search,pins_board_rsconnect)
S3method(pin_store,pins_board_azure)
S3method(pin_store,pins_board_folder)
S3method(pin_store,pins_board_rsconnect)
S3method(pin_store,pins_board_s3)
S3method(pin_store,pins_board_url)
S3method(pin_version_delete,pins_board)
S3method(pin_version_delete,pins_board_azure)
S3method(pin_version_delete,pins_board_folder)
S3method(pin_version_delete,pins_board_rsconnect)
S3method(pin_version_delete,pins_board_s3)
S3method(pin_versions,pins_board)
S3method(pin_versions,pins_board_azure)
S3method(pin_versions,pins_board_folder)
S3method(pin_versions,pins_board_rsconnect)
S3method(pin_versions,pins_board_s3)
Expand All @@ -95,6 +103,7 @@ S3method(print,pins_hidden)
S3method(print,pins_meta)
S3method(str,pins_hidden)
export("%>%")
export(board_azure)
export(board_browse)
export(board_cache_path)
export(board_connect)
Expand Down
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ This version includes the following modern boards:
[paws](https://paws-r.github.io) so supports a wide range of authentication
options.

* `board_azure()` stores data in to Azure's blob storage. It is built on top of
[AzureStor](https://github.com/Azure/AzureStor) so supports a wide range of
authentication options (#474).

* `board_url()` lets you create a manual pin board from a vector of
urls. This is useful because `pin_donwload()` and `pin_read()` are
cached, and will only re-download the data if it's changed since the
Expand Down
187 changes: 187 additions & 0 deletions R/board_azure.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#' Use an Azure Blob Storage Container as a board
#'
#' Pin data to a container on Azure's blog storage using the AzureStor package.
#'
#' @inheritParams new_board
#' @param account,container Account and container name.
#' @param key,token,sas Authentication credentials: either an access `key`, an
#' Azure Active Directory (AAD) `token`, or a `SAS`, in that order of
#' priority. If no omitted, provides read access to a public (anonymous) board.
#' @param url If omitted, `board_azure()` will generate a endpoint url of the
#' form `"https://{account}.blob.core.windows.net/{container}"`. Use this argument
#' to override if needed.
#' @export
#' @examples
#' if (requireNamespace("AzureStor")) {
#' # Public access board
#' board <- board_azure("pins", "public-data")
#' board %>% pin_read("mtcars")
#' }
#'
#' \dontrun{
#' # To create a board that you can write to, you'll need to supply one
#' # of `key`, `token`, or `sas`
#' board <- board_azure("pins", "private-data", sas = "...")
#' board %>% pin_write(iris)
#' }
board_azure <- function(
account,
container,
versioned = TRUE,
key = NULL,
token = NULL,
sas = NULL,
url = NULL,
cache = NULL) {

check_installed("AzureStor")

url <- url %||% paste0("https://", account, ".blob.core.windows.net/", container)

# TODO: check error message when container doesn't exist
container <- AzureStor::blob_container(url, key = key, token = token, sas = sas)

cache <- cache %||% board_cache_path(paste0("azure-", hash(url)))
new_board_v1("pins_board_azure",
name = "azure",
container = container,
cache = cache,
versioned = versioned
)
}

board_azure_test <- function(...) {
if (!has_envvars("PINS_AZURE_TEST_SAS")) {
testthat::skip("PINS_AZURE_TEST_SAS not set")
}

board_azure("pins", "test-data",
sas = Sys.getenv("PINS_AZURE_TEST_SAS"),
cache = tempfile(), ...
)
}

#' @export
pin_list.pins_board_azure <- function(board, ...) {
azure_ls(board)
}

#' @export
pin_exists.pins_board_azure <- function(board, name, ...) {
length(azure_ls(board, name)) > 0
}

#' @export
pin_delete.pins_board_azure <- function(board, names, ...) {
for (name in names) {
azure_delete_dir(board, name)
}
invisible(board)
}

#' @export
pin_versions.pins_board_azure <- function(board, name, ...) {
check_pin_exists(board, name)
version_from_path(azure_ls(board, name))
}

#' @export
pin_version_delete.pins_board_azure <- function(board, name, version, ...) {
azure_delete_dir(board, fs::path(name, version))
}

#' @export
pin_meta.pins_board_azure <- function(board, name, version = NULL, ...) {
check_pin_exists(board, name)
version <- check_pin_version(board, name, version)

path_version <- fs::path(board$cache, name, version)
fs::dir_create(path_version)

azure_download(board, fs::path(name, version, "data.txt"), progress = FALSE)
local_meta(
read_meta(fs::path(board$cache, name, version)),
dir = path_version,
version = version
)
}

#' @export
pin_fetch.pins_board_azure <- function(board, name, version = NULL, ...) {
meta <- pin_meta(board, name, version = version)
cache_touch(board, meta)

keys <- map_chr(meta$file, ~ fs::path(name, meta$local$version, .x))
azure_download(board, keys)

meta
}

#' @export
pin_store.pins_board_azure <- function(board, name, paths, metadata,
versioned = NULL, ...) {
check_name(name)
version <- version_setup(board, name, metadata, versioned = versioned)

version_dir <- fs::path(name, version)


# Upload metadata
local_azure_progress(FALSE)
AzureStor::upload_blob(board$container,
src = textConnection(yaml::as.yaml(metadata)),
dest = fs::path(version_dir, "data.txt")
)

# Upload files
local_azure_progress()
keys <- fs::path(version_dir, fs::path_file(paths))
AzureStor::multiupload_blob(board$container, src = paths, dest = keys)

invisible(board)
}

# Helpers -----------------------------------------------------------------

azure_delete_dir <- function(board, dir) {
ls <- AzureStor::list_blobs(board$container, dir)
for (path in ls$name) {
AzureStor::delete_blob(board$container, path, confirm = FALSE)
}
}

azure_ls <- function(board, dir = "/") {
ls <- AzureStor::list_blobs(
board$container,
dir = dir,
recursive = FALSE
)
paths <- ls$name

if (dir != "/") {
# trim off name/ prefix
paths <- substr(paths, nchar(dir) + 2, nchar(paths))
}
# trim / suffix off directories
paths <- sub("/$", "", paths)

paths
}



local_azure_progress <- function(progress = !is_testing(), env = parent.frame()) {
withr::local_options(azure_storage_progress_bar = progress, .local_envir = env)
}

azure_download <- function(board, keys, progress = !is_testing()) {
local_azure_progress(progress)

paths <- fs::path(board$cache, keys)
needed <- !fs::file_exists(paths)
if (any(needed)) {
AzureStor::multidownload_blob(board$container, keys[needed], paths[needed])
}

invisible()
}
3 changes: 2 additions & 1 deletion R/board_s3.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#' Use an S3 bucket as a board
#'
#' Pin data to a bucket on Amazon's S3 service.
#' Pin data to a bucket on Amazon's S3 service, using the paws.storage
#' package.
#'
#' # Authentication
#'
Expand Down
13 changes: 13 additions & 0 deletions R/testthat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
local_pin <- function(board, value, ..., env = parent.frame()) {
name <- random_pin_name()

pin_write(board, value, name, ...)
withr::defer(pin_delete(board, name), env)

name
}

random_pin_name <- function() {
rand <- sample(c(letters, LETTERS, 0:9), 10, replace = TRUE)
paste0("test-", paste(rand, collapse = ""))
}
51 changes: 51 additions & 0 deletions man/board_azure.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/board_s3.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

27 changes: 27 additions & 0 deletions tests/testthat/_snaps/board_azure.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# generates useful errors for missing pins/versions

Code
board %>% pin_versions("missing")
Error <pins_pin_absent>
Can't find pin called 'missing'
i Use `pin_list()` to see all available pins in this board
Code
board %>% pin_read("missing")
Error <pins_pin_absent>
Can't find pin called 'missing'
i Use `pin_list()` to see all available pins in this board
Code
board %>% pin_read(name, version = 1)
Error <rlang_error>
`version` must be a string or `NULL`
Code
board %>% pin_read(name, version = "missing")
Error <http_404>
Not Found (HTTP 404). Failed to complete Storage Services operation. Message:
.
Code
board %>% pin_write(3, name, versioned = FALSE)
Error <rlang_error>
Pin is versioned, but you have requested a write without versions
i To un-version a pin, you must delete it

Loading

0 comments on commit 8f56982

Please sign in to comment.