From 5274e24d211f70c32db9b859006d146514ee6d75 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 07:25:17 -0400 Subject: [PATCH 01/36] Move methods and upload on dequeue --- R/class_database.R | 5 +++++ R/class_database_local.R | 8 +------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/R/class_database.R b/R/class_database.R index 192f67c32..5a3d66161 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -158,6 +158,7 @@ database_class <- R6::R6Class( if (length(self$queue)) { on.exit(self$queue <- NULL) self$append_lines(self$queue) + self$upload() } }, write_row = function(row) { @@ -294,6 +295,10 @@ database_class <- R6::R6Class( self$overwrite_storage(data) } }, + upload = function() { + }, + download = function() { + } validate_columns = function(header, list_columns) { if (!all(list_columns %in% header)) { tar_throw_validate("all list columns must be in the header") diff --git a/R/class_database_local.R b/R/class_database_local.R index ec045e078..b9497b4a6 100644 --- a/R/class_database_local.R +++ b/R/class_database_local.R @@ -25,11 +25,5 @@ database_local_class <- R6::R6Class( inherit = database_class, class = FALSE, portable = FALSE, - cloneable = FALSE, - public = list( - upload = function() { - }, - download = function() { - } - ) + cloneable = FALSE ) From f64aa5a6f01ae4d19159259b6a6446dee8e53c27 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 07:40:49 -0400 Subject: [PATCH 02/36] repository_meta option --- R/class_database.R | 4 ++-- R/class_options.R | 21 +++++++++++++++++++++ R/tar_option_get.R | 1 + R/tar_option_set.R | 10 ++++++++++ man/tar_option_set.Rd | 6 ++++++ tests/testthat/test-class_options.R | 27 +++++++++++++++++++++++++++ tests/testthat/test-tar_option_set.R | 26 ++++++++++++++++++++++++++ 7 files changed, 93 insertions(+), 2 deletions(-) diff --git a/R/class_database.R b/R/class_database.R index 5a3d66161..73f7610da 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -4,7 +4,7 @@ database_init <- function( header = "name", list_columns = character(0L), list_column_modes = character(0L), - repository = tar_options$get_repository(), + repository = tar_options$get_repository_meta(), resources = tar_options$get_resources() ) { memory <- memory_init() @@ -298,7 +298,7 @@ database_class <- R6::R6Class( upload = function() { }, download = function() { - } + }, validate_columns = function(header, list_columns) { if (!all(list_columns %in% header)) { tar_throw_validate("all list columns must be in the header") diff --git a/R/class_options.R b/R/class_options.R index 9b24646f6..75e7e3069 100644 --- a/R/class_options.R +++ b/R/class_options.R @@ -6,6 +6,7 @@ options_init <- function( envir = NULL, format = NULL, repository = NULL, + repository_meta = NULL, iteration = NULL, error = NULL, memory = NULL, @@ -32,6 +33,7 @@ options_init <- function( envir = envir, format = format, repository = repository, + repository_meta = repository_meta, iteration = iteration, error = error, memory = memory, @@ -60,6 +62,7 @@ options_new <- function( envir = NULL, format = NULL, repository = NULL, + repository_meta = NULL, iteration = NULL, error = NULL, memory = NULL, @@ -86,6 +89,7 @@ options_new <- function( envir = envir, format = format, repository = repository, + repository_meta = repository_meta, iteration = iteration, error = error, memory = memory, @@ -119,6 +123,7 @@ options_class <- R6::R6Class( envir = NULL, format = NULL, repository = NULL, + repository_meta = NULL, iteration = NULL, error = NULL, memory = NULL, @@ -144,6 +149,7 @@ options_class <- R6::R6Class( envir = NULL, format = NULL, repository = NULL, + repository_meta = NULL, iteration = NULL, error = NULL, memory = NULL, @@ -169,6 +175,7 @@ options_class <- R6::R6Class( self$envir <- envir self$format <- format self$repository <- repository + self$repository_meta <- repository_meta self$iteration <- iteration self$error <- error self$memory <- memory @@ -195,6 +202,7 @@ options_class <- R6::R6Class( library = self$get_library(), format = self$get_format(), repository = self$get_repository(), + repository_meta = self$get_repository_meta(), iteration = self$get_iteration(), error = self$get_error(), memory = self$get_memory(), @@ -219,6 +227,7 @@ options_class <- R6::R6Class( self$set_library(list$library) self$set_format(list$format) self$set_repository(list$repository) + self$set_repository_meta(list$repository_meta) self$set_iteration(list$iteration) self$set_error(list$error) self$set_memory(list$memory) @@ -243,6 +252,7 @@ options_class <- R6::R6Class( self$envir <- NULL self$format <- NULL self$repository <- NULL + self$repository_meta <- NULL self$iteration <- NULL self$error <- NULL self$memory <- NULL @@ -282,6 +292,9 @@ options_class <- R6::R6Class( get_repository = function() { self$repository %|||% "local" }, + get_repository_meta = function() { + (self$repository_meta %|||% self$repository) %|||% "local" + }, get_iteration = function() { self$iteration %|||% "vector" }, @@ -361,6 +374,10 @@ options_class <- R6::R6Class( self$validate_repository(repository) self$repository <- repository }, + set_repository_meta = function(repository_meta) { + self$validate_repository_meta(repository_meta) + self$repository_meta <- repository_meta + }, set_iteration = function(iteration) { self$validate_iteration(iteration) self$iteration <- iteration @@ -466,6 +483,9 @@ options_class <- R6::R6Class( validate_repository = function(repository) { tar_assert_repository(repository) }, + validate_repository_meta = function(repository_meta) { + tar_assert_repository(repository_meta) + }, validate_iteration = function(iteration) { tar_assert_flag(iteration, c("vector", "list", "group")) }, @@ -547,6 +567,7 @@ options_class <- R6::R6Class( self$validate_envir(self$get_envir()) self$validate_format(self$get_format()) self$validate_repository(self$get_repository()) + self$validate_repository_meta(self$get_repository_meta()) self$validate_iteration(self$get_iteration()) self$validate_error(self$get_error()) self$validate_memory(self$get_memory()) diff --git a/R/tar_option_get.R b/R/tar_option_get.R index 9279bcef6..dd506f690 100644 --- a/R/tar_option_get.R +++ b/R/tar_option_get.R @@ -51,6 +51,7 @@ tar_option_get <- function(name = NULL, option = NULL) { envir = tar_options$get_envir(), format = tar_options$get_format(), repository = tar_options$get_repository(), + repository_meta = tar_options$get_repository_meta(), iteration = tar_options$get_iteration(), error = tar_options$get_error(), memory = tar_options$get_memory(), diff --git a/R/tar_option_set.R b/R/tar_option_set.R index 094811b58..cba392977 100644 --- a/R/tar_option_set.R +++ b/R/tar_option_set.R @@ -11,6 +11,10 @@ #' @return `NULL` (invisibly). #' @inheritSection tar_target Storage formats #' @inheritParams tar_target +#' @param repository_meta Character of length 1 with the same values as +#' `repository` (`"aws"`, `"gcp"`, `"local"`). Cloud repository +#' for the metadata text files in `_targets/meta/`, including target +#' metadata and progress data. Defaults to [tar_option_get("repository")]. #' @param imports Character vector of package names. #' For every package listed, `targets` tracks every #' dataset and every object in the package namespace @@ -180,6 +184,7 @@ tar_option_set <- function( envir = NULL, format = NULL, repository = NULL, + repository_meta = NULL, iteration = NULL, error = NULL, memory = NULL, @@ -206,6 +211,11 @@ tar_option_set <- function( if_any(is.null(envir), NULL, tar_options$set_envir(envir)) if_any(is.null(format), NULL, tar_options$set_format(format)) if_any(is.null(repository), NULL, tar_options$set_repository(repository)) + if_any( + is.null(repository_meta), + NULL, + tar_options$set_repository(repository_meta) + ) if_any(is.null(iteration), NULL, tar_options$set_iteration(iteration)) if_any(is.null(error), NULL, tar_options$set_error(error)) if_any(is.null(memory), NULL, tar_options$set_memory(memory)) diff --git a/man/tar_option_set.Rd b/man/tar_option_set.Rd index a1aea84d4..3fbad387b 100644 --- a/man/tar_option_set.Rd +++ b/man/tar_option_set.Rd @@ -12,6 +12,7 @@ tar_option_set( envir = NULL, format = NULL, repository = NULL, + repository_meta = NULL, iteration = NULL, error = NULL, memory = NULL, @@ -137,6 +138,11 @@ That output file is uploaded to the cloud and tracked for changes where it exists in the cloud. The local file is deleted after the target runs.} +\item{repository_meta}{Character of length 1 with the same values as +\code{repository} (\code{"aws"}, \code{"gcp"}, \code{"local"}). Cloud repository +for the metadata text files in \verb{_targets/meta/}, including target +metadata and progress data. Defaults to \link{tar_option_get("repository")}.} + \item{iteration}{Character of length 1, name of the iteration mode of the target. Choices: \itemize{ diff --git a/tests/testthat/test-class_options.R b/tests/testthat/test-class_options.R index ad50cf186..30449de85 100644 --- a/tests/testthat/test-class_options.R +++ b/tests/testthat/test-class_options.R @@ -12,6 +12,7 @@ tar_test("validate non-default options", { envir = new.env(), format = "qs", repository = "aws", + repository_meta = "gcp", iteration = "list", error = "continue", memory = "transient", @@ -40,6 +41,7 @@ tar_test("export", { library = "path", format = "qs", repository = "aws", + repository_meta = "gcp", iteration = "list", error = "continue", memory = "transient", @@ -64,6 +66,7 @@ tar_test("export", { library = "path", format = "qs", repository = "aws", + repository_meta = "gcp", iteration = "list", error = "continue", memory = "transient", @@ -94,6 +97,7 @@ tar_test("import", { library = "path", format = "qs", repository = "aws", + repository_meta = "gcp", iteration = "list", error = "continue", memory = "transient", @@ -120,6 +124,7 @@ tar_test("import", { expect_equal(x$get_library(), "path") expect_equal(x$get_format(), "qs") expect_equal(x$get_repository(), "aws") + expect_equal(x$get_repository_meta(), "gcp") expect_equal(x$get_iteration(), "list") expect_equal(x$get_error(), "continue") expect_equal(x$get_memory(), "transient") @@ -210,6 +215,28 @@ tar_test("repository", { expect_error(x$set_repository(123), class = "tar_condition_validate") }) +tar_test("repository_meta", { + x <- options_init() + expect_equal(x$get_repository_meta(), "local") + x$set_repository("aws") + expect_equal(x$get_repository_meta(), "aws") + x$reset() + expect_equal(x$get_repository_meta(), "local") + expect_error(x$set_repository_meta(123), class = "tar_condition_validate") +}) + +tar_test("repository_meta defaults to repository", { + x <- options_init() + x$set_repository("gcp") + expect_equal(x$get_repository_meta(), "gcp") + x$set_repository("aws") + expect_equal(x$get_repository_meta(), "aws") + x$reset() + x$set_repository("gcp") + expect_equal(x$get_repository_meta(), "gcp") + expect_error(x$set_repository_meta(123), class = "tar_condition_validate") +}) + tar_test("iteration", { x <- options_init() expect_equal(x$get_iteration(), "vector") diff --git a/tests/testthat/test-tar_option_set.R b/tests/testthat/test-tar_option_set.R index 350956054..60119bcd6 100644 --- a/tests/testthat/test-tar_option_set.R +++ b/tests/testthat/test-tar_option_set.R @@ -88,6 +88,32 @@ tar_test("repository", { ) }) +tar_test("repository_meta", { + expect_equal(tar_option_get("repository_meta"), "local") + tar_option_set(repository_meta = "aws") + expect_equal(tar_option_get("repository_meta"), "aws") + tar_option_reset() + expect_equal(tar_option_get("repository_meta"), "local") + expect_error( + tar_option_set(repository_meta = 123), + class = "tar_condition_validate" + ) +}) + +tar_test("repository_meta", { + tar_option_set(repository = "gcp") + expect_equal(tar_option_get("repository_meta"), "gcp") + tar_option_set(repository_meta = "aws") + expect_equal(tar_option_get("repository_meta"), "aws") + tar_option_reset() + tar_option_set(repository = "gcp") + expect_equal(tar_option_get("repository_meta"), "gcp") + expect_error( + tar_option_set(repository_meta = 123), + class = "tar_condition_validate" + ) +}) + tar_test("iteration", { expect_equal(tar_option_get("iteration"), "vector") tar_option_set(iteration = "list") From 15fa54bc43b5e9dc454fe361dd1dc0c9a8c5ddbe Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 07:41:48 -0400 Subject: [PATCH 03/36] news --- NEWS.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/NEWS.md b/NEWS.md index 9e736d83c..16778e080 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,11 @@ Because of these changes, upgrading to this version of `targets` will unavoidabl * In the `hash_deps()` method of the metadata class, exclude symbols which are not actually dependencies, rather than just giving them empty strings. This change decouples the dependency hash from the hash of the target's command (#1108). + +## Cloud metadata + +* Add a new `repository_meta` option to select the cloud repository of the metadata files in `_targets/meta/` (#1109). Defaults to `tar_option_get("repository")`. + ## Other changes * Fix solution of #1103 so the copy fallback actually runs (@jds485, #1102, #1103). From d12275658c16fddbb84dfdeaab9474e973b334bd Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 08:06:14 -0400 Subject: [PATCH 04/36] Redesign db classes and mock sync --- R/class_database.R | 29 +++++++++++++++++++++++++++++ R/class_database_aws.R | 34 ++++++++++++++++++++++++++++------ R/class_database_gcp.R | 32 ++++++++++++++++++++++++++------ R/class_database_local.R | 13 ++++++++++++- 4 files changed, 95 insertions(+), 13 deletions(-) diff --git a/R/class_database.R b/R/class_database.R index 73f7610da..05e0aed05 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -296,8 +296,37 @@ database_class <- R6::R6Class( } }, upload = function() { + "upload" }, download = function() { + "download" + }, + head = function() { + file <- file_init(path = "path_mock") + file_ensure_hash(file) + list( + exists = file.exists("path_mock"), + hash = file$hash, + size = file$size, + time = file$time + ) + }, + sync = function() { + head <- self$head() + file <- file_init(path = path) + file_ensure_hash(file) + exists_file <- all(file.exists(path)) + exists_object <- head$exists + changed <- !all(file$hash == head$hash) + if (exists_file && (!exists_object)) { + self$upload() + } else if ((!exists_file) && exists_object) { + self$download() + } else if (exists_file && exists_object && changed) { + time_file <- file_time_posixct(file$time) + time_head <- file_time_posixct(head$time) + if_any(time_file > time_head, self$upload(), self$download()) + } }, validate_columns = function(header, list_columns) { if (!all(list_columns %in% header)) { diff --git a/R/class_database_aws.R b/R/class_database_aws.R index b2acba944..5fb2d903a 100644 --- a/R/class_database_aws.R +++ b/R/class_database_aws.R @@ -43,32 +43,54 @@ database_aws_class <- R6::R6Class( }, download = function() { aws <- self$resources$aws - file <- file_init(path = path) - file_ensure_hash(file) aws_s3_download( file = self$path, key = self$key, bucket = aws$bucket, region = aws$region, endpoint = aws$endpoint, - max_tries = aws$max_tries, - args = aws$args + args = aws$args, + max_tries = aws$max_tries %|||% 5L ) invisible() }, upload = function() { aws <- self$resources$aws + file <- file_init(path = path) + file_ensure_hash(file) aws_s3_upload( file = self$path, key = self$key, bucket = aws$bucket, region = aws$region, endpoint = aws$endpoint, + metadata = list( + "targets-database-hash" = file$hash, + "targets-database-size" = file$size, + "targets-database-time" = file$time + ), part_size = aws$part_size, - max_tries = aws$max_tries, - args = aws$args + args = aws$args, + max_tries = aws$max_tries %|||% 5L ) invisible() + }, + head = function() { + aws <- self$resources$aws + head <- aws_s3_head( + key = self$key, + bucket = aws$bucket, + region = aws$region, + endpoint = aws$endpoint, + args = aws$args, + max_tries = aws$max_tries %|||% 5L + ) + list( + exists = !is.null(head), + hash = head$Metadata$`targets-database-hash`, + size = head$Metadata$`targets-database-size`, + time = head$Metadata$`targets-database-time` + ) } ) ) diff --git a/R/class_database_gcp.R b/R/class_database_gcp.R index 29797afc9..0da052346 100644 --- a/R/class_database_gcp.R +++ b/R/class_database_gcp.R @@ -43,30 +43,50 @@ database_gcp_class <- R6::R6Class( }, download = function() { gcp <- self$resources$gcp - network <- self$resources$network file <- file_init(path = path) file_ensure_hash(file) gcp_gcs_download( file = self$path, key = self$key, bucket = gcp$bucket, - max_tries = network$max_tries %|||% 5L, - verbose = network$verbose %|||% TRUE + max_tries = gcp$max_tries %|||% 5L, + verbose = gcp$verbose %|||% TRUE ) invisible() }, upload = function() { gcp <- self$resources$gcp - network <- self$resources$network + file <- file_init(path = path) + file_ensure_hash(file) gcp_gcs_upload( file = self$path, key = self$key, bucket = gcp$bucket, + metadata = list( + "targets-database-hash" = file$hash, + "targets-database-size" = file$size, + "targets-database-time" = file$time + ), predefined_acl = gcp$predefined_acl %|||% "private", - max_tries = network$max_tries %|||% 5L, - verbose = network$verbose %|||% TRUE + max_tries = gcp$max_tries %|||% 5L, + verbose = gcp$verbose %|||% TRUE ) invisible() + }, + head = function() { + gcp <- self$resources$gcp + head <- gcp_gcs_head( + key = self$key, + bucket = gcp$bucket, + max_tries = gcp$max_tries %|||% 5L, + verbose = gcp$verbose %|||% TRUE + ) + list( + exists = !is.null(head), + hash = head$metadata$`targets-database-hash`, + size = head$metadata$`targets-database-size`, + time = head$metadata$`targets-database-time` + ) } ) ) diff --git a/R/class_database_local.R b/R/class_database_local.R index b9497b4a6..e80f1e715 100644 --- a/R/class_database_local.R +++ b/R/class_database_local.R @@ -25,5 +25,16 @@ database_local_class <- R6::R6Class( inherit = database_class, class = FALSE, portable = FALSE, - cloneable = FALSE + cloneable = FALSE, + public = list( + upload = function() { + invisible() + }, + download = function() { + invisible() + }, + head = function() { + invisible() + } + ) ) From 93a3d3eefe2a8f2556a095c3ff5bb6f30b83b55b Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 09:03:01 -0400 Subject: [PATCH 05/36] test mock sync() --- R/class_database.R | 6 ++- tests/testthat/test-class_database.R | 70 ++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 2 deletions(-) diff --git a/R/class_database.R b/R/class_database.R index 05e0aed05..a3464a831 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -302,10 +302,10 @@ database_class <- R6::R6Class( "download" }, head = function() { - file <- file_init(path = "path_mock") + file <- file_init(path = "path_cloud") file_ensure_hash(file) list( - exists = file.exists("path_mock"), + exists = file.exists("path_cloud"), hash = file$hash, size = file$size, time = file$time @@ -326,6 +326,8 @@ database_class <- R6::R6Class( time_file <- file_time_posixct(file$time) time_head <- file_time_posixct(head$time) if_any(time_file > time_head, self$upload(), self$download()) + } else { + invisible() } }, validate_columns = function(header, list_columns) { diff --git a/tests/testthat/test-class_database.R b/tests/testthat/test-class_database.R index 8511ec33e..a8c7b623b 100644 --- a/tests/testthat/test-class_database.R +++ b/tests/testthat/test-class_database.R @@ -408,3 +408,73 @@ tar_test("database unknown repository", { class = "tar_condition_validate" ) }) + +tar_test("mock download", { + x <- database_class$new(path = tempfile()) + expect_equal(x$download(), "download") +}) + +tar_test("mock upload", { + x <- database_class$new(path = tempfile()) + expect_equal(x$upload(), "upload") +}) + +tar_test("mock head non-existent file", { + x <- database_class$new(path = tempfile()) + out <- x$head() + expect_false(out$exists) + expect_equal(out$time, file_time(info = list(mtime_numeric = 0))) +}) + +tar_test("mock head", { + x <- database_class$new(path = tempfile()) + file.create("path_cloud") + out <- x$head() + expect_true(out$exists) +}) + +tar_test("mock sync no action", { + x <- database_class$new(path = tempfile()) + expect_null(x$sync()) +}) + +tar_test("mock sync only cloud", { + x <- database_class$new(path = tempfile()) + file.create("path_cloud") + expect_equal(x$sync(), "download") +}) + +tar_test("mock sync only local", { + x <- database_class$new(path = tempfile()) + file.create(x$path) + expect_equal(x$sync(), "upload") +}) + +tar_test("mock sync only local", { + x <- database_class$new(path = tempfile()) + file.create(x$path) + expect_equal(x$sync(), "upload") +}) + +tar_test("mock sync no action on agreement", { + x <- database_class$new(path = tempfile()) + writeLines("lines", x$path) + file.copy(x$path, "path_cloud") + expect_null(x$sync()) +}) + +tar_test("mock sync cloud file more recent", { + old <- system.file("CITATION", package = "targets", mustWork = TRUE) + x <- database_class$new(path = old) + writeLines("lines", "path_cloud") + expect_equal(x$sync(), "download") +}) + +tar_test("mock sync local file more recent", { + skip_cran() + x <- database_class$new(path = tempfile()) + writeLines("lines", x$path) + old <- system.file("CITATION", package = "targets", mustWork = TRUE) + file.copy(from = old, to = "path_cloud", copy.date = TRUE) + expect_equal(x$sync(), "upload") +}) From 1d69313818af21d14834e9cad2c2544d158b8f71 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 09:11:44 -0400 Subject: [PATCH 06/36] restore coverage --- tests/testthat/test-class_database.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-class_database.R b/tests/testthat/test-class_database.R index a8c7b623b..4fb8f57a7 100644 --- a/tests/testthat/test-class_database.R +++ b/tests/testthat/test-class_database.R @@ -396,10 +396,11 @@ tar_test("compare_working_directories()", { ) }) -tar_test("local database download_upload methods", { +tar_test("local database cloud methods", { database <- database_init(repository = "local") - expect_silent(database$upload()) - expect_silent(database$download()) + expect_null(database$download()) + expect_null(database$upload()) + expect_null(database$head()) }) tar_test("database unknown repository", { From 99aad6833d2242be1bcdfbfa430e9937351e28fc Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 09:39:58 -0400 Subject: [PATCH 07/36] test aws database methods --- tests/aws/test-class_database_aws.R | 119 ++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 tests/aws/test-class_database_aws.R diff --git a/tests/aws/test-class_database_aws.R b/tests/aws/test-class_database_aws.R new file mode 100644 index 000000000..ae01f6d8c --- /dev/null +++ b/tests/aws/test-class_database_aws.R @@ -0,0 +1,119 @@ +# Use sparingly to minimize AWS costs. +# Verify all `targets` buckets are deleted afterwards. +tar_test("aws database basic methods", { + skip_if_no_aws() + s3 <- paws.storage::s3() + bucket <- random_bucket_name() + s3$create_bucket(Bucket = bucket) + on.exit(aws_s3_delete_bucket(bucket)) + x <- database_init( + path = tempfile(), + subkey = "meta/key", + resources = tar_resources( + aws = tar_resources_aws(bucket = bucket, prefix = "custom/prefix") + ), + repository = "aws" + ) + key <- "custom/prefix/meta/key" + writeLines("meta_lines", x$path) + expect_false( + aws_s3_exists( + key = key, + bucket = bucket, + max_tries = 20 + ) + ) + x$upload() + expect_true( + aws_s3_exists( + key = key, + bucket = bucket, + max_tries = 20 + ) + ) + head <- x$head() + file <- file_init(x$path) + file_ensure_hash(file) + expect_true(head$exists) + expect_equal(head$hash, file$hash) + expect_equal(head$size, file$size) + expect_equal(head$time, file$time) + x$path <- tempfile() + expect_false(file.exists(x$path)) + x$download() + expect_true(file.exists(x$path)) + expect_equal(readLines(x$path), "meta_lines") + file <- file_init(x$path) + file_ensure_hash(file) + expect_equal(head$hash, file$hash) + expect_equal(head$size, file$size) +}) + +tar_test("aws database sync upload", { + skip_if_no_aws() + s3 <- paws.storage::s3() + bucket <- random_bucket_name() + s3$create_bucket(Bucket = bucket) + on.exit(aws_s3_delete_bucket(bucket)) + x <- database_init( + path = tempfile(), + subkey = "meta/key", + resources = tar_resources( + aws = tar_resources_aws(bucket = bucket, prefix = "custom/prefix") + ), + repository = "aws" + ) + key <- "custom/prefix/meta/key" + writeLines("meta_lines", x$path) + expect_false( + aws_s3_exists( + key = key, + bucket = bucket, + max_tries = 20 + ) + ) + x$sync() + expect_true( + aws_s3_exists( + key = key, + bucket = bucket, + max_tries = 20 + ) + ) + Sys.sleep(2) + writeLines("meta_lines2", x$path) + x$sync() + x$path <- tempfile() + x$download() + expect_equal(readLines(x$path), "meta_lines2") +}) + +tar_test("aws database sync first download", { + skip_if_no_aws() + s3 <- paws.storage::s3() + bucket <- random_bucket_name() + s3$create_bucket(Bucket = bucket) + on.exit(aws_s3_delete_bucket(bucket)) + x <- database_init( + path = tempfile(), + subkey = "meta/key", + resources = tar_resources( + aws = tar_resources_aws(bucket = bucket, prefix = "custom/prefix") + ), + repository = "aws" + ) + key <- "custom/prefix/meta/key" + writeLines("meta_lines", x$path) + x$upload() + x$path <- tempfile() + expect_false(file.exists(x$path)) + x$sync() + expect_true(file.exists(x$path)) + expect_equal(readLines(x$path), "meta_lines") + Sys.sleep(2) + writeLines("meta_lines2", x$path) + x$sync() + x$path <- tempfile() + x$download() + expect_equal(readLines(x$path), "meta_lines2") +}) From 1046766d0f0b0d3438a2b7e817c3351505e8f4c4 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 09:42:59 -0400 Subject: [PATCH 08/36] sketch gcp db tests --- tests/gcp/test-class_database_gcp.R | 122 ++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 tests/gcp/test-class_database_gcp.R diff --git a/tests/gcp/test-class_database_gcp.R b/tests/gcp/test-class_database_gcp.R new file mode 100644 index 000000000..ff4ce88f7 --- /dev/null +++ b/tests/gcp/test-class_database_gcp.R @@ -0,0 +1,122 @@ +# Use sparingly to minimize gcp costs. +# Verify all `targets` buckets are deleted afterwards. +tar_test("gcp database basic methods", { + skip_if_no_gcp() + bucket <- random_bucket_name() + gcp_gcs_auth(max_tries = 5) + project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") + googleCloudStorageR::gcs_create_bucket(bucket, projectId = project) + on.exit(gcp_gcs_delete_bucket(bucket)) + x <- database_init( + path = tempfile(), + subkey = "meta/key", + resources = tar_resources( + gcp = tar_resources_gcp(bucket = bucket, prefix = "custom/prefix") + ), + repository = "gcp" + ) + key <- "custom/prefix/meta/key" + writeLines("meta_lines", x$path) + expect_false( + gcp_gcs_exists( + key = key, + bucket = bucket, + max_tries = 20 + ) + ) + x$upload() + expect_true( + gcp_gcs_exists( + key = key, + bucket = bucket, + max_tries = 20 + ) + ) + head <- x$head() + file <- file_init(x$path) + file_ensure_hash(file) + expect_true(head$exists) + expect_equal(head$hash, file$hash) + expect_equal(head$size, file$size) + expect_equal(head$time, file$time) + x$path <- tempfile() + expect_false(file.exists(x$path)) + x$download() + expect_true(file.exists(x$path)) + expect_equal(readLines(x$path), "meta_lines") + file <- file_init(x$path) + file_ensure_hash(file) + expect_equal(head$hash, file$hash) + expect_equal(head$size, file$size) +}) + +tar_test("gcp database sync upload", { + skip_if_no_gcp() + bucket <- random_bucket_name() + gcp_gcs_auth(max_tries = 5) + project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") + googleCloudStorageR::gcs_create_bucket(bucket, projectId = project) + on.exit(gcp_gcs_delete_bucket(bucket)) + x <- database_init( + path = tempfile(), + subkey = "meta/key", + resources = tar_resources( + gcp = tar_resources_gcp(bucket = bucket, prefix = "custom/prefix") + ), + repository = "gcp" + ) + key <- "custom/prefix/meta/key" + writeLines("meta_lines", x$path) + expect_false( + gcp_gcs_exists( + key = key, + bucket = bucket, + max_tries = 20 + ) + ) + x$sync() + expect_true( + gcp_gcs_exists( + key = key, + bucket = bucket, + max_tries = 20 + ) + ) + Sys.sleep(2) + writeLines("meta_lines2", x$path) + x$sync() + x$path <- tempfile() + x$download() + expect_equal(readLines(x$path), "meta_lines2") +}) + +tar_test("gcp database sync first download", { + skip_if_no_gcp() + bucket <- random_bucket_name() + gcp_gcs_auth(max_tries = 5) + project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") + googleCloudStorageR::gcs_create_bucket(bucket, projectId = project) + on.exit(gcp_gcs_delete_bucket(bucket)) + x <- database_init( + path = tempfile(), + subkey = "meta/key", + resources = tar_resources( + gcp = tar_resources_gcp(bucket = bucket, prefix = "custom/prefix") + ), + repository = "gcp" + ) + key <- "custom/prefix/meta/key" + writeLines("meta_lines", x$path) + x$upload() + x$path <- tempfile() + expect_false(file.exists(x$path)) + x$sync() + expect_true(file.exists(x$path)) + expect_equal(readLines(x$path), "meta_lines") + Sys.sleep(2) + writeLines("meta_lines2", x$path) + x$sync() + x$path <- tempfile() + x$download() + expect_equal(readLines(x$path), "meta_lines2") +}) From e4b42d167dfa25041e75c318956e888f01f4e500 Mon Sep 17 00:00:00 2001 From: wlandau Date: Sun, 27 Aug 2023 09:48:12 -0400 Subject: [PATCH 09/36] test labels --- tests/aws/test-class_database_aws.R | 2 +- tests/gcp/test-class_database_gcp.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/aws/test-class_database_aws.R b/tests/aws/test-class_database_aws.R index ae01f6d8c..80690bc7b 100644 --- a/tests/aws/test-class_database_aws.R +++ b/tests/aws/test-class_database_aws.R @@ -88,7 +88,7 @@ tar_test("aws database sync upload", { expect_equal(readLines(x$path), "meta_lines2") }) -tar_test("aws database sync first download", { +tar_test("aws database sync download", { skip_if_no_aws() s3 <- paws.storage::s3() bucket <- random_bucket_name() diff --git a/tests/gcp/test-class_database_gcp.R b/tests/gcp/test-class_database_gcp.R index ff4ce88f7..5a272860c 100644 --- a/tests/gcp/test-class_database_gcp.R +++ b/tests/gcp/test-class_database_gcp.R @@ -90,7 +90,7 @@ tar_test("gcp database sync upload", { expect_equal(readLines(x$path), "meta_lines2") }) -tar_test("gcp database sync first download", { +tar_test("gcp database sync download", { skip_if_no_gcp() bucket <- random_bucket_name() gcp_gcs_auth(max_tries = 5) From a5a2e008949b153bc382734c8306ef3baf9fdbfe Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 10:18:39 -0400 Subject: [PATCH 10/36] Start aws meta test --- R/class_active.R | 1 + R/class_crew.R | 1 + tests/aws/test-aws_meta.R | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 tests/aws/test-aws_meta.R diff --git a/R/class_active.R b/R/class_active.R index 14bc025f3..27523b70b 100644 --- a/R/class_active.R +++ b/R/class_active.R @@ -102,6 +102,7 @@ active_class <- R6::R6Class( ensure_process = function() { self$process <- process_init(path_store = self$meta$store) self$process$record_process() + self$process$database$upload() }, produce_exports = function(envir, path_store, is_globalenv = NULL) { map(names(envir), ~force(envir[[.x]])) # try to nix high-mem promises diff --git a/R/class_crew.R b/R/class_crew.R index ac478935c..27361cef0 100644 --- a/R/class_crew.R +++ b/R/class_crew.R @@ -268,6 +268,7 @@ crew_class <- R6::R6Class( record_controller_summary = function(summary) { database <- database_crew(self$meta$store) database$overwrite_storage(summary) + database$upload() }, finalize_crew = function() { summary <- crew_summary(self$controller) diff --git a/tests/aws/test-aws_meta.R b/tests/aws/test-aws_meta.R new file mode 100644 index 000000000..b85840c68 --- /dev/null +++ b/tests/aws/test-aws_meta.R @@ -0,0 +1,35 @@ +# Use sparingly to minimize AWS costs. +# And afterwards, manually verify that all the buckets are gone. +tar_test("AWS meta", { + skip_if_no_aws() + skip_if_not_installed("crew") + s3 <- paws.storage::s3() + bucket_name <- random_bucket_name() + s3$create_bucket(Bucket = bucket_name) + on.exit(aws_s3_delete_bucket(bucket_name)) + code <- substitute({ + library(targets) + tar_option_set( + format = "rds", + repository = "aws", + resources = tar_resources( + aws = tar_resources_aws( + bucket = bucket_name, + prefix = "_targets" + ) + ), + controller = crew::crew_controller_local(), + storage = "worker", + retrieval = "worker" + ) + list( + tar_target(a, 1L), + tar_target(b, a), + tar_target(c, a + b) + ) + }, env = list(bucket_name = bucket_name)) + do.call(tar_script, list(code = code)) + tar_make() + expect_equal(tar_read(c), 2L) + tar_make() +}) From 0689fb57fc473a556068f06cfb539ab89a56ac44 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 10:58:57 -0400 Subject: [PATCH 11/36] rm link --- R/class_active.R | 18 +++++++++++------- R/class_database.R | 20 ++++++++++++++------ R/tar_option_set.R | 2 +- man/tar_option_set.Rd | 2 +- tests/aws/test-aws_meta.R | 15 +++++++++++++++ 5 files changed, 42 insertions(+), 15 deletions(-) diff --git a/R/class_active.R b/R/class_active.R index 27523b70b..f8944ffa6 100644 --- a/R/class_active.R +++ b/R/class_active.R @@ -63,6 +63,7 @@ active_class <- R6::R6Class( }, ensure_meta = function() { new_store <- !file.exists(self$meta$store) + self$meta$database$sync() self$meta$migrate_database() self$meta$validate() self$meta$database$preprocess(write = TRUE) @@ -74,8 +75,8 @@ active_class <- R6::R6Class( self$meta$restrict_records(self$pipeline) }, dequeue_meta = function() { - self$meta$database$dequeue_rows() - self$scheduler$progress$database$dequeue_rows() + self$meta$database$dequeue_rows(upload = TRUE) + self$scheduler$progress$database$dequeue_rows(upload = TRUE) }, dequeue_meta_time = function() { self$seconds_dequeued <- self$seconds_dequeued %|||% -Inf @@ -174,15 +175,18 @@ active_class <- R6::R6Class( self$scheduler$reporter$report_start() }, end = function() { - self$dequeue_meta() - pipeline_unload_loaded(self$pipeline) - seconds_elapsed <- time_seconds() - self$seconds_start scheduler <- self$scheduler - scheduler$reporter$report_end(scheduler$progress, seconds_elapsed) + pipeline_unload_loaded(self$pipeline) + self$meta$database$dequeue_rows(upload = FALSE) + if (self$meta$database$deduplicate_storage()) { + self$meta$database$upload() + } + self$scheduler$progress$database$dequeue_rows(upload = TRUE) path_scratch_del(path_store = self$meta$store) - self$meta$database$deduplicate_storage() compare_working_directories() tar_assert_objects_files(self$meta$store) + seconds_elapsed <- time_seconds() - self$seconds_start + scheduler$reporter$report_end(scheduler$progress, seconds_elapsed) }, validate = function() { super$validate() diff --git a/R/class_database.R b/R/class_database.R index a3464a831..e4697b893 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -154,11 +154,13 @@ database_class <- R6::R6Class( line <- self$produce_line(self$select_cols(row)) self$queue[length(self$queue) + 1L] <- line }, - dequeue_rows = function() { + dequeue_rows = function(upload = TRUE) { if (length(self$queue)) { on.exit(self$queue <- NULL) self$append_lines(self$queue) - self$upload() + if (upload) { + self$upload() + } } }, write_row = function(row) { @@ -289,11 +291,17 @@ database_class <- R6::R6Class( out }, deduplicate_storage = function() { + overwrite <- FALSE if (file.exists(self$path)) { - data <- self$condense_data(self$read_data()) - data <- data[order(data$name),, drop = FALSE] # nolint - self$overwrite_storage(data) + old <- self$read_data() + data <- self$condense_data(old) + overwrite <- (nrow(data) != nrow(old)) + if (overwrite) { + data <- data[order(data$name),, drop = FALSE] # nolint + self$overwrite_storage(data) + } } + overwrite }, upload = function() { "upload" @@ -316,7 +324,7 @@ database_class <- R6::R6Class( file <- file_init(path = path) file_ensure_hash(file) exists_file <- all(file.exists(path)) - exists_object <- head$exists + exists_object <- head$exists %|||% FALSE changed <- !all(file$hash == head$hash) if (exists_file && (!exists_object)) { self$upload() diff --git a/R/tar_option_set.R b/R/tar_option_set.R index cba392977..e2f35b10a 100644 --- a/R/tar_option_set.R +++ b/R/tar_option_set.R @@ -14,7 +14,7 @@ #' @param repository_meta Character of length 1 with the same values as #' `repository` (`"aws"`, `"gcp"`, `"local"`). Cloud repository #' for the metadata text files in `_targets/meta/`, including target -#' metadata and progress data. Defaults to [tar_option_get("repository")]. +#' metadata and progress data. Defaults to `tar_option_get("repository")`. #' @param imports Character vector of package names. #' For every package listed, `targets` tracks every #' dataset and every object in the package namespace diff --git a/man/tar_option_set.Rd b/man/tar_option_set.Rd index 3fbad387b..950c07363 100644 --- a/man/tar_option_set.Rd +++ b/man/tar_option_set.Rd @@ -141,7 +141,7 @@ the target runs.} \item{repository_meta}{Character of length 1 with the same values as \code{repository} (\code{"aws"}, \code{"gcp"}, \code{"local"}). Cloud repository for the metadata text files in \verb{_targets/meta/}, including target -metadata and progress data. Defaults to \link{tar_option_get("repository")}.} +metadata and progress data. Defaults to \code{tar_option_get("repository")}.} \item{iteration}{Character of length 1, name of the iteration mode of the target. Choices: diff --git a/tests/aws/test-aws_meta.R b/tests/aws/test-aws_meta.R index b85840c68..dfaa54a58 100644 --- a/tests/aws/test-aws_meta.R +++ b/tests/aws/test-aws_meta.R @@ -31,5 +31,20 @@ tar_test("AWS meta", { do.call(tar_script, list(code = code)) tar_make() expect_equal(tar_read(c), 2L) + + + + + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + aws_s3_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + unlink(path_meta(path_store_default())) + expect_equal(sort(tar_outdated()), sort(c("a", "b", "c"))) tar_make() }) From 91a8b3626b4670958d163c1b9a99aef9df67596c Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 11:28:14 -0400 Subject: [PATCH 12/36] Fix test --- R/class_active.R | 4 +--- R/class_database.R | 22 +++++++++++++--------- tests/aws/test-aws_meta.R | 6 ++---- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/R/class_active.R b/R/class_active.R index f8944ffa6..a5e70aa09 100644 --- a/R/class_active.R +++ b/R/class_active.R @@ -178,9 +178,7 @@ active_class <- R6::R6Class( scheduler <- self$scheduler pipeline_unload_loaded(self$pipeline) self$meta$database$dequeue_rows(upload = FALSE) - if (self$meta$database$deduplicate_storage()) { - self$meta$database$upload() - } + self$meta$database$sync(prefer_local = TRUE) self$scheduler$progress$database$dequeue_rows(upload = TRUE) path_scratch_del(path_store = self$meta$store) compare_working_directories() diff --git a/R/class_database.R b/R/class_database.R index e4697b893..0edea8f7f 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -291,17 +291,18 @@ database_class <- R6::R6Class( out }, deduplicate_storage = function() { - overwrite <- FALSE - if (file.exists(self$path)) { + exists <- file.exists(self$path) + overwrite <- !exists + if (exists) { old <- self$read_data() data <- self$condense_data(old) overwrite <- (nrow(data) != nrow(old)) - if (overwrite) { - data <- data[order(data$name),, drop = FALSE] # nolint - self$overwrite_storage(data) - } } - overwrite + if (overwrite) { + data <- data[order(data$name),, drop = FALSE] # nolint + self$overwrite_storage(data) + } + invisible() }, upload = function() { "upload" @@ -319,7 +320,7 @@ database_class <- R6::R6Class( time = file$time ) }, - sync = function() { + sync = function(prefer_local = FALSE) { head <- self$head() file <- file_init(path = path) file_ensure_hash(file) @@ -333,7 +334,10 @@ database_class <- R6::R6Class( } else if (exists_file && exists_object && changed) { time_file <- file_time_posixct(file$time) time_head <- file_time_posixct(head$time) - if_any(time_file > time_head, self$upload(), self$download()) + file_newer <- time_file > time_head + file_same <- file$time == head$time + do_upload <- file_newer || (prefer_local && file_same) + if_any(do_upload, self$upload(), self$download()) } else { invisible() } diff --git a/tests/aws/test-aws_meta.R b/tests/aws/test-aws_meta.R index dfaa54a58..c4ffb3b99 100644 --- a/tests/aws/test-aws_meta.R +++ b/tests/aws/test-aws_meta.R @@ -30,11 +30,8 @@ tar_test("AWS meta", { }, env = list(bucket_name = bucket_name)) do.call(tar_script, list(code = code)) tar_make() + expect_true(all(tar_progress()$progress == "built")) expect_equal(tar_read(c), 2L) - - - - for (file in c("meta", "process", "progress", "crew")) { expect_true( aws_s3_exists( @@ -47,4 +44,5 @@ tar_test("AWS meta", { unlink(path_meta(path_store_default())) expect_equal(sort(tar_outdated()), sort(c("a", "b", "c"))) tar_make() + expect_true(all(tar_progress()$progress == "skipped")) }) From d1b61986937a5fb3d026cc30a908527519884100 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 11:29:08 -0400 Subject: [PATCH 13/36] add to test --- tests/aws/test-aws_meta.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/aws/test-aws_meta.R b/tests/aws/test-aws_meta.R index c4ffb3b99..fa89507ed 100644 --- a/tests/aws/test-aws_meta.R +++ b/tests/aws/test-aws_meta.R @@ -41,6 +41,13 @@ tar_test("AWS meta", { ) ) } + for (object in c("a", "b", "c")) { + aws_s3_exists( + key = file.path("_targets/objects", file), + bucket = bucket_name, + max_tries = 5L + ) + } unlink(path_meta(path_store_default())) expect_equal(sort(tar_outdated()), sort(c("a", "b", "c"))) tar_make() From c4953c613e8c6837fdf8ce4b9531cbf3c863c02f Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 11:40:58 -0400 Subject: [PATCH 14/36] Bring back deduplicating storage --- R/class_active.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/class_active.R b/R/class_active.R index a5e70aa09..05a364f57 100644 --- a/R/class_active.R +++ b/R/class_active.R @@ -178,6 +178,7 @@ active_class <- R6::R6Class( scheduler <- self$scheduler pipeline_unload_loaded(self$pipeline) self$meta$database$dequeue_rows(upload = FALSE) + self$meta$database$deduplicate_storage() self$meta$database$sync(prefer_local = TRUE) self$scheduler$progress$database$dequeue_rows(upload = TRUE) path_scratch_del(path_store = self$meta$store) From fdb9d132594327482709109bc28de8633eef8c3b Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 11:59:40 -0400 Subject: [PATCH 15/36] add gcp meta test --- tests/gcp/test-gcp_meta.R | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 tests/gcp/test-gcp_meta.R diff --git a/tests/gcp/test-gcp_meta.R b/tests/gcp/test-gcp_meta.R new file mode 100644 index 000000000..268ce7e63 --- /dev/null +++ b/tests/gcp/test-gcp_meta.R @@ -0,0 +1,57 @@ +# Use sparingly to minimize gcp costs. +# And afterwards, manually verify that all the buckets are gone. +tar_test("gcp meta", { + skip_if_no_gcp() + skip_if_not_installed("crew") + bucket_name <- random_bucket_name() + project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") + gcp_gcs_auth(max_tries = 5) + googleCloudStorageR::gcs_create_bucket(bucket_name, projectId = project) + # needs to be a GCP project the tester auth has access to + on.exit(gcp_gcs_delete_bucket(bucket_name)) + code <- substitute({ + library(targets) + tar_option_set( + format = "rds", + repository = "gcp", + resources = tar_resources( + gcp = tar_resources_gcp( + bucket = bucket_name, + prefix = "_targets" + ) + ), + controller = crew::crew_controller_local(), + storage = "worker", + retrieval = "worker" + ) + list( + tar_target(a, 1L), + tar_target(b, a), + tar_target(c, a + b) + ) + }, env = list(bucket_name = bucket_name)) + do.call(tar_script, list(code = code)) + tar_make() + expect_true(all(tar_progress()$progress == "built")) + expect_equal(tar_read(c), 2L) + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + gcp_gcs_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + for (object in c("a", "b", "c")) { + gcp_gcs_exists( + key = file.path("_targets/objects", file), + bucket = bucket_name, + max_tries = 5L + ) + } + unlink(path_meta(path_store_default())) + expect_equal(sort(tar_outdated()), sort(c("a", "b", "c"))) + tar_make() + expect_true(all(tar_progress()$progress == "skipped")) +}) From 4d53110e11df71110e64dfb443ea6d3b2a83981f Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Sun, 27 Aug 2023 12:21:04 -0400 Subject: [PATCH 16/36] prefer_local = TRUE in active algos --- R/class_active.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/class_active.R b/R/class_active.R index 05a364f57..797746ebb 100644 --- a/R/class_active.R +++ b/R/class_active.R @@ -63,7 +63,7 @@ active_class <- R6::R6Class( }, ensure_meta = function() { new_store <- !file.exists(self$meta$store) - self$meta$database$sync() + self$meta$database$sync(prefer_local = TRUE) self$meta$migrate_database() self$meta$validate() self$meta$database$preprocess(write = TRUE) From a5633fb18abb93f2e5af463d74f895c9c693c122 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 07:14:23 -0400 Subject: [PATCH 17/36] rename an internal function --- R/tar_delete.R | 4 ++-- R/tar_destroy.R | 2 +- R/tar_prune.R | 6 +++++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/tar_delete.R b/R/tar_delete.R index 17be50f41..ffea51c2e 100644 --- a/R/tar_delete.R +++ b/R/tar_delete.R @@ -61,7 +61,7 @@ tar_delete <- function( local_dynamic_files <- meta$name[index_local_dynamic_files] names <- setdiff(names, local_dynamic_files) if (cloud) { - tar_delete_cloud(names = names, meta = meta, path_store = store) + tar_delete_cloud_objects(names = names, meta = meta, path_store = store) } files <- list.files(path_objects_dir(store), all.files = TRUE) discard <- intersect(names, files) @@ -71,7 +71,7 @@ tar_delete <- function( # Tested in tests/aws/test-delete.R # nocov start -tar_delete_cloud <- function(names, meta, path_store) { +tar_delete_cloud_objects <- function(names, meta, path_store) { index_cloud <- !is.na(meta$repository) & meta$repository != "local" meta <- meta[index_cloud,, drop = FALSE] # nolint meta <- meta[meta$name %in% names,, drop = FALSE] # nolint diff --git a/R/tar_destroy.R b/R/tar_destroy.R index 702157ff5..04e9db7b4 100644 --- a/R/tar_destroy.R +++ b/R/tar_destroy.R @@ -107,7 +107,7 @@ tar_destroy <- function( ) if (destroy %in% c("all", "cloud")) { meta <- tar_meta(store = store) - tar_delete_cloud(names = meta$name, meta = meta, path_store = store) + tar_delete_cloud_objects(names = meta$name, meta = meta, path_store = store) unlink(path_scratch_dir_network(), recursive = TRUE) } if (tar_should_delete(path = path, ask = ask)) { diff --git a/R/tar_prune.R b/R/tar_prune.R index 25f214b7d..1c8914a8f 100644 --- a/R/tar_prune.R +++ b/R/tar_prune.R @@ -76,7 +76,11 @@ tar_prune_inner <- function(pipeline, cloud, path_store) { dynamic_files <- data$name[data$format == "file"] discard <- setdiff(discard, dynamic_files) if (cloud) { - tar_delete_cloud(names = discard, meta = data, path_store = path_store) + tar_delete_cloud_objects( + names = discard, + meta = data, + path_store = path_store + ) } data <- as_data_frame(data)[data$name %in% keep, ] meta$database$overwrite_storage(data) From 43879e7701252b29b13d9153fe0cf76cb547e456 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 07:17:15 -0400 Subject: [PATCH 18/36] lint --- R/tar_destroy.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/tar_destroy.R b/R/tar_destroy.R index 04e9db7b4..571d8dd9c 100644 --- a/R/tar_destroy.R +++ b/R/tar_destroy.R @@ -107,7 +107,11 @@ tar_destroy <- function( ) if (destroy %in% c("all", "cloud")) { meta <- tar_meta(store = store) - tar_delete_cloud_objects(names = meta$name, meta = meta, path_store = store) + tar_delete_cloud_objects( + names = meta$name, + meta = meta, + path_store = store + ) unlink(path_scratch_dir_network(), recursive = TRUE) } if (tar_should_delete(path = path, ask = ask)) { From 3f368b476bee6e406782446bb0fa440c2667faad Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 07:25:24 -0400 Subject: [PATCH 19/36] parse file --- R/tar_load_globals.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/tar_load_globals.R b/R/tar_load_globals.R index 7c2c387be..6f107193d 100644 --- a/R/tar_load_globals.R +++ b/R/tar_load_globals.R @@ -43,7 +43,7 @@ tar_load_globals <- function( tar_assert_allow_meta("tar_load_globals") force(envir) tar_assert_script(script) - eval(parse(text = readLines(script)), envir = envir) + eval(parse(file = script), envir = envir) map( x = tar_option_get("packages"), f = library, From bf35d6d728fa8ef85500a906a135eb75fa8d42f1 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 07:43:10 -0400 Subject: [PATCH 20/36] New process to get resources --- NAMESPACE | 1 + R/class_options.R | 21 +++++++++++++++++++++ R/utils_callr.R | 2 +- tests/testthat/test-class_options.R | 22 ++++++++++++++++++++++ 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 59ffc06cf..316c8b485 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -423,6 +423,7 @@ export(tar_newer) export(tar_noninteractive) export(tar_objects) export(tar_older) +export(tar_option_export) export(tar_option_get) export(tar_option_reset) export(tar_option_set) diff --git a/R/class_options.R b/R/class_options.R index 75e7e3069..da10ef9ba 100644 --- a/R/class_options.R +++ b/R/class_options.R @@ -601,3 +601,24 @@ deprecate_error_workspace <- function(error) { ) } } + +#' @title Export options. +#' @export +#' @keywords internal +#' @description Internal function. Not for users. +#' @return A list of options from tar_option_set(). +#' @param script Path to the target script. +tar_option_export <- function() { + tar_options$export() +} + +tar_option_script <- function(script) { + tar_assert_script(script) + callr::r( + func = function(script) { + eval(parse(file = script), envir = targets::tar_option_get("envir")) + targets::tar_option_export() + }, + args = list(script = script) + ) +} diff --git a/R/utils_callr.R b/R/utils_callr.R index 68ebcb0a9..7c56c5130 100644 --- a/R/utils_callr.R +++ b/R/utils_callr.R @@ -171,7 +171,7 @@ tar_callr_inner_try <- function( on.exit(tar_runtime$file_info_exist <- NULL, add = TRUE) old <- options(options) on.exit(options(old), add = TRUE) - targets <- eval(parse(text = readLines(script, warn = FALSE)), envir = envir) + targets <- eval(parse(file = script), envir = envir) targets_arguments$pipeline <- targets::tar_as_pipeline(targets) targets::tar_pipeline_validate_lite(targets_arguments$pipeline) do.call(targets_function, targets_arguments) diff --git a/tests/testthat/test-class_options.R b/tests/testthat/test-class_options.R index 30449de85..a52246164 100644 --- a/tests/testthat/test-class_options.R +++ b/tests/testthat/test-class_options.R @@ -455,3 +455,25 @@ tar_test("trust_object_timestamps", { class = "tar_condition_validate" ) }) + +tar_test("tar_option_export", { + skip_cran() + script <- path_script_default() + tar_script(tar_target(x, 1), script = script) + out <- tar_option_script(script = script) + expect_true(is.list(out)) + names <- c( + "packages", + "imports", + "library", + "format", + "repository", + "repository_meta", + "iteration", + "error", + "memory", + "garbage_collection", + "resources" + ) + expect_true(all(names %in% names(out))) +}) From 2a9db148ad828fa0af5af1bce480f1c589a1c45e Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 07:54:21 -0400 Subject: [PATCH 21/36] db methods delete() --- R/class_database_aws.R | 11 +++++++++++ R/class_database_gcp.R | 9 +++++++++ R/tar_destroy.R | 9 ++++++++- man/tar_destroy.Rd | 13 ++++++++++++- tests/aws/test-class_database_aws.R | 15 +++++++++++++++ tests/gcp/test-class_database_gcp.R | 15 +++++++++++++++ 6 files changed, 70 insertions(+), 2 deletions(-) diff --git a/R/class_database_aws.R b/R/class_database_aws.R index 5fb2d903a..d7bb29b40 100644 --- a/R/class_database_aws.R +++ b/R/class_database_aws.R @@ -91,6 +91,17 @@ database_aws_class <- R6::R6Class( size = head$Metadata$`targets-database-size`, time = head$Metadata$`targets-database-time` ) + }, + delete = function() { + aws <- self$resources$aws + aws_s3_delete( + key = self$key, + bucket = aws$bucket, + region = aws$region, + endpoint = aws$endpoint, + args = aws$args, + max_tries = aws$max_tries %|||% 5L + ) } ) ) diff --git a/R/class_database_gcp.R b/R/class_database_gcp.R index 0da052346..4353aceac 100644 --- a/R/class_database_gcp.R +++ b/R/class_database_gcp.R @@ -87,6 +87,15 @@ database_gcp_class <- R6::R6Class( size = head$metadata$`targets-database-size`, time = head$metadata$`targets-database-time` ) + }, + delete = function() { + gcp <- self$resources$gcp + head <- gcp_gcs_delete( + key = self$key, + bucket = gcp$bucket, + max_tries = gcp$max_tries %|||% 5L, + verbose = gcp$verbose %|||% TRUE + ) } ) ) diff --git a/R/tar_destroy.R b/R/tar_destroy.R index 571d8dd9c..178ade3b1 100644 --- a/R/tar_destroy.R +++ b/R/tar_destroy.R @@ -31,7 +31,7 @@ #' @param destroy Character of length 1, what to destroy. Choices: #' * `"all"`: entire data store (default: `_targets/`) #' including cloud data, as well as download/upload scratch files. -#' * `"cloud"`: cloud data, e.g. target data +#' * `"cloud"`: cloud data, including metadata as well as target object data #' from targets with `tar_target(..., repository = "aws")`. #' Also deletes temporary staging files in #' `file.path(tempdir(), "targets")` @@ -85,6 +85,7 @@ tar_destroy <- function( "user" ), ask = NULL, + script = targets::tar_config_get("script"), store = targets::tar_config_get("store") ) { tar_assert_allow_meta("tar_destroy") @@ -112,6 +113,7 @@ tar_destroy <- function( meta = meta, path_store = store ) + tar_delete_cloud_meta(script = script) unlink(path_scratch_dir_network(), recursive = TRUE) } if (tar_should_delete(path = path, ask = ask)) { @@ -119,3 +121,8 @@ tar_destroy <- function( } invisible() } + +tar_delete_cloud_meta <- function(script) { + options <- tar_option_script(script = script) + browser() +} diff --git a/man/tar_destroy.Rd b/man/tar_destroy.Rd index 0b0fa102a..b16fc8b83 100644 --- a/man/tar_destroy.Rd +++ b/man/tar_destroy.Rd @@ -8,6 +8,7 @@ tar_destroy( destroy = c("all", "cloud", "local", "meta", "process", "progress", "objects", "scratch", "workspaces", "user"), ask = NULL, + script = targets::tar_config_get("script"), store = targets::tar_config_get("store") ) } @@ -16,7 +17,7 @@ tar_destroy( \itemize{ \item \code{"all"}: entire data store (default: \verb{_targets/}) including cloud data, as well as download/upload scratch files. -\item \code{"cloud"}: cloud data, e.g. target data +\item \code{"cloud"}: cloud data, including metadata as well as target object data from targets with \code{tar_target(..., repository = "aws")}. Also deletes temporary staging files in \code{file.path(tempdir(), "targets")} @@ -50,6 +51,16 @@ before deleting files. To disable this menu, set the \code{TAR_ASK} environment variable to \code{"false"}. \code{usethis::edit_r_environ()} can help set environment variables.} +\item{script}{Character of length 1, path to the +target script file. Defaults to \code{tar_config_get("script")}, +which in turn defaults to \verb{_targets.R}. When you set +this argument, the value of \code{tar_config_get("script")} +is temporarily changed for the current function call. +See \code{\link[=tar_script]{tar_script()}}, +\code{\link[=tar_config_get]{tar_config_get()}}, and \code{\link[=tar_config_set]{tar_config_set()}} for details +about the target script file and how to set it +persistently for a project.} + \item{store}{Character of length 1, path to the \code{targets} data store. Defaults to \code{tar_config_get("store")}, which in turn defaults to \verb{_targets/}. diff --git a/tests/aws/test-class_database_aws.R b/tests/aws/test-class_database_aws.R index 80690bc7b..4eb1debf7 100644 --- a/tests/aws/test-class_database_aws.R +++ b/tests/aws/test-class_database_aws.R @@ -47,6 +47,21 @@ tar_test("aws database basic methods", { file_ensure_hash(file) expect_equal(head$hash, file$hash) expect_equal(head$size, file$size) + expect_true( + aws_s3_exists( + key = key, + bucket = bucket, + max_tries = 20 + ) + ) + x$delete() + expect_false( + aws_s3_exists( + key = key, + bucket = bucket, + max_tries = 20 + ) + ) }) tar_test("aws database sync upload", { diff --git a/tests/gcp/test-class_database_gcp.R b/tests/gcp/test-class_database_gcp.R index 5a272860c..83f1d850d 100644 --- a/tests/gcp/test-class_database_gcp.R +++ b/tests/gcp/test-class_database_gcp.R @@ -48,6 +48,21 @@ tar_test("gcp database basic methods", { file_ensure_hash(file) expect_equal(head$hash, file$hash) expect_equal(head$size, file$size) + expect_true( + gcp_gcs_exists( + key = key, + bucket = bucket, + max_tries = 20 + ) + ) + x$delete() + expect_false( + gcp_gcs_exists( + key = key, + bucket = bucket, + max_tries = 20 + ) + ) }) tar_test("gcp database sync upload", { From 4eb72dbc035e159071c1bc309c5a7f118b346527 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 08:04:21 -0400 Subject: [PATCH 22/36] tar_destroy() cloud meta --- R/class_database_aws.R | 2 +- R/class_database_gcp.R | 2 +- R/class_database_local.R | 3 +++ R/tar_destroy.R | 18 ++++++++++++++++- man/tar_option_export.Rd | 18 +++++++++++++++++ tests/aws/test-aws_meta.R | 29 ++++++++++++++++++++++++---- tests/aws/test-class_database_aws.R | 2 +- tests/gcp/test-class_database_gcp.R | 2 +- tests/testthat/test-class_database.R | 1 + 9 files changed, 68 insertions(+), 9 deletions(-) create mode 100644 man/tar_option_export.Rd diff --git a/R/class_database_aws.R b/R/class_database_aws.R index d7bb29b40..f3b49cd3d 100644 --- a/R/class_database_aws.R +++ b/R/class_database_aws.R @@ -92,7 +92,7 @@ database_aws_class <- R6::R6Class( time = head$Metadata$`targets-database-time` ) }, - delete = function() { + delete_cloud = function() { aws <- self$resources$aws aws_s3_delete( key = self$key, diff --git a/R/class_database_gcp.R b/R/class_database_gcp.R index 4353aceac..d5b9b9fbe 100644 --- a/R/class_database_gcp.R +++ b/R/class_database_gcp.R @@ -88,7 +88,7 @@ database_gcp_class <- R6::R6Class( time = head$metadata$`targets-database-time` ) }, - delete = function() { + delete_cloud = function() { gcp <- self$resources$gcp head <- gcp_gcs_delete( key = self$key, diff --git a/R/class_database_local.R b/R/class_database_local.R index e80f1e715..bf1c7cfee 100644 --- a/R/class_database_local.R +++ b/R/class_database_local.R @@ -35,6 +35,9 @@ database_local_class <- R6::R6Class( }, head = function() { invisible() + }, + delete_cloud = function() { + invisible() } ) ) diff --git a/R/tar_destroy.R b/R/tar_destroy.R index 178ade3b1..616cc8fc3 100644 --- a/R/tar_destroy.R +++ b/R/tar_destroy.R @@ -124,5 +124,21 @@ tar_destroy <- function( tar_delete_cloud_meta <- function(script) { options <- tar_option_script(script = script) - browser() + old_repository_meta <- tar_options$get_repository_meta() + old_resources <- tar_options$get_resources() + on.exit({ + tar_options$set_repository_meta(old_repository_meta) + tar_options$set_resources(old_resources) + }) + tar_option_set(repository_meta = options$repository_meta) + tar_option_set(resources = options$resources) + meta <- database_meta(path_store = tempfile()) + progress <- database_progress(path_store = tempfile()) + process <- database_process(path_store = tempfile()) + crew <- database_crew(path_store = tempfile()) + meta$delete_cloud() + progress$delete_cloud() + process$delete_cloud() + crew$delete_cloud() + invisible() } diff --git a/man/tar_option_export.Rd b/man/tar_option_export.Rd new file mode 100644 index 000000000..d94b1961c --- /dev/null +++ b/man/tar_option_export.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/class_options.R +\name{tar_option_export} +\alias{tar_option_export} +\title{Export options.} +\usage{ +tar_option_export() +} +\arguments{ +\item{script}{Path to the target script.} +} +\value{ +A list of options from tar_option_set(). +} +\description{ +Internal function. Not for users. +} +\keyword{internal} diff --git a/tests/aws/test-aws_meta.R b/tests/aws/test-aws_meta.R index fa89507ed..88d217d70 100644 --- a/tests/aws/test-aws_meta.R +++ b/tests/aws/test-aws_meta.R @@ -42,14 +42,35 @@ tar_test("AWS meta", { ) } for (object in c("a", "b", "c")) { - aws_s3_exists( - key = file.path("_targets/objects", file), - bucket = bucket_name, - max_tries = 5L + expect_true( + aws_s3_exists( + key = file.path("_targets/objects", file), + bucket = bucket_name, + max_tries = 5L + ) ) } unlink(path_meta(path_store_default())) expect_equal(sort(tar_outdated()), sort(c("a", "b", "c"))) tar_make() expect_true(all(tar_progress()$progress == "skipped")) + tar_destroy() + for (file in c("meta", "process", "progress", "crew")) { + expect_false( + aws_s3_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + for (object in c("a", "b", "c")) { + expect_false( + aws_s3_exists( + key = file.path("_targets/objects", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } }) diff --git a/tests/aws/test-class_database_aws.R b/tests/aws/test-class_database_aws.R index 4eb1debf7..fdfa19af5 100644 --- a/tests/aws/test-class_database_aws.R +++ b/tests/aws/test-class_database_aws.R @@ -54,7 +54,7 @@ tar_test("aws database basic methods", { max_tries = 20 ) ) - x$delete() + x$delete_cloud() expect_false( aws_s3_exists( key = key, diff --git a/tests/gcp/test-class_database_gcp.R b/tests/gcp/test-class_database_gcp.R index 83f1d850d..78dfc2ee3 100644 --- a/tests/gcp/test-class_database_gcp.R +++ b/tests/gcp/test-class_database_gcp.R @@ -55,7 +55,7 @@ tar_test("gcp database basic methods", { max_tries = 20 ) ) - x$delete() + x$delete_cloud() expect_false( gcp_gcs_exists( key = key, diff --git a/tests/testthat/test-class_database.R b/tests/testthat/test-class_database.R index 4fb8f57a7..d29130836 100644 --- a/tests/testthat/test-class_database.R +++ b/tests/testthat/test-class_database.R @@ -401,6 +401,7 @@ tar_test("local database cloud methods", { expect_null(database$download()) expect_null(database$upload()) expect_null(database$head()) + expect_null(database$delete_cloud()) }) tar_test("database unknown repository", { From 770d56edc3f76386420f248a5abfc5ce0a010b27 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 08:05:55 -0400 Subject: [PATCH 23/36] tar_destroy() gcp meta --- tests/gcp/test-gcp_meta.R | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/tests/gcp/test-gcp_meta.R b/tests/gcp/test-gcp_meta.R index 268ce7e63..4ae635be8 100644 --- a/tests/gcp/test-gcp_meta.R +++ b/tests/gcp/test-gcp_meta.R @@ -44,14 +44,35 @@ tar_test("gcp meta", { ) } for (object in c("a", "b", "c")) { - gcp_gcs_exists( - key = file.path("_targets/objects", file), - bucket = bucket_name, - max_tries = 5L + expect_true( + gcp_gcs_exists( + key = file.path("_targets/objects", file), + bucket = bucket_name, + max_tries = 5L + ) ) } unlink(path_meta(path_store_default())) expect_equal(sort(tar_outdated()), sort(c("a", "b", "c"))) tar_make() expect_true(all(tar_progress()$progress == "skipped")) + tar_destroy() + for (file in c("meta", "process", "progress", "crew")) { + expect_false( + gcp_gcs_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + for (object in c("a", "b", "c")) { + expect_false( + gcp_gcs_exists( + key = file.path("_targets/objects", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } }) From 4ae13dd4d6e729c0f5b0cb16e1c0123f6774e095 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 08:15:54 -0400 Subject: [PATCH 24/36] relax tar_destroy() script requirement --- R/tar_destroy.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/tar_destroy.R b/R/tar_destroy.R index 616cc8fc3..96ff6eee3 100644 --- a/R/tar_destroy.R +++ b/R/tar_destroy.R @@ -122,7 +122,12 @@ tar_destroy <- function( invisible() } +# Covered in AWS and GCP tests. +# nocov start tar_delete_cloud_meta <- function(script) { + if (!file.exists(script)) { + return() + } options <- tar_option_script(script = script) old_repository_meta <- tar_options$get_repository_meta() old_resources <- tar_options$get_resources() @@ -142,3 +147,4 @@ tar_delete_cloud_meta <- function(script) { crew$delete_cloud() invisible() } +# nocov end From 4fc80ca5851bd81c401ee5d110ab028ab8b57bce Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 08:16:56 -0400 Subject: [PATCH 25/36] docs --- R/tar_destroy.R | 4 ++++ man/tar_destroy.Rd | 9 ++------- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/R/tar_destroy.R b/R/tar_destroy.R index 96ff6eee3..c81ca62d6 100644 --- a/R/tar_destroy.R +++ b/R/tar_destroy.R @@ -62,6 +62,10 @@ #' before deleting files. To disable this menu, set the `TAR_ASK` #' environment variable to `"false"`. `usethis::edit_r_environ()` #' can help set environment variables. +#' @param script Character of length 1, path to the +#' target script file. Defaults to `tar_config_get("script")`, +#' which in turn defaults to `_targets.R`. If the script does not exist, +#' then cloud metadata will not be deleted. #' @examples #' if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN #' tar_dir({ # tar_dir() runs code from a temp dir for CRAN. diff --git a/man/tar_destroy.Rd b/man/tar_destroy.Rd index b16fc8b83..3e7a81a44 100644 --- a/man/tar_destroy.Rd +++ b/man/tar_destroy.Rd @@ -53,13 +53,8 @@ can help set environment variables.} \item{script}{Character of length 1, path to the target script file. Defaults to \code{tar_config_get("script")}, -which in turn defaults to \verb{_targets.R}. When you set -this argument, the value of \code{tar_config_get("script")} -is temporarily changed for the current function call. -See \code{\link[=tar_script]{tar_script()}}, -\code{\link[=tar_config_get]{tar_config_get()}}, and \code{\link[=tar_config_set]{tar_config_set()}} for details -about the target script file and how to set it -persistently for a project.} +which in turn defaults to \verb{_targets.R}. If the script does not exist, +then cloud metadata will not be deleted.} \item{store}{Character of length 1, path to the \code{targets} data store. Defaults to \code{tar_config_get("store")}, From ce3caea2e19a4712014f149dcc8319ca4b3dfd11 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 08:22:32 -0400 Subject: [PATCH 26/36] fix tests --- tests/aws/test-aws_meta.R | 4 ++-- tests/gcp/test-gcp_meta.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/aws/test-aws_meta.R b/tests/aws/test-aws_meta.R index 88d217d70..6ba830c6a 100644 --- a/tests/aws/test-aws_meta.R +++ b/tests/aws/test-aws_meta.R @@ -44,7 +44,7 @@ tar_test("AWS meta", { for (object in c("a", "b", "c")) { expect_true( aws_s3_exists( - key = file.path("_targets/objects", file), + key = file.path("_targets/objects", object), bucket = bucket_name, max_tries = 5L ) @@ -67,7 +67,7 @@ tar_test("AWS meta", { for (object in c("a", "b", "c")) { expect_false( aws_s3_exists( - key = file.path("_targets/objects", file), + key = file.path("_targets/objects", object), bucket = bucket_name, max_tries = 5L ) diff --git a/tests/gcp/test-gcp_meta.R b/tests/gcp/test-gcp_meta.R index 4ae635be8..6158f3402 100644 --- a/tests/gcp/test-gcp_meta.R +++ b/tests/gcp/test-gcp_meta.R @@ -46,7 +46,7 @@ tar_test("gcp meta", { for (object in c("a", "b", "c")) { expect_true( gcp_gcs_exists( - key = file.path("_targets/objects", file), + key = file.path("_targets/objects", object), bucket = bucket_name, max_tries = 5L ) @@ -69,7 +69,7 @@ tar_test("gcp meta", { for (object in c("a", "b", "c")) { expect_false( gcp_gcs_exists( - key = file.path("_targets/objects", file), + key = file.path("_targets/objects", object), bucket = bucket_name, max_tries = 5L ) From d6e5f6b7f78b6530739835b8ca30841d51d0179a Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 08:23:56 -0400 Subject: [PATCH 27/36] exempt lines --- R/class_options.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/class_options.R b/R/class_options.R index da10ef9ba..c3040c6c1 100644 --- a/R/class_options.R +++ b/R/class_options.R @@ -615,10 +615,13 @@ tar_option_export <- function() { tar_option_script <- function(script) { tar_assert_script(script) callr::r( + # Covered in unit tests but runs in a different R process. + # nocov start func = function(script) { eval(parse(file = script), envir = targets::tar_option_get("envir")) targets::tar_option_export() }, + # nocov end args = list(script = script) ) } From e04b002a63d564fdeb20a79a744848d67e4167e5 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 09:58:53 -0400 Subject: [PATCH 28/36] Add tar_meta_upload(), *_download(), *_sync(), and *_delete() --- NAMESPACE | 5 ++ NEWS.md | 5 +- R/class_active.R | 4 +- R/class_database.R | 27 ++++++++-- R/class_database_aws.R | 1 + R/class_database_gcp.R | 3 +- R/tar_meta.R | 34 +++++++++++- R/tar_meta_delete.R | 69 ++++++++++++++++++++++++ R/tar_meta_download.R | 52 ++++++++++++++++++ R/tar_meta_sync.R | 79 ++++++++++++++++++++++++++++ R/tar_meta_upload.R | 49 +++++++++++++++++ R/utils_condition.R | 9 ++++ R/utils_path.R | 4 ++ _pkgdown.yml | 7 ++- man/tar_condition.Rd | 3 ++ man/tar_crew.Rd | 1 - man/tar_load.Rd | 1 - man/tar_load_everything.Rd | 1 - man/tar_load_raw.Rd | 1 - man/tar_meta.Rd | 54 +++++++++++++++---- man/tar_meta_delete.Rd | 59 +++++++++++++++++++++ man/tar_meta_download.Rd | 54 +++++++++++++++++++ man/tar_meta_sync.Rd | 75 ++++++++++++++++++++++++++ man/tar_meta_upload.Rd | 54 +++++++++++++++++++ man/tar_objects.Rd | 1 - man/tar_pid.Rd | 1 - man/tar_process.Rd | 1 - man/tar_read.Rd | 1 - man/tar_read_raw.Rd | 1 - tests/testthat/test-class_database.R | 33 +++++++----- 30 files changed, 642 insertions(+), 47 deletions(-) create mode 100644 R/tar_meta_delete.R create mode 100644 R/tar_meta_download.R create mode 100644 R/tar_meta_sync.R create mode 100644 R/tar_meta_upload.R create mode 100644 man/tar_meta_delete.Rd create mode 100644 man/tar_meta_download.Rd create mode 100644 man/tar_meta_sync.Rd create mode 100644 man/tar_meta_upload.Rd diff --git a/NAMESPACE b/NAMESPACE index 316c8b485..efd390c47 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -417,6 +417,10 @@ export(tar_mermaid) export(tar_message) export(tar_message_run) export(tar_meta) +export(tar_meta_delete) +export(tar_meta_download) +export(tar_meta_sync) +export(tar_meta_upload) export(tar_name) export(tar_network) export(tar_newer) @@ -440,6 +444,7 @@ export(tar_pid) export(tar_pipeline) export(tar_pipeline_validate_lite) export(tar_poll) +export(tar_print) export(tar_process) export(tar_progress) export(tar_progress_branches) diff --git a/NEWS.md b/NEWS.md index 16778e080..d430d1904 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,16 +1,15 @@ # targets 1.2.2.9001 (development) - ## Invalidating changes Because of these changes, upgrading to this version of `targets` will unavoidably invalidate previously built targets in existing pipelines. Your pipeline code should still work, but any targets you ran before will most likely need to rerun after the upgrade. * In the `hash_deps()` method of the metadata class, exclude symbols which are not actually dependencies, rather than just giving them empty strings. This change decouples the dependency hash from the hash of the target's command (#1108). - ## Cloud metadata -* Add a new `repository_meta` option to select the cloud repository of the metadata files in `_targets/meta/` (#1109). Defaults to `tar_option_get("repository")`. +* Continuously upload metadata files to the cloud during `tar_make()`, `tar_make_clustermq()`, and `tar_make_future()` (#1109). Upload them to the repository specified in the `repository_meta` `tar_option_set()` option, and use the bucket and prefix set in the `resources` `tar_option_set()` option. `repository_meta` defaults to the existing `repository` `tar_option_set()` option. +* Add new functions `tar_meta_download()`, `tar_meta_upload()`, `tar_meta_sync()`, and `tar_meta_delete()` to directly manage cloud metadata outside the pipeline (#1109). ## Other changes diff --git a/R/class_active.R b/R/class_active.R index 797746ebb..de2b95d0a 100644 --- a/R/class_active.R +++ b/R/class_active.R @@ -63,7 +63,7 @@ active_class <- R6::R6Class( }, ensure_meta = function() { new_store <- !file.exists(self$meta$store) - self$meta$database$sync(prefer_local = TRUE) + self$meta$database$sync(prefer_local = TRUE, verbose = FALSE) self$meta$migrate_database() self$meta$validate() self$meta$database$preprocess(write = TRUE) @@ -179,7 +179,7 @@ active_class <- R6::R6Class( pipeline_unload_loaded(self$pipeline) self$meta$database$dequeue_rows(upload = FALSE) self$meta$database$deduplicate_storage() - self$meta$database$sync(prefer_local = TRUE) + self$meta$database$sync(prefer_local = TRUE, verbose = FALSE) self$scheduler$progress$database$dequeue_rows(upload = TRUE) path_scratch_del(path_store = self$meta$store) compare_working_directories() diff --git a/R/class_database.R b/R/class_database.R index 0edea8f7f..bf1c2686a 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -320,16 +320,22 @@ database_class <- R6::R6Class( time = file$time ) }, - sync = function(prefer_local = FALSE) { + sync = function(prefer_local = TRUE, verbose = TRUE) { head <- self$head() - file <- file_init(path = path) + file <- file_init(path = self$path) file_ensure_hash(file) - exists_file <- all(file.exists(path)) + exists_file <- all(file.exists(self$path)) exists_object <- head$exists %|||% FALSE changed <- !all(file$hash == head$hash) if (exists_file && (!exists_object)) { + if (verbose) { + tar_print("Uploading ", self$path, " to ", self$key, ".") + } self$upload() } else if ((!exists_file) && exists_object) { + if (verbose) { + tar_print("Downloading ", self$key, " to ", self$path, ".") + } self$download() } else if (exists_file && exists_object && changed) { time_file <- file_time_posixct(file$time) @@ -337,8 +343,21 @@ database_class <- R6::R6Class( file_newer <- time_file > time_head file_same <- file$time == head$time do_upload <- file_newer || (prefer_local && file_same) - if_any(do_upload, self$upload(), self$download()) + if (do_upload) { + if (verbose) { + tar_print("Uploading ", self$path, " to ", self$key, ".") + } + self$upload() + } else { + if (verbose) { + tar_print("Downloading ", self$key, " to ", self$path, ".") + } + self$download() + } } else { + if (verbose) { + tar_print("Skipped syncing ", self$path, " and ", self$key, ".") + } invisible() } }, diff --git a/R/class_database_aws.R b/R/class_database_aws.R index f3b49cd3d..9c4641290 100644 --- a/R/class_database_aws.R +++ b/R/class_database_aws.R @@ -43,6 +43,7 @@ database_aws_class <- R6::R6Class( }, download = function() { aws <- self$resources$aws + dir_create(dirname(self$path)) aws_s3_download( file = self$path, key = self$key, diff --git a/R/class_database_gcp.R b/R/class_database_gcp.R index d5b9b9fbe..4117597e3 100644 --- a/R/class_database_gcp.R +++ b/R/class_database_gcp.R @@ -43,8 +43,7 @@ database_gcp_class <- R6::R6Class( }, download = function() { gcp <- self$resources$gcp - file <- file_init(path = path) - file_ensure_hash(file) + dir_create(dirname(self$path)) gcp_gcs_download( file = self$path, key = self$key, diff --git a/R/tar_meta.R b/R/tar_meta.R index d932aeb71..84f2721c4 100644 --- a/R/tar_meta.R +++ b/R/tar_meta.R @@ -1,6 +1,6 @@ #' @title Read a project's metadata. #' @export -#' @family data +#' @family metadata #' @description Read the metadata of all recorded targets and global objects. #' @details A metadata row only updates when the target is built. #' [tar_progress()] shows information on targets that are running. @@ -18,6 +18,38 @@ #' pipeline. The only exception is literate programming #' target factories in the `tarchetypes` package such as `tar_render()` #' and `tar_quarto()`. +#' @section Cloud metadata: +#' Metadata files help `targets` +#' read data objects and decide if the pipeline is up to date. +#' Usually, these metadata files live in files in the local +#' `_targets/meta/` folder in your project, e.g. `_targets/meta/meta`. +#' But in addition, if you set `repository` to anything other than +#' `"local"` in [tar_option_set()] in `_targets.R`, then [tar_make()] +#' continuously uploads the metadata files to the bucket you specify +#' in `resources`. [tar_meta_delete()] will delete those files from the +#' cloud, and so will [tar_destroy()] if `destroy` is +#' set to either `"all"` or `"cloud"`. +#' +#' Other functions in `targets`, such as [tar_meta()], +#' [tar_visnetwork()], [tar_outdated()], and [tar_invalidate()], +#' use the local metadata only and ignore the copies on the cloud. +#' So if you are working on a different computer than the +#' one running the pipeline, you will need to download the cloud metadata +#' to your current machine using [tar_meta_download()]. Other functions +#' [tar_meta_upload()], [tar_meta_sync()], and [tar_meta_delete()] +#' also manage metadata across the cloud and the local file system. +#' +#' Remarks: +#' * The `repository_meta` option in [tar_option_set()] is actually +#' what controls where the metadata lives in the cloud, but it defaults +#' to `repository`. +#' * Like [tar_make()], [tar_make_future()] and [tar_make_clustermq()] +#' also continuously upload metadata files to the cloud bucket +#' specified in `resources`. +#' * [`tar_meta_download()`] and related functions need to run `_targets.R` +#' to detect [tar_option_set()] options `repository_meta` and `resources`, +#' so please be aware of side effects that may happen running your +#' custom `_targets.R` file. #' @return A data frame with one row per target/object and the selected fields. #' @inheritParams tar_validate #' @param names Optional, names of the targets. If supplied, `tar_meta()` diff --git a/R/tar_meta_delete.R b/R/tar_meta_delete.R new file mode 100644 index 000000000..0393138af --- /dev/null +++ b/R/tar_meta_delete.R @@ -0,0 +1,69 @@ +#' @title Delete metadata. +#' @export +#' @family metadata +#' @description Delete the project metadata files from the local file system, +#' the cloud, or both. +#' @inheritParams tar_validate +#' @param which Character of length 1, which metadata files to delete. +#' Choose `"local"` for local files, `"cloud"` for files on the cloud, +#' or `"all"` to delete metadata files from both the local file system +#' and the cloud. +#' @examples +#' if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN +#' tar_dir({ # tar_dir() runs code from a temp dir for CRAN. +#' tar_script({ +#> tar_option_set( +#> resources = tar_resources( +#> aws = tar_resources_aws( +#> bucket = "YOUR_BUCKET_NAME", +#> prefix = "YOUR_PROJECT_NAME" +#> ) +#> ), +#> repository = "aws" +#> ) +#> list( +#> tar_target(x, data.frame(x = seq_len(2), y = seq_len(2))) +#> ) +#' }, ask = FALSE) +#' tar_make() +#' tar_meta_delete() +#' }) +#' } +tar_meta_delete <- function( + which = "all", + script = targets::tar_config_get("script"), + store = targets::tar_config_get("store") +) { + tar_assert_chr(which) + tar_assert_scalar(which) + tar_assert_none_na(which) + tar_assert_nzchar(which) + tar_assert_in(which, c("all", "local", "cloud")) + if (which %in% c("all", "local")) { + tar_assert_scalar(store) + tar_assert_chr(store) + tar_assert_none_na(store) + tar_assert_nzchar(store) + unlink(path_meta(store)) + unlink(path_progress(store)) + unlink(path_process(store)) + unlink(path_crew(store)) + } + if (which %in% c("all", "cloud")) { + tar_assert_script(script) + options <- tar_option_script(script = script) + old_repository_meta <- tar_options$get_repository_meta() + old_resources <- tar_options$get_resources() + on.exit({ + tar_options$set_repository_meta(old_repository_meta) + tar_options$set_resources(old_resources) + }) + tar_option_set(repository_meta = options$repository_meta) + tar_option_set(resources = options$resources) + database_meta(path_store = tempfile())$delete_cloud() + database_progress(path_store = tempfile())$delete_cloud() + database_process(path_store = tempfile())$delete_cloud() + database_crew(path_store = tempfile())$delete_cloud() + } + invisible() +} diff --git a/R/tar_meta_download.R b/R/tar_meta_download.R new file mode 100644 index 000000000..2d8a28182 --- /dev/null +++ b/R/tar_meta_download.R @@ -0,0 +1,52 @@ +#' @title download local metadata to the cloud. +#' @export +#' @family metadata +#' @description download local metadata files to the cloud location +#' (repository, bucket, and prefix) you set in +#' [tar_option_set()] in `_targets.R`. +#' @inheritParams tar_validate +#' @examples +#' if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN +#' tar_dir({ # tar_dir() runs code from a temp dir for CRAN. +#' tar_script({ +#> tar_option_set( +#> resources = tar_resources( +#> aws = tar_resources_aws( +#> bucket = "YOUR_BUCKET_NAME", +#> prefix = "YOUR_PROJECT_NAME" +#> ) +#> ), +#> repository = "aws" +#> ) +#> list( +#> tar_target(x, data.frame(x = seq_len(2), y = seq_len(2))) +#> ) +#' }, ask = FALSE) +#' tar_make() +#' tar_meta_download() +#' }) +#' } +tar_meta_download <- function( + script = targets::tar_config_get("script"), + store = targets::tar_config_get("store") +) { + tar_assert_script(script) + tar_assert_scalar(store) + tar_assert_chr(store) + tar_assert_none_na(store) + tar_assert_nzchar(store) + options <- tar_option_script(script = script) + old_repository_meta <- tar_options$get_repository_meta() + old_resources <- tar_options$get_resources() + on.exit({ + tar_options$set_repository_meta(old_repository_meta) + tar_options$set_resources(old_resources) + }) + tar_option_set(repository_meta = options$repository_meta) + tar_option_set(resources = options$resources) + database_meta(path_store = store)$download() + database_progress(path_store = store)$download() + database_process(path_store = store)$download() + database_crew(path_store = store)$download() + invisible() +} diff --git a/R/tar_meta_sync.R b/R/tar_meta_sync.R new file mode 100644 index 000000000..961891c12 --- /dev/null +++ b/R/tar_meta_sync.R @@ -0,0 +1,79 @@ +#' @title Synchronize cloud metadata. +#' @export +#' @family metadata +#' @description Synchronize metadata in a cloud bucket with metadata in the +#' local data store. +#' @details [tar_meta_sync()] synchronizes the local and cloud copies +#' of all the metadata files of the pipeline so that both have the +#' most recent copy. For each metadata file, +#' if the local file does not exist or is older than the cloud file, +#' then the cloud file is downloaded to the local file path. +#' Conversely, if the cloud file is older or does not exist, then the local +#' file is uploaded to the cloud. If the time stamps of these files are +#' equal, use the `prefer_local` argument to determine +#' which copy takes precedence. +#' @inheritParams tar_validate +#' @param prefer_local Logical of length 1 to control which copy of each +#' metadata file takes precedence if the local hash and cloud hash +#' are different but the time stamps are the same. Set to `TRUE` +#' to upload the local data file in that scenario, `FALSE` to download +#' the cloud file. +#' @param verbose Logical of length 1, whether to print informative +#' console messages. +#' @examples +#' if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN +#' tar_dir({ # tar_dir() runs code from a temp dir for CRAN. +#' tar_script({ +#> tar_option_set( +#> resources = tar_resources( +#> aws = tar_resources_aws( +#> bucket = "YOUR_BUCKET_NAME", +#> prefix = "YOUR_PROJECT_NAME" +#> ) +#> ), +#> repository = "aws" +#> ) +#> list( +#> tar_target(x, data.frame(x = seq_len(2), y = seq_len(2))) +#> ) +#' }, ask = FALSE) +#' tar_make() +#' tar_meta_sync() +#' }) +#' } +tar_meta_sync <- function( + prefer_local = TRUE, + verbose = TRUE, + script = targets::tar_config_get("script"), + store = targets::tar_config_get("store") +) { + tar_assert_lgl(prefer_local) + tar_assert_none_na(prefer_local) + tar_assert_scalar(prefer_local) + tar_assert_lgl(verbose) + tar_assert_none_na(verbose) + tar_assert_scalar(verbose) + tar_assert_script(script) + tar_assert_scalar(store) + tar_assert_chr(store) + tar_assert_none_na(store) + tar_assert_nzchar(store) + options <- tar_option_script(script = script) + old_repository_meta <- tar_options$get_repository_meta() + old_resources <- tar_options$get_resources() + on.exit({ + tar_options$set_repository_meta(old_repository_meta) + tar_options$set_resources(old_resources) + }) + tar_option_set(repository_meta = options$repository_meta) + tar_option_set(resources = options$resources) + meta <- database_meta(path_store = store) + progress <- database_progress(path_store = store) + process <- database_process(path_store = store) + crew <- database_crew(path_store = store) + meta$sync(prefer_local = prefer_local, verbose = verbose) + progress$sync(prefer_local = prefer_local, verbose = verbose) + process$sync(prefer_local = prefer_local, verbose = verbose) + crew$sync(prefer_local = prefer_local, verbose = verbose) + invisible() +} diff --git a/R/tar_meta_upload.R b/R/tar_meta_upload.R new file mode 100644 index 000000000..3413ca2fa --- /dev/null +++ b/R/tar_meta_upload.R @@ -0,0 +1,49 @@ +#' @title Upload local metadata to the cloud. +#' @export +#' @family metadata +#' @description Upload local metadata files to the cloud location +#' (repository, bucket, and prefix) you set in +#' [tar_option_set()] in `_targets.R`. +#' @inheritParams tar_validate +#' @examples +#' if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN +#' tar_dir({ # tar_dir() runs code from a temp dir for CRAN. +#' tar_script({ +#> tar_option_set( +#> resources = tar_resources( +#> aws = tar_resources_aws( +#> bucket = "YOUR_BUCKET_NAME", +#> prefix = "YOUR_PROJECT_NAME" +#> ) +#> ), +#> repository = "aws" +#> ) +#> list( +#> tar_target(x, data.frame(x = seq_len(2), y = seq_len(2))) +#> ) +#' }, ask = FALSE) +#' tar_make() +#' tar_meta_upload() +#' }) +#' } +tar_meta_upload <- function( + script = targets::tar_config_get("script"), + store = targets::tar_config_get("store") +) { + tar_assert_script(script) + tar_assert_store(store) + options <- tar_option_script(script = script) + old_repository_meta <- tar_options$get_repository_meta() + old_resources <- tar_options$get_resources() + on.exit({ + tar_options$set_repository_meta(old_repository_meta) + tar_options$set_resources(old_resources) + }) + tar_option_set(repository_meta = options$repository_meta) + tar_option_set(resources = options$resources) + database_meta(path_store = store)$upload() + database_progress(path_store = store)$upload() + database_process(path_store = store)$upload() + database_crew(path_store = store)$upload() + invisible() +} diff --git a/R/utils_condition.R b/R/utils_condition.R index 2a153ace0..f58f1737e 100644 --- a/R/utils_condition.R +++ b/R/utils_condition.R @@ -98,6 +98,15 @@ tar_warn_validate <- function(...) { ) } +#' @export +#' @rdname tar_condition +tar_print <- function(...) { + tar_message( + message = paste0(...), + class = c("tar_condition_run", "tar_condition_targets") + ) +} + #' @export #' @rdname tar_condition tar_error <- function(message, class) { diff --git a/R/utils_path.R b/R/utils_path.R index 036e5b1c3..c3b4ca86b 100644 --- a/R/utils_path.R +++ b/R/utils_path.R @@ -77,6 +77,10 @@ path_process <- function(path_store) { file.path(path_meta_dir(path_store), "process") } +path_crew <- function(path_store) { + file.path(path_meta_dir(path_store), "crew") +} + path_scratch <- function(path_store, pattern = "tmp") { file.path(path_scratch_dir(path_store), pattern) } diff --git a/_pkgdown.yml b/_pkgdown.yml index 82b74f7f1..a0e2f132c 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -65,12 +65,17 @@ reference: - '`tar_load`' - '`tar_load_raw`' - '`tar_load_everything`' - - '`tar_meta`' - '`tar_objects`' - '`tar_pid`' - '`tar_process`' - '`tar_read`' - '`tar_read_raw`' +- title: Metadata + - '`tar_meta`' + - '`tar_meta_delete`' + - '`tar_meta_download`' + - '`tar_meta_sync`' + - '`tar_meta_upload`' - title: Inspect contents: - '`tar_deps`' diff --git a/man/tar_condition.Rd b/man/tar_condition.Rd index 1ef70b122..c6fd76b0c 100644 --- a/man/tar_condition.Rd +++ b/man/tar_condition.Rd @@ -9,6 +9,7 @@ \alias{tar_warn_deprecate} \alias{tar_warn_run} \alias{tar_warn_validate} +\alias{tar_print} \alias{tar_error} \alias{tar_warning} \alias{tar_message} @@ -28,6 +29,8 @@ tar_warn_run(...) tar_warn_validate(...) +tar_print(...) + tar_error(message, class) tar_warning(message, class) diff --git a/man/tar_crew.Rd b/man/tar_crew.Rd index 6e60a76c9..4b27e9125 100644 --- a/man/tar_crew.Rd +++ b/man/tar_crew.Rd @@ -79,7 +79,6 @@ Other data: \code{\link{tar_load_everything}()}, \code{\link{tar_load_raw}()}, \code{\link{tar_load}()}, -\code{\link{tar_meta}()}, \code{\link{tar_objects}()}, \code{\link{tar_pid}()}, \code{\link{tar_process}()}, diff --git a/man/tar_load.Rd b/man/tar_load.Rd index 6dee1a402..ca0c16965 100644 --- a/man/tar_load.Rd +++ b/man/tar_load.Rd @@ -114,7 +114,6 @@ Other data: \code{\link{tar_crew}()}, \code{\link{tar_load_everything}()}, \code{\link{tar_load_raw}()}, -\code{\link{tar_meta}()}, \code{\link{tar_objects}()}, \code{\link{tar_pid}()}, \code{\link{tar_process}()}, diff --git a/man/tar_load_everything.Rd b/man/tar_load_everything.Rd index 6745b781e..5cc6f27a1 100644 --- a/man/tar_load_everything.Rd +++ b/man/tar_load_everything.Rd @@ -76,7 +76,6 @@ Other data: \code{\link{tar_crew}()}, \code{\link{tar_load_raw}()}, \code{\link{tar_load}()}, -\code{\link{tar_meta}()}, \code{\link{tar_objects}()}, \code{\link{tar_pid}()}, \code{\link{tar_process}()}, diff --git a/man/tar_load_raw.Rd b/man/tar_load_raw.Rd index c18ea63e0..123d9b42e 100644 --- a/man/tar_load_raw.Rd +++ b/man/tar_load_raw.Rd @@ -108,7 +108,6 @@ Other data: \code{\link{tar_crew}()}, \code{\link{tar_load_everything}()}, \code{\link{tar_load}()}, -\code{\link{tar_meta}()}, \code{\link{tar_objects}()}, \code{\link{tar_pid}()}, \code{\link{tar_process}()}, diff --git a/man/tar_meta.Rd b/man/tar_meta.Rd index 81fb51e84..f419ffd62 100644 --- a/man/tar_meta.Rd +++ b/man/tar_meta.Rd @@ -126,6 +126,43 @@ target factories in the \code{tarchetypes} package such as \code{tar_render()} and \code{tar_quarto()}. } +\section{Cloud metadata}{ + +Metadata files help \code{targets} +read data objects and decide if the pipeline is up to date. +Usually, these metadata files live in files in the local +\verb{_targets/meta/} folder in your project, e.g. \verb{_targets/meta/meta}. +But in addition, if you set \code{repository} to anything other than +\code{"local"} in \code{\link[=tar_option_set]{tar_option_set()}} in \verb{_targets.R}, then \code{\link[=tar_make]{tar_make()}} +continuously uploads the metadata files to the bucket you specify +in \code{resources}. \code{\link[=tar_meta_delete]{tar_meta_delete()}} will delete those files from the +cloud, and so will \code{\link[=tar_destroy]{tar_destroy()}} if \code{destroy} is +set to either \code{"all"} or \code{"cloud"}. + +Other functions in \code{targets}, such as \code{\link[=tar_meta]{tar_meta()}}, +\code{\link[=tar_visnetwork]{tar_visnetwork()}}, \code{\link[=tar_outdated]{tar_outdated()}}, and \code{\link[=tar_invalidate]{tar_invalidate()}}, +use the local metadata only and ignore the copies on the cloud. +So if you are working on a different computer than the +one running the pipeline, you will need to download the cloud metadata +to your current machine using \code{\link[=tar_meta_download]{tar_meta_download()}}. Other functions +\code{\link[=tar_meta_upload]{tar_meta_upload()}}, \code{\link[=tar_meta_sync]{tar_meta_sync()}}, and \code{\link[=tar_meta_delete]{tar_meta_delete()}} +also manage metadata across the cloud and the local file system. + +Remarks: +\itemize{ +\item The \code{repository_meta} option in \code{\link[=tar_option_set]{tar_option_set()}} is actually +what controls where the metadata lives in the cloud, but it defaults +to \code{repository}. +\item Like \code{\link[=tar_make]{tar_make()}}, \code{\link[=tar_make_future]{tar_make_future()}} and \code{\link[=tar_make_clustermq]{tar_make_clustermq()}} +also continuously upload metadata files to the cloud bucket +specified in \code{resources}. +\item \code{\link[=tar_meta_download]{tar_meta_download()}} and related functions need to run \verb{_targets.R} +to detect \code{\link[=tar_option_set]{tar_option_set()}} options \code{repository_meta} and \code{resources}, +so please be aware of side effects that may happen running your +custom \verb{_targets.R} file. +} +} + \examples{ if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN tar_dir({ # tar_dir() runs code from a temp dir for CRAN. @@ -142,15 +179,10 @@ tar_meta(starts_with("y_")) # see also any_of() } } \seealso{ -Other data: -\code{\link{tar_crew}()}, -\code{\link{tar_load_everything}()}, -\code{\link{tar_load_raw}()}, -\code{\link{tar_load}()}, -\code{\link{tar_objects}()}, -\code{\link{tar_pid}()}, -\code{\link{tar_process}()}, -\code{\link{tar_read_raw}()}, -\code{\link{tar_read}()} +Other metadata: +\code{\link{tar_meta_delete}()}, +\code{\link{tar_meta_download}()}, +\code{\link{tar_meta_sync}()}, +\code{\link{tar_meta_upload}()} } -\concept{data} +\concept{metadata} diff --git a/man/tar_meta_delete.Rd b/man/tar_meta_delete.Rd new file mode 100644 index 000000000..c495c9d63 --- /dev/null +++ b/man/tar_meta_delete.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tar_meta_delete.R +\name{tar_meta_delete} +\alias{tar_meta_delete} +\title{Delete metadata.} +\usage{ +tar_meta_delete( + which = "all", + script = targets::tar_config_get("script"), + store = targets::tar_config_get("store") +) +} +\arguments{ +\item{which}{Character of length 1, which metadata files to delete. +Choose \code{"local"} for local files, \code{"cloud"} for files on the cloud, +or \code{"all"} to delete metadata files from both the local file system +and the cloud.} + +\item{script}{Character of length 1, path to the +target script file. Defaults to \code{tar_config_get("script")}, +which in turn defaults to \verb{_targets.R}. When you set +this argument, the value of \code{tar_config_get("script")} +is temporarily changed for the current function call. +See \code{\link[=tar_script]{tar_script()}}, +\code{\link[=tar_config_get]{tar_config_get()}}, and \code{\link[=tar_config_set]{tar_config_set()}} for details +about the target script file and how to set it +persistently for a project.} + +\item{store}{Character of length 1, path to the +\code{targets} data store. Defaults to \code{tar_config_get("store")}, +which in turn defaults to \verb{_targets/}. +When you set this argument, the value of \code{tar_config_get("store")} +is temporarily changed for the current function call. +See \code{\link[=tar_config_get]{tar_config_get()}} and \code{\link[=tar_config_set]{tar_config_set()}} for details +about how to set the data store path persistently +for a project.} +} +\description{ +Delete the project metadata files from the local file system, +the cloud, or both. +} +\examples{ +if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN +tar_dir({ # tar_dir() runs code from a temp dir for CRAN. +tar_script({ +}, ask = FALSE) +tar_make() +tar_meta_delete() +}) +} +} +\seealso{ +Other metadata: +\code{\link{tar_meta_download}()}, +\code{\link{tar_meta_sync}()}, +\code{\link{tar_meta_upload}()}, +\code{\link{tar_meta}()} +} +\concept{metadata} diff --git a/man/tar_meta_download.Rd b/man/tar_meta_download.Rd new file mode 100644 index 000000000..5ec8b1032 --- /dev/null +++ b/man/tar_meta_download.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tar_meta_download.R +\name{tar_meta_download} +\alias{tar_meta_download} +\title{download local metadata to the cloud.} +\usage{ +tar_meta_download( + script = targets::tar_config_get("script"), + store = targets::tar_config_get("store") +) +} +\arguments{ +\item{script}{Character of length 1, path to the +target script file. Defaults to \code{tar_config_get("script")}, +which in turn defaults to \verb{_targets.R}. When you set +this argument, the value of \code{tar_config_get("script")} +is temporarily changed for the current function call. +See \code{\link[=tar_script]{tar_script()}}, +\code{\link[=tar_config_get]{tar_config_get()}}, and \code{\link[=tar_config_set]{tar_config_set()}} for details +about the target script file and how to set it +persistently for a project.} + +\item{store}{Character of length 1, path to the +\code{targets} data store. Defaults to \code{tar_config_get("store")}, +which in turn defaults to \verb{_targets/}. +When you set this argument, the value of \code{tar_config_get("store")} +is temporarily changed for the current function call. +See \code{\link[=tar_config_get]{tar_config_get()}} and \code{\link[=tar_config_set]{tar_config_set()}} for details +about how to set the data store path persistently +for a project.} +} +\description{ +download local metadata files to the cloud location +(repository, bucket, and prefix) you set in +\code{\link[=tar_option_set]{tar_option_set()}} in \verb{_targets.R}. +} +\examples{ +if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN +tar_dir({ # tar_dir() runs code from a temp dir for CRAN. +tar_script({ +}, ask = FALSE) +tar_make() +tar_meta_download() +}) +} +} +\seealso{ +Other metadata: +\code{\link{tar_meta_delete}()}, +\code{\link{tar_meta_sync}()}, +\code{\link{tar_meta_upload}()}, +\code{\link{tar_meta}()} +} +\concept{metadata} diff --git a/man/tar_meta_sync.Rd b/man/tar_meta_sync.Rd new file mode 100644 index 000000000..dfaa8aa76 --- /dev/null +++ b/man/tar_meta_sync.Rd @@ -0,0 +1,75 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tar_meta_sync.R +\name{tar_meta_sync} +\alias{tar_meta_sync} +\title{Synchronize cloud metadata.} +\usage{ +tar_meta_sync( + prefer_local = TRUE, + verbose = TRUE, + script = targets::tar_config_get("script"), + store = targets::tar_config_get("store") +) +} +\arguments{ +\item{prefer_local}{Logical of length 1 to control which copy of each +metadata file takes precedence if the local hash and cloud hash +are different but the time stamps are the same. Set to \code{TRUE} +to upload the local data file in that scenario, \code{FALSE} to download +the cloud file.} + +\item{verbose}{Logical of length 1, whether to print informative +console messages.} + +\item{script}{Character of length 1, path to the +target script file. Defaults to \code{tar_config_get("script")}, +which in turn defaults to \verb{_targets.R}. When you set +this argument, the value of \code{tar_config_get("script")} +is temporarily changed for the current function call. +See \code{\link[=tar_script]{tar_script()}}, +\code{\link[=tar_config_get]{tar_config_get()}}, and \code{\link[=tar_config_set]{tar_config_set()}} for details +about the target script file and how to set it +persistently for a project.} + +\item{store}{Character of length 1, path to the +\code{targets} data store. Defaults to \code{tar_config_get("store")}, +which in turn defaults to \verb{_targets/}. +When you set this argument, the value of \code{tar_config_get("store")} +is temporarily changed for the current function call. +See \code{\link[=tar_config_get]{tar_config_get()}} and \code{\link[=tar_config_set]{tar_config_set()}} for details +about how to set the data store path persistently +for a project.} +} +\description{ +Synchronize metadata in a cloud bucket with metadata in the +local data store. +} +\details{ +\code{\link[=tar_meta_sync]{tar_meta_sync()}} synchronizes the local and cloud copies +of all the metadata files of the pipeline so that both have the +most recent copy. For each metadata file, +if the local file does not exist or is older than the cloud file, +then the cloud file is downloaded to the local file path. +Conversely, if the cloud file is older or does not exist, then the local +file is uploaded to the cloud. If the time stamps of these files are +equal, use the \code{prefer_local} argument to determine +which copy takes precedence. +} +\examples{ +if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN +tar_dir({ # tar_dir() runs code from a temp dir for CRAN. +tar_script({ +}, ask = FALSE) +tar_make() +tar_meta_sync() +}) +} +} +\seealso{ +Other metadata: +\code{\link{tar_meta_delete}()}, +\code{\link{tar_meta_download}()}, +\code{\link{tar_meta_upload}()}, +\code{\link{tar_meta}()} +} +\concept{metadata} diff --git a/man/tar_meta_upload.Rd b/man/tar_meta_upload.Rd new file mode 100644 index 000000000..feb917fd7 --- /dev/null +++ b/man/tar_meta_upload.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tar_meta_upload.R +\name{tar_meta_upload} +\alias{tar_meta_upload} +\title{Upload local metadata to the cloud.} +\usage{ +tar_meta_upload( + script = targets::tar_config_get("script"), + store = targets::tar_config_get("store") +) +} +\arguments{ +\item{script}{Character of length 1, path to the +target script file. Defaults to \code{tar_config_get("script")}, +which in turn defaults to \verb{_targets.R}. When you set +this argument, the value of \code{tar_config_get("script")} +is temporarily changed for the current function call. +See \code{\link[=tar_script]{tar_script()}}, +\code{\link[=tar_config_get]{tar_config_get()}}, and \code{\link[=tar_config_set]{tar_config_set()}} for details +about the target script file and how to set it +persistently for a project.} + +\item{store}{Character of length 1, path to the +\code{targets} data store. Defaults to \code{tar_config_get("store")}, +which in turn defaults to \verb{_targets/}. +When you set this argument, the value of \code{tar_config_get("store")} +is temporarily changed for the current function call. +See \code{\link[=tar_config_get]{tar_config_get()}} and \code{\link[=tar_config_set]{tar_config_set()}} for details +about how to set the data store path persistently +for a project.} +} +\description{ +Upload local metadata files to the cloud location +(repository, bucket, and prefix) you set in +\code{\link[=tar_option_set]{tar_option_set()}} in \verb{_targets.R}. +} +\examples{ +if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN +tar_dir({ # tar_dir() runs code from a temp dir for CRAN. +tar_script({ +}, ask = FALSE) +tar_make() +tar_meta_upload() +}) +} +} +\seealso{ +Other metadata: +\code{\link{tar_meta_delete}()}, +\code{\link{tar_meta_download}()}, +\code{\link{tar_meta_sync}()}, +\code{\link{tar_meta}()} +} +\concept{metadata} diff --git a/man/tar_objects.Rd b/man/tar_objects.Rd index 7e8c636e6..72fc72e31 100644 --- a/man/tar_objects.Rd +++ b/man/tar_objects.Rd @@ -81,7 +81,6 @@ Other data: \code{\link{tar_load_everything}()}, \code{\link{tar_load_raw}()}, \code{\link{tar_load}()}, -\code{\link{tar_meta}()}, \code{\link{tar_pid}()}, \code{\link{tar_process}()}, \code{\link{tar_read_raw}()}, diff --git a/man/tar_pid.Rd b/man/tar_pid.Rd index e081e6a07..caad58b06 100644 --- a/man/tar_pid.Rd +++ b/man/tar_pid.Rd @@ -55,7 +55,6 @@ Other data: \code{\link{tar_load_everything}()}, \code{\link{tar_load_raw}()}, \code{\link{tar_load}()}, -\code{\link{tar_meta}()}, \code{\link{tar_objects}()}, \code{\link{tar_process}()}, \code{\link{tar_read_raw}()}, diff --git a/man/tar_process.Rd b/man/tar_process.Rd index 85731870c..512682672 100644 --- a/man/tar_process.Rd +++ b/man/tar_process.Rd @@ -89,7 +89,6 @@ Other data: \code{\link{tar_load_everything}()}, \code{\link{tar_load_raw}()}, \code{\link{tar_load}()}, -\code{\link{tar_meta}()}, \code{\link{tar_objects}()}, \code{\link{tar_pid}()}, \code{\link{tar_read_raw}()}, diff --git a/man/tar_read.Rd b/man/tar_read.Rd index 4799727c1..151bd881e 100644 --- a/man/tar_read.Rd +++ b/man/tar_read.Rd @@ -83,7 +83,6 @@ Other data: \code{\link{tar_load_everything}()}, \code{\link{tar_load_raw}()}, \code{\link{tar_load}()}, -\code{\link{tar_meta}()}, \code{\link{tar_objects}()}, \code{\link{tar_pid}()}, \code{\link{tar_process}()}, diff --git a/man/tar_read_raw.Rd b/man/tar_read_raw.Rd index b6e324fb5..0e392299c 100644 --- a/man/tar_read_raw.Rd +++ b/man/tar_read_raw.Rd @@ -83,7 +83,6 @@ Other data: \code{\link{tar_load_everything}()}, \code{\link{tar_load_raw}()}, \code{\link{tar_load}()}, -\code{\link{tar_meta}()}, \code{\link{tar_objects}()}, \code{\link{tar_pid}()}, \code{\link{tar_process}()}, diff --git a/tests/testthat/test-class_database.R b/tests/testthat/test-class_database.R index d29130836..39257e5b4 100644 --- a/tests/testthat/test-class_database.R +++ b/tests/testthat/test-class_database.R @@ -436,47 +436,52 @@ tar_test("mock head", { }) tar_test("mock sync no action", { - x <- database_class$new(path = tempfile()) - expect_null(x$sync()) + x <- database_class$new(path = tempfile(), key = "x") + expect_null(x$sync(verbose = TRUE)) }) tar_test("mock sync only cloud", { - x <- database_class$new(path = tempfile()) + x <- database_class$new(path = tempfile(), key = "x") file.create("path_cloud") - expect_equal(x$sync(), "download") + expect_equal(x$sync(verbose = TRUE), "download") }) tar_test("mock sync only local", { - x <- database_class$new(path = tempfile()) + x <- database_class$new(path = tempfile(), key = "x") file.create(x$path) - expect_equal(x$sync(), "upload") + expect_equal(x$sync(verbose = TRUE), "upload") }) tar_test("mock sync only local", { - x <- database_class$new(path = tempfile()) + x <- database_class$new(path = tempfile(), key = "x") file.create(x$path) - expect_equal(x$sync(), "upload") + expect_equal(x$sync(verbose = TRUE), "upload") }) tar_test("mock sync no action on agreement", { x <- database_class$new(path = tempfile()) writeLines("lines", x$path) file.copy(x$path, "path_cloud") - expect_null(x$sync()) + expect_null(x$sync(verbose = TRUE)) }) tar_test("mock sync cloud file more recent", { old <- system.file("CITATION", package = "targets", mustWork = TRUE) - x <- database_class$new(path = old) + x <- database_class$new(path = old, key = "x") writeLines("lines", "path_cloud") - expect_equal(x$sync(), "download") + expect_equal(x$sync(verbose = TRUE), "download") }) tar_test("mock sync local file more recent", { skip_cran() - x <- database_class$new(path = tempfile()) + x <- database_class$new(path = tempfile(), key = "x") writeLines("lines", x$path) old <- system.file("CITATION", package = "targets", mustWork = TRUE) - file.copy(from = old, to = "path_cloud", copy.date = TRUE) - expect_equal(x$sync(), "upload") + file.copy( + from = old, + to = "path_cloud", + copy.date = TRUE, + overwrite = TRUE + ) + expect_equal(x$sync(verbose = TRUE), "upload") }) From 177bb9dc86494bfbff3bd97e2234c4d01c10e77e Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 10:17:12 -0400 Subject: [PATCH 29/36] Add unit tests for new tar_meta_*() functions --- R/class_database.R | 31 ++++++++++--------------- R/class_database_aws.R | 25 +++++++++++++++++--- R/class_database_local.R | 15 +++++++++--- R/tar_destroy.R | 8 +++---- R/tar_meta_delete.R | 11 +++++---- R/tar_meta_download.R | 11 +++++---- R/tar_meta_upload.R | 11 +++++---- man/tar_meta_delete.Rd | 4 ++++ man/tar_meta_download.Rd | 4 ++++ man/tar_meta_upload.Rd | 4 ++++ tests/testthat/test-tar_meta_delete.R | 7 ++++++ tests/testthat/test-tar_meta_download.R | 7 ++++++ tests/testthat/test-tar_meta_sync.R | 7 ++++++ tests/testthat/test-tar_meta_upload.R | 6 +++++ 14 files changed, 107 insertions(+), 44 deletions(-) create mode 100644 tests/testthat/test-tar_meta_delete.R create mode 100644 tests/testthat/test-tar_meta_download.R create mode 100644 tests/testthat/test-tar_meta_sync.R create mode 100644 tests/testthat/test-tar_meta_upload.R diff --git a/R/class_database.R b/R/class_database.R index bf1c2686a..30050a9f5 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -304,10 +304,10 @@ database_class <- R6::R6Class( } invisible() }, - upload = function() { + upload = function(verbose = TRUE) { "upload" }, - download = function() { + download = function(verbose = TRUE) { "download" }, head = function() { @@ -328,15 +328,9 @@ database_class <- R6::R6Class( exists_object <- head$exists %|||% FALSE changed <- !all(file$hash == head$hash) if (exists_file && (!exists_object)) { - if (verbose) { - tar_print("Uploading ", self$path, " to ", self$key, ".") - } - self$upload() + self$upload(verbose = verbose) } else if ((!exists_file) && exists_object) { - if (verbose) { - tar_print("Downloading ", self$key, " to ", self$path, ".") - } - self$download() + self$download(verbose = verbose) } else if (exists_file && exists_object && changed) { time_file <- file_time_posixct(file$time) time_head <- file_time_posixct(head$time) @@ -344,19 +338,18 @@ database_class <- R6::R6Class( file_same <- file$time == head$time do_upload <- file_newer || (prefer_local && file_same) if (do_upload) { - if (verbose) { - tar_print("Uploading ", self$path, " to ", self$key, ".") - } - self$upload() + self$upload(verbose = verbose) } else { - if (verbose) { - tar_print("Downloading ", self$key, " to ", self$path, ".") - } - self$download() + self$download(verbose = verbose) } } else { if (verbose) { - tar_print("Skipped syncing ", self$path, " and ", self$key, ".") + tar_print( + "Skipped syncing ", + self$path, + " with cloud object ", + self$key + ) } invisible() } diff --git a/R/class_database_aws.R b/R/class_database_aws.R index 9c4641290..1a867a70c 100644 --- a/R/class_database_aws.R +++ b/R/class_database_aws.R @@ -41,7 +41,15 @@ database_aws_class <- R6::R6Class( ) resources_validate(self$resources$aws) }, - download = function() { + download = function(verbose = TRUE) { + if (verbose) { + tar_print( + "Downloading AWS cloud object ", + self$key, + " to local file ", + self$path + ) + } aws <- self$resources$aws dir_create(dirname(self$path)) aws_s3_download( @@ -55,7 +63,15 @@ database_aws_class <- R6::R6Class( ) invisible() }, - upload = function() { + upload = function(verbose = TRUE) { + if (verbose) { + tar_print( + "Uploading local file ", + self$path, + " to AWS cloud object ", + self$key + ) + } aws <- self$resources$aws file <- file_init(path = path) file_ensure_hash(file) @@ -93,7 +109,10 @@ database_aws_class <- R6::R6Class( time = head$Metadata$`targets-database-time` ) }, - delete_cloud = function() { + delete_cloud = function(verbose = TRUE) { + if (verbose) { + tar_print("Deleting AWS cloud object ", self$key) + } aws <- self$resources$aws aws_s3_delete( key = self$key, diff --git a/R/class_database_local.R b/R/class_database_local.R index bf1c7cfee..3c081c132 100644 --- a/R/class_database_local.R +++ b/R/class_database_local.R @@ -27,16 +27,25 @@ database_local_class <- R6::R6Class( portable = FALSE, cloneable = FALSE, public = list( - upload = function() { + upload = function(verbose = TRUE) { + if (verbose) { + tar_print(self$path, " not configured to upload to the cloud.") + } invisible() }, - download = function() { + download = function(verbose = TRUE) { + if (verbose) { + tar_print(self$path, " not configured to download from the cloud.") + } invisible() }, head = function() { invisible() }, - delete_cloud = function() { + delete_cloud = function(verbose = TRUE) { + if (verbose) { + tar_print("Not configured to delete cloud object ", self$key) + } invisible() } ) diff --git a/R/tar_destroy.R b/R/tar_destroy.R index c81ca62d6..a6c47e9ca 100644 --- a/R/tar_destroy.R +++ b/R/tar_destroy.R @@ -145,10 +145,10 @@ tar_delete_cloud_meta <- function(script) { progress <- database_progress(path_store = tempfile()) process <- database_process(path_store = tempfile()) crew <- database_crew(path_store = tempfile()) - meta$delete_cloud() - progress$delete_cloud() - process$delete_cloud() - crew$delete_cloud() + meta$delete_cloud(verbose = FALSE) + progress$delete_cloud(verbose = FALSE) + process$delete_cloud(verbose = FALSE) + crew$delete_cloud(verbose = FALSE) invisible() } # nocov end diff --git a/R/tar_meta_delete.R b/R/tar_meta_delete.R index 0393138af..8355985d2 100644 --- a/R/tar_meta_delete.R +++ b/R/tar_meta_delete.R @@ -3,7 +3,7 @@ #' @family metadata #' @description Delete the project metadata files from the local file system, #' the cloud, or both. -#' @inheritParams tar_validate +#' @inheritParams tar_meta_sync #' @param which Character of length 1, which metadata files to delete. #' Choose `"local"` for local files, `"cloud"` for files on the cloud, #' or `"all"` to delete metadata files from both the local file system @@ -31,6 +31,7 @@ #' } tar_meta_delete <- function( which = "all", + verbose = TRUE, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") ) { @@ -60,10 +61,10 @@ tar_meta_delete <- function( }) tar_option_set(repository_meta = options$repository_meta) tar_option_set(resources = options$resources) - database_meta(path_store = tempfile())$delete_cloud() - database_progress(path_store = tempfile())$delete_cloud() - database_process(path_store = tempfile())$delete_cloud() - database_crew(path_store = tempfile())$delete_cloud() + database_meta(path_store = tempfile())$delete_cloud(verbose = verbose) + database_progress(path_store = tempfile())$delete_cloud(verbose = verbose) + database_process(path_store = tempfile())$delete_cloud(verbose = verbose) + database_crew(path_store = tempfile())$delete_cloud(verbose = verbose) } invisible() } diff --git a/R/tar_meta_download.R b/R/tar_meta_download.R index 2d8a28182..c56c5a9b7 100644 --- a/R/tar_meta_download.R +++ b/R/tar_meta_download.R @@ -4,7 +4,7 @@ #' @description download local metadata files to the cloud location #' (repository, bucket, and prefix) you set in #' [tar_option_set()] in `_targets.R`. -#' @inheritParams tar_validate +#' @inheritParams tar_meta_sync #' @examples #' if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN #' tar_dir({ # tar_dir() runs code from a temp dir for CRAN. @@ -27,6 +27,7 @@ #' }) #' } tar_meta_download <- function( + verbose = TRUE, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") ) { @@ -44,9 +45,9 @@ tar_meta_download <- function( }) tar_option_set(repository_meta = options$repository_meta) tar_option_set(resources = options$resources) - database_meta(path_store = store)$download() - database_progress(path_store = store)$download() - database_process(path_store = store)$download() - database_crew(path_store = store)$download() + database_meta(path_store = store)$download(verbose = verbose) + database_progress(path_store = store)$download(verbose = verbose) + database_process(path_store = store)$download(verbose = verbose) + database_crew(path_store = store)$download(verbose = verbose) invisible() } diff --git a/R/tar_meta_upload.R b/R/tar_meta_upload.R index 3413ca2fa..970c1c834 100644 --- a/R/tar_meta_upload.R +++ b/R/tar_meta_upload.R @@ -4,7 +4,7 @@ #' @description Upload local metadata files to the cloud location #' (repository, bucket, and prefix) you set in #' [tar_option_set()] in `_targets.R`. -#' @inheritParams tar_validate +#' @inheritParams tar_meta_sync #' @examples #' if (identical(Sys.getenv("TAR_EXAMPLES"), "true")) { # for CRAN #' tar_dir({ # tar_dir() runs code from a temp dir for CRAN. @@ -27,6 +27,7 @@ #' }) #' } tar_meta_upload <- function( + verbose = TRUE, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") ) { @@ -41,9 +42,9 @@ tar_meta_upload <- function( }) tar_option_set(repository_meta = options$repository_meta) tar_option_set(resources = options$resources) - database_meta(path_store = store)$upload() - database_progress(path_store = store)$upload() - database_process(path_store = store)$upload() - database_crew(path_store = store)$upload() + database_meta(path_store = store)$upload(verbose = verbose) + database_progress(path_store = store)$upload(verbose = verbose) + database_process(path_store = store)$upload(verbose = verbose) + database_crew(path_store = store)$upload(verbose = verbose) invisible() } diff --git a/man/tar_meta_delete.Rd b/man/tar_meta_delete.Rd index c495c9d63..76de0a80f 100644 --- a/man/tar_meta_delete.Rd +++ b/man/tar_meta_delete.Rd @@ -6,6 +6,7 @@ \usage{ tar_meta_delete( which = "all", + verbose = TRUE, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") ) @@ -16,6 +17,9 @@ Choose \code{"local"} for local files, \code{"cloud"} for files on the cloud, or \code{"all"} to delete metadata files from both the local file system and the cloud.} +\item{verbose}{Logical of length 1, whether to print informative +console messages.} + \item{script}{Character of length 1, path to the target script file. Defaults to \code{tar_config_get("script")}, which in turn defaults to \verb{_targets.R}. When you set diff --git a/man/tar_meta_download.Rd b/man/tar_meta_download.Rd index 5ec8b1032..f06dceb2a 100644 --- a/man/tar_meta_download.Rd +++ b/man/tar_meta_download.Rd @@ -5,11 +5,15 @@ \title{download local metadata to the cloud.} \usage{ tar_meta_download( + verbose = TRUE, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") ) } \arguments{ +\item{verbose}{Logical of length 1, whether to print informative +console messages.} + \item{script}{Character of length 1, path to the target script file. Defaults to \code{tar_config_get("script")}, which in turn defaults to \verb{_targets.R}. When you set diff --git a/man/tar_meta_upload.Rd b/man/tar_meta_upload.Rd index feb917fd7..0ca76a720 100644 --- a/man/tar_meta_upload.Rd +++ b/man/tar_meta_upload.Rd @@ -5,11 +5,15 @@ \title{Upload local metadata to the cloud.} \usage{ tar_meta_upload( + verbose = TRUE, script = targets::tar_config_get("script"), store = targets::tar_config_get("store") ) } \arguments{ +\item{verbose}{Logical of length 1, whether to print informative +console messages.} + \item{script}{Character of length 1, path to the target script file. Defaults to \code{tar_config_get("script")}, which in turn defaults to \verb{_targets.R}. When you set diff --git a/tests/testthat/test-tar_meta_delete.R b/tests/testthat/test-tar_meta_delete.R new file mode 100644 index 000000000..4271c2dfd --- /dev/null +++ b/tests/testthat/test-tar_meta_delete.R @@ -0,0 +1,7 @@ +tar_test("tar_meta_delete() works on a local pipeline", { + skip_cran() + tar_script(tar_target(x, 1)) + expect_null(tar_meta_delete()) + tar_make(callr_function = NULL) + expect_null(tar_meta_delete()) +}) diff --git a/tests/testthat/test-tar_meta_download.R b/tests/testthat/test-tar_meta_download.R new file mode 100644 index 000000000..36578c8bb --- /dev/null +++ b/tests/testthat/test-tar_meta_download.R @@ -0,0 +1,7 @@ +tar_test("tar_meta_download() works on a local pipeline", { + skip_cran() + tar_script(tar_target(x, 1)) + expect_null(tar_meta_download()) + tar_make(callr_function = NULL) + expect_null(tar_meta_download()) +}) diff --git a/tests/testthat/test-tar_meta_sync.R b/tests/testthat/test-tar_meta_sync.R new file mode 100644 index 000000000..4f249aeee --- /dev/null +++ b/tests/testthat/test-tar_meta_sync.R @@ -0,0 +1,7 @@ +tar_test("tar_meta_sync() works on a local pipeline", { + skip_cran() + tar_script(tar_target(x, 1)) + expect_null(tar_meta_sync()) + tar_make(callr_function = NULL) + expect_null(tar_meta_sync()) +}) diff --git a/tests/testthat/test-tar_meta_upload.R b/tests/testthat/test-tar_meta_upload.R new file mode 100644 index 000000000..4f2f2827c --- /dev/null +++ b/tests/testthat/test-tar_meta_upload.R @@ -0,0 +1,6 @@ +tar_test("tar_meta_upload() works on a local pipeline", { + skip_cran() + tar_script(tar_target(x, 1)) + tar_make(callr_function = NULL) + expect_null(tar_meta_upload()) +}) From 0f29a253fa17a75d59b406f613e3e6729fef1796 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 10:49:37 -0400 Subject: [PATCH 30/36] Fix tests --- R/class_active.R | 2 +- R/class_database.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/class_active.R b/R/class_active.R index de2b95d0a..d37e5d226 100644 --- a/R/class_active.R +++ b/R/class_active.R @@ -103,7 +103,7 @@ active_class <- R6::R6Class( ensure_process = function() { self$process <- process_init(path_store = self$meta$store) self$process$record_process() - self$process$database$upload() + self$process$database$upload(verbose = FALSE) }, produce_exports = function(envir, path_store, is_globalenv = NULL) { map(names(envir), ~force(envir[[.x]])) # try to nix high-mem promises diff --git a/R/class_database.R b/R/class_database.R index 30050a9f5..e28f4662b 100644 --- a/R/class_database.R +++ b/R/class_database.R @@ -159,7 +159,7 @@ database_class <- R6::R6Class( on.exit(self$queue <- NULL) self$append_lines(self$queue) if (upload) { - self$upload() + self$upload(verbose = FALSE) } } }, From 55f99556625035cb8ab180d7d7422053bb891180 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 11:10:12 -0400 Subject: [PATCH 31/36] try to fix tests --- R/class_crew.R | 2 +- R/tar_meta_delete.R | 4 +- R/tar_meta_download.R | 4 +- R/tar_meta_sync.R | 4 +- R/tar_meta_upload.R | 4 +- R/tar_option_set.R | 2 +- tests/aws/test-aws_meta.R | 309 ++++++++++++++++++++++++++++++++++++++ 7 files changed, 319 insertions(+), 10 deletions(-) diff --git a/R/class_crew.R b/R/class_crew.R index 27361cef0..f5381f3c3 100644 --- a/R/class_crew.R +++ b/R/class_crew.R @@ -268,7 +268,7 @@ crew_class <- R6::R6Class( record_controller_summary = function(summary) { database <- database_crew(self$meta$store) database$overwrite_storage(summary) - database$upload() + database$upload(verbose = FALSE) }, finalize_crew = function() { summary <- crew_summary(self$controller) diff --git a/R/tar_meta_delete.R b/R/tar_meta_delete.R index 8355985d2..72e45f1ba 100644 --- a/R/tar_meta_delete.R +++ b/R/tar_meta_delete.R @@ -59,8 +59,8 @@ tar_meta_delete <- function( tar_options$set_repository_meta(old_repository_meta) tar_options$set_resources(old_resources) }) - tar_option_set(repository_meta = options$repository_meta) - tar_option_set(resources = options$resources) + tar_options$set_repository_meta(options$repository_meta) + tar_options$set_resources(options$resources) database_meta(path_store = tempfile())$delete_cloud(verbose = verbose) database_progress(path_store = tempfile())$delete_cloud(verbose = verbose) database_process(path_store = tempfile())$delete_cloud(verbose = verbose) diff --git a/R/tar_meta_download.R b/R/tar_meta_download.R index c56c5a9b7..3d485eeff 100644 --- a/R/tar_meta_download.R +++ b/R/tar_meta_download.R @@ -43,8 +43,8 @@ tar_meta_download <- function( tar_options$set_repository_meta(old_repository_meta) tar_options$set_resources(old_resources) }) - tar_option_set(repository_meta = options$repository_meta) - tar_option_set(resources = options$resources) + tar_options$set_repository_meta(options$repository_meta) + tar_options$set_resources(options$resources) database_meta(path_store = store)$download(verbose = verbose) database_progress(path_store = store)$download(verbose = verbose) database_process(path_store = store)$download(verbose = verbose) diff --git a/R/tar_meta_sync.R b/R/tar_meta_sync.R index 961891c12..53672c85d 100644 --- a/R/tar_meta_sync.R +++ b/R/tar_meta_sync.R @@ -65,8 +65,8 @@ tar_meta_sync <- function( tar_options$set_repository_meta(old_repository_meta) tar_options$set_resources(old_resources) }) - tar_option_set(repository_meta = options$repository_meta) - tar_option_set(resources = options$resources) + tar_options$set_repository_meta(options$repository_meta) + tar_options$set_resources(options$resources) meta <- database_meta(path_store = store) progress <- database_progress(path_store = store) process <- database_process(path_store = store) diff --git a/R/tar_meta_upload.R b/R/tar_meta_upload.R index 970c1c834..d45813fab 100644 --- a/R/tar_meta_upload.R +++ b/R/tar_meta_upload.R @@ -40,8 +40,8 @@ tar_meta_upload <- function( tar_options$set_repository_meta(old_repository_meta) tar_options$set_resources(old_resources) }) - tar_option_set(repository_meta = options$repository_meta) - tar_option_set(resources = options$resources) + tar_options$set_repository_meta(options$repository_meta) + tar_options$set_resources(options$resources) database_meta(path_store = store)$upload(verbose = verbose) database_progress(path_store = store)$upload(verbose = verbose) database_process(path_store = store)$upload(verbose = verbose) diff --git a/R/tar_option_set.R b/R/tar_option_set.R index e2f35b10a..2f253d0ea 100644 --- a/R/tar_option_set.R +++ b/R/tar_option_set.R @@ -214,7 +214,7 @@ tar_option_set <- function( if_any( is.null(repository_meta), NULL, - tar_options$set_repository(repository_meta) + tar_options$set_repository_meta(repository_meta) ) if_any(is.null(iteration), NULL, tar_options$set_iteration(iteration)) if_any(is.null(error), NULL, tar_options$set_error(error)) diff --git a/tests/aws/test-aws_meta.R b/tests/aws/test-aws_meta.R index 6ba830c6a..c86e7dcf5 100644 --- a/tests/aws/test-aws_meta.R +++ b/tests/aws/test-aws_meta.R @@ -74,3 +74,312 @@ tar_test("AWS meta", { ) } }) + +# Use sparingly to minimize AWS costs. +# And afterwards, manually verify that all the buckets are gone. +tar_test("AWS tar_meta_delete()", { + skip_if_no_aws() + skip_if_not_installed("crew") + s3 <- paws.storage::s3() + bucket_name <- random_bucket_name() + s3$create_bucket(Bucket = bucket_name) + on.exit(aws_s3_delete_bucket(bucket_name)) + code <- substitute({ + library(targets) + tar_option_set( + format = "rds", + repository = "aws", + resources = tar_resources( + aws = tar_resources_aws( + bucket = bucket_name, + prefix = "_targets" + ) + ), + controller = crew::crew_controller_local(), + storage = "worker", + retrieval = "worker" + ) + list( + tar_target(a, 1L), + tar_target(b, a), + tar_target(c, a + b) + ) + }, env = list(bucket_name = bucket_name)) + do.call(tar_script, list(code = code)) + tar_make() + expect_true(all(tar_progress()$progress == "built")) + expect_equal(tar_read(c), 2L) + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + aws_s3_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + for (object in c("a", "b", "c")) { + expect_true( + aws_s3_exists( + key = file.path("_targets/objects", object), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + tar_meta_delete() + for (file in c("meta", "process", "progress", "crew")) { + expect_false( + aws_s3_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + for (object in c("a", "b", "c")) { + expect_true( + aws_s3_exists( + key = file.path("_targets/objects", object), + bucket = bucket_name, + max_tries = 5L + ) + ) + } +}) + +# Use sparingly to minimize AWS costs. +# And afterwards, manually verify that all the buckets are gone. +tar_test("AWS tar_meta_upload()", { + skip_if_no_aws() + skip_if_not_installed("crew") + s3 <- paws.storage::s3() + bucket_name <- random_bucket_name() + s3$create_bucket(Bucket = bucket_name) + on.exit(aws_s3_delete_bucket(bucket_name)) + code <- substitute({ + library(targets) + tar_option_set( + format = "rds", + repository = "aws", + resources = tar_resources( + aws = tar_resources_aws( + bucket = bucket_name, + prefix = "_targets" + ) + ), + controller = crew::crew_controller_local(), + storage = "worker", + retrieval = "worker" + ) + list( + tar_target(a, 1L), + tar_target(b, a), + tar_target(c, a + b) + ) + }, env = list(bucket_name = bucket_name)) + do.call(tar_script, list(code = code)) + tar_make() + expect_true(all(tar_progress()$progress == "built")) + expect_equal(tar_read(c), 2L) + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + aws_s3_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + tar_meta_delete(which = "cloud") + for (file in c("meta", "process", "progress", "crew")) { + expect_false( + aws_s3_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + tar_meta_upload() + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + aws_s3_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } +}) + +# Use sparingly to minimize AWS costs. +# And afterwards, manually verify that all the buckets are gone. +tar_test("AWS tar_meta_download()", { + skip_if_no_aws() + skip_if_not_installed("crew") + s3 <- paws.storage::s3() + bucket_name <- random_bucket_name() + s3$create_bucket(Bucket = bucket_name) + on.exit(aws_s3_delete_bucket(bucket_name)) + code <- substitute({ + library(targets) + tar_option_set( + format = "rds", + repository = "aws", + resources = tar_resources( + aws = tar_resources_aws( + bucket = bucket_name, + prefix = "_targets" + ) + ), + controller = crew::crew_controller_local(), + storage = "worker", + retrieval = "worker" + ) + list( + tar_target(a, 1L), + tar_target(b, a), + tar_target(c, a + b) + ) + }, env = list(bucket_name = bucket_name)) + do.call(tar_script, list(code = code)) + tar_make() + expect_true(all(tar_progress()$progress == "built")) + expect_equal(tar_read(c), 2L) + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + file.exists(file.path(path_meta_dir(path_store_default()), file)) + ) + } + tar_meta_delete(which = "local") + for (file in c("meta", "process", "progress", "crew")) { + expect_false( + file.exists(file.path(path_meta_dir(path_store_default()), file)) + ) + } + tar_meta_download() + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + file.exists(file.path(path_meta_dir(path_store_default()), file)) + ) + } +}) + +# Use sparingly to minimize AWS costs. +# And afterwards, manually verify that all the buckets are gone. +tar_test("AWS tar_meta_sync() upload", { + skip_if_no_aws() + skip_if_not_installed("crew") + s3 <- paws.storage::s3() + bucket_name <- random_bucket_name() + s3$create_bucket(Bucket = bucket_name) + on.exit(aws_s3_delete_bucket(bucket_name)) + code <- substitute({ + library(targets) + tar_option_set( + format = "rds", + repository = "aws", + resources = tar_resources( + aws = tar_resources_aws( + bucket = bucket_name, + prefix = "_targets" + ) + ), + controller = crew::crew_controller_local(), + storage = "worker", + retrieval = "worker" + ) + list( + tar_target(a, 1L), + tar_target(b, a), + tar_target(c, a + b) + ) + }, env = list(bucket_name = bucket_name)) + do.call(tar_script, list(code = code)) + tar_make() + expect_true(all(tar_progress()$progress == "built")) + expect_equal(tar_read(c), 2L) + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + aws_s3_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + tar_meta_delete(which = "cloud") + for (file in c("meta", "process", "progress", "crew")) { + expect_false( + aws_s3_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + tar_meta_sync() + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + aws_s3_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } +}) + +# Use sparingly to minimize AWS costs. +# And afterwards, manually verify that all the buckets are gone. +tar_test("AWS tar_meta_sync() download", { + skip_if_no_aws() + skip_if_not_installed("crew") + s3 <- paws.storage::s3() + bucket_name <- random_bucket_name() + s3$create_bucket(Bucket = bucket_name) + on.exit(aws_s3_delete_bucket(bucket_name)) + code <- substitute({ + library(targets) + tar_option_set( + format = "rds", + repository = "aws", + resources = tar_resources( + aws = tar_resources_aws( + bucket = bucket_name, + prefix = "_targets" + ) + ), + controller = crew::crew_controller_local(), + storage = "worker", + retrieval = "worker" + ) + list( + tar_target(a, 1L), + tar_target(b, a), + tar_target(c, a + b) + ) + }, env = list(bucket_name = bucket_name)) + do.call(tar_script, list(code = code)) + tar_make() + expect_true(all(tar_progress()$progress == "built")) + expect_equal(tar_read(c), 2L) + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + file.exists(file.path(path_meta_dir(path_store_default()), file)) + ) + } + tar_meta_delete(which = "local") + for (file in c("meta", "process", "progress", "crew")) { + expect_false( + file.exists(file.path(path_meta_dir(path_store_default()), file)) + ) + } + tar_meta_sync() + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + file.exists(file.path(path_meta_dir(path_store_default()), file)) + ) + } +}) From 97b6d229c8bf1df8ad83cf60095ff2e735e69968 Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 11:33:33 -0400 Subject: [PATCH 32/36] Fix #1109 --- tests/gcp/test-gcp_meta.R | 314 +++++++++++++++++++++++++++++++++++++- 1 file changed, 313 insertions(+), 1 deletion(-) diff --git a/tests/gcp/test-gcp_meta.R b/tests/gcp/test-gcp_meta.R index 6158f3402..0b8ee8523 100644 --- a/tests/gcp/test-gcp_meta.R +++ b/tests/gcp/test-gcp_meta.R @@ -7,7 +7,6 @@ tar_test("gcp meta", { project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") gcp_gcs_auth(max_tries = 5) googleCloudStorageR::gcs_create_bucket(bucket_name, projectId = project) - # needs to be a GCP project the tester auth has access to on.exit(gcp_gcs_delete_bucket(bucket_name)) code <- substitute({ library(targets) @@ -76,3 +75,316 @@ tar_test("gcp meta", { ) } }) + +# Use sparingly to minimize gcp costs. +# And afterwards, manually verify that all the buckets are gone. +tar_test("gcp tar_meta_delete()", { + skip_if_no_gcp() + skip_if_not_installed("crew") + bucket_name <- random_bucket_name() + project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") + gcp_gcs_auth(max_tries = 5) + googleCloudStorageR::gcs_create_bucket(bucket_name, projectId = project) + on.exit(gcp_gcs_delete_bucket(bucket_name)) + code <- substitute({ + library(targets) + tar_option_set( + format = "rds", + repository = "gcp", + resources = tar_resources( + gcp = tar_resources_gcp( + bucket = bucket_name, + prefix = "_targets" + ) + ), + controller = crew::crew_controller_local(), + storage = "worker", + retrieval = "worker" + ) + list( + tar_target(a, 1L), + tar_target(b, a), + tar_target(c, a + b) + ) + }, env = list(bucket_name = bucket_name)) + do.call(tar_script, list(code = code)) + tar_make() + expect_true(all(tar_progress()$progress == "built")) + expect_equal(tar_read(c), 2L) + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + gcp_gcs_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + for (object in c("a", "b", "c")) { + expect_true( + gcp_gcs_exists( + key = file.path("_targets/objects", object), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + tar_meta_delete() + for (file in c("meta", "process", "progress", "crew")) { + expect_false( + gcp_gcs_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + for (object in c("a", "b", "c")) { + expect_true( + gcp_gcs_exists( + key = file.path("_targets/objects", object), + bucket = bucket_name, + max_tries = 5L + ) + ) + } +}) + +# Use sparingly to minimize gcp costs. +# And afterwards, manually verify that all the buckets are gone. +tar_test("gcp tar_meta_upload()", { + skip_if_no_gcp() + skip_if_not_installed("crew") + bucket_name <- random_bucket_name() + project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") + gcp_gcs_auth(max_tries = 5) + googleCloudStorageR::gcs_create_bucket(bucket_name, projectId = project) + on.exit(gcp_gcs_delete_bucket(bucket_name)) + code <- substitute({ + library(targets) + tar_option_set( + format = "rds", + repository = "gcp", + resources = tar_resources( + gcp = tar_resources_gcp( + bucket = bucket_name, + prefix = "_targets" + ) + ), + controller = crew::crew_controller_local(), + storage = "worker", + retrieval = "worker" + ) + list( + tar_target(a, 1L), + tar_target(b, a), + tar_target(c, a + b) + ) + }, env = list(bucket_name = bucket_name)) + do.call(tar_script, list(code = code)) + tar_make() + expect_true(all(tar_progress()$progress == "built")) + expect_equal(tar_read(c), 2L) + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + gcp_gcs_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + tar_meta_delete(which = "cloud") + for (file in c("meta", "process", "progress", "crew")) { + expect_false( + gcp_gcs_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + tar_meta_upload() + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + gcp_gcs_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } +}) + +# Use sparingly to minimize gcp costs. +# And afterwards, manually verify that all the buckets are gone. +tar_test("gcp tar_meta_download()", { + skip_if_no_gcp() + skip_if_not_installed("crew") + bucket_name <- random_bucket_name() + project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") + gcp_gcs_auth(max_tries = 5) + googleCloudStorageR::gcs_create_bucket(bucket_name, projectId = project) + on.exit(gcp_gcs_delete_bucket(bucket_name)) + code <- substitute({ + library(targets) + tar_option_set( + format = "rds", + repository = "gcp", + resources = tar_resources( + gcp = tar_resources_gcp( + bucket = bucket_name, + prefix = "_targets" + ) + ), + controller = crew::crew_controller_local(), + storage = "worker", + retrieval = "worker" + ) + list( + tar_target(a, 1L), + tar_target(b, a), + tar_target(c, a + b) + ) + }, env = list(bucket_name = bucket_name)) + do.call(tar_script, list(code = code)) + tar_make() + expect_true(all(tar_progress()$progress == "built")) + expect_equal(tar_read(c), 2L) + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + file.exists(file.path(path_meta_dir(path_store_default()), file)) + ) + } + tar_meta_delete(which = "local") + for (file in c("meta", "process", "progress", "crew")) { + expect_false( + file.exists(file.path(path_meta_dir(path_store_default()), file)) + ) + } + tar_meta_download() + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + file.exists(file.path(path_meta_dir(path_store_default()), file)) + ) + } +}) + +# Use sparingly to minimize gcp costs. +# And afterwards, manually verify that all the buckets are gone. +tar_test("gcp tar_meta_sync() upload", { + skip_if_no_gcp() + skip_if_not_installed("crew") + bucket_name <- random_bucket_name() + project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") + gcp_gcs_auth(max_tries = 5) + googleCloudStorageR::gcs_create_bucket(bucket_name, projectId = project) + on.exit(gcp_gcs_delete_bucket(bucket_name)) + code <- substitute({ + library(targets) + tar_option_set( + format = "rds", + repository = "gcp", + resources = tar_resources( + gcp = tar_resources_gcp( + bucket = bucket_name, + prefix = "_targets" + ) + ), + controller = crew::crew_controller_local(), + storage = "worker", + retrieval = "worker" + ) + list( + tar_target(a, 1L), + tar_target(b, a), + tar_target(c, a + b) + ) + }, env = list(bucket_name = bucket_name)) + do.call(tar_script, list(code = code)) + tar_make() + expect_true(all(tar_progress()$progress == "built")) + expect_equal(tar_read(c), 2L) + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + gcp_gcs_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + tar_meta_delete(which = "cloud") + for (file in c("meta", "process", "progress", "crew")) { + expect_false( + gcp_gcs_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } + tar_meta_sync() + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + gcp_gcs_exists( + key = file.path("_targets/meta", file), + bucket = bucket_name, + max_tries = 5L + ) + ) + } +}) + +# Use sparingly to minimize gcp costs. +# And afterwards, manually verify that all the buckets are gone. +tar_test("gcp tar_meta_sync() download", { + skip_if_no_gcp() + skip_if_not_installed("crew") + gcs <- pgcp.storage::gcs() + bucket_name <- random_bucket_name() + gcs$create_bucket(Bucket = bucket_name) + on.exit(gcp_gcs_delete_bucket(bucket_name)) + code <- substitute({ + library(targets) + tar_option_set( + format = "rds", + repository = "gcp", + resources = tar_resources( + gcp = tar_resources_gcp( + bucket = bucket_name, + prefix = "_targets" + ) + ), + controller = crew::crew_controller_local(), + storage = "worker", + retrieval = "worker" + ) + list( + tar_target(a, 1L), + tar_target(b, a), + tar_target(c, a + b) + ) + }, env = list(bucket_name = bucket_name)) + do.call(tar_script, list(code = code)) + tar_make() + expect_true(all(tar_progress()$progress == "built")) + expect_equal(tar_read(c), 2L) + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + file.exists(file.path(path_meta_dir(path_store_default()), file)) + ) + } + tar_meta_delete(which = "local") + for (file in c("meta", "process", "progress", "crew")) { + expect_false( + file.exists(file.path(path_meta_dir(path_store_default()), file)) + ) + } + tar_meta_sync() + for (file in c("meta", "process", "progress", "crew")) { + expect_true( + file.exists(file.path(path_meta_dir(path_store_default()), file)) + ) + } +}) From 1c366d9b090c30e8abf02a2ced353f83e41c357b Mon Sep 17 00:00:00 2001 From: wlandau Date: Mon, 28 Aug 2023 11:37:38 -0400 Subject: [PATCH 33/36] Fix gcp db --- R/class_database_gcp.R | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/R/class_database_gcp.R b/R/class_database_gcp.R index 4117597e3..993702daf 100644 --- a/R/class_database_gcp.R +++ b/R/class_database_gcp.R @@ -41,7 +41,15 @@ database_gcp_class <- R6::R6Class( ) resources_validate(self$resources$gcp) }, - download = function() { + download = function(verbose = TRUE) { + if (verbose) { + tar_print( + "Downloading GCP cloud object ", + self$key, + " to local file ", + self$path + ) + } gcp <- self$resources$gcp dir_create(dirname(self$path)) gcp_gcs_download( @@ -53,7 +61,15 @@ database_gcp_class <- R6::R6Class( ) invisible() }, - upload = function() { + upload = function(verbose = TRUE) { + if (verbose) { + tar_print( + "Uploading local file ", + self$path, + " to GCP cloud object ", + self$key + ) + } gcp <- self$resources$gcp file <- file_init(path = path) file_ensure_hash(file) @@ -87,7 +103,10 @@ database_gcp_class <- R6::R6Class( time = head$metadata$`targets-database-time` ) }, - delete_cloud = function() { + delete_cloud = function(verbose = TRUE) { + if (verbose) { + tar_print("Deleting GCP cloud object ", self$key) + } gcp <- self$resources$gcp head <- gcp_gcs_delete( key = self$key, From 96bf4cdb97fcc32482d7b6917f284d9e3ee2c179 Mon Sep 17 00:00:00 2001 From: wlandau Date: Mon, 28 Aug 2023 11:40:54 -0400 Subject: [PATCH 34/36] fix a test --- tests/gcp/test-gcp_meta.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/gcp/test-gcp_meta.R b/tests/gcp/test-gcp_meta.R index 0b8ee8523..d0dd8827a 100644 --- a/tests/gcp/test-gcp_meta.R +++ b/tests/gcp/test-gcp_meta.R @@ -341,9 +341,10 @@ tar_test("gcp tar_meta_sync() upload", { tar_test("gcp tar_meta_sync() download", { skip_if_no_gcp() skip_if_not_installed("crew") - gcs <- pgcp.storage::gcs() bucket_name <- random_bucket_name() - gcs$create_bucket(Bucket = bucket_name) + project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") + gcp_gcs_auth(max_tries = 5) + googleCloudStorageR::gcs_create_bucket(bucket_name, projectId = project) on.exit(gcp_gcs_delete_bucket(bucket_name)) code <- substitute({ library(targets) From 75039efae22bad2666a2561613f33e897b9ce057 Mon Sep 17 00:00:00 2001 From: wlandau Date: Mon, 28 Aug 2023 12:00:38 -0400 Subject: [PATCH 35/36] test continuous metadata updates on gcp --- R/utils_aws.R | 1 + R/utils_gcp.R | 1 + tests/gcp/test-gcp_long.R | 59 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+) create mode 100644 tests/gcp/test-gcp_long.R diff --git a/R/utils_aws.R b/R/utils_aws.R index 8cb3eb256..a0046fd4b 100644 --- a/R/utils_aws.R +++ b/R/utils_aws.R @@ -76,6 +76,7 @@ aws_s3_download <- function( args$VersionId <- version } args <- supported_args(fun = client$get_object, args = args) + dir_create(dirname(file)) out <- retry_until_success( fun = function(client, args) { do.call(what = client$get_object, args = args) diff --git a/R/utils_gcp.R b/R/utils_gcp.R index 071ab0556..c5d0a37fb 100644 --- a/R/utils_gcp.R +++ b/R/utils_gcp.R @@ -84,6 +84,7 @@ gcp_gcs_download <- function( options(googleAuthR.tryAttempts = max_tries %|||% 5L) ) gcp_gcs_auth(verbose = verbose, max_tries = max_tries) + dir_create(dirname(file)) if_any(verbose, identity, suppressMessages) ( googleCloudStorageR::gcs_get_object( key, diff --git a/tests/gcp/test-gcp_long.R b/tests/gcp/test-gcp_long.R new file mode 100644 index 000000000..10ff2ef4e --- /dev/null +++ b/tests/gcp/test-gcp_long.R @@ -0,0 +1,59 @@ +# Use sparingly to minimize GCP costs. +# Verify all `targets` buckets are deleted afterwards. +tar_test("pipeline continuously uploads metadata", { + skip_if_no_gcp() + skip_if_not_installed("arrow") + bucket_name <- random_bucket_name() + gcp_gcs_auth(max_tries = 5) + project <- Sys.getenv("GCE_DEFAULT_PROJECT_ID") + googleCloudStorageR::gcs_create_bucket(bucket_name, projectId = project) + on.exit(gcp_gcs_delete_bucket(bucket_name)) + expr <- quote({ + tar_option_set( + resources = tar_resources( + gcp = tar_resources_gcp(bucket = !!bucket_name, prefix = "_targets"), + network = tar_resources_network(max_tries = 10L) + ), + repository = "gcp" + ) + list( + tar_target(a, 1), + tar_target( + b, { + Sys.sleep(2) + a + } + ), + tar_target( + c, { + Sys.sleep(2) + b + } + ), + tar_target( + d, { + Sys.sleep(200) + c + } + ) + ) + }) + expr <- tar_tidy_eval(expr, environment(), TRUE) + eval(as.call(list(`tar_script`, expr, ask = FALSE))) + R.utils::withTimeout( + expr = tar_make(seconds_meta = 1), + timeout = 30, + onTimeout = "silent" + ) + tar_destroy(destroy = "local") + temp <- tempfile() + meta <- path_meta(temp) + gcp_gcs_download( + file = meta, + bucket = bucket_name, + key = "_targets/meta/meta", + max_tries = 3 + ) + out <- tar_meta(store = temp, targets_only = TRUE) + expect_equal(sort(out$name), sort(c("a", "b", "c"))) +}) From c16f945014895f891f6eea79e6fd87e80efa315d Mon Sep 17 00:00:00 2001 From: wlandau-lilly Date: Mon, 28 Aug 2023 12:02:40 -0400 Subject: [PATCH 36/36] Test continuous metadata uploads on AWS --- tests/aws/test-aws_long.R | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 tests/aws/test-aws_long.R diff --git a/tests/aws/test-aws_long.R b/tests/aws/test-aws_long.R new file mode 100644 index 000000000..fca19e6fd --- /dev/null +++ b/tests/aws/test-aws_long.R @@ -0,0 +1,57 @@ +# Use sparingly to minimize aws costs. +# Verify all `targets` buckets are deleted afterwards. +tar_test("pipeline continuously uploads metadata", { + skip_if_no_aws() + bucket_name <- random_bucket_name() + s3 <- paws.storage::s3() + s3$create_bucket(Bucket = bucket_name) + on.exit(aws_s3_delete_bucket(bucket_name)) + expr <- quote({ + tar_option_set( + resources = tar_resources( + aws = tar_resources_aws(bucket = !!bucket_name, prefix = "_targets"), + network = tar_resources_network(max_tries = 10L) + ), + repository = "aws" + ) + list( + tar_target(a, 1), + tar_target( + b, { + Sys.sleep(2) + a + } + ), + tar_target( + c, { + Sys.sleep(2) + b + } + ), + tar_target( + d, { + Sys.sleep(200) + c + } + ) + ) + }) + expr <- tar_tidy_eval(expr, environment(), TRUE) + eval(as.call(list(`tar_script`, expr, ask = FALSE))) + R.utils::withTimeout( + expr = tar_make(seconds_meta = 1), + timeout = 30, + onTimeout = "silent" + ) + tar_destroy(destroy = "local") + temp <- tempfile() + meta <- path_meta(temp) + aws_s3_download( + file = meta, + bucket = bucket_name, + key = "_targets/meta/meta", + max_tries = 3 + ) + out <- tar_meta(store = temp, targets_only = TRUE) + expect_equal(sort(out$name), sort(c("a", "b", "c"))) +})