diff --git a/DESCRIPTION b/DESCRIPTION index c3c3b1b0..78351cfb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -12,7 +12,7 @@ Description: Pipeline tools coordinate the pieces of computationally The methodology in this package borrows from GNU 'Make' (2015, ISBN:978-9881443519) and 'drake' (2018, ). -Version: 1.9.1.9008 +Version: 1.9.1.9009 License: MIT + file LICENSE URL: https://docs.ropensci.org/targets/, https://github.com/ropensci/targets BugReports: https://github.com/ropensci/targets/issues diff --git a/NEWS.md b/NEWS.md index 5ec63ae9..2f3af698 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,10 +1,11 @@ -# targets 1.9.1.9008 +# targets 1.9.1.9009 ## Invalidating changes These changes invalidate certain targets in a pipeline and cause them to rerun on the next `tar_make()`. * Exclude function signatures from `tar_repository_cas()` output strings to reduce the size of pipeline metadata (#1390). +* Exclude function signatures from `tar_format()` output strings to reduce the size of pipeline metadata (#1390). ## Summary of performance gains diff --git a/R/class_pipeline.R b/R/class_pipeline.R index 65352524..da1fad30 100644 --- a/R/class_pipeline.R +++ b/R/class_pipeline.R @@ -339,6 +339,7 @@ pipeline_validate <- function(pipeline) { pipeline_validate_lite <- function(pipeline) { tar_assert_inherits(pipeline, "tar_pipeline", msg = "invalid pipeline.") tar_assert_correct_fields(pipeline, pipeline_new) + tar_assert_target_name_case(pipeline_get_names(pipeline)) pipeline_validate_conflicts(pipeline) } diff --git a/R/class_store_format_custom.R b/R/class_store_format_custom.R index 3ffc4aa4..3d1ef25b 100644 --- a/R/class_store_format_custom.R +++ b/R/class_store_format_custom.R @@ -9,12 +9,13 @@ store_class_format_format_custom <- c( "tar_store" ) -store_format_custom_field <- function(format, pattern, default) { +store_format_custom_field <- function(format, pattern, default, prefix) { out <- base64url::base64_urldecode(keyvalue_field(format, pattern)) if ((length(out) < 1L) || !any(nzchar(out))) { - out <- default + return(default) + } else { + return(paste0(prefix, out)) } - out } #' @export diff --git a/R/class_store_format_custom_methods.R b/R/class_store_format_custom_methods.R index 8eba52f4..9b327e3f 100644 --- a/R/class_store_format_custom_methods.R +++ b/R/class_store_format_custom_methods.R @@ -4,32 +4,38 @@ store_format_custom_methods_init <- function(format) { read = store_format_custom_field( format = format, pattern = "^read=", - default = store_format_custom_default_read() + default = store_format_custom_default_read(), + prefix = "function(path) " ), write = store_format_custom_field( format = format, pattern = "^write=", - default = store_format_custom_default_write() + default = store_format_custom_default_write(), + prefix = "function(object, path) " ), marshal = store_format_custom_field( format = format, pattern = "^marshal=", - default = store_format_custom_default_marshal() + default = store_format_custom_default_marshal(), + prefix = "function(object) " ), unmarshal = store_format_custom_field( format = format, pattern = "^unmarshal=", - default = store_format_custom_default_unmarshal() + default = store_format_custom_default_unmarshal(), + prefix = "function(object) " ), convert = store_format_custom_field( format = format, pattern = "^convert=", - default = store_format_custom_default_convert() + default = store_format_custom_default_convert(), + prefix = "function(object) " ), copy = store_format_custom_field( format = format, pattern = "^copy=", - default = store_format_custom_default_copy() + default = store_format_custom_default_copy(), + prefix = "function(object) " ) ) } diff --git a/R/tar_format.R b/R/tar_format.R index 9d57a047..101ac7a2 100644 --- a/R/tar_format.R +++ b/R/tar_format.R @@ -76,8 +76,9 @@ #' See the "Format functions" section for specific requirements. #' If `NULL`, the `unmarshal` argument defaults to just #' returning the original object without any modifications. -#' @param convert The `convert` argument is a function -#' that accepts the object returned by the command of the target +#' @param convert The `convert` argument is a function with a single argument +#' named `object`. +#' It accepts the object returned by the command of the target #' and changes it into an acceptable format (e.g. can be #' saved with the `read` function). The `convert` #' ensures the in-memory copy @@ -88,8 +89,9 @@ #' `error = "null"` in [tar_target()] or [tar_option_set()]). #' If `NULL`, the `convert` argument defaults to just #' returning the original object without any modifications. -#' @param copy The `copy` argument is a function -#' that accepts the object returned by the command of the target +#' @param copy The `copy` argument is a function with a single function +#' named `object`. +#' It accepts the object returned by the command of the target #' and makes a deep copy in memory. This method does is relevant #' to objects like `data.table`s that support in-place modification #' which could cause unpredictable side effects from target @@ -231,10 +233,11 @@ tar_format <- function( } tar_format_field <- function(key, value) { - encoded <- if_any( - is.null(value), - "", - base64url::base64_urlencode(tar_deparse_safe(value)) - ) - paste0(key, "=", encoded) + if (is.null(value)) { + return(paste0(key, "=")) + } + if (is.function(value)) { + value <- body(value) + } + paste0(key, "=", base64url::base64_urlencode(tar_deparse_safe(value))) } diff --git a/R/tar_target.R b/R/tar_target.R index e08bb751..b36e3f08 100644 --- a/R/tar_target.R +++ b/R/tar_target.R @@ -198,7 +198,15 @@ #' can refer to this name symbolically to induce a dependency relationship: #' e.g. `tar_target(downstream_target, f(upstream_target))` is a #' target named `downstream_target` which depends on a target -#' `upstream_target` and a function `f()`. In addition, a target's +#' `upstream_target` and a function `f()`. +#' +#' In most cases, The target name is the name of its local data file +#' in storage. Some file systems are not case sensitive, which means +#' converting a name to a different case may overwrite a different target. +#' Please ensure all target names have unique names when converted to +#' lower case. +#' +#' In addition, a target's #' name determines its random number generator seed. In this way, #' each target runs with a reproducible seed so someone else #' running the same pipeline should get the same results, diff --git a/R/utils_assert.R b/R/utils_assert.R index 98c204a9..1895fc09 100644 --- a/R/utils_assert.R +++ b/R/utils_assert.R @@ -871,3 +871,22 @@ tar_assert_meta <- function(store) { tar_throw_validate(message = message) } } + +tar_assert_target_name_case <- function(names) { + index <- duplicated(tolower(names)) + if (!any(index)) { + return() + } + problems <- paste(names[index], collapse = ", ") + message <- paste0( + "In most pipelines, a target name is the name of its data file in ", + "storage. Some file systems are not case sensitive, so targets ", + "should not have duplicate names when converting to lower case. ", + "Found problematic names: ", + problems + ) + tar_warning( + message = message, + class = c("tar_condition_validate", "tar_condition_targets") + ) +} diff --git a/man/tar_format.Rd b/man/tar_format.Rd index 32bb3f17..d56920d7 100644 --- a/man/tar_format.Rd +++ b/man/tar_format.Rd @@ -49,8 +49,9 @@ See the "Format functions" section for specific requirements. If \code{NULL}, the \code{unmarshal} argument defaults to just returning the original object without any modifications.} -\item{convert}{The \code{convert} argument is a function -that accepts the object returned by the command of the target +\item{convert}{The \code{convert} argument is a function with a single argument +named \code{object}. +It accepts the object returned by the command of the target and changes it into an acceptable format (e.g. can be saved with the \code{read} function). The \code{convert} ensures the in-memory copy @@ -62,8 +63,9 @@ handle edge cases like \code{NULL} values (especially for If \code{NULL}, the \code{convert} argument defaults to just returning the original object without any modifications.} -\item{copy}{The \code{copy} argument is a function -that accepts the object returned by the command of the target +\item{copy}{The \code{copy} argument is a function with a single function +named \code{object}. +It accepts the object returned by the command of the target and makes a deep copy in memory. This method does is relevant to objects like \code{data.table}s that support in-place modification which could cause unpredictable side effects from target diff --git a/man/tar_target.Rd b/man/tar_target.Rd index 2a695513..4d88ab93 100644 --- a/man/tar_target.Rd +++ b/man/tar_target.Rd @@ -62,7 +62,15 @@ must not start with a dot. Subsequent targets can refer to this name symbolically to induce a dependency relationship: e.g. \code{tar_target(downstream_target, f(upstream_target))} is a target named \code{downstream_target} which depends on a target -\code{upstream_target} and a function \code{f()}. In addition, a target's +\code{upstream_target} and a function \code{f()}. + +In most cases, The target name is the name of its local data file +in storage. Some file systems are not case sensitive, which means +converting a name to a different case may overwrite a different target. +Please ensure all target names have unique names when converted to +lower case. + +In addition, a target's name determines its random number generator seed. In this way, each target runs with a reproducible seed so someone else running the same pipeline should get the same results, diff --git a/tests/testthat/test-utils_assert.R b/tests/testthat/test-utils_assert.R index e35f0fde..ab517273 100644 --- a/tests/testthat/test-utils_assert.R +++ b/tests/testthat/test-utils_assert.R @@ -471,3 +471,11 @@ tar_test("tar_assert_identical()", { class = "tar_condition_validate" ) }) + +tar_test("tar_assert_target_name_case()", { + expect_silent(tar_assert_target_name_case(letters)) + expect_warning( + tar_assert_target_name_case(c(letters, LETTERS)), + class = "tar_condition_validate" + ) +})