-
Notifications
You must be signed in to change notification settings - Fork 26
/
Copy pathbuild_targets.R
259 lines (220 loc) · 9.63 KB
/
build_targets.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
#' Set up Google Cloud Build to run a targets pipeline
#' @family Cloud Build functions
#' @export
#' @description Creates a Google Cloud Build yaml file so as to execute \link[targets]{tar_make} pipelines
#'
#' Historical runs accumulate in the
#' configured Google Cloud Storage bucket, and the latest output is downloaded before
#' \link[targets]{tar_make} executes so up-to-date steps do not rerun.
#'
#' @details Steps to set up your target task in Cloud Build:
#'
#' \itemize{
#' \item Create your `targets` workflow.
#' \item Create a Dockerfile that holds the R and system dependencies for your workflow. You can test the image using \link{cr_deploy_docker}. Include \code{library(targets)} dependencies - a Docker image with \code{targets} installed is available at \code{gcr.io/gcer-public/targets}.
#' \item Run \code{cr_build_targets} to create the cloudbuild yaml file.
#' \item Run the build via \link{cr_build} or similar. Each build should only recompute outdated targets.
#' \item Optionally create a build trigger via \link{cr_buildtrigger}.
#' \item Trigger a build. The first trigger will run the targets pipeline, subsequent runs will only recompute the outdated targets.
#' }
#'
#' @section DAGs:
#'
#' If your target workflow has parallel processing steps then leaving this as default \code{cr_buildstep_targets_multi()} will create a build that uses waitFor and build ids to create a DAG. Setting this to \code{cr_buildstep_targets_single()} will be single thread but you can then customise the \code{targets::tar_make} script. Or add your own custom target buildsteps here using \link{cr_buildstep_targets} - for example you could create the docker environment targets runs within before the main pipeline.
#'
#' @return A Yaml object as generated by \link{cr_build_yaml} if \code{execute="trigger"} or the built object if \code{execute="now"}
#' @param path File path to write the Google Cloud Build yaml workflow file. Set to NULL to write no file and just return the \code{Yaml} object.
#' @param task_image An existing Docker image that will be used to run your targets workflow after the targets meta has been downloaded from Google Cloud Storage
#' @param target_folder Where target metadata will sit within the Google Cloud Storage bucket as a folder. If NULL defaults to RStudio project name or "targets_cloudbuild" if no RStudio project found.
#' @param bucket The Google Cloud Storage bucket the target metadata will be saved to in folder `target_folder`
#' @param predefinedAcl The ACL rules for the object uploaded. Set to "bucketLevel" for buckets with bucket level access enabled
#' @param ... Other arguments passed to \link{cr_build_yaml}
#' @inheritDotParams cr_build_yaml
#' @param task_args A named list of additional arguments to send to \link{cr_buildstep_r} when its executing the \link[targets]{tar_make} command (such as environment arguments)
#' @param tar_make The R script that will run in the tar_make() step. Modify to include custom settings such as "script"
#' @param buildsteps Generated buildsteps that create the targets build
#' @param execute Whether to run the Cloud Build now or to write to a file for use within triggers or otherwise
#' @param local If executing now, the local folder that will be uploaded as the context for the target build
#' @param ... Other arguments passed to \link{cr_build_yaml}
#' @inheritDotParams cr_build_yaml
#' @inheritParams cr_buildstep_targets
#' @inheritParams cr_buildstep_targets_setup
#' @inheritParams cr_buildstep_targets_teardown
#' @inheritParams cr_build_targets_artifacts
#' @inheritParams cr_build_upload_gcs
#' @seealso \link{cr_buildstep_targets} if you want to customise the build
#' @examples
#'
#' csv_file = tempfile(fileext = ".csv")
#' write.csv(mtcars, file = csv_file, row.names = FALSE)
#'
#' targets::tar_script(
#' list(
#' targets::tar_target(file1,
#' csv_file, format = "file"),
#' targets::tar_target(input1,
#' read.csv(file1)),
#' targets::tar_target(result1,
#' sum(input1$mpg)),
#' targets::tar_target(result2,
#' mean(input1$mpg)),
#' targets::tar_target(result3,
#' max(input1$mpg)),
#' targets::tar_target(result4,
#' min(input1$mpg)),
#' targets::tar_target(merge1,
#' paste(result1, result2, result3, result4))
#' ),
#' ask = FALSE)
#' have_bucket = tryCatch({cr_bucket_get(); TRUE},
#' error = function(err) FALSE)
#' have_project = tryCatch({cr_project_get(); TRUE},
#' error = function(err) FALSE)
#' if (have_project && have_bucket) {
#' bs <- cr_buildstep_targets_multi()
#' # only create the yaml
#' par_build <- cr_build_targets(bs, path = NULL)
#' par_build
#' # clean up example
#' unlink("_targets.R")
#' }
#'
#' \dontrun{
#' # run it immediately in cloud
#' cr_build_targets(bs, execute="now")
#'
#' # create a yaml file for use in build triggers
#' cr_build_targets(bs)
#' }
#'
cr_build_targets <- function(
buildsteps = cr_buildstep_targets_multi(),
execute = c("trigger", "now"),
path = "cloudbuild_targets.yaml",
local = ".",
predefinedAcl = "bucketLevel",
bucket = cr_bucket_get(),
download_folder = getwd(),
...) {
execute <- match.arg(execute)
if (execute == "trigger") {
args = list(buildsteps, ...)
footer <- if (!is.null(args$footer)) args$footer else TRUE
args$footer <- NULL
yaml <- do.call(cr_build_yaml, args = args)
if (!is.null(path)) cr_build_write(yaml, file = path, footer = footer)
return(yaml)
}
target_folder <- basename(tempfile())
store <- cr_build_upload_gcs(
local,
bucket = bucket,
predefinedAcl = predefinedAcl,
deploy_folder = target_folder)
move_it <- cr_buildstep_source_move(target_folder)
buildsteps <- c(move_it, buildsteps)
yaml <- cr_build_yaml(buildsteps, ...)
myMessage(
paste("Running Cloud Build for targets workflow in",
normalizePath(local)),
level = 3)
if (getOption("googleAuthR.verbose") < 3) {
print(yaml)
}
build <- cr_build(yaml, launch_browser = FALSE, source = store)
built <- cr_build_wait(build)
extract_upload <- strsplit(buildsteps[[length(buildsteps)]]$args[[2]], " ")[[1]]
uploaded <- extract_upload[[length(extract_upload)]]
artifact_download <- cr_build_targets_artifacts(
built,
bucket = bucket,
target_folder = basename(uploaded),
download_folder = NULL)
myMessage(
sprintf("# Built targets on Cloud Build with status: %s", built$status),
level = 3)
if(!is.null(artifact_download)){
myMessage(
sprintf("Build artifacts downloaded to %s", artifact_download),
level = 3)
}
built
}
resolve_bucket_folder <- function(target_folder, bucket){
if(is.null(target_folder)) {
target_folder <- tryCatch(
basename(rstudioapi::getActiveProject()),
error = function(err){
NULL
}
)
if(is.null(target_folder)){
target_folder <- "targets_cloudbuild"
}
}
myMessage(sprintf("targets cloud location: gs://%s/%s",
bucket, target_folder),
level = 3)
# gs://bucket-name/target-folder
sprintf("gs://%s/%s", bucket, target_folder)
}
#' @rdname cr_build_targets
#' @export
#' @details
#' Use \code{cr_build_targets_artifacts} to download the return values of a
#' target Cloud Build, then \link[targets]{tar_read} to read the results. You can set the downloaded files as the target store via \code{targets::tar_config_set(store="_targets_cloudbuild")}. Set \code{download_folder = "_targets"} to overwrite your local targets store.
#' @inheritParams cr_build_artifacts
#' @param download_folder Set to NULL to overwrite local _target folder: \code{_targets/*} otherwise will write to \code{download_folder/_targets/*}
#' @param target_subfolder If you only want to download a specific folder from the _targets/ folder on Cloud Build then specify it here.
#' @return \code{cr_build_targets_artifacts} returns the file path to where the download occurred.
cr_build_targets_artifacts <- function(
build,
bucket = cr_bucket_get(),
target_folder = NULL,
download_folder = NULL,
target_subfolder = c("all", "meta", "objects", "user"),
overwrite = TRUE) {
target_subfolder <- match.arg(target_subfolder)
target_bucket <- resolve_bucket_folder(target_folder, bucket)
build_folder <- basename(target_bucket)
prefix <- build_folder
if (target_subfolder != "all") {
prefix <- paste0(build_folder, "/", target_subfolder)
}
arts <- googleCloudStorageR::gcs_list_objects(
bucket = bucket, prefix = prefix
)
if (nrow(arts) == 0) {
myMessage("No build artifacts found in", target_bucket, level = 3)
return(NULL)
}
if(!is.null(download_folder)){
dir.create(download_folder, showWarnings = FALSE)
} else {
download_folder <- "."
}
df_bf <- normalizePath(download_folder)
tar_store <- targets::tar_config_get("store")
myMessage("Downloading to download_folder:", file.path(df_bf, tar_store),
level = 3)
tar_store <- targets::tar_config_get("store")
# create targets folder structure
dir.create(df_bf, showWarnings = FALSE)
dir.create(file.path(df_bf, tar_store), showWarnings = FALSE)
dir.create(file.path(df_bf, tar_store, "meta"), showWarnings = FALSE)
dir.create(file.path(df_bf, tar_store, "objects"), showWarnings = FALSE)
dir.create(file.path(df_bf, tar_store, "user"), showWarnings = FALSE)
# download to the local _target folder
lapply(arts$name, function(x) {
googleCloudStorageR::gcs_get_object(
x,
bucket = bucket,
saveToDisk = only_target_dir(x),
overwrite = overwrite
)
})
file.path(df_bf, tar_store)
}
only_target_dir <- function(path){
gsub("(.+)/(_targets/.+)", "\\2", path)
}