Skip to content

Commit

Permalink
Implement download function that preseves the remote's last-modified.
Browse files Browse the repository at this point in the history
This ensures that the timestamps are accurate so that the cache can be
accurately updated. For consistency, we replace all other instances of
download.file; our implementation also has friendlier errors.
  • Loading branch information
LTLA committed Aug 22, 2024
1 parent bbd6ca0 commit 6f24668
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 31 deletions.
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,4 @@ export(stopSewerRat)
import(httr2)
import(methods)
importFrom(utils,URLencode)
importFrom(utils,download.file)
importFrom(utils,head)
33 changes: 7 additions & 26 deletions R/retrieveDirectory.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,33 +82,12 @@ retrieveDirectory <- function(path, url, cache=NULL, forceRemote=FALSE, overwrit
final
}

#' @importFrom utils URLencode
full_file_url <- function(url, path) {
paste0(url, "/retrieve/file?path=", URLencode(path, reserved=TRUE))
}

get_remote_last_modified <- function(url, path) {
req <- request(full_file_url(url, path))
req <- req_method(req, "HEAD")
req <- handle_error(req)
res <- req_perform(req)
remote_mod <- resp_header(res, "last-modified")

if (is.null(remote_mod)) {
warning("failed to find 'last-modified' header from the SewerRat API")
return(NULL)
}

remote_mod <- as.POSIXct(remote_mod, format="%a, %d %b %Y %H:%M:%S", tz="GMT")
if (is.na(remote_mod)) {
warning("invalid 'last-modified' header from the SewerRat API")
return(NULL)
}

return(remote_mod)
}

#' @import httr2
#' @importFrom utils download.file
acquire_file_raw <- function(cache, path, url, overwrite, updateDelay) {
target <- file.path(cache, "LOCAL", path)

Expand All @@ -117,7 +96,11 @@ acquire_file_raw <- function(cache, path, url, overwrite, updateDelay) {
} else if (!overwrite) {
last_mod <- file.info(target)$mtime
if (last_mod + updateDelay < Sys.time()) { # only check older files for updates, to avoid excessive queries.
remote_mod <- get_remote_last_modified(url, path)
req <- request(full_file_url(url, path))
req <- req_method(req, "HEAD")
req <- handle_error(req)
res <- req_perform(req)
remote_mod <- parse_remote_last_modified(res)
if (!is.null(remote_mod) && remote_mod > last_mod) {
overwrite <- TRUE
}
Expand All @@ -130,9 +113,7 @@ acquire_file_raw <- function(cache, path, url, overwrite, updateDelay) {
tempf <- tempfile(tmpdir=tempdir)
on.exit(unlink(tempf), add=TRUE, after=FALSE)

if (download.file(full_file_url(url, path), tempf)) {
stop("failed to download '", path, "'")
}
download_file(full_file_url(url, path), tempf)
dir.create(dirname(target), recursive=TRUE, showWarnings=FALSE)
file.rename(tempf, target) # this should be more or less atomic, so no need for locks.
}
Expand Down
5 changes: 1 addition & 4 deletions R/startSewerRat.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#' startSewerRat() # initialize a new instance.
#'
#' @export
#' @importFrom utils download.file
startSewerRat <- function(db=tempfile(fileext=".sqlite3"), port=NULL, wait = 1, version = "1.0.6", overwrite = FALSE) {
if (!is.null(running$active)) {
return(list(new=FALSE, port=running$port, url=assemble_url(running$port)))
Expand Down Expand Up @@ -59,9 +58,7 @@ startSewerRat <- function(db=tempfile(fileext=".sqlite3"), port=NULL, wait = 1,
if (!file.exists(exe) || overwrite) {
url <- paste0("https://github.com/ArtifactDB/SewerRat/releases/download/", version, "/", desired)
tmp <- tempfile()
if (download.file(url, tmp)) {
stop("failed to download the SewerRat binary")
}
download_file(url, tmp)
Sys.chmod(tmp, "0755")

# Using a write-and-rename paradigm to provide some atomicity. Note
Expand Down
32 changes: 32 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,35 @@ clean_path <- function(path) {
keep <- c("", keep) # add back the root.
paste(keep, collapse="/")
}

#' @import httr2
parse_remote_last_modified <- function(res) {
remote_mod <- resp_header(res, "last-modified")

if (is.null(remote_mod)) {
warning("failed to find 'last-modified' header from the SewerRat API")
return(NULL)
}

remote_mod <- as.POSIXct(remote_mod, format="%a, %d %b %Y %H:%M:%S", tz="GMT")
if (is.na(remote_mod)) {
warning("invalid 'last-modified' header from the SewerRat API")
return(NULL)
}

return(remote_mod)
}

#' @import httr2
download_file <- function(url, path) {
req <- request(url)
req <- handle_error(req)
res <- req_perform(req, path=path)

# The key part here is to set the modification time correctly,
# so that any updating mechanisms work correctly.
mod <- parse_remote_last_modified(res)
if (!is.null(mod)) {
Sys.setFileTime(path, mod)
}
}

0 comments on commit 6f24668

Please sign in to comment.