Skip to content

Commit

Permalink
Support relative symbolic links when saving a ReloadedArray.
Browse files Browse the repository at this point in the history
This allows us to create an on-disk representation that is more robust
to movement of the original directory from which the ReloadedArray is
sourced, provided that both the new and original paths are moved together.
  • Loading branch information
LTLA committed Sep 3, 2024
1 parent bc979a4 commit 1c1c603
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 26 deletions.
29 changes: 7 additions & 22 deletions R/ReloadedArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@
#' If this fails, we silently fall back to a copy.
#' This mode is the default approach.
#' \item \code{"symlink"}: create a symbolic link from the files in the original directory to the new \code{path}.
#' Each symbolic link refers to an absolute path in the original directory, which is useful when the contents of \code{path} might be moved (but the original directory will not).
#' \item \code{"relsymlink"}: create a symbolic link from the files in the original directory to the new \code{path}.
#' Each symbolic link refers to an relative path to its corresponding file in the original directory,
#' which is useful when both \code{path} and the original directory are moved together, e.g., as they are part of the same parent object like a SummarizedExperiment.
#' \item \code{"none"}: ignore existing files and just save the contents by calling \code{"\link{saveObject,DelayedArray-method}"}.
#' }
#'
Expand Down Expand Up @@ -90,32 +94,13 @@ setMethod("path", "ReloadedArraySeed", function(object, ...) object@path)

#' @export
setMethod("saveObject", "ReloadedArray", function(x, path, ReloadedArray.reuse.files="link", ...) {
ReloadedArray.reuse.files <- match.arg(ReloadedArray.reuse.files, c("none", "copy", "link", "symlink"))
ReloadedArray.reuse.files <- match.arg(ReloadedArray.reuse.files, c("none", "copy", "link", "symlink", "relsymlink"))
s <- x@seed
if (ReloadedArray.reuse.files == "none") {
x <- DelayedArray(s@seed)
return(callNextMethod())
return(saveObject(x, path, ReloadedArray.reuse.files=ReloadedArray.reuse.files, ...))
}

manifest <- list.files(s@path, recursive=TRUE)
dir.create(path)

if (ReloadedArray.reuse.files == "symlink") {
fun <- file.symlink
msg <- "link"
} else if (ReloadedArray.reuse.files == "link") {
fun <- function(from, to) file.link(from, to) || file.copy(from, to)
msg <- "copy or link"
} else {
fun <- file.copy
msg <- "copy"
}

for (y in manifest) {
if (!fun(file.path(s@path, y), file.path(path, y))) {
stop("failed to ", msg, " '", y, "' from '", s@path, "' to '", path, "'")
}
}

clone_duplicate(s@path, path, action=ReloadedArray.reuse.files)
invisible(NULL)
})
67 changes: 67 additions & 0 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,70 @@ load_names <- function(handle, ndim, group = "names") {

names
}

path_components <- function(path) {
output <- character()
while (TRUE) {
base <- basename(path)
dpath <- dirname(path)
output <- c(base, output)
if (dpath == path) {
break
}
path <- dpath
}
output
}

clone_duplicate <- function(src, dest, action) {
dir.create(dest)
manifest <- list.files(src, recursive=TRUE)

if (action == "relsymlink") {
# Find relative path from one to the other.
src <- normalizePath(src, mustWork=TRUE)
src.comp <- path_components(src)
src.len <- length(src.comp)

dest <- normalizePath(dest, mustWork=TRUE)
dest.comp <- path_components(dest)
dest.len <- length(dest.comp)

counter <- 0L
for (i in seq_len(min(src.len, dest.len))) {
if (src.comp[i] != dest.comp[i]) {
counter <- i - 1L
break
}
}

base2base <- do.call(file.path, as.list(c(rep("..", dest.len - counter), src.comp[(counter+1):src.len])))
pwd <- getwd()
on.exit(setwd(pwd), add=TRUE, after=FALSE)
setwd(dest)

for (y in manifest) {
if (!file.symlink(file.path(base2base, y), y)) {
stop("failed to link '", y, "' from '", src, "' to '", dest, "'")
}
}
return(NULL)
}

if (action == "link") {
fun <- function(from, to) file.link(from, to) || file.copy(from, to)
msg <- "copy or link"
} else if (action == "copy") {
fun <- file.copy
msg <- "copy"
} else if (action == "symlink") {
fun <- file.symlink
msg <- "link"
}

for (y in manifest) {
if (!fun(file.path(src, y), file.path(dest, y))) {
stop("failed to ", msg, " '", y, "' from '", src, "' to '", dest, "'")
}
}
}
31 changes: 27 additions & 4 deletions tests/testthat/test-ReloadedArray.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,40 @@ test_that("ReloadedArrays store the absolute path", {
})

test_that("ReloadedArrays save correctly", {
tmp <- tempfile()
saveObject(obj, tmp, ReloadedArray.reuse.files="none")
expect_identical(as.array(readObject(tmp)), arr)
original <- tempfile()
saveObject(obj, original, ReloadedArray.reuse.files="none")
expect_identical(as.array(readObject(original)), arr)

if (.Platform$OS.type=="unix") {
# This test just doesn't seem to work on Windows. Either the symlink
# doesn't form properly, or the symlink path isn't what we expect.
tmp <- tempfile()
saveObject(obj, tmp, ReloadedArray.reuse.files="symlink")
expect_identical(as.array(readObject(tmp)), arr)
expect_identical(normalizePath(Sys.readlink(file.path(tmp, "array.h5"))), normalizePath(file.path(dir, "array.h5")))
link.dest <- Sys.readlink(file.path(tmp, "array.h5"))
expect_true(startsWith(link.dest, "/"))
expect_identical(normalizePath(link.dest), normalizePath(file.path(dir, "array.h5")))
expect_identical(as.array(readObject(tmp)), arr)

# Relative symlinks also work as expected.
tmp <- tempfile()
saveObject(obj, tmp, ReloadedArray.reuse.files="relsymlink")
expect_identical(as.array(readObject(tmp)), arr)
link.dest <- Sys.readlink(file.path(tmp, "array.h5"))
expect_true(startsWith(link.dest, ".."))
expect_identical(normalizePath(file.path(tmp, link.dest)), normalizePath(file.path(dir, "array.h5")))
expect_identical(as.array(readObject(tmp)), arr)

# Trying in a more deeply nested target directory.
tmp0 <- tempfile()
dir.create(tmp0)
tmp <- tempfile(tmpdir=tmp0)
saveObject(obj, tmp, ReloadedArray.reuse.files="relsymlink")
expect_identical(as.array(readObject(tmp)), arr)
link.dest <- Sys.readlink(file.path(tmp, "array.h5"))
expect_true(startsWith(link.dest, "../.."))
expect_identical(normalizePath(file.path(tmp, link.dest)), normalizePath(file.path(dir, "array.h5")))
expect_identical(as.array(readObject(tmp)), arr)
}

# file.info() doesn't report the inode number so we don't have an easy way
Expand Down

0 comments on commit 1c1c603

Please sign in to comment.