Skip to content

Commit

Permalink
Don't recalculate checksum on package download (new default case) (#261)
Browse files Browse the repository at this point in the history
  • Loading branch information
gothub committed Nov 18, 2020
1 parent 4323543 commit e1617c5
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 23 deletions.
42 changes: 26 additions & 16 deletions R/D1Client.R
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,9 @@ setMethod("getD1Object", "D1Client", function(x, identifier) {
#' @param limit A \code{character} value specifying maximum package member size to download. Specified with "KB", "MB" or "TB"
#' for example: "100KB", "10MB", "20GB", "1TB". The default is "1MB". Only takes effect if 'lazyLoad=FALSE'.
#' @param quiet A \code{'logical'}. If TRUE (the default) then informational messages will not be printed.
#' @param checksumAlgorithm A \code{character} value specifying the algorithm to use to calculate the system metadata check
#' for the object's data bytes for example: "SHA-256"
#' @param checksumAlgorithm A \code{character} value specifying the algorithm to use to re-calculate (after download) the system metadata checksum
#' for the object's data bytes for example: "SHA-256". The default is "NA", which specifies that this
#' re-calculation will not be performed.
#' @param ... (not yet used)
#' @rdname getDataObject
#' @aliases getDataObject
Expand All @@ -274,7 +275,7 @@ setGeneric("getDataObject", function(x, identifier, ...) {
#' @rdname getDataObject
#' @export
setMethod("getDataObject", "D1Client", function(x, identifier, lazyLoad=FALSE, limit="1MB", quiet=TRUE,
checksumAlgorithm="SHA-256") {
checksumAlgorithm=as.character(NA)) {

# Resolve the object location
# This service is too chatty if any of the locations aren't available
Expand Down Expand Up @@ -373,20 +374,29 @@ setMethod("getDataObject", "D1Client", function(x, identifier, lazyLoad=FALSE, l

# If the checksum for current object does not match the requested checksum algorithm, then calculate
# it if the object's data is local, otherwise ask DataONE to calculate it.
if(tolower(sysmeta@checksumAlgorithm) != tolower(checksumAlgorithm)) {
# Bytes were not downloaded into the DataObject
if (deferredDownload) {
checksum = getChecksum(currentMN, pid=identifier, checksumAlgorithm=checksumAlgorithm)
sysmeta@checksum = checksum
sysmeta@checksumAlgorithm <- checksumAlgorithm
cat(sprintf("got new %s checksum from dataone for id %s: %s\n", checksumAlgorithm, identifier, checksum))
}
}
if(!is.na(checksumAlgorithm)) {
if(tolower(sysmeta@checksumAlgorithm) != tolower(checksumAlgorithm)) {
# Bytes were not downloaded into the DataObject
if (deferredDownload) {
checksum = getChecksum(currentMN, pid=identifier, checksumAlgorithm=checksumAlgorithm)
sysmeta@checksum = checksum
sysmeta@checksumAlgorithm <- checksumAlgorithm
}
if(!quiet) {
cat(sprintf("Fetched recalculated checksum from DataONE using %s for id %s: %s\n", checksumAlgorithm, identifier, checksum))
}
}
} else {
# The checksum algorithm was set to NA, meaning don't re-calculate the checksum after download. Since the
# DataObject constructor requires an algorithm to be specified, use the algorithm specified in the
# system metadata that was just downloaded for this object.
checksumAlgorithm <- sysmeta@checksumAlgorithm
}

# Construct and return a DataObject
# Notice that we are passing the existing sysmeta for this object via the 'id' parameter,
# which will cause the DataObject to use this sysmeta and not generate a new one. However, if the
# sysmeta checksum algorithm is different thatn the pecified algorithm, then the checksum will
# sysmeta checksum algorithm is different than the specified algorithm, then the checksum will
# be recalculated.
do <- new("DataObject", id=sysmeta, dataobj=bytes, dataURL=dataURL, checksumAlgorithm=checksumAlgorithm)

Expand Down Expand Up @@ -437,12 +447,12 @@ setGeneric("getDataPackage", function(x, identifier, ...) {
#' @param limit A \code{character} value specifying maximum package member size to download. Specified with "KB", "MB" or "TB"
#' for example: "100KB", "10MB", "20GB", "1TB". The default is "1MB". Only takes effect if 'lazyLoad=FALSE'.
#' @param quiet A \code{'logical'}. If TRUE (the default) then informational messages will not be printed.
#' @param checksumAlgorithm A \code{character} value specifying the algorithm to use to calculate the system metadata check
#' for the object's data bytes for example: "SHA-256"
#' @param checksumAlgorithm A \code{character} value specifying the algorithm to use to re-calculate (after download) the system metadata checksum
#' for the object's data bytes for example: "SHA-256". The default is "NA", which specifies that this re-calculation will not be performed.
#' @param ... (not yet used)
#' @export
setMethod("getDataPackage", "D1Client", function(x, identifier, lazyLoad=FALSE, limit="1MB", quiet=TRUE,
checksumAlgorithm="SHA-256") {
checksumAlgorithm=as.character(NA)) {

# The identifier provided could be the package id (resource map), the metadata id or a package member (data, etc)
# The solr queries attempt to determine which id was specified and may issue additional queries to get all the
Expand Down
7 changes: 4 additions & 3 deletions man/getDataObject.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/getDataPackage.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test.D1Client.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ test_that("D1Client getDataObject", {

# Try retrieving a known object from the PROD environment
pid <- "solson.5.1"
obj <- getDataObject(d1cKNB, pid)
obj <- getDataObject(d1cKNB, pid, checksumAlgorithm="SHA-256")
cname <- class(obj)[1]
expect_match(cname, "DataObject")
expect_match(class(obj@sysmeta), "SystemMetadata")
Expand Down

0 comments on commit e1617c5

Please sign in to comment.