Skip to content

Commit

Permalink
WX-867 Translate crc32c hashes to b64 for getm (#6970)
Browse files Browse the repository at this point in the history
* Translate crc32c hashes to b64 for getm

* Update tests

* Remove obsolete b64 handling for md5, centralize hex validation

* Restore old test, fix other test
  • Loading branch information
jgainerdewar authored Dec 16, 2022
1 parent aa9e876 commit 2242b74
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 19 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ import cats.syntax.validated._
import cloud.nio.impl.drs.AccessUrl
import common.validation.ErrorOr.ErrorOr
import drs.localizer.downloaders.AccessUrlDownloader.Hashes
import mouse.all.anySyntaxMouse
import org.apache.commons.codec.binary.Base64.{decodeBase64, isBase64}
import org.apache.commons.codec.binary.Hex.encodeHexString
import org.apache.commons.codec.binary.Base64.encodeBase64String
import org.apache.commons.codec.binary.Hex.decodeHex
import org.apache.commons.text.StringEscapeUtils


Expand All @@ -26,23 +25,18 @@ sealed trait GetmChecksum {
}

case class Md5(override val rawValue: String) extends GetmChecksum {
override def value: ErrorOr[String] = {
val trimmed = rawValue.trim
if (trimmed.matches("[A-Fa-f0-9]+"))
trimmed.validNel
// TDR currently returns a base64-encoded MD5 because that's what Azure seems to do. However,
// the DRS spec does not specify that any checksums should be base64-encoded, and `getm` also
// does not expect base64. This case handles the current behavior in the short term until
// https://broadworkbench.atlassian.net/browse/DR-2259 is done.
else if (isBase64(trimmed))
(trimmed |> decodeBase64 |> encodeHexString).validNel
else
s"Invalid md5 checksum value is neither hex nor base64: $rawValue".invalidNel
}
override def value: ErrorOr[String] = GetmChecksum.validateHex(rawValue)
override def getmAlgorithm: String = "md5"
}

case class Crc32c(override val rawValue: String) extends GetmChecksum {
// The DRS spec says that all hash values should be hex strings,
// but getm expects crc32c values to be base64.
override def value: ErrorOr[String] =
GetmChecksum.validateHex(rawValue)
.map(decodeHex)
.map(encodeBase64String)

override def getmAlgorithm: String = "gs_crc32c"
}
case class AwsEtag(override val rawValue: String) extends GetmChecksum {
Expand Down Expand Up @@ -88,4 +82,12 @@ object GetmChecksum {
case _ => Null // None or an empty hashes map.
}
}

def validateHex(s: String): ErrorOr[String] = {
val trimmed = s.trim
if (trimmed.matches("[A-Fa-f0-9]+"))
trimmed.validNel
else
s"Invalid checksum value, expected hex but got: $trimmed".invalidNel
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ class GetmChecksumSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matcher
val results = Table(
("description", "algorithm", "expected"),
("md5 hex", Md5("abcdef"), "--checksum-algorithm 'md5' --checksum abcdef".validNel),
("md5 base64", Md5("cR84lXY1y17c3q7/7riLEA=="), "--checksum-algorithm 'md5' --checksum 711f38957635cb5edcdeaeffeeb88b10".validNel),
("md5 gibberish", Md5("what is this???"), "Invalid md5 checksum value is neither hex nor base64: what is this???".invalidNel),
("crc32c", Crc32c("012345"), "--checksum-algorithm 'gs_crc32c' --checksum 012345".validNel),
("md5 base64", Md5("cR84lXY1y17c3q7/7riLEA=="), "Invalid checksum value, expected hex but got: cR84lXY1y17c3q7/7riLEA==".invalidNel),
("md5 gibberish", Md5("what is this???"), "Invalid checksum value, expected hex but got: what is this???".invalidNel),
("crc32c", Crc32c("012345"), "--checksum-algorithm 'gs_crc32c' --checksum ASNF".validNel),
("crc32c gibberish", Crc32c("????"), "Invalid checksum value, expected hex but got: ????".invalidNel),
("AWS ETag", AwsEtag("012345"), "--checksum-algorithm 's3_etag' --checksum 012345".validNel),
// Escape checksum values constructed from unvalidated data returned by DRS servers.
("Unsupported", Unsupported("Robert'); DROP TABLE Students;\n --\\"), raw"--checksum-algorithm 'null' --checksum Robert\'\)\;\ DROP\ TABLE\ Students\;\ --\\".validNel),
Expand All @@ -46,4 +47,19 @@ class GetmChecksumSpec extends AnyFlatSpec with CromwellTimeoutSpec with Matcher
}
}
}

it should "correctly validate hex strings" in {
val results = Table(
("test string", "expected"),
("", "Invalid checksum value, expected hex but got: ".invalidNel),
(" ", "Invalid checksum value, expected hex but got: ".invalidNel),
("myfavoritestring", "Invalid checksum value, expected hex but got: myfavoritestring".invalidNel),
(" AbC123 ", "AbC123".validNel),
("456", "456".validNel),
)

forAll(results) { (testString, expected) =>
GetmChecksum.validateHex(testString) shouldBe expected
}
}
}

0 comments on commit 2242b74

Please sign in to comment.