Skip to content

Commit

Permalink
Support more checksum algorithms (#88)
Browse files Browse the repository at this point in the history
Added support for SHA-1, SHA-256, and SHA-512 file checksums.
  • Loading branch information
mcantelon committed Nov 26, 2024
1 parent 8893cde commit 4ac122f
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 24 deletions.
63 changes: 39 additions & 24 deletions internal/activities/verify_manifest.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@ package activities

import (
"context"
"crypto/md5" // #nosec: 501 -- not used for security.
"crypto/md5" // #nosec: 501 -- not used for security.
"crypto/sha1" // #nosec: 501 -- not used for security.
"crypto/sha256" // #nosec: 501 -- not used for security.
"crypto/sha512" // #nosec: 501 -- not used for security.
"encoding/hex"
"fmt"
"hash"
"io"
"io/fs"
"os"
Expand Down Expand Up @@ -181,43 +185,54 @@ func verifyChecksums(
continue
}

// Generate checksum from filesystem file contents.
switch file.Checksum.Algorithm {
case "MD5":
hash, err := md5Hash(filepath.Join(root, path))
if err != nil {
return nil, fmt.Errorf("generate MD5 hash: %v", err)
}
if hash != file.Checksum.Hash {
failures = append(
failures,
fmt.Sprintf(
"Checksum mismatch for %q (expected: %q, got: %q)",
path,
file.Checksum.Hash,
hash,
),
)
}
default:
return nil, fmt.Errorf("hash algorithm %q is not supported", file.Checksum.Algorithm)
// Attempt to generate hash from filesystem file contents.
hashResult, err := generateHash(filepath.Join(root, path), file.Checksum.Algorithm)
if err != nil {
return nil, err
}

Check warning on line 192 in internal/activities/verify_manifest.go

View check run for this annotation

Codecov / codecov/patch

internal/activities/verify_manifest.go#L191-L192

Added lines #L191 - L192 were not covered by tests

// Compare hash to expected value.
if hashResult != file.Checksum.Hash {
failures = append(
failures,
fmt.Sprintf(
"Checksum mismatch for %q (expected: %q, got: %q)",
path,
file.Checksum.Hash,
hashResult,
),
)
}
}
slices.Sort(failures)

return failures, nil
}

// md5Hash returns a hexadecimal encoded hash string generated from the contents
// Return a hexadecimal encoded hash string generated from the contents
// of the file at path.
func md5Hash(path string) (string, error) {
func generateHash(path, alg string) (string, error) {
var h hash.Hash

switch alg {
case "MD5":
h = md5.New() // #nosec: G401 -- not used for security.
case "SHA-1":
h = sha1.New() // #nosec: G401 -- not used for security.
case "SHA-256":
h = sha256.New() // #nosec: G401 -- not used for security.
case "SHA-512":
h = sha512.New() // #nosec: G401 -- not used for security.
default:
return "", fmt.Errorf("hash algorithm %q is not supported", alg)

Check warning on line 227 in internal/activities/verify_manifest.go

View check run for this annotation

Codecov / codecov/patch

internal/activities/verify_manifest.go#L226-L227

Added lines #L226 - L227 were not covered by tests
}

f, err := os.Open(path) // #nosec: G304 -- trusted path.
if err != nil {
return "", fmt.Errorf("open file: %v", err)
}
defer f.Close()

h := md5.New() // #nosec: G401 -- not used for security.
if _, err := io.Copy(h, f); err != nil {
return "", fmt.Errorf("copy contents: %v", err)
}
Expand Down
30 changes: 30 additions & 0 deletions internal/activities/verify_manifest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,24 @@ const (
<pruefalgorithmus>MD5</pruefalgorithmus>
<pruefsumme>1e01ba3e07ac48cbdab2d3284d1dd0fa</pruefsumme>
</datei>
<datei id="_m1Xw3hINt3zY6WvKQOfYmk">
<name>00000003.jp2</name>
<originalName>00000003.jp2</originalName>
<pruefalgorithmus>SHA-256</pruefalgorithmus>
<pruefsumme>e2217d3e4e120c6a3372a1890f03e232b35ad659d71f7a62501a4ee204a3e66d</pruefsumme>
</datei>
<datei id="_m2Xw3hINt3zY6WvKQOfYmk">
<name>00000004.jp2</name>
<originalName>00000004.jp2</originalName>
<pruefalgorithmus>SHA-512</pruefalgorithmus>
<pruefsumme>21d906a2e95ee518e5423f8536941272bdc81f48d0e0c33cd358a6c3c5bdd26b9beb87e72b99574b4b1e72e0993565c7205aeed3c7ddc0af553408ed035d47f2</pruefsumme>
</datei>
<datei id="_m3Xw3hINt3zY6WvKQOfYmk">
<name>00000005.jp2</name>
<originalName>00000005.jp2</originalName>
<pruefalgorithmus>SHA-1</pruefalgorithmus>
<pruefsumme>230991abcd77e8173edb0af392e1f11120051e29</pruefsumme>
</datei>
</ordner>
</ordner>
</inhaltsverzeichnis>
Expand Down Expand Up @@ -178,6 +196,9 @@ func TestVerifyManifest(t *testing.T) {
// fs.WithFile("00000001.jp2", "12345"),
fs.WithFile("00000001_PREMIS.xml", "abcdef"),
fs.WithFile("00000002.jp2", "67890"),
fs.WithFile("00000003.jp2", "67890"),
fs.WithFile("00000004.jp2", "67890"),
fs.WithFile("00000005.jp2", "67890"),
fs.WithFile("00000002_PREMIS.xml", "ghijk"),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", "lmnop"),
),
Expand All @@ -203,6 +224,9 @@ func TestVerifyManifest(t *testing.T) {
fs.WithFile("00000001.jp2", "12345"),
fs.WithFile("00000001_PREMIS.xml", "abcdef"),
fs.WithFile("00000002.jp2", "67890"),
fs.WithFile("00000003.jp2", "67890"),
fs.WithFile("00000004.jp2", "67890"),
fs.WithFile("00000005.jp2", "67890"),
fs.WithFile("00000002_PREMIS.xml", "ghijk"),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", "lmnop"),
),
Expand Down Expand Up @@ -319,6 +343,9 @@ func TestVerifyManifest(t *testing.T) {
fs.WithFile("00000001.jp2", "wrong checksum"),
fs.WithFile("00000001_PREMIS.xml", "abcdef"),
fs.WithFile("00000002.jp2", "67890"),
fs.WithFile("00000003.jp2", "567890"),
fs.WithFile("00000004.jp2", "567890"),
fs.WithFile("00000005.jp2", "567890"),
fs.WithFile("00000002_PREMIS.xml", "ghijk"),
fs.WithFile("Prozess_Digitalisierung_PREMIS.xml", "lmnop"),
),
Expand All @@ -341,6 +368,9 @@ func TestVerifyManifest(t *testing.T) {
Failed: true,
ChecksumFailures: []string{
`Checksum mismatch for "content/content/d_0000001/00000001.jp2" (expected: "827ccb0eea8a706c4c34a16891f84e7b", got: "2714364e3a0ac68e8bf9b898b31ff303")`,
`Checksum mismatch for "content/content/d_0000001/00000003.jp2" (expected: "e2217d3e4e120c6a3372a1890f03e232b35ad659d71f7a62501a4ee204a3e66d", got: "b39667cf64cd5bc6cd7adbfc711cd8446036f9144c1cceb604897b0e824a027d")`,
`Checksum mismatch for "content/content/d_0000001/00000004.jp2" (expected: "21d906a2e95ee518e5423f8536941272bdc81f48d0e0c33cd358a6c3c5bdd26b9beb87e72b99574b4b1e72e0993565c7205aeed3c7ddc0af553408ed035d47f2", got: "6f81f85d16cdb32c7cde06266545f19fc31657c77db15e8b075414931843d99376c4cbd64f3682e78deebf849dd5b78ca9d0d23b6fdffd990c5170d92ece694a")`,
`Checksum mismatch for "content/content/d_0000001/00000005.jp2" (expected: "230991abcd77e8173edb0af392e1f11120051e29", got: "cce4229d3a446c687f23de7b5ee34c057cfc9d90")`,
`Checksum mismatch for "content/header/old/SIP/metadata.xml" (expected: "636351dce76b47b3d40712813b9a34f3", got: "dff24b6a34ff7ab645cb477e090bee5f")`,
},
},
Expand Down

0 comments on commit 4ac122f

Please sign in to comment.