Skip to content

Commit

Permalink
Replace md5 with blake3 for fingerprinting (#166)
Browse files Browse the repository at this point in the history
* replace md5 with blake3 for fingerprinting

* Update internal/fingerprint/fingerprint.go

Co-authored-by: Oscar Reimer <[email protected]>

* move blake3 to direct dep

---------

Co-authored-by: Oscar Reimer <[email protected]>
  • Loading branch information
emilwareus and sweoggy authored Dec 14, 2023
1 parent e4442ab commit b673b57
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 17 deletions.
2 changes: 2 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ require (
github.com/spf13/viper v1.15.0
github.com/stretchr/testify v1.8.2
github.com/vifraa/gopom v0.2.1
lukechampine.com/blake3 v1.2.1
)

require (
Expand All @@ -32,6 +33,7 @@ require (
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
github.com/kevinburke/ssh_config v1.2.0 // indirect
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
github.com/magiconair/properties v1.8.7 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.18 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,8 @@ github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1
github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4=
github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
Expand Down Expand Up @@ -641,6 +643,8 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
lukechampine.com/blake3 v1.2.1 h1:YuqqRuaqsGV71BV/nm9xlI0MKUv4QC54jQnBChWbGnI=
lukechampine.com/blake3 v1.2.1/go.mod h1:0OFRp7fBtAylGVCO40o87sbupkyIGgbpv1+M1k1LM6k=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4=
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
Expand Down
30 changes: 20 additions & 10 deletions internal/fingerprint/fingerprint.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package fingerprint
import (
"archive/zip"
"bufio"
"crypto/md5" // #nosec
"errors"
"fmt"
"io"
Expand All @@ -14,6 +13,7 @@ import (

"github.com/debricked/cli/internal/file"
"github.com/debricked/cli/internal/tui"
"lukechampine.com/blake3"
)

var EXCLUDED_EXT = []string{
Expand Down Expand Up @@ -43,6 +43,15 @@ var EXCLUDED_FILES = []string{
"thumbs.db", "babel.config.js", "license.txt", "license.md", "copying.lib", "makefile",
}

const HASH_SIZE = 16

func newHasher() *blake3.Hasher {
return blake3.New(
HASH_SIZE,
nil,
)
}

const (
OutputFileNameFingerprints = "debricked.fingerprints.wfp"
)
Expand Down Expand Up @@ -132,7 +141,7 @@ func (f *Fingerprinter) FingerprintFiles(rootPath string, exclusions []string) (
return err
}

fingerprintsZip, err := computeMD5ForFileAndZip(fileInfo, path, exclusions)
fingerprintsZip, err := computeHashForFileAndZip(fileInfo, path, exclusions)
if err != nil {
return err
}
Expand Down Expand Up @@ -162,7 +171,7 @@ func (f *Fingerprinter) FingerprintFiles(rootPath string, exclusions []string) (
return fingerprints, err
}

func computeMD5ForFileAndZip(fileInfo os.FileInfo, path string, exclusions []string) ([]FileFingerprint, error) {
func computeHashForFileAndZip(fileInfo os.FileInfo, path string, exclusions []string) ([]FileFingerprint, error) {
if !shouldProcessFile(fileInfo, exclusions, path) {
return nil, nil
}
Expand All @@ -182,8 +191,7 @@ func computeMD5ForFileAndZip(fileInfo os.FileInfo, path string, exclusions []str
fingerprints = append(fingerprints, fingerprintsZip...)
}

// Compute the MD5 for the file
fingerprint, err := computeMD5ForFile(path)
fingerprint, err := computeHashForFile(path)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -229,15 +237,15 @@ func shouldProcessFile(fileInfo os.FileInfo, exclusions []string, path string) b
return !isSymlink
}

func computeMD5ForFile(filename string) (FileFingerprint, error) {
func computeHashForFile(filename string) (FileFingerprint, error) {
data, err := os.ReadFile(filename)
if err != nil {
return FileFingerprint{}, err
}

hash := md5.New() // #nosec
hasher := newHasher()

if _, err := hash.Write(data); err != nil {
if _, err := hasher.Write(data); err != nil {
return FileFingerprint{}, err
}

Expand All @@ -250,7 +258,7 @@ func computeMD5ForFile(filename string) (FileFingerprint, error) {
return FileFingerprint{
path: filename,
contentLength: contentLength,
fingerprint: hash.Sum(nil),
fingerprint: hasher.Sum(nil),
}, nil
}

Expand Down Expand Up @@ -319,7 +327,9 @@ func inMemFingerprintingCompressedContent(filename string, exclusions []string)
if err != nil {
return nil, err
}
hasher := md5.New() // #nosec

hasher := newHasher()

_, err = io.Copy(hasher, rc) // #nosec
if err != nil {
rc.Close()
Expand Down
14 changes: 7 additions & 7 deletions internal/fingerprint/fingerprint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ func TestFingerprintFiles(t *testing.T) {
assert.NotNil(t, fingerprints)
assert.NotEmpty(t, fingerprints)
assert.Equal(t, 2, fingerprints.Len())
assert.Equal(t, "file=72214db4e1e543018d1bafe86ea3b444,21,testdata/fingerprinter/testfile.py", fingerprints.Entries[0].ToString())
assert.Equal(t, "file=634c5485de8e22b27094affadd8a6e3b,21,testdata/fingerprinter/testfile.py", fingerprints.Entries[0].ToString())

// Test no file
fingerprints, err = fingerprinter.FingerprintFiles("", []string{})
Expand Down Expand Up @@ -198,14 +198,14 @@ func TestFileFingerprintToString(t *testing.T) {

func TestComputeMD5(t *testing.T) {
// Test file not found
_, err := computeMD5ForFile("testdata/fingerprinter/testfile-not-found.py")
_, err := computeHashForFile("testdata/fingerprinter/testfile-not-found.py")
assert.Error(t, err)

// Test file found
entry, err := computeMD5ForFile("testdata/fingerprinter/testfile.py")
entry, err := computeHashForFile("testdata/fingerprinter/testfile.py")
assert.NoError(t, err)
entryS := fmt.Sprintf("%x", entry.fingerprint)
assert.Equal(t, "72214db4e1e543018d1bafe86ea3b444", entryS)
assert.Equal(t, "634c5485de8e22b27094affadd8a6e3b", entryS)
}

func TestFingerprintsToFile(t *testing.T) {
Expand Down Expand Up @@ -350,7 +350,7 @@ func TestInMemFingerprintingCompressedContent(t *testing.T) {
}
}

func TestComputeMD5ForFile(t *testing.T) {
func TestComputeHashForFile(t *testing.T) {
tests := []struct {
name string
file string
Expand All @@ -366,9 +366,9 @@ func TestComputeMD5ForFile(t *testing.T) {

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := computeMD5ForFile(tt.file)
_, err := computeHashForFile(tt.file)
if (err != nil) != tt.wantErr {
t.Errorf("computeMD5ForFile() error = %v, wantErr %v", err, tt.wantErr)
t.Errorf("computeHashForFile() error = %v, wantErr %v", err, tt.wantErr)
}
})
}
Expand Down

0 comments on commit b673b57

Please sign in to comment.