diff --git a/pkg/hasher/blake2b.go b/pkg/hasher/blake2b.go new file mode 100644 index 000000000000..70e5cdafaae5 --- /dev/null +++ b/pkg/hasher/blake2b.go @@ -0,0 +1,12 @@ +package hasher + +import "golang.org/x/crypto/blake2b" + +// Blake2b implements the Hasher interface using Blake2b algorithm. +type Blake2b struct{ baseHasher } + +// NewBlake2B creates a new Blake2b hasher. +func NewBlake2B() *Blake2b { + h, _ := blake2b.New256(nil) + return &Blake2b{baseHasher: baseHasher{hash: h}} +} diff --git a/pkg/hasher/hasher.go b/pkg/hasher/hasher.go new file mode 100644 index 000000000000..ada04383bdd0 --- /dev/null +++ b/pkg/hasher/hasher.go @@ -0,0 +1,54 @@ +// Package hasher provides a generic interface and base implementation for hashing data. +package hasher + +import ( + "fmt" + "hash" +) + +// Hasher defines a generic interface for hashing data. +// Implementations of this interface may choose to be safe for concurrent use, +// but it is not a requirement. Users should check the documentation of specific +// implementations for concurrent safety guarantees. +type Hasher interface { + // Hash takes input data and returns the hashed result. + // It returns an error if the input data is too large. + // The function is idempotent - calling it multiple times with the same input + // will produce the same output, assuming the underlying hash function is deterministic. + Hash(data []byte) ([]byte, error) +} + +// baseHasher provides a base implementation for the Hasher interface. +// It uses the hash.Hash interface from the standard library to perform the actual hashing. +// This implementation is not safe for concurrent use. Each goroutine/worker should +// use its own instance of baseHasher for concurrent operations. +// Implementations that require concurrent access should wrap baseHasher with a mutex. (e.g., MutexHasher) +type baseHasher struct{ hash hash.Hash } + +// InputTooLargeError is returned when the input data exceeds the maximum allowed size. +type InputTooLargeError struct { + inputSize int + maxSize int +} + +func (e *InputTooLargeError) Error() string { + return fmt.Sprintf("input data exceeds the maximum allowed size: %d > %d", e.inputSize, e.maxSize) +} + +const maxInputSize = 1 << 14 // 16KB + +// Hash computes the hash of the given data. +// It returns an InputTooLargeError if the input data exceeds the maximum allowed size. +// This method resets the underlying hash before each computation to ensure +// that previous hashing operations do not affect the result. +func (b *baseHasher) Hash(data []byte) ([]byte, error) { + if len(data) > maxInputSize { + return nil, &InputTooLargeError{inputSize: len(data), maxSize: maxInputSize} + } + b.hash.Reset() + // nolint:errcheck + // The hash.Hash interface does not return errors on Write. + // (https://cs.opensource.google/go/go/+/refs/tags/go1.23.1:src/hash/hash.go;l=27-28) + _, _ = b.hash.Write(data) + return b.hash.Sum(nil), nil +} diff --git a/pkg/hasher/hasher_test.go b/pkg/hasher/hasher_test.go new file mode 100644 index 000000000000..0b6f8dccca80 --- /dev/null +++ b/pkg/hasher/hasher_test.go @@ -0,0 +1,112 @@ +package hasher + +import ( + "bytes" + "encoding/hex" + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestHasherHash(t *testing.T) { + testCases := []struct { + name string + hasher Hasher + input []byte + expectedHex string + expectError error + }{ + { + name: "Blake2b with 'Hello, World!'", + hasher: NewBlake2B(), + input: []byte("Hello, World!"), + expectedHex: "511bc81dde11180838c562c82bb35f3223f46061ebde4a955c27b3f489cf1e03", + }, + { + name: "Blake2b input at max size", + hasher: NewBlake2B(), + input: bytes.Repeat([]byte("a"), maxInputSize), + expectedHex: "605fd8458957df95394e9bf812f385264267c679e4899dc198ca67db4029d0ea", + }, + { + name: "Blake2b empty input", + hasher: NewBlake2B(), + input: []byte(""), + expectedHex: "0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8", + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + got, err := tc.hasher.Hash(tc.input) + checkError(t, err, tc.expectError, len(tc.input)) + + if tc.expectError != nil { + return + } + + expected, err := hex.DecodeString(tc.expectedHex) + if err != nil { + t.Fatalf("invalid expected hex string '%s': %v", tc.expectedHex, err) + } + + if !bytes.Equal(got, expected) { + t.Errorf("hash mismatch.\nGot: %x\nExpected: %x", got, expected) + } + }) + } +} + +func checkError(t *testing.T, err, expectError error, inputSize int) { + t.Helper() + + if expectError != nil { + var inputTooLargeError *InputTooLargeError + if errors.As(expectError, &inputTooLargeError) { + var inputTooLargeErr *InputTooLargeError + if assert.ErrorAs(t, err, &inputTooLargeErr) { + assert.Equal(t, inputSize, inputTooLargeErr.inputSize) + assert.Equal(t, maxInputSize, inputTooLargeErr.maxSize) + } + } + } else { + assert.NoError(t, err) + } +} + +func TestBlake2bHashIdempotency(t *testing.T) { + t.Parallel() + + hasher := NewBlake2B() + input := bytes.Repeat([]byte("a"), maxInputSize) + + hash1, err1 := hasher.Hash(input) + assert.NoError(t, err1, "unexpected error on first hash") + + hash2, err2 := hasher.Hash(input) + assert.NoError(t, err2, "unexpected error on second hash") + + if !bytes.Equal(hash1, hash2) { + t.Errorf("hash results are not identical.\nFirst: %x\nSecond: %x", hash1, hash2) + } +} + +var sampleData = []byte("The quick brown fox jumps over the lazy dog") + +// BenchmarkHasherPerGoroutine_Blake2b benchmarks hashing using separate Blake2b Hasher instances +// for each goroutine, eliminating the need for synchronization. +func BenchmarkHasherPerGoroutine_Blake2b(b *testing.B) { + b.ReportAllocs() + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + hasher := NewBlake2B() + for pb.Next() { + _, err := hasher.Hash(sampleData) + assert.NoError(b, err) + } + }) +}