Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat] - Add Generic Hasher Interface with Blake2b Implementation #3337

Merged
merged 8 commits into from
Sep 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions pkg/hasher/blake2b.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package hasher

import "golang.org/x/crypto/blake2b"

// Blake2b implements the Hasher interface using Blake2b algorithm.
type Blake2b struct{ baseHasher }

// NewBlake2B creates a new Blake2b hasher.
func NewBlake2B() *Blake2b {
h, _ := blake2b.New256(nil)
return &Blake2b{baseHasher: baseHasher{hash: h}}
}
54 changes: 54 additions & 0 deletions pkg/hasher/hasher.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Package hasher provides a generic interface and base implementation for hashing data.
package hasher

import (
"fmt"
"hash"
)

// Hasher defines a generic interface for hashing data.
// Implementations of this interface may choose to be safe for concurrent use,
// but it is not a requirement. Users should check the documentation of specific
// implementations for concurrent safety guarantees.
type Hasher interface {
// Hash takes input data and returns the hashed result.
// It returns an error if the input data is too large.
// The function is idempotent - calling it multiple times with the same input
// will produce the same output, assuming the underlying hash function is deterministic.
Hash(data []byte) ([]byte, error)
}

// baseHasher provides a base implementation for the Hasher interface.
// It uses the hash.Hash interface from the standard library to perform the actual hashing.
// This implementation is not safe for concurrent use. Each goroutine/worker should
// use its own instance of baseHasher for concurrent operations.
// Implementations that require concurrent access should wrap baseHasher with a mutex. (e.g., MutexHasher)
type baseHasher struct{ hash hash.Hash }

// InputTooLargeError is returned when the input data exceeds the maximum allowed size.
type InputTooLargeError struct {
inputSize int
maxSize int
}

func (e *InputTooLargeError) Error() string {
return fmt.Sprintf("input data exceeds the maximum allowed size: %d > %d", e.inputSize, e.maxSize)
}

const maxInputSize = 1 << 14 // 16KB

// Hash computes the hash of the given data.
// It returns an InputTooLargeError if the input data exceeds the maximum allowed size.
// This method resets the underlying hash before each computation to ensure
// that previous hashing operations do not affect the result.
func (b *baseHasher) Hash(data []byte) ([]byte, error) {
if len(data) > maxInputSize {
return nil, &InputTooLargeError{inputSize: len(data), maxSize: maxInputSize}
}
b.hash.Reset()
// nolint:errcheck
// The hash.Hash interface does not return errors on Write.
// (https://cs.opensource.google/go/go/+/refs/tags/go1.23.1:src/hash/hash.go;l=27-28)
_, _ = b.hash.Write(data)
mcastorina marked this conversation as resolved.
Show resolved Hide resolved
return b.hash.Sum(nil), nil
}
112 changes: 112 additions & 0 deletions pkg/hasher/hasher_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
package hasher

import (
"bytes"
"encoding/hex"
"errors"
"testing"

"github.com/stretchr/testify/assert"
)

func TestHasherHash(t *testing.T) {
testCases := []struct {
name string
hasher Hasher
input []byte
expectedHex string
expectError error
}{
{
name: "Blake2b with 'Hello, World!'",
hasher: NewBlake2B(),
input: []byte("Hello, World!"),
expectedHex: "511bc81dde11180838c562c82bb35f3223f46061ebde4a955c27b3f489cf1e03",
},
{
name: "Blake2b input at max size",
hasher: NewBlake2B(),
input: bytes.Repeat([]byte("a"), maxInputSize),
expectedHex: "605fd8458957df95394e9bf812f385264267c679e4899dc198ca67db4029d0ea",
},
{
name: "Blake2b empty input",
hasher: NewBlake2B(),
input: []byte(""),
expectedHex: "0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8",
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()

got, err := tc.hasher.Hash(tc.input)
checkError(t, err, tc.expectError, len(tc.input))

if tc.expectError != nil {
return
}

expected, err := hex.DecodeString(tc.expectedHex)
if err != nil {
t.Fatalf("invalid expected hex string '%s': %v", tc.expectedHex, err)
}

if !bytes.Equal(got, expected) {
t.Errorf("hash mismatch.\nGot: %x\nExpected: %x", got, expected)
}
})
}
}

func checkError(t *testing.T, err, expectError error, inputSize int) {
t.Helper()

if expectError != nil {
var inputTooLargeError *InputTooLargeError
if errors.As(expectError, &inputTooLargeError) {
var inputTooLargeErr *InputTooLargeError
if assert.ErrorAs(t, err, &inputTooLargeErr) {
assert.Equal(t, inputSize, inputTooLargeErr.inputSize)
assert.Equal(t, maxInputSize, inputTooLargeErr.maxSize)
}
}
} else {
assert.NoError(t, err)
}
}

func TestBlake2bHashIdempotency(t *testing.T) {
t.Parallel()

hasher := NewBlake2B()
input := bytes.Repeat([]byte("a"), maxInputSize)

hash1, err1 := hasher.Hash(input)
assert.NoError(t, err1, "unexpected error on first hash")

hash2, err2 := hasher.Hash(input)
assert.NoError(t, err2, "unexpected error on second hash")

if !bytes.Equal(hash1, hash2) {
t.Errorf("hash results are not identical.\nFirst: %x\nSecond: %x", hash1, hash2)
}
}

var sampleData = []byte("The quick brown fox jumps over the lazy dog")

// BenchmarkHasherPerGoroutine_Blake2b benchmarks hashing using separate Blake2b Hasher instances
// for each goroutine, eliminating the need for synchronization.
func BenchmarkHasherPerGoroutine_Blake2b(b *testing.B) {
b.ReportAllocs()
b.ResetTimer()

b.RunParallel(func(pb *testing.PB) {
hasher := NewBlake2B()
for pb.Next() {
_, err := hasher.Hash(sampleData)
assert.NoError(b, err)
}
})
}
Loading