From d5eda13cb2e57998210b66e080dc96e95b38e5f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Erik=20Pedersen?= Date: Tue, 30 Jul 2024 12:52:54 +0200 Subject: [PATCH] Replace the MD5 hashing of images with xxHash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Note that we only use this for change detection. The previous implementation invoked `MD5FromReaderFast` that created a MD5 has from 8 64 bytes chunks in the file, which is obviously very fast. The new implementation creates the hash from the entire file and ... seems to be even more effective: ``` name old time/op new time/op delta HashImage-10 9.45µs ±21% 10.89µs ± 1% ~ (p=0.343 n=4+4) name old alloc/op new alloc/op delta HashImage-10 144B ± 0% 8B ± 0% -94.44% (p=0.029 n=4+4) name old allocs/op new allocs/op delta HashImage-10 4.00 ± 0% 1.00 ± 0% -75.00% (p=0.029 n=4+4) ``` --- common/hashing/hashing.go | 86 ++++++++++++++++++++++++ common/hashing/hashing_test.go | 79 ++++++++++++++++++++++ helpers/general.go | 13 +--- resources/image.go | 2 +- resources/resource.go | 13 ++-- tpl/hash/hash.go | 12 +--- tpl/tplimpl/template_ast_transformers.go | 4 +- 7 files changed, 180 insertions(+), 29 deletions(-) create mode 100644 common/hashing/hashing.go create mode 100644 common/hashing/hashing_test.go diff --git a/common/hashing/hashing.go b/common/hashing/hashing.go new file mode 100644 index 00000000000..abf8e6b1431 --- /dev/null +++ b/common/hashing/hashing.go @@ -0,0 +1,86 @@ +// Copyright 2024 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package hashing provides common hashing utilities. +package hashing + +import ( + "encoding/hex" + "io" + "sync" + + "github.com/cespare/xxhash/v2" +) + +// XXHashFromReader calculates the xxHash for the given reader. +func XXHashFromReader(r io.ReadSeeker) (uint64, int64, error) { + h := getXxHashReadFrom() + defer putXxHashReadFrom(h) + + size, err := io.Copy(h, r) + if err != nil { + return 0, 0, err + } + return h.Sum64(), size, nil +} + +// XXHashFromString calculates the xxHash for the given string. +func XXHashFromString(s string) (uint64, error) { + h := xxhash.New() + h.WriteString(s) + return h.Sum64(), nil +} + +// XxHashFromStringHexEncoded calculates the xxHash for the given string +// and returns the hash as a hex encoded string. +func XxHashFromStringHexEncoded(f string) string { + h := xxhash.New() + h.WriteString(f) + hash := h.Sum(nil) + return hex.EncodeToString(hash) +} + +type xxhashReadFrom struct { + buff []byte + *xxhash.Digest +} + +func (x *xxhashReadFrom) ReadFrom(r io.Reader) (int64, error) { + for { + n, err := r.Read(x.buff) + if n > 0 { + x.Digest.Write(x.buff[:n]) + } + if err != nil { + if err == io.EOF { + err = nil + } + return int64(n), err + } + } +} + +var xXhashReadFromPool = sync.Pool{ + New: func() any { + return &xxhashReadFrom{Digest: xxhash.New(), buff: make([]byte, 48*1024)} + }, +} + +func getXxHashReadFrom() *xxhashReadFrom { + return xXhashReadFromPool.Get().(*xxhashReadFrom) +} + +func putXxHashReadFrom(h *xxhashReadFrom) { + h.Reset() + xXhashReadFromPool.Put(h) +} diff --git a/common/hashing/hashing_test.go b/common/hashing/hashing_test.go new file mode 100644 index 00000000000..2e79b36b9e5 --- /dev/null +++ b/common/hashing/hashing_test.go @@ -0,0 +1,79 @@ +// Copyright 2024 The Hugo Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package hashing + +import ( + "strings" + "testing" + + "github.com/cespare/xxhash/v2" + qt "github.com/frankban/quicktest" +) + +func TestXxHashFromReader(t *testing.T) { + c := qt.New(t) + s := "Hello World" + r := strings.NewReader(s) + got, size, err := XXHashFromReader(r) + c.Assert(err, qt.IsNil) + c.Assert(size, qt.Equals, int64(len(s))) + c.Assert(got, qt.Equals, uint64(7148569436472236994)) +} + +func TestXxHashFromString(t *testing.T) { + c := qt.New(t) + s := "Hello World" + got, err := XXHashFromString(s) + c.Assert(err, qt.IsNil) + c.Assert(got, qt.Equals, uint64(7148569436472236994)) +} + +func TestXxHashFromStringHexEncoded(t *testing.T) { + c := qt.New(t) + s := "The quick brown fox jumps over the lazy dog" + got := XxHashFromStringHexEncoded(s) + // Facit: https://asecuritysite.com/encryption/xxhash?val=The%20quick%20brown%20fox%20jumps%20over%20the%20lazy%20dog + c.Assert(got, qt.Equals, "0b242d361fda71bc") +} + +func BenchmarkXXHashFromReader(b *testing.B) { + r := strings.NewReader("Hello World") + b.ResetTimer() + for i := 0; i < b.N; i++ { + XXHashFromReader(r) + r.Seek(0, 0) + } +} + +func BenchmarkXXHashFromString(b *testing.B) { + s := "Hello World" + b.ResetTimer() + for i := 0; i < b.N; i++ { + XXHashFromString(s) + } +} + +func BenchmarkXXHashFromStringHexEncoded(b *testing.B) { + s := "The quick brown fox jumps over the lazy dog" + b.ResetTimer() + for i := 0; i < b.N; i++ { + XxHashFromStringHexEncoded(s) + } +} + +func xxHashFromString(f string) uint64 { + h := xxhash.New() + h.WriteString(f) + return h.Sum64() +} diff --git a/helpers/general.go b/helpers/general.go index fa65265b836..a294455dd15 100644 --- a/helpers/general.go +++ b/helpers/general.go @@ -27,12 +27,11 @@ import ( "unicode" "unicode/utf8" - "github.com/cespare/xxhash/v2" + bp "github.com/gohugoio/hugo/bufferpool" + "github.com/spf13/afero" "github.com/jdkato/prose/transform" - - bp "github.com/gohugoio/hugo/bufferpool" ) // FilePathSeparator as defined by os.Separator. @@ -258,13 +257,7 @@ func SliceToLower(s []string) []string { return l } -// XxHashString takes a string and returns its xxHash hash. -func XxHashString(f string) string { - h := xxhash.New() - h.WriteString(f) - hash := h.Sum(nil) - return hex.EncodeToString(hash) -} +// XXHashFromReader creates a xxHash hash from the given reader. // MD5String takes a string and returns its MD5 hash. func MD5String(f string) string { diff --git a/resources/image.go b/resources/image.go index 188f4962460..56f0981f281 100644 --- a/resources/image.go +++ b/resources/image.go @@ -493,7 +493,7 @@ func (i *imageResource) relTargetPathFromConfig(conf images.ImageConfig) interna } h := i.hash() - idStr := fmt.Sprintf("_hu%s_%d", h, i.size()) + idStr := fmt.Sprintf("_hu%d_%d", h, i.size()) // Do not change for no good reason. const md5Threshold = 100 diff --git a/resources/resource.go b/resources/resource.go index 19270307edc..cc7008e5a88 100644 --- a/resources/resource.go +++ b/resources/resource.go @@ -26,6 +26,7 @@ import ( "github.com/gohugoio/hugo/identity" "github.com/gohugoio/hugo/resources/internal" + "github.com/gohugoio/hugo/common/hashing" "github.com/gohugoio/hugo/common/herrors" "github.com/gohugoio/hugo/common/paths" @@ -307,7 +308,7 @@ type fileInfo interface { } type hashProvider interface { - hash() string + hash() uint64 } var _ resource.StaleInfo = (*StaleValue[any])(nil) @@ -403,7 +404,7 @@ func (l *genericResource) size() int64 { return l.h.size } -func (l *genericResource) hash() string { +func (l *genericResource) hash() uint64 { if err := l.h.init(l); err != nil { panic(err) } @@ -628,7 +629,7 @@ type targetPather interface { } type resourceHash struct { - value string + value uint64 size int64 initOnce sync.Once } @@ -636,7 +637,7 @@ type resourceHash struct { func (r *resourceHash) init(l hugio.ReadSeekCloserProvider) error { var initErr error r.initOnce.Do(func() { - var hash string + var hash uint64 var size int64 f, err := l.ReadSeekCloser() if err != nil { @@ -656,6 +657,6 @@ func (r *resourceHash) init(l hugio.ReadSeekCloserProvider) error { return initErr } -func hashImage(r io.ReadSeeker) (string, int64, error) { - return helpers.MD5FromReaderFast(r) +func hashImage(r io.ReadSeeker) (uint64, int64, error) { + return hashing.XXHashFromReader(r) } diff --git a/tpl/hash/hash.go b/tpl/hash/hash.go index d4a80b342a8..00df4e3cdf0 100644 --- a/tpl/hash/hash.go +++ b/tpl/hash/hash.go @@ -16,10 +16,9 @@ package hash import ( "context" - "encoding/hex" "hash/fnv" - "github.com/cespare/xxhash/v2" + "github.com/gohugoio/hugo/common/hashing" "github.com/gohugoio/hugo/deps" "github.com/gohugoio/hugo/tpl/internal" "github.com/spf13/cast" @@ -51,14 +50,7 @@ func (ns *Namespace) XxHash(v any) (string, error) { return "", err } - hasher := xxhash.New() - - _, err = hasher.WriteString(conv) - if err != nil { - return "", err - } - hash := hasher.Sum(nil) - return hex.EncodeToString(hash), nil + return hashing.XxHashFromStringHexEncoded(conv), nil } const name = "hash" diff --git a/tpl/tplimpl/template_ast_transformers.go b/tpl/tplimpl/template_ast_transformers.go index 9eee64235d4..f95335779f8 100644 --- a/tpl/tplimpl/template_ast_transformers.go +++ b/tpl/tplimpl/template_ast_transformers.go @@ -18,12 +18,12 @@ import ( "fmt" "strings" - "github.com/gohugoio/hugo/helpers" htmltemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/htmltemplate" texttemplate "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate" "github.com/gohugoio/hugo/tpl/internal/go_templates/texttemplate/parse" + "github.com/gohugoio/hugo/common/hashing" "github.com/gohugoio/hugo/common/maps" "github.com/gohugoio/hugo/tpl" "github.com/mitchellh/mapstructure" @@ -254,7 +254,7 @@ func (c *templateContext) handleDefer(withNode *parse.WithNode) { c.err = errors.New("resources.PostProcess cannot be used in a deferred template") return } - innerHash := helpers.XxHashString(s) + innerHash := hashing.XxHashFromStringHexEncoded(s) deferredID := tpl.HugoDeferredTemplatePrefix + innerHash c.deferNodes[deferredID] = inner