diff --git a/go.mod b/go.mod index 193264e14e..a708763d66 100644 --- a/go.mod +++ b/go.mod @@ -59,7 +59,7 @@ require ( github.com/ulikunitz/xz v0.5.11 github.com/vbauerster/mpb/v7 v7.5.3 github.com/vishvananda/netlink v1.2.1-beta.2 - go.etcd.io/bbolt v1.3.6 + go.etcd.io/bbolt v1.3.7 golang.org/x/net v0.5.0 golang.org/x/sync v0.1.0 golang.org/x/sys v0.4.0 diff --git a/go.sum b/go.sum index 70da80db35..df6565c0a2 100644 --- a/go.sum +++ b/go.sum @@ -1115,8 +1115,9 @@ github.com/yvasiyarov/newrelic_platform_go v0.0.0-20140908184405-b21fdbd4370f/go go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.3/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU= go.etcd.io/bbolt v1.3.5/go.mod h1:G5EMThwa9y8QZGBClrRx5EY+Yw9kAhnjy3bSjsnlVTQ= -go.etcd.io/bbolt v1.3.6 h1:/ecaJf0sk1l4l6V4awd65v2C3ILy7MSj+s/x1ADCIMU= go.etcd.io/bbolt v1.3.6/go.mod h1:qXsaaIqmgQH0T+OPdb99Bf+PKfBBQVAdyD6TY9G8XM4= +go.etcd.io/bbolt v1.3.7 h1:j+zJOnnEjF/kyHlDDgGnVL/AIqIJPq8UoB2GSNfkUfQ= +go.etcd.io/bbolt v1.3.7/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw= go.etcd.io/etcd v0.5.0-alpha.5.0.20200910180754-dd1b699fc489/go.mod h1:yVHk9ub3CSBatqGNg7GRmsnfLWtoW60w4eDYfh7vHDg= go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= go.etcd.io/etcd/client/pkg/v3 v3.5.0/go.mod h1:IJHfcCEKxYu1Os13ZdwCwIUTUVGYTSAM3YSwc9/Ac1g= diff --git a/vendor/go.etcd.io/bbolt/.gitignore b/vendor/go.etcd.io/bbolt/.gitignore index 18312f0043..9fa948ebf9 100644 --- a/vendor/go.etcd.io/bbolt/.gitignore +++ b/vendor/go.etcd.io/bbolt/.gitignore @@ -3,5 +3,8 @@ *.swp /bin/ cover.out +cover-*.out /.idea *.iml +/cmd/bbolt/bbolt + diff --git a/vendor/go.etcd.io/bbolt/.travis.yml b/vendor/go.etcd.io/bbolt/.travis.yml deleted file mode 100644 index 452601e49d..0000000000 --- a/vendor/go.etcd.io/bbolt/.travis.yml +++ /dev/null @@ -1,18 +0,0 @@ -language: go -go_import_path: go.etcd.io/bbolt - -sudo: false - -go: -- 1.15 - -before_install: -- go get -v golang.org/x/sys/unix -- go get -v honnef.co/go/tools/... -- go get -v github.com/kisielk/errcheck - -script: -- make fmt -- make test -- make race -# - make errcheck diff --git a/vendor/go.etcd.io/bbolt/Makefile b/vendor/go.etcd.io/bbolt/Makefile index 21ecf48f61..18154c6388 100644 --- a/vendor/go.etcd.io/bbolt/Makefile +++ b/vendor/go.etcd.io/bbolt/Makefile @@ -2,35 +2,62 @@ BRANCH=`git rev-parse --abbrev-ref HEAD` COMMIT=`git rev-parse --short HEAD` GOLDFLAGS="-X main.branch $(BRANCH) -X main.commit $(COMMIT)" -race: - @TEST_FREELIST_TYPE=hashmap go test -v -race -test.run="TestSimulate_(100op|1000op)" - @echo "array freelist test" - @TEST_FREELIST_TYPE=array go test -v -race -test.run="TestSimulate_(100op|1000op)" - +TESTFLAGS_RACE=-race=false +ifdef ENABLE_RACE + TESTFLAGS_RACE=-race=true +endif + +TESTFLAGS_CPU= +ifdef CPU + TESTFLAGS_CPU=-cpu=$(CPU) +endif +TESTFLAGS = $(TESTFLAGS_RACE) $(TESTFLAGS_CPU) $(EXTRA_TESTFLAGS) + +.PHONY: fmt fmt: !(gofmt -l -s -d $(shell find . -name \*.go) | grep '[a-z]') -# go get honnef.co/go/tools/simple -gosimple: - gosimple ./... +.PHONY: lint +lint: + golangci-lint run ./... -# go get honnef.co/go/tools/unused -unused: - unused ./... +.PHONY: test +test: + @echo "hashmap freelist test" + TEST_FREELIST_TYPE=hashmap go test -v ${TESTFLAGS} -timeout 30m + TEST_FREELIST_TYPE=hashmap go test -v ${TESTFLAGS} ./cmd/bbolt -# go get github.com/kisielk/errcheck -errcheck: - @errcheck -ignorepkg=bytes -ignore=os:Remove go.etcd.io/bbolt + @echo "array freelist test" + TEST_FREELIST_TYPE=array go test -v ${TESTFLAGS} -timeout 30m + TEST_FREELIST_TYPE=array go test -v ${TESTFLAGS} ./cmd/bbolt -test: - TEST_FREELIST_TYPE=hashmap go test -timeout 20m -v -coverprofile cover.out -covermode atomic - # Note: gets "program not an importable package" in out of path builds - TEST_FREELIST_TYPE=hashmap go test -v ./cmd/bbolt +.PHONY: coverage +coverage: + @echo "hashmap freelist test" + TEST_FREELIST_TYPE=hashmap go test -v -timeout 30m \ + -coverprofile cover-freelist-hashmap.out -covermode atomic @echo "array freelist test" + TEST_FREELIST_TYPE=array go test -v -timeout 30m \ + -coverprofile cover-freelist-array.out -covermode atomic + +.PHONY: gofail-enable +gofail-enable: install-gofail + gofail enable . + +.PHONY: gofail-disable +gofail-disable: + gofail disable . + +.PHONY: install-gofail +install-gofail: + go install go.etcd.io/gofail + +.PHONY: test-failpoint +test-failpoint: + @echo "[failpoint] hashmap freelist test" + TEST_FREELIST_TYPE=hashmap go test -v ${TESTFLAGS} -timeout 30m ./tests/failpoint - @TEST_FREELIST_TYPE=array go test -timeout 20m -v -coverprofile cover.out -covermode atomic - # Note: gets "program not an importable package" in out of path builds - @TEST_FREELIST_TYPE=array go test -v ./cmd/bbolt + @echo "[failpoint] array freelist test" + TEST_FREELIST_TYPE=array go test -v ${TESTFLAGS} -timeout 30m ./tests/failpoint -.PHONY: race fmt errcheck test gosimple unused diff --git a/vendor/go.etcd.io/bbolt/README.md b/vendor/go.etcd.io/bbolt/README.md index f1b4a7b2bf..2be669a60a 100644 --- a/vendor/go.etcd.io/bbolt/README.md +++ b/vendor/go.etcd.io/bbolt/README.md @@ -26,7 +26,7 @@ and setting values. That's it. [gh_ben]: https://github.com/benbjohnson [bolt]: https://github.com/boltdb/bolt [hyc_symas]: https://twitter.com/hyc_symas -[lmdb]: http://symas.com/mdb/ +[lmdb]: https://www.symas.com/symas-embedded-database-lmdb ## Project Status @@ -78,14 +78,23 @@ New minor versions may add additional features to the API. ### Installing To start using Bolt, install Go and run `go get`: - ```sh -$ go get go.etcd.io/bbolt/... +$ go get go.etcd.io/bbolt@latest ``` -This will retrieve the library and install the `bolt` command line utility into -your `$GOBIN` path. +This will retrieve the library and update your `go.mod` and `go.sum` files. + +To run the command line utility, execute: +```sh +$ go run go.etcd.io/bbolt/cmd/bbolt@latest +``` +Run `go install` to install the `bbolt` command line utility into +your `$GOBIN` path, which defaults to `$GOPATH/bin` or `$HOME/go/bin` if the +`GOPATH` environment variable is not set. +```sh +$ go install go.etcd.io/bbolt/cmd/bbolt@latest +``` ### Importing bbolt @@ -933,7 +942,7 @@ Below is a list of public, open source projects that use Bolt: * [ipxed](https://github.com/kelseyhightower/ipxed) - Web interface and api for ipxed. * [Ironsmith](https://github.com/timshannon/ironsmith) - A simple, script-driven continuous integration (build - > test -> release) tool, with no external dependencies * [Kala](https://github.com/ajvb/kala) - Kala is a modern job scheduler optimized to run on a single node. It is persistent, JSON over HTTP API, ISO 8601 duration notation, and dependent jobs. -* [Key Value Access Langusge (KVAL)](https://github.com/kval-access-language) - A proposed grammar for key-value datastores offering a bbolt binding. +* [Key Value Access Language (KVAL)](https://github.com/kval-access-language) - A proposed grammar for key-value datastores offering a bbolt binding. * [LedisDB](https://github.com/siddontang/ledisdb) - A high performance NoSQL, using Bolt as optional storage. * [lru](https://github.com/crowdriff/lru) - Easy to use Bolt-backed Least-Recently-Used (LRU) read-through cache with chainable remote stores. * [mbuckets](https://github.com/abhigupta912/mbuckets) - A Bolt wrapper that allows easy operations on multi level (nested) buckets. diff --git a/vendor/go.etcd.io/bbolt/bolt_arm64.go b/vendor/go.etcd.io/bbolt/bolt_arm64.go index 810dfd55c5..447bc19733 100644 --- a/vendor/go.etcd.io/bbolt/bolt_arm64.go +++ b/vendor/go.etcd.io/bbolt/bolt_arm64.go @@ -1,3 +1,4 @@ +//go:build arm64 // +build arm64 package bbolt diff --git a/vendor/go.etcd.io/bbolt/bolt_loong64.go b/vendor/go.etcd.io/bbolt/bolt_loong64.go new file mode 100644 index 0000000000..31c17c1d07 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/bolt_loong64.go @@ -0,0 +1,10 @@ +//go:build loong64 +// +build loong64 + +package bbolt + +// maxMapSize represents the largest mmap size supported by Bolt. +const maxMapSize = 0xFFFFFFFFFFFF // 256TB + +// maxAllocSize is the size used when creating array pointers. +const maxAllocSize = 0x7FFFFFFF diff --git a/vendor/go.etcd.io/bbolt/bolt_mips64x.go b/vendor/go.etcd.io/bbolt/bolt_mips64x.go index dd8ffe1239..a9385beb68 100644 --- a/vendor/go.etcd.io/bbolt/bolt_mips64x.go +++ b/vendor/go.etcd.io/bbolt/bolt_mips64x.go @@ -1,3 +1,4 @@ +//go:build mips64 || mips64le // +build mips64 mips64le package bbolt diff --git a/vendor/go.etcd.io/bbolt/bolt_mipsx.go b/vendor/go.etcd.io/bbolt/bolt_mipsx.go index a669703a4e..ed734ff7f3 100644 --- a/vendor/go.etcd.io/bbolt/bolt_mipsx.go +++ b/vendor/go.etcd.io/bbolt/bolt_mipsx.go @@ -1,3 +1,4 @@ +//go:build mips || mipsle // +build mips mipsle package bbolt diff --git a/vendor/go.etcd.io/bbolt/bolt_ppc.go b/vendor/go.etcd.io/bbolt/bolt_ppc.go index 84e545ef3e..e403f57d8a 100644 --- a/vendor/go.etcd.io/bbolt/bolt_ppc.go +++ b/vendor/go.etcd.io/bbolt/bolt_ppc.go @@ -1,3 +1,4 @@ +//go:build ppc // +build ppc package bbolt diff --git a/vendor/go.etcd.io/bbolt/bolt_ppc64.go b/vendor/go.etcd.io/bbolt/bolt_ppc64.go index a76120908c..fcd86529f9 100644 --- a/vendor/go.etcd.io/bbolt/bolt_ppc64.go +++ b/vendor/go.etcd.io/bbolt/bolt_ppc64.go @@ -1,3 +1,4 @@ +//go:build ppc64 // +build ppc64 package bbolt diff --git a/vendor/go.etcd.io/bbolt/bolt_ppc64le.go b/vendor/go.etcd.io/bbolt/bolt_ppc64le.go index c830f2fc77..20234aca46 100644 --- a/vendor/go.etcd.io/bbolt/bolt_ppc64le.go +++ b/vendor/go.etcd.io/bbolt/bolt_ppc64le.go @@ -1,3 +1,4 @@ +//go:build ppc64le // +build ppc64le package bbolt diff --git a/vendor/go.etcd.io/bbolt/bolt_riscv64.go b/vendor/go.etcd.io/bbolt/bolt_riscv64.go index c967613b00..060f30c73c 100644 --- a/vendor/go.etcd.io/bbolt/bolt_riscv64.go +++ b/vendor/go.etcd.io/bbolt/bolt_riscv64.go @@ -1,3 +1,4 @@ +//go:build riscv64 // +build riscv64 package bbolt diff --git a/vendor/go.etcd.io/bbolt/bolt_s390x.go b/vendor/go.etcd.io/bbolt/bolt_s390x.go index ff2a560970..92d2755adb 100644 --- a/vendor/go.etcd.io/bbolt/bolt_s390x.go +++ b/vendor/go.etcd.io/bbolt/bolt_s390x.go @@ -1,3 +1,4 @@ +//go:build s390x // +build s390x package bbolt diff --git a/vendor/go.etcd.io/bbolt/bolt_unix.go b/vendor/go.etcd.io/bbolt/bolt_unix.go index 4e5f65ccc8..757ae4d1a4 100644 --- a/vendor/go.etcd.io/bbolt/bolt_unix.go +++ b/vendor/go.etcd.io/bbolt/bolt_unix.go @@ -1,3 +1,4 @@ +//go:build !windows && !plan9 && !solaris && !aix // +build !windows,!plan9,!solaris,!aix package bbolt diff --git a/vendor/go.etcd.io/bbolt/bolt_unix_aix.go b/vendor/go.etcd.io/bbolt/bolt_unix_aix.go index a64c16f512..6dea4294dc 100644 --- a/vendor/go.etcd.io/bbolt/bolt_unix_aix.go +++ b/vendor/go.etcd.io/bbolt/bolt_unix_aix.go @@ -1,3 +1,4 @@ +//go:build aix // +build aix package bbolt diff --git a/vendor/go.etcd.io/bbolt/bolt_windows.go b/vendor/go.etcd.io/bbolt/bolt_windows.go index fca178bd29..e5dde27454 100644 --- a/vendor/go.etcd.io/bbolt/bolt_windows.go +++ b/vendor/go.etcd.io/bbolt/bolt_windows.go @@ -6,40 +6,10 @@ import ( "syscall" "time" "unsafe" -) - -// LockFileEx code derived from golang build filemutex_windows.go @ v1.5.1 -var ( - modkernel32 = syscall.NewLazyDLL("kernel32.dll") - procLockFileEx = modkernel32.NewProc("LockFileEx") - procUnlockFileEx = modkernel32.NewProc("UnlockFileEx") -) - -const ( - // see https://msdn.microsoft.com/en-us/library/windows/desktop/aa365203(v=vs.85).aspx - flagLockExclusive = 2 - flagLockFailImmediately = 1 - // see https://msdn.microsoft.com/en-us/library/windows/desktop/ms681382(v=vs.85).aspx - errLockViolation syscall.Errno = 0x21 + "golang.org/x/sys/windows" ) -func lockFileEx(h syscall.Handle, flags, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) { - r, _, err := procLockFileEx.Call(uintptr(h), uintptr(flags), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol))) - if r == 0 { - return err - } - return nil -} - -func unlockFileEx(h syscall.Handle, reserved, locklow, lockhigh uint32, ol *syscall.Overlapped) (err error) { - r, _, err := procUnlockFileEx.Call(uintptr(h), uintptr(reserved), uintptr(locklow), uintptr(lockhigh), uintptr(unsafe.Pointer(ol)), 0) - if r == 0 { - return err - } - return nil -} - // fdatasync flushes written data to a file descriptor. func fdatasync(db *DB) error { return db.file.Sync() @@ -51,22 +21,22 @@ func flock(db *DB, exclusive bool, timeout time.Duration) error { if timeout != 0 { t = time.Now() } - var flag uint32 = flagLockFailImmediately + var flags uint32 = windows.LOCKFILE_FAIL_IMMEDIATELY if exclusive { - flag |= flagLockExclusive + flags |= windows.LOCKFILE_EXCLUSIVE_LOCK } for { // Fix for https://github.com/etcd-io/bbolt/issues/121. Use byte-range // -1..0 as the lock on the database file. var m1 uint32 = (1 << 32) - 1 // -1 in a uint32 - err := lockFileEx(syscall.Handle(db.file.Fd()), flag, 0, 1, 0, &syscall.Overlapped{ + err := windows.LockFileEx(windows.Handle(db.file.Fd()), flags, 0, 1, 0, &windows.Overlapped{ Offset: m1, OffsetHigh: m1, }) if err == nil { return nil - } else if err != errLockViolation { + } else if err != windows.ERROR_LOCK_VIOLATION { return err } @@ -83,34 +53,37 @@ func flock(db *DB, exclusive bool, timeout time.Duration) error { // funlock releases an advisory lock on a file descriptor. func funlock(db *DB) error { var m1 uint32 = (1 << 32) - 1 // -1 in a uint32 - err := unlockFileEx(syscall.Handle(db.file.Fd()), 0, 1, 0, &syscall.Overlapped{ + return windows.UnlockFileEx(windows.Handle(db.file.Fd()), 0, 1, 0, &windows.Overlapped{ Offset: m1, OffsetHigh: m1, }) - return err } // mmap memory maps a DB's data file. // Based on: https://github.com/edsrzf/mmap-go func mmap(db *DB, sz int) error { + var sizelo, sizehi uint32 + if !db.readOnly { // Truncate the database to the size of the mmap. if err := db.file.Truncate(int64(sz)); err != nil { return fmt.Errorf("truncate: %s", err) } + sizehi = uint32(sz >> 32) + sizelo = uint32(sz) & 0xffffffff } // Open a file mapping handle. - sizelo := uint32(sz >> 32) - sizehi := uint32(sz) & 0xffffffff - h, errno := syscall.CreateFileMapping(syscall.Handle(db.file.Fd()), nil, syscall.PAGE_READONLY, sizelo, sizehi, nil) + h, errno := syscall.CreateFileMapping(syscall.Handle(db.file.Fd()), nil, syscall.PAGE_READONLY, sizehi, sizelo, nil) if h == 0 { return os.NewSyscallError("CreateFileMapping", errno) } // Create the memory map. - addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, uintptr(sz)) + addr, errno := syscall.MapViewOfFile(h, syscall.FILE_MAP_READ, 0, 0, 0) if addr == 0 { + // Do our best and report error returned from MapViewOfFile. + _ = syscall.CloseHandle(h) return os.NewSyscallError("MapViewOfFile", errno) } @@ -134,8 +107,11 @@ func munmap(db *DB) error { } addr := (uintptr)(unsafe.Pointer(&db.data[0])) + var err1 error if err := syscall.UnmapViewOfFile(addr); err != nil { - return os.NewSyscallError("UnmapViewOfFile", err) + err1 = os.NewSyscallError("UnmapViewOfFile", err) } - return nil + db.data = nil + db.datasz = 0 + return err1 } diff --git a/vendor/go.etcd.io/bbolt/boltsync_unix.go b/vendor/go.etcd.io/bbolt/boltsync_unix.go index 9587afefee..81e09a5310 100644 --- a/vendor/go.etcd.io/bbolt/boltsync_unix.go +++ b/vendor/go.etcd.io/bbolt/boltsync_unix.go @@ -1,3 +1,4 @@ +//go:build !windows && !plan9 && !linux && !openbsd // +build !windows,!plan9,!linux,!openbsd package bbolt diff --git a/vendor/go.etcd.io/bbolt/bucket.go b/vendor/go.etcd.io/bbolt/bucket.go index d8750b1487..054467af30 100644 --- a/vendor/go.etcd.io/bbolt/bucket.go +++ b/vendor/go.etcd.io/bbolt/bucket.go @@ -81,7 +81,7 @@ func (b *Bucket) Writable() bool { // Do not use a cursor after the transaction is closed. func (b *Bucket) Cursor() *Cursor { // Update transaction statistics. - b.tx.stats.CursorCount++ + b.tx.stats.IncCursorCount(1) // Allocate and return a cursor. return &Cursor{ @@ -229,11 +229,9 @@ func (b *Bucket) DeleteBucket(key []byte) error { // Recursively delete all child buckets. child := b.Bucket(key) - err := child.ForEach(func(k, v []byte) error { - if _, _, childFlags := child.Cursor().seek(k); (childFlags & bucketLeafFlag) != 0 { - if err := child.DeleteBucket(k); err != nil { - return fmt.Errorf("delete bucket: %s", err) - } + err := child.ForEachBucket(func(k []byte) error { + if err := child.DeleteBucket(k); err != nil { + return fmt.Errorf("delete bucket: %s", err) } return nil }) @@ -353,7 +351,7 @@ func (b *Bucket) SetSequence(v uint64) error { _ = b.node(b.root, nil) } - // Increment and return the sequence. + // Set the sequence. b.bucket.sequence = v return nil } @@ -378,6 +376,7 @@ func (b *Bucket) NextSequence() (uint64, error) { } // ForEach executes a function for each key/value pair in a bucket. +// Because ForEach uses a Cursor, the iteration over keys is in lexicographical order. // If the provided function returns an error then the iteration is stopped and // the error is returned to the caller. The provided function must not modify // the bucket; this will result in undefined behavior. @@ -394,7 +393,22 @@ func (b *Bucket) ForEach(fn func(k, v []byte) error) error { return nil } -// Stat returns stats on a bucket. +func (b *Bucket) ForEachBucket(fn func(k []byte) error) error { + if b.tx.db == nil { + return ErrTxClosed + } + c := b.Cursor() + for k, _, flags := c.first(); k != nil; k, _, flags = c.next() { + if flags&bucketLeafFlag != 0 { + if err := fn(k); err != nil { + return err + } + } + } + return nil +} + +// Stats returns stats on a bucket. func (b *Bucket) Stats() BucketStats { var s, subStats BucketStats pageSize := b.tx.db.pageSize @@ -402,7 +416,7 @@ func (b *Bucket) Stats() BucketStats { if b.root == 0 { s.InlineBucketN += 1 } - b.forEachPage(func(p *page, depth int) { + b.forEachPage(func(p *page, depth int, pgstack []pgid) { if (p.flags & leafPageFlag) != 0 { s.KeyN += int(p.count) @@ -461,7 +475,7 @@ func (b *Bucket) Stats() BucketStats { // Keep track of maximum page depth. if depth+1 > s.Depth { - s.Depth = (depth + 1) + s.Depth = depth + 1 } }) @@ -477,15 +491,15 @@ func (b *Bucket) Stats() BucketStats { } // forEachPage iterates over every page in a bucket, including inline pages. -func (b *Bucket) forEachPage(fn func(*page, int)) { +func (b *Bucket) forEachPage(fn func(*page, int, []pgid)) { // If we have an inline page then just use that. if b.page != nil { - fn(b.page, 0) + fn(b.page, 0, []pgid{b.root}) return } // Otherwise traverse the page hierarchy. - b.tx.forEachPage(b.root, 0, fn) + b.tx.forEachPage(b.root, fn) } // forEachPageNode iterates over every page (or node) in a bucket. @@ -499,8 +513,8 @@ func (b *Bucket) forEachPageNode(fn func(*page, *node, int)) { b._forEachPageNode(b.root, 0, fn) } -func (b *Bucket) _forEachPageNode(pgid pgid, depth int, fn func(*page, *node, int)) { - var p, n = b.pageNode(pgid) +func (b *Bucket) _forEachPageNode(pgId pgid, depth int, fn func(*page, *node, int)) { + var p, n = b.pageNode(pgId) // Execute function. fn(p, n, depth) @@ -640,11 +654,11 @@ func (b *Bucket) rebalance() { } // node creates a node from a page and associates it with a given parent. -func (b *Bucket) node(pgid pgid, parent *node) *node { +func (b *Bucket) node(pgId pgid, parent *node) *node { _assert(b.nodes != nil, "nodes map expected") // Retrieve node if it's already been created. - if n := b.nodes[pgid]; n != nil { + if n := b.nodes[pgId]; n != nil { return n } @@ -659,15 +673,15 @@ func (b *Bucket) node(pgid pgid, parent *node) *node { // Use the inline page if this is an inline bucket. var p = b.page if p == nil { - p = b.tx.page(pgid) + p = b.tx.page(pgId) } // Read the page into the node and cache it. n.read(p) - b.nodes[pgid] = n + b.nodes[pgId] = n // Update statistics. - b.tx.stats.NodeCount++ + b.tx.stats.IncNodeCount(1) return n } diff --git a/vendor/go.etcd.io/bbolt/compact.go b/vendor/go.etcd.io/bbolt/compact.go index e4fe91b046..5f1d4c3b50 100644 --- a/vendor/go.etcd.io/bbolt/compact.go +++ b/vendor/go.etcd.io/bbolt/compact.go @@ -12,7 +12,11 @@ func Compact(dst, src *DB, txMaxSize int64) error { if err != nil { return err } - defer tx.Rollback() + defer func() { + if tempErr := tx.Rollback(); tempErr != nil { + err = tempErr + } + }() if err := walk(src, func(keys [][]byte, k, v []byte, seq uint64) error { // On each key/value, check if we have exceeded tx size. @@ -73,8 +77,9 @@ func Compact(dst, src *DB, txMaxSize int64) error { }); err != nil { return err } + err = tx.Commit() - return tx.Commit() + return err } // walkFunc is the type of the function called for keys (buckets and "normal" diff --git a/vendor/go.etcd.io/bbolt/cursor.go b/vendor/go.etcd.io/bbolt/cursor.go index 98aeb449a4..5dafb0cac3 100644 --- a/vendor/go.etcd.io/bbolt/cursor.go +++ b/vendor/go.etcd.io/bbolt/cursor.go @@ -6,7 +6,8 @@ import ( "sort" ) -// Cursor represents an iterator that can traverse over all key/value pairs in a bucket in sorted order. +// Cursor represents an iterator that can traverse over all key/value pairs in a bucket +// in lexicographical order. // Cursors see nested buckets with value == nil. // Cursors can be obtained from a transaction and are valid as long as the transaction is open. // @@ -30,10 +31,18 @@ func (c *Cursor) Bucket() *Bucket { // The returned key and value are only valid for the life of the transaction. func (c *Cursor) First() (key []byte, value []byte) { _assert(c.bucket.tx.db != nil, "tx closed") + k, v, flags := c.first() + if (flags & uint32(bucketLeafFlag)) != 0 { + return k, nil + } + return k, v +} + +func (c *Cursor) first() (key []byte, value []byte, flags uint32) { c.stack = c.stack[:0] p, n := c.bucket.pageNode(c.bucket.root) c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) - c.first() + c.goToFirstElementOnTheStack() // If we land on an empty page then move to the next value. // https://github.com/boltdb/bolt/issues/450 @@ -43,10 +52,9 @@ func (c *Cursor) First() (key []byte, value []byte) { k, v, flags := c.keyValue() if (flags & uint32(bucketLeafFlag)) != 0 { - return k, nil + return k, nil, flags } - return k, v - + return k, v, flags } // Last moves the cursor to the last item in the bucket and returns its key and value. @@ -60,6 +68,17 @@ func (c *Cursor) Last() (key []byte, value []byte) { ref.index = ref.count() - 1 c.stack = append(c.stack, ref) c.last() + + // If this is an empty page (calling Delete may result in empty pages) + // we call prev to find the last page that is not empty + for len(c.stack) > 0 && c.stack[len(c.stack)-1].count() == 0 { + c.prev() + } + + if len(c.stack) == 0 { + return nil, nil + } + k, v, flags := c.keyValue() if (flags & uint32(bucketLeafFlag)) != 0 { return k, nil @@ -84,37 +103,20 @@ func (c *Cursor) Next() (key []byte, value []byte) { // The returned key and value are only valid for the life of the transaction. func (c *Cursor) Prev() (key []byte, value []byte) { _assert(c.bucket.tx.db != nil, "tx closed") - - // Attempt to move back one element until we're successful. - // Move up the stack as we hit the beginning of each page in our stack. - for i := len(c.stack) - 1; i >= 0; i-- { - elem := &c.stack[i] - if elem.index > 0 { - elem.index-- - break - } - c.stack = c.stack[:i] - } - - // If we've hit the end then return nil. - if len(c.stack) == 0 { - return nil, nil - } - - // Move down the stack to find the last element of the last leaf under this branch. - c.last() - k, v, flags := c.keyValue() + k, v, flags := c.prev() if (flags & uint32(bucketLeafFlag)) != 0 { return k, nil } return k, v } -// Seek moves the cursor to a given key and returns it. +// Seek moves the cursor to a given key using a b-tree search and returns it. // If the key does not exist then the next key is used. If no keys // follow, a nil key is returned. // The returned key and value are only valid for the life of the transaction. func (c *Cursor) Seek(seek []byte) (key []byte, value []byte) { + _assert(c.bucket.tx.db != nil, "tx closed") + k, v, flags := c.seek(seek) // If we ended up after the last element of a page then move to the next one. @@ -152,8 +154,6 @@ func (c *Cursor) Delete() error { // seek moves the cursor to a given key and returns it. // If the key does not exist then the next key is used. func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) { - _assert(c.bucket.tx.db != nil, "tx closed") - // Start from root page/node and traverse to correct page. c.stack = c.stack[:0] c.search(seek, c.bucket.root) @@ -163,7 +163,7 @@ func (c *Cursor) seek(seek []byte) (key []byte, value []byte, flags uint32) { } // first moves the cursor to the first leaf element under the last page in the stack. -func (c *Cursor) first() { +func (c *Cursor) goToFirstElementOnTheStack() { for { // Exit when we hit a leaf page. var ref = &c.stack[len(c.stack)-1] @@ -172,13 +172,13 @@ func (c *Cursor) first() { } // Keep adding pages pointing to the first element to the stack. - var pgid pgid + var pgId pgid if ref.node != nil { - pgid = ref.node.inodes[ref.index].pgid + pgId = ref.node.inodes[ref.index].pgid } else { - pgid = ref.page.branchPageElement(uint16(ref.index)).pgid + pgId = ref.page.branchPageElement(uint16(ref.index)).pgid } - p, n := c.bucket.pageNode(pgid) + p, n := c.bucket.pageNode(pgId) c.stack = append(c.stack, elemRef{page: p, node: n, index: 0}) } } @@ -193,13 +193,13 @@ func (c *Cursor) last() { } // Keep adding pages pointing to the last element in the stack. - var pgid pgid + var pgId pgid if ref.node != nil { - pgid = ref.node.inodes[ref.index].pgid + pgId = ref.node.inodes[ref.index].pgid } else { - pgid = ref.page.branchPageElement(uint16(ref.index)).pgid + pgId = ref.page.branchPageElement(uint16(ref.index)).pgid } - p, n := c.bucket.pageNode(pgid) + p, n := c.bucket.pageNode(pgId) var nextRef = elemRef{page: p, node: n} nextRef.index = nextRef.count() - 1 @@ -231,7 +231,7 @@ func (c *Cursor) next() (key []byte, value []byte, flags uint32) { // Otherwise start from where we left off in the stack and find the // first element of the first leaf page. c.stack = c.stack[:i+1] - c.first() + c.goToFirstElementOnTheStack() // If this is an empty page then restart and move back up the stack. // https://github.com/boltdb/bolt/issues/450 @@ -243,9 +243,33 @@ func (c *Cursor) next() (key []byte, value []byte, flags uint32) { } } +// prev moves the cursor to the previous item in the bucket and returns its key and value. +// If the cursor is at the beginning of the bucket then a nil key and value are returned. +func (c *Cursor) prev() (key []byte, value []byte, flags uint32) { + // Attempt to move back one element until we're successful. + // Move up the stack as we hit the beginning of each page in our stack. + for i := len(c.stack) - 1; i >= 0; i-- { + elem := &c.stack[i] + if elem.index > 0 { + elem.index-- + break + } + c.stack = c.stack[:i] + } + + // If we've hit the end then return nil. + if len(c.stack) == 0 { + return nil, nil, 0 + } + + // Move down the stack to find the last element of the last leaf under this branch. + c.last() + return c.keyValue() +} + // search recursively performs a binary search against a given page/node until it finds a given key. -func (c *Cursor) search(key []byte, pgid pgid) { - p, n := c.bucket.pageNode(pgid) +func (c *Cursor) search(key []byte, pgId pgid) { + p, n := c.bucket.pageNode(pgId) if p != nil && (p.flags&(branchPageFlag|leafPageFlag)) == 0 { panic(fmt.Sprintf("invalid page type: %d: %x", p.id, p.flags)) } diff --git a/vendor/go.etcd.io/bbolt/db.go b/vendor/go.etcd.io/bbolt/db.go index a798c390a2..c9422127e1 100644 --- a/vendor/go.etcd.io/bbolt/db.go +++ b/vendor/go.etcd.io/bbolt/db.go @@ -4,7 +4,7 @@ import ( "errors" "fmt" "hash/fnv" - "log" + "io" "os" "runtime" "sort" @@ -81,7 +81,7 @@ type DB struct { NoFreelistSync bool // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures - // dramatic performance degradation if database is large and framentation in freelist is common. + // dramatic performance degradation if database is large and fragmentation in freelist is common. // The alternative one is using hashmap, it is faster in almost all circumstances // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe. // The default type is array @@ -95,6 +95,11 @@ type DB struct { // https://github.com/boltdb/bolt/issues/284 NoGrowSync bool + // When `true`, bbolt will always load the free pages when opening the DB. + // When opening db in write mode, this flag will always automatically + // set to `true`. + PreLoadFreelist bool + // If you want to read the entire database fast, you can set MmapFlag to // syscall.MAP_POPULATE on Linux 2.6.23+ for sequential read-ahead. MmapFlags int @@ -129,6 +134,9 @@ type DB struct { path string openFile func(string, int, os.FileMode) (*os.File, error) file *os.File + // `dataref` isn't used at all on Windows, and the golangci-lint + // always fails on Windows platform. + //nolint dataref []byte // mmap'ed readonly, write throws SEGV data *[maxMapSize]byte datasz int @@ -193,6 +201,7 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { db.NoGrowSync = options.NoGrowSync db.MmapFlags = options.MmapFlags db.NoFreelistSync = options.NoFreelistSync + db.PreLoadFreelist = options.PreLoadFreelist db.FreelistType = options.FreelistType db.Mlock = options.Mlock @@ -205,6 +214,9 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { if options.ReadOnly { flag = os.O_RDONLY db.readOnly = true + } else { + // always load free pages in write mode + db.PreLoadFreelist = true } db.openFile = options.OpenFile @@ -252,21 +264,9 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { return nil, err } } else { - // Read the first meta page to determine the page size. - var buf [0x1000]byte - // If we can't read the page size, but can read a page, assume - // it's the same as the OS or one given -- since that's how the - // page size was chosen in the first place. - // - // If the first page is invalid and this OS uses a different - // page size than what the database was created with then we - // are out of luck and cannot access the database. - // - // TODO: scan for next page - if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) { - if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil { - db.pageSize = int(m.pageSize) - } + // try to get the page size from the metadata pages + if pgSize, err := db.getPageSize(); err == nil { + db.pageSize = pgSize } else { _ = db.close() return nil, ErrInvalid @@ -286,12 +286,14 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { return nil, err } + if db.PreLoadFreelist { + db.loadFreelist() + } + if db.readOnly { return db, nil } - db.loadFreelist() - // Flush freelist when transitioning from no sync to sync so // NoFreelistSync unaware boltdb can open the db later. if !db.NoFreelistSync && !db.hasSyncedFreelist() { @@ -309,6 +311,96 @@ func Open(path string, mode os.FileMode, options *Options) (*DB, error) { return db, nil } +// getPageSize reads the pageSize from the meta pages. It tries +// to read the first meta page firstly. If the first page is invalid, +// then it tries to read the second page using the default page size. +func (db *DB) getPageSize() (int, error) { + var ( + meta0CanRead, meta1CanRead bool + ) + + // Read the first meta page to determine the page size. + if pgSize, canRead, err := db.getPageSizeFromFirstMeta(); err != nil { + // We cannot read the page size from page 0, but can read page 0. + meta0CanRead = canRead + } else { + return pgSize, nil + } + + // Read the second meta page to determine the page size. + if pgSize, canRead, err := db.getPageSizeFromSecondMeta(); err != nil { + // We cannot read the page size from page 1, but can read page 1. + meta1CanRead = canRead + } else { + return pgSize, nil + } + + // If we can't read the page size from both pages, but can read + // either page, then we assume it's the same as the OS or the one + // given, since that's how the page size was chosen in the first place. + // + // If both pages are invalid, and (this OS uses a different page size + // from what the database was created with or the given page size is + // different from what the database was created with), then we are out + // of luck and cannot access the database. + if meta0CanRead || meta1CanRead { + return db.pageSize, nil + } + + return 0, ErrInvalid +} + +// getPageSizeFromFirstMeta reads the pageSize from the first meta page +func (db *DB) getPageSizeFromFirstMeta() (int, bool, error) { + var buf [0x1000]byte + var metaCanRead bool + if bw, err := db.file.ReadAt(buf[:], 0); err == nil && bw == len(buf) { + metaCanRead = true + if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil { + return int(m.pageSize), metaCanRead, nil + } + } + return 0, metaCanRead, ErrInvalid +} + +// getPageSizeFromSecondMeta reads the pageSize from the second meta page +func (db *DB) getPageSizeFromSecondMeta() (int, bool, error) { + var ( + fileSize int64 + metaCanRead bool + ) + + // get the db file size + if info, err := db.file.Stat(); err != nil { + return 0, metaCanRead, err + } else { + fileSize = info.Size() + } + + // We need to read the second meta page, so we should skip the first page; + // but we don't know the exact page size yet, it's chicken & egg problem. + // The solution is to try all the possible page sizes, which starts from 1KB + // and until 16MB (1024<<14) or the end of the db file + // + // TODO: should we support larger page size? + for i := 0; i <= 14; i++ { + var buf [0x1000]byte + var pos int64 = 1024 << uint(i) + if pos >= fileSize-1024 { + break + } + bw, err := db.file.ReadAt(buf[:], pos) + if (err == nil && bw == len(buf)) || (err == io.EOF && int64(bw) == (fileSize-pos)) { + metaCanRead = true + if m := db.pageInBuffer(buf[:], 0).meta(); m.validate() == nil { + return int(m.pageSize), metaCanRead, nil + } + } + } + + return 0, metaCanRead, ErrInvalid +} + // loadFreelist reads the freelist if it is synced, or reconstructs it // by scanning the DB if it is not synced. It assumes there are no // concurrent accesses being made to the freelist. @@ -372,6 +464,8 @@ func (db *DB) mmap(minsz int) error { } // Memory-map the data file as a byte slice. + // gofail: var mapError string + // return errors.New(mapError) if err := mmap(db, size); err != nil { return err } @@ -399,11 +493,25 @@ func (db *DB) mmap(minsz int) error { return nil } +func (db *DB) invalidate() { + db.dataref = nil + db.data = nil + db.datasz = 0 + + db.meta0 = nil + db.meta1 = nil +} + // munmap unmaps the data file from memory. func (db *DB) munmap() error { + defer db.invalidate() + + // gofail: var unmapError string + // return errors.New(unmapError) if err := munmap(db); err != nil { return fmt.Errorf("unmap error: " + err.Error()) } + return nil } @@ -552,7 +660,7 @@ func (db *DB) close() error { if !db.readOnly { // Unlock the file. if err := funlock(db); err != nil { - log.Printf("bolt.Close(): funlock error: %s", err) + return fmt.Errorf("bolt.Close(): funlock error: %w", err) } } @@ -609,6 +717,13 @@ func (db *DB) beginTx() (*Tx, error) { return nil, ErrDatabaseNotOpen } + // Exit if the database is not correctly mapped. + if db.data == nil { + db.mmaplock.RUnlock() + db.metalock.Unlock() + return nil, ErrInvalidMapping + } + // Create a transaction associated with the database. t := &Tx{} t.init(db) @@ -650,6 +765,12 @@ func (db *DB) beginRWTx() (*Tx, error) { return nil, ErrDatabaseNotOpen } + // Exit if the database is not correctly mapped. + if db.data == nil { + db.rwlock.Unlock() + return nil, ErrInvalidMapping + } + // Create a transaction associated with the database. t := &Tx{writable: true} t.init(db) @@ -924,6 +1045,7 @@ func (db *DB) Stats() Stats { // This is for internal access to the raw data bytes from the C cursor, use // carefully, or not at all. func (db *DB) Info() *Info { + _assert(db.data != nil, "database file isn't correctly mapped") return &Info{uintptr(unsafe.Pointer(&db.data[0])), db.pageSize} } @@ -950,7 +1072,7 @@ func (db *DB) meta() *meta { metaB = db.meta0 } - // Use higher meta page if valid. Otherwise fallback to previous, if valid. + // Use higher meta page if valid. Otherwise, fallback to previous, if valid. if err := metaA.validate(); err == nil { return metaA } else if err := metaB.validate(); err == nil { @@ -1003,7 +1125,7 @@ func (db *DB) grow(sz int) error { // If the data is smaller than the alloc size then only allocate what's needed. // Once it goes over the allocation size then allocate in chunks. - if db.datasz < db.AllocSize { + if db.datasz <= db.AllocSize { sz = db.datasz } else { sz += db.AllocSize @@ -1056,9 +1178,11 @@ func (db *DB) freepages() []pgid { panic(fmt.Sprintf("freepages: failed to get all reachable pages (%v)", e)) } }() - tx.checkBucket(&tx.root, reachable, nofreed, ech) + tx.checkBucket(&tx.root, reachable, nofreed, HexKVStringer(), ech) close(ech) + // TODO: If check bucket reported any corruptions (ech) we shouldn't proceed to freeing the pages. + var fids []pgid for i := pgid(2); i < db.meta().pgid; i++ { if _, ok := reachable[i]; !ok { @@ -1082,8 +1206,13 @@ type Options struct { // under normal operation, but requires a full database re-sync during recovery. NoFreelistSync bool + // PreLoadFreelist sets whether to load the free pages when opening + // the db file. Note when opening db in write mode, bbolt will always + // load the free pages. + PreLoadFreelist bool + // FreelistType sets the backend freelist type. There are two options. Array which is simple but endures - // dramatic performance degradation if database is large and framentation in freelist is common. + // dramatic performance degradation if database is large and fragmentation in freelist is common. // The alternative one is using hashmap, it is faster in almost all circumstances // but it doesn't guarantee that it offers the smallest page id available. In normal case it is safe. // The default type is array @@ -1187,7 +1316,7 @@ func (m *meta) validate() error { return ErrInvalid } else if m.version != version { return ErrVersionMismatch - } else if m.checksum != 0 && m.checksum != m.sum64() { + } else if m.checksum != m.sum64() { return ErrChecksum } return nil diff --git a/vendor/go.etcd.io/bbolt/doc.go b/vendor/go.etcd.io/bbolt/doc.go index 95f25f01c6..d1007e4b04 100644 --- a/vendor/go.etcd.io/bbolt/doc.go +++ b/vendor/go.etcd.io/bbolt/doc.go @@ -14,8 +14,7 @@ The design of Bolt is based on Howard Chu's LMDB database project. Bolt currently works on Windows, Mac OS X, and Linux. - -Basics +# Basics There are only a few types in Bolt: DB, Bucket, Tx, and Cursor. The DB is a collection of buckets and is represented by a single file on disk. A bucket is @@ -27,8 +26,7 @@ iterate over the dataset sequentially. Read-write transactions can create and delete buckets and can insert and remove keys. Only one read-write transaction is allowed at a time. - -Caveats +# Caveats The database uses a read-only, memory-mapped data file to ensure that applications cannot corrupt the database, however, this means that keys and @@ -38,7 +36,5 @@ will cause Go to panic. Keys and values retrieved from the database are only valid for the life of the transaction. When used outside the transaction, these byte slices can point to different data or can point to invalid memory which will cause a panic. - - */ package bbolt diff --git a/vendor/go.etcd.io/bbolt/errors.go b/vendor/go.etcd.io/bbolt/errors.go index 48758ca577..f2c3b20ed8 100644 --- a/vendor/go.etcd.io/bbolt/errors.go +++ b/vendor/go.etcd.io/bbolt/errors.go @@ -16,6 +16,9 @@ var ( // This typically occurs when a file is not a bolt database. ErrInvalid = errors.New("invalid database") + // ErrInvalidMapping is returned when the database file fails to get mapped. + ErrInvalidMapping = errors.New("database isn't correctly mapped") + // ErrVersionMismatch is returned when the data file was created with a // different version of Bolt. ErrVersionMismatch = errors.New("version mismatch") @@ -41,6 +44,10 @@ var ( // ErrDatabaseReadOnly is returned when a mutating transaction is started on a // read-only database. ErrDatabaseReadOnly = errors.New("database is in read-only mode") + + // ErrFreePagesNotLoaded is returned when a readonly transaction without + // preloading the free pages is trying to access the free pages. + ErrFreePagesNotLoaded = errors.New("free pages are not pre-loaded") ) // These errors can occur when putting or deleting a value or a bucket. diff --git a/vendor/go.etcd.io/bbolt/freelist.go b/vendor/go.etcd.io/bbolt/freelist.go index 697a46968b..50f2d0e174 100644 --- a/vendor/go.etcd.io/bbolt/freelist.go +++ b/vendor/go.etcd.io/bbolt/freelist.go @@ -24,7 +24,7 @@ type freelist struct { ids []pgid // all free and available free page ids. allocs map[pgid]txid // mapping of txid that allocated a pgid. pending map[txid]*txPending // mapping of soon-to-be free page ids by tx. - cache map[pgid]bool // fast lookup of all free and pending page ids. + cache map[pgid]struct{} // fast lookup of all free and pending page ids. freemaps map[uint64]pidSet // key is the size of continuous pages(span), value is a set which contains the starting pgids of same size forwardMap map[pgid]uint64 // key is start pgid, value is its span size backwardMap map[pgid]uint64 // key is end pgid, value is its span size @@ -41,7 +41,7 @@ func newFreelist(freelistType FreelistType) *freelist { freelistType: freelistType, allocs: make(map[pgid]txid), pending: make(map[txid]*txPending), - cache: make(map[pgid]bool), + cache: make(map[pgid]struct{}), freemaps: make(map[uint64]pidSet), forwardMap: make(map[pgid]uint64), backwardMap: make(map[pgid]uint64), @@ -171,13 +171,13 @@ func (f *freelist) free(txid txid, p *page) { for id := p.id; id <= p.id+pgid(p.overflow); id++ { // Verify that page is not already free. - if f.cache[id] { + if _, ok := f.cache[id]; ok { panic(fmt.Sprintf("page %d already freed", id)) } // Add to the freelist and cache. txp.ids = append(txp.ids, id) txp.alloctx = append(txp.alloctx, allocTxid) - f.cache[id] = true + f.cache[id] = struct{}{} } } @@ -256,8 +256,9 @@ func (f *freelist) rollback(txid txid) { } // freed returns whether a given page is in the free list. -func (f *freelist) freed(pgid pgid) bool { - return f.cache[pgid] +func (f *freelist) freed(pgId pgid) bool { + _, ok := f.cache[pgId] + return ok } // read initializes the freelist from a freelist page. @@ -386,13 +387,13 @@ func (f *freelist) noSyncReload(pgids []pgid) { // reindex rebuilds the free cache based on available and pending free lists. func (f *freelist) reindex() { ids := f.getFreePageIDs() - f.cache = make(map[pgid]bool, len(ids)) + f.cache = make(map[pgid]struct{}, len(ids)) for _, id := range ids { - f.cache[id] = true + f.cache[id] = struct{}{} } for _, txp := range f.pending { for _, pendingID := range txp.ids { - f.cache[pendingID] = true + f.cache[pendingID] = struct{}{} } } } diff --git a/vendor/go.etcd.io/bbolt/mlock_unix.go b/vendor/go.etcd.io/bbolt/mlock_unix.go index 6a6c7b3537..744a972f51 100644 --- a/vendor/go.etcd.io/bbolt/mlock_unix.go +++ b/vendor/go.etcd.io/bbolt/mlock_unix.go @@ -1,3 +1,4 @@ +//go:build !windows // +build !windows package bbolt @@ -17,7 +18,7 @@ func mlock(db *DB, fileSize int) error { return nil } -//munlock unlocks memory of db file +// munlock unlocks memory of db file func munlock(db *DB, fileSize int) error { if db.dataref == nil { return nil diff --git a/vendor/go.etcd.io/bbolt/mlock_windows.go b/vendor/go.etcd.io/bbolt/mlock_windows.go index b4a36a493d..00b0fb431f 100644 --- a/vendor/go.etcd.io/bbolt/mlock_windows.go +++ b/vendor/go.etcd.io/bbolt/mlock_windows.go @@ -5,7 +5,7 @@ func mlock(_ *DB, _ int) error { panic("mlock is supported only on UNIX systems") } -//munlock unlocks memory of db file +// munlock unlocks memory of db file func munlock(_ *DB, _ int) error { panic("munlock is supported only on UNIX systems") } diff --git a/vendor/go.etcd.io/bbolt/node.go b/vendor/go.etcd.io/bbolt/node.go index 73988b5c4c..9c56150d88 100644 --- a/vendor/go.etcd.io/bbolt/node.go +++ b/vendor/go.etcd.io/bbolt/node.go @@ -113,9 +113,9 @@ func (n *node) prevSibling() *node { } // put inserts a key/value. -func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) { - if pgid >= n.bucket.tx.meta.pgid { - panic(fmt.Sprintf("pgid (%d) above high water mark (%d)", pgid, n.bucket.tx.meta.pgid)) +func (n *node) put(oldKey, newKey, value []byte, pgId pgid, flags uint32) { + if pgId >= n.bucket.tx.meta.pgid { + panic(fmt.Sprintf("pgId (%d) above high water mark (%d)", pgId, n.bucket.tx.meta.pgid)) } else if len(oldKey) <= 0 { panic("put: zero-length old key") } else if len(newKey) <= 0 { @@ -136,7 +136,7 @@ func (n *node) put(oldKey, newKey, value []byte, pgid pgid, flags uint32) { inode.flags = flags inode.key = newKey inode.value = value - inode.pgid = pgid + inode.pgid = pgId _assert(len(inode.key) > 0, "put: zero-length inode key") } @@ -188,12 +188,16 @@ func (n *node) read(p *page) { } // write writes the items onto one or more pages. +// The page should have p.id (might be 0 for meta or bucket-inline page) and p.overflow set +// and the rest should be zeroed. func (n *node) write(p *page) { + _assert(p.count == 0 && p.flags == 0, "node cannot be written into a not empty page") + // Initialize page. if n.isLeaf { - p.flags |= leafPageFlag + p.flags = leafPageFlag } else { - p.flags |= branchPageFlag + p.flags = branchPageFlag } if len(n.inodes) >= 0xFFFF { @@ -300,7 +304,7 @@ func (n *node) splitTwo(pageSize uintptr) (*node, *node) { n.inodes = n.inodes[:splitIndex] // Update the statistics. - n.bucket.tx.stats.Split++ + n.bucket.tx.stats.IncSplit(1) return n, next } @@ -387,7 +391,7 @@ func (n *node) spill() error { } // Update the statistics. - tx.stats.Spill++ + tx.stats.IncSpill(1) } // If the root node split and created a new root then we need to spill that @@ -409,7 +413,7 @@ func (n *node) rebalance() { n.unbalanced = false // Update statistics. - n.bucket.tx.stats.Rebalance++ + n.bucket.tx.stats.IncRebalance(1) // Ignore if node is above threshold (25%) and has enough keys. var threshold = n.bucket.tx.db.pageSize / 4 @@ -543,7 +547,7 @@ func (n *node) dereference() { } // Update statistics. - n.bucket.tx.stats.NodeDeref++ + n.bucket.tx.stats.IncNodeDeref(1) } // free adds the node's underlying page to the freelist. @@ -581,6 +585,10 @@ func (n *node) dump() { } */ +func compareKeys(left, right []byte) int { + return bytes.Compare(left, right) +} + type nodes []*node func (s nodes) Len() int { return len(s) } diff --git a/vendor/go.etcd.io/bbolt/page.go b/vendor/go.etcd.io/bbolt/page.go index c9a158fb06..379645c97f 100644 --- a/vendor/go.etcd.io/bbolt/page.go +++ b/vendor/go.etcd.io/bbolt/page.go @@ -53,6 +53,16 @@ func (p *page) meta() *meta { return (*meta)(unsafeAdd(unsafe.Pointer(p), unsafe.Sizeof(*p))) } +func (p *page) fastCheck(id pgid) { + _assert(p.id == id, "Page expected to be: %v, but self identifies as %v", id, p.id) + // Only one flag of page-type can be set. + _assert(p.flags == branchPageFlag || + p.flags == leafPageFlag || + p.flags == metaPageFlag || + p.flags == freelistPageFlag, + "page %v: has unexpected type/flags: %x", p.id, p.flags) +} + // leafPageElement retrieves the leaf node by index func (p *page) leafPageElement(index uint16) *leafPageElement { return (*leafPageElement)(unsafeIndex(unsafe.Pointer(p), unsafe.Sizeof(*p), diff --git a/vendor/go.etcd.io/bbolt/tx.go b/vendor/go.etcd.io/bbolt/tx.go index 869d412008..2fac8c0a78 100644 --- a/vendor/go.etcd.io/bbolt/tx.go +++ b/vendor/go.etcd.io/bbolt/tx.go @@ -6,6 +6,7 @@ import ( "os" "sort" "strings" + "sync/atomic" "time" "unsafe" ) @@ -151,17 +152,19 @@ func (tx *Tx) Commit() error { // Rebalance nodes which have had deletions. var startTime = time.Now() tx.root.rebalance() - if tx.stats.Rebalance > 0 { - tx.stats.RebalanceTime += time.Since(startTime) + if tx.stats.GetRebalance() > 0 { + tx.stats.IncRebalanceTime(time.Since(startTime)) } + opgid := tx.meta.pgid + // spill data onto dirty pages. startTime = time.Now() if err := tx.root.spill(); err != nil { tx.rollback() return err } - tx.stats.SpillTime += time.Since(startTime) + tx.stats.IncSpillTime(time.Since(startTime)) // Free the old root bucket. tx.meta.root.root = tx.root.root @@ -180,6 +183,14 @@ func (tx *Tx) Commit() error { tx.meta.freelist = pgidNoFreelist } + // If the high water mark has moved up then attempt to grow the database. + if tx.meta.pgid > opgid { + if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { + tx.rollback() + return err + } + } + // Write dirty pages to disk. startTime = time.Now() if err := tx.write(); err != nil { @@ -208,7 +219,7 @@ func (tx *Tx) Commit() error { tx.rollback() return err } - tx.stats.WriteTime += time.Since(startTime) + tx.stats.IncWriteTime(time.Since(startTime)) // Finalize the transaction. tx.close() @@ -224,7 +235,6 @@ func (tx *Tx) Commit() error { func (tx *Tx) commitFreelist() error { // Allocate new pages for the new free list. This will overestimate // the size of the freelist but not underestimate the size (which would be bad). - opgid := tx.meta.pgid p, err := tx.allocate((tx.db.freelist.size() / tx.db.pageSize) + 1) if err != nil { tx.rollback() @@ -235,13 +245,6 @@ func (tx *Tx) commitFreelist() error { return err } tx.meta.freelist = p.id - // If the high water mark has moved up then attempt to grow the database. - if tx.meta.pgid > opgid { - if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { - tx.rollback() - return err - } - } return nil } @@ -275,13 +278,17 @@ func (tx *Tx) rollback() { } if tx.writable { tx.db.freelist.rollback(tx.meta.txid) - if !tx.db.hasSyncedFreelist() { - // Reconstruct free page list by scanning the DB to get the whole free page list. - // Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode. - tx.db.freelist.noSyncReload(tx.db.freepages()) - } else { - // Read free page list from freelist page. - tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) + // When mmap fails, the `data`, `dataref` and `datasz` may be reset to + // zero values, and there is no way to reload free page IDs in this case. + if tx.db.data != nil { + if !tx.db.hasSyncedFreelist() { + // Reconstruct free page list by scanning the DB to get the whole free page list. + // Note: scaning the whole db is heavy if your db size is large in NoSyncFreeList mode. + tx.db.freelist.noSyncReload(tx.db.freepages()) + } else { + // Read free page list from freelist page. + tx.db.freelist.reload(tx.db.page(tx.db.meta().freelist)) + } } } tx.close() @@ -400,98 +407,6 @@ func (tx *Tx) CopyFile(path string, mode os.FileMode) error { return f.Close() } -// Check performs several consistency checks on the database for this transaction. -// An error is returned if any inconsistency is found. -// -// It can be safely run concurrently on a writable transaction. However, this -// incurs a high cost for large databases and databases with a lot of subbuckets -// because of caching. This overhead can be removed if running on a read-only -// transaction, however, it is not safe to execute other writer transactions at -// the same time. -func (tx *Tx) Check() <-chan error { - ch := make(chan error) - go tx.check(ch) - return ch -} - -func (tx *Tx) check(ch chan error) { - // Force loading free list if opened in ReadOnly mode. - tx.db.loadFreelist() - - // Check if any pages are double freed. - freed := make(map[pgid]bool) - all := make([]pgid, tx.db.freelist.count()) - tx.db.freelist.copyall(all) - for _, id := range all { - if freed[id] { - ch <- fmt.Errorf("page %d: already freed", id) - } - freed[id] = true - } - - // Track every reachable page. - reachable := make(map[pgid]*page) - reachable[0] = tx.page(0) // meta0 - reachable[1] = tx.page(1) // meta1 - if tx.meta.freelist != pgidNoFreelist { - for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ { - reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist) - } - } - - // Recursively check buckets. - tx.checkBucket(&tx.root, reachable, freed, ch) - - // Ensure all pages below high water mark are either reachable or freed. - for i := pgid(0); i < tx.meta.pgid; i++ { - _, isReachable := reachable[i] - if !isReachable && !freed[i] { - ch <- fmt.Errorf("page %d: unreachable unfreed", int(i)) - } - } - - // Close the channel to signal completion. - close(ch) -} - -func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, ch chan error) { - // Ignore inline buckets. - if b.root == 0 { - return - } - - // Check every page used by this bucket. - b.tx.forEachPage(b.root, 0, func(p *page, _ int) { - if p.id > tx.meta.pgid { - ch <- fmt.Errorf("page %d: out of bounds: %d", int(p.id), int(b.tx.meta.pgid)) - } - - // Ensure each page is only referenced once. - for i := pgid(0); i <= pgid(p.overflow); i++ { - var id = p.id + i - if _, ok := reachable[id]; ok { - ch <- fmt.Errorf("page %d: multiple references", int(id)) - } - reachable[id] = p - } - - // We should only encounter un-freed leaf and branch pages. - if freed[p.id] { - ch <- fmt.Errorf("page %d: reachable freed", int(p.id)) - } else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 { - ch <- fmt.Errorf("page %d: invalid type: %s", int(p.id), p.typ()) - } - }) - - // Check each bucket within this bucket. - _ = b.ForEach(func(k, v []byte) error { - if child := b.Bucket(k); child != nil { - tx.checkBucket(child, reachable, freed, ch) - } - return nil - }) -} - // allocate returns a contiguous block of memory starting at a given page. func (tx *Tx) allocate(count int) (*page, error) { p, err := tx.db.allocate(tx.meta.txid, count) @@ -503,8 +418,8 @@ func (tx *Tx) allocate(count int) (*page, error) { tx.pages[p.id] = p // Update statistics. - tx.stats.PageCount += count - tx.stats.PageAlloc += count * tx.db.pageSize + tx.stats.IncPageCount(int64(count)) + tx.stats.IncPageAlloc(int64(count * tx.db.pageSize)) return p, nil } @@ -539,7 +454,7 @@ func (tx *Tx) write() error { } // Update statistics. - tx.stats.Write++ + tx.stats.IncWrite(1) // Exit inner for loop if we've written all the chunks. rem -= sz @@ -574,7 +489,7 @@ func (tx *Tx) write() error { for i := range buf { buf[i] = 0 } - tx.db.pagePool.Put(buf) + tx.db.pagePool.Put(buf) //nolint:staticcheck } return nil @@ -598,7 +513,7 @@ func (tx *Tx) writeMeta() error { } // Update statistics. - tx.stats.Write++ + tx.stats.IncWrite(1) return nil } @@ -609,26 +524,35 @@ func (tx *Tx) page(id pgid) *page { // Check the dirty pages first. if tx.pages != nil { if p, ok := tx.pages[id]; ok { + p.fastCheck(id) return p } } // Otherwise return directly from the mmap. - return tx.db.page(id) + p := tx.db.page(id) + p.fastCheck(id) + return p } // forEachPage iterates over every page within a given page and executes a function. -func (tx *Tx) forEachPage(pgid pgid, depth int, fn func(*page, int)) { - p := tx.page(pgid) +func (tx *Tx) forEachPage(pgidnum pgid, fn func(*page, int, []pgid)) { + stack := make([]pgid, 10) + stack[0] = pgidnum + tx.forEachPageInternal(stack[:1], fn) +} + +func (tx *Tx) forEachPageInternal(pgidstack []pgid, fn func(*page, int, []pgid)) { + p := tx.page(pgidstack[len(pgidstack)-1]) // Execute function. - fn(p, depth) + fn(p, len(pgidstack)-1, pgidstack) // Recursively loop over children. if (p.flags & branchPageFlag) != 0 { for i := 0; i < int(p.count); i++ { elem := p.branchPageElement(uint16(i)) - tx.forEachPage(elem.pgid, depth+1, fn) + tx.forEachPageInternal(append(pgidstack, elem.pgid), fn) } } } @@ -642,6 +566,10 @@ func (tx *Tx) Page(id int) (*PageInfo, error) { return nil, nil } + if tx.db.freelist == nil { + return nil, ErrFreePagesNotLoaded + } + // Build the page info. p := tx.db.page(pgid(id)) info := &PageInfo{ @@ -663,43 +591,61 @@ func (tx *Tx) Page(id int) (*PageInfo, error) { // TxStats represents statistics about the actions performed by the transaction. type TxStats struct { // Page statistics. - PageCount int // number of page allocations - PageAlloc int // total bytes allocated + // + // DEPRECATED: Use GetPageCount() or IncPageCount() + PageCount int64 // number of page allocations + // DEPRECATED: Use GetPageAlloc() or IncPageAlloc() + PageAlloc int64 // total bytes allocated // Cursor statistics. - CursorCount int // number of cursors created + // + // DEPRECATED: Use GetCursorCount() or IncCursorCount() + CursorCount int64 // number of cursors created // Node statistics - NodeCount int // number of node allocations - NodeDeref int // number of node dereferences + // + // DEPRECATED: Use GetNodeCount() or IncNodeCount() + NodeCount int64 // number of node allocations + // DEPRECATED: Use GetNodeDeref() or IncNodeDeref() + NodeDeref int64 // number of node dereferences // Rebalance statistics. - Rebalance int // number of node rebalances + // + // DEPRECATED: Use GetRebalance() or IncRebalance() + Rebalance int64 // number of node rebalances + // DEPRECATED: Use GetRebalanceTime() or IncRebalanceTime() RebalanceTime time.Duration // total time spent rebalancing // Split/Spill statistics. - Split int // number of nodes split - Spill int // number of nodes spilled + // + // DEPRECATED: Use GetSplit() or IncSplit() + Split int64 // number of nodes split + // DEPRECATED: Use GetSpill() or IncSpill() + Spill int64 // number of nodes spilled + // DEPRECATED: Use GetSpillTime() or IncSpillTime() SpillTime time.Duration // total time spent spilling // Write statistics. - Write int // number of writes performed + // + // DEPRECATED: Use GetWrite() or IncWrite() + Write int64 // number of writes performed + // DEPRECATED: Use GetWriteTime() or IncWriteTime() WriteTime time.Duration // total time spent writing to disk } func (s *TxStats) add(other *TxStats) { - s.PageCount += other.PageCount - s.PageAlloc += other.PageAlloc - s.CursorCount += other.CursorCount - s.NodeCount += other.NodeCount - s.NodeDeref += other.NodeDeref - s.Rebalance += other.Rebalance - s.RebalanceTime += other.RebalanceTime - s.Split += other.Split - s.Spill += other.Spill - s.SpillTime += other.SpillTime - s.Write += other.Write - s.WriteTime += other.WriteTime + s.IncPageCount(other.GetPageCount()) + s.IncPageAlloc(other.GetPageAlloc()) + s.IncCursorCount(other.GetCursorCount()) + s.IncNodeCount(other.GetNodeCount()) + s.IncNodeDeref(other.GetNodeDeref()) + s.IncRebalance(other.GetRebalance()) + s.IncRebalanceTime(other.GetRebalanceTime()) + s.IncSplit(other.GetSplit()) + s.IncSpill(other.GetSpill()) + s.IncSpillTime(other.GetSpillTime()) + s.IncWrite(other.GetWrite()) + s.IncWriteTime(other.GetWriteTime()) } // Sub calculates and returns the difference between two sets of transaction stats. @@ -707,17 +653,145 @@ func (s *TxStats) add(other *TxStats) { // you need the performance counters that occurred within that time span. func (s *TxStats) Sub(other *TxStats) TxStats { var diff TxStats - diff.PageCount = s.PageCount - other.PageCount - diff.PageAlloc = s.PageAlloc - other.PageAlloc - diff.CursorCount = s.CursorCount - other.CursorCount - diff.NodeCount = s.NodeCount - other.NodeCount - diff.NodeDeref = s.NodeDeref - other.NodeDeref - diff.Rebalance = s.Rebalance - other.Rebalance - diff.RebalanceTime = s.RebalanceTime - other.RebalanceTime - diff.Split = s.Split - other.Split - diff.Spill = s.Spill - other.Spill - diff.SpillTime = s.SpillTime - other.SpillTime - diff.Write = s.Write - other.Write - diff.WriteTime = s.WriteTime - other.WriteTime + diff.PageCount = s.GetPageCount() - other.GetPageCount() + diff.PageAlloc = s.GetPageAlloc() - other.GetPageAlloc() + diff.CursorCount = s.GetCursorCount() - other.GetCursorCount() + diff.NodeCount = s.GetNodeCount() - other.GetNodeCount() + diff.NodeDeref = s.GetNodeDeref() - other.GetNodeDeref() + diff.Rebalance = s.GetRebalance() - other.GetRebalance() + diff.RebalanceTime = s.GetRebalanceTime() - other.GetRebalanceTime() + diff.Split = s.GetSplit() - other.GetSplit() + diff.Spill = s.GetSpill() - other.GetSpill() + diff.SpillTime = s.GetSpillTime() - other.GetSpillTime() + diff.Write = s.GetWrite() - other.GetWrite() + diff.WriteTime = s.GetWriteTime() - other.GetWriteTime() return diff } + +// GetPageCount returns PageCount atomically. +func (s *TxStats) GetPageCount() int64 { + return atomic.LoadInt64(&s.PageCount) +} + +// IncPageCount increases PageCount atomically and returns the new value. +func (s *TxStats) IncPageCount(delta int64) int64 { + return atomic.AddInt64(&s.PageCount, delta) +} + +// GetPageAlloc returns PageAlloc atomically. +func (s *TxStats) GetPageAlloc() int64 { + return atomic.LoadInt64(&s.PageAlloc) +} + +// IncPageAlloc increases PageAlloc atomically and returns the new value. +func (s *TxStats) IncPageAlloc(delta int64) int64 { + return atomic.AddInt64(&s.PageAlloc, delta) +} + +// GetCursorCount returns CursorCount atomically. +func (s *TxStats) GetCursorCount() int64 { + return atomic.LoadInt64(&s.CursorCount) +} + +// IncCursorCount increases CursorCount atomically and return the new value. +func (s *TxStats) IncCursorCount(delta int64) int64 { + return atomic.AddInt64(&s.CursorCount, delta) +} + +// GetNodeCount returns NodeCount atomically. +func (s *TxStats) GetNodeCount() int64 { + return atomic.LoadInt64(&s.NodeCount) +} + +// IncNodeCount increases NodeCount atomically and returns the new value. +func (s *TxStats) IncNodeCount(delta int64) int64 { + return atomic.AddInt64(&s.NodeCount, delta) +} + +// GetNodeDeref returns NodeDeref atomically. +func (s *TxStats) GetNodeDeref() int64 { + return atomic.LoadInt64(&s.NodeDeref) +} + +// IncNodeDeref increases NodeDeref atomically and returns the new value. +func (s *TxStats) IncNodeDeref(delta int64) int64 { + return atomic.AddInt64(&s.NodeDeref, delta) +} + +// GetRebalance returns Rebalance atomically. +func (s *TxStats) GetRebalance() int64 { + return atomic.LoadInt64(&s.Rebalance) +} + +// IncRebalance increases Rebalance atomically and returns the new value. +func (s *TxStats) IncRebalance(delta int64) int64 { + return atomic.AddInt64(&s.Rebalance, delta) +} + +// GetRebalanceTime returns RebalanceTime atomically. +func (s *TxStats) GetRebalanceTime() time.Duration { + return atomicLoadDuration(&s.RebalanceTime) +} + +// IncRebalanceTime increases RebalanceTime atomically and returns the new value. +func (s *TxStats) IncRebalanceTime(delta time.Duration) time.Duration { + return atomicAddDuration(&s.RebalanceTime, delta) +} + +// GetSplit returns Split atomically. +func (s *TxStats) GetSplit() int64 { + return atomic.LoadInt64(&s.Split) +} + +// IncSplit increases Split atomically and returns the new value. +func (s *TxStats) IncSplit(delta int64) int64 { + return atomic.AddInt64(&s.Split, delta) +} + +// GetSpill returns Spill atomically. +func (s *TxStats) GetSpill() int64 { + return atomic.LoadInt64(&s.Spill) +} + +// IncSpill increases Spill atomically and returns the new value. +func (s *TxStats) IncSpill(delta int64) int64 { + return atomic.AddInt64(&s.Spill, delta) +} + +// GetSpillTime returns SpillTime atomically. +func (s *TxStats) GetSpillTime() time.Duration { + return atomicLoadDuration(&s.SpillTime) +} + +// IncSpillTime increases SpillTime atomically and returns the new value. +func (s *TxStats) IncSpillTime(delta time.Duration) time.Duration { + return atomicAddDuration(&s.SpillTime, delta) +} + +// GetWrite returns Write atomically. +func (s *TxStats) GetWrite() int64 { + return atomic.LoadInt64(&s.Write) +} + +// IncWrite increases Write atomically and returns the new value. +func (s *TxStats) IncWrite(delta int64) int64 { + return atomic.AddInt64(&s.Write, delta) +} + +// GetWriteTime returns WriteTime atomically. +func (s *TxStats) GetWriteTime() time.Duration { + return atomicLoadDuration(&s.WriteTime) +} + +// IncWriteTime increases WriteTime atomically and returns the new value. +func (s *TxStats) IncWriteTime(delta time.Duration) time.Duration { + return atomicAddDuration(&s.WriteTime, delta) +} + +func atomicAddDuration(ptr *time.Duration, du time.Duration) time.Duration { + return time.Duration(atomic.AddInt64((*int64)(unsafe.Pointer(ptr)), int64(du))) +} + +func atomicLoadDuration(ptr *time.Duration) time.Duration { + return time.Duration(atomic.LoadInt64((*int64)(unsafe.Pointer(ptr)))) +} diff --git a/vendor/go.etcd.io/bbolt/tx_check.go b/vendor/go.etcd.io/bbolt/tx_check.go new file mode 100644 index 0000000000..75c7c08436 --- /dev/null +++ b/vendor/go.etcd.io/bbolt/tx_check.go @@ -0,0 +1,226 @@ +package bbolt + +import ( + "encoding/hex" + "fmt" +) + +// Check performs several consistency checks on the database for this transaction. +// An error is returned if any inconsistency is found. +// +// It can be safely run concurrently on a writable transaction. However, this +// incurs a high cost for large databases and databases with a lot of subbuckets +// because of caching. This overhead can be removed if running on a read-only +// transaction, however, it is not safe to execute other writer transactions at +// the same time. +func (tx *Tx) Check() <-chan error { + return tx.CheckWithOptions() +} + +// CheckWithOptions allows users to provide a customized `KVStringer` implementation, +// so that bolt can generate human-readable diagnostic messages. +func (tx *Tx) CheckWithOptions(options ...CheckOption) <-chan error { + chkConfig := checkConfig{ + kvStringer: HexKVStringer(), + } + for _, op := range options { + op(&chkConfig) + } + + ch := make(chan error) + go tx.check(chkConfig.kvStringer, ch) + return ch +} + +func (tx *Tx) check(kvStringer KVStringer, ch chan error) { + // Force loading free list if opened in ReadOnly mode. + tx.db.loadFreelist() + + // Check if any pages are double freed. + freed := make(map[pgid]bool) + all := make([]pgid, tx.db.freelist.count()) + tx.db.freelist.copyall(all) + for _, id := range all { + if freed[id] { + ch <- fmt.Errorf("page %d: already freed", id) + } + freed[id] = true + } + + // Track every reachable page. + reachable := make(map[pgid]*page) + reachable[0] = tx.page(0) // meta0 + reachable[1] = tx.page(1) // meta1 + if tx.meta.freelist != pgidNoFreelist { + for i := uint32(0); i <= tx.page(tx.meta.freelist).overflow; i++ { + reachable[tx.meta.freelist+pgid(i)] = tx.page(tx.meta.freelist) + } + } + + // Recursively check buckets. + tx.checkBucket(&tx.root, reachable, freed, kvStringer, ch) + + // Ensure all pages below high water mark are either reachable or freed. + for i := pgid(0); i < tx.meta.pgid; i++ { + _, isReachable := reachable[i] + if !isReachable && !freed[i] { + ch <- fmt.Errorf("page %d: unreachable unfreed", int(i)) + } + } + + // Close the channel to signal completion. + close(ch) +} + +func (tx *Tx) checkBucket(b *Bucket, reachable map[pgid]*page, freed map[pgid]bool, + kvStringer KVStringer, ch chan error) { + // Ignore inline buckets. + if b.root == 0 { + return + } + + // Check every page used by this bucket. + b.tx.forEachPage(b.root, func(p *page, _ int, stack []pgid) { + if p.id > tx.meta.pgid { + ch <- fmt.Errorf("page %d: out of bounds: %d (stack: %v)", int(p.id), int(b.tx.meta.pgid), stack) + } + + // Ensure each page is only referenced once. + for i := pgid(0); i <= pgid(p.overflow); i++ { + var id = p.id + i + if _, ok := reachable[id]; ok { + ch <- fmt.Errorf("page %d: multiple references (stack: %v)", int(id), stack) + } + reachable[id] = p + } + + // We should only encounter un-freed leaf and branch pages. + if freed[p.id] { + ch <- fmt.Errorf("page %d: reachable freed", int(p.id)) + } else if (p.flags&branchPageFlag) == 0 && (p.flags&leafPageFlag) == 0 { + ch <- fmt.Errorf("page %d: invalid type: %s (stack: %v)", int(p.id), p.typ(), stack) + } + }) + + tx.recursivelyCheckPages(b.root, kvStringer.KeyToString, ch) + + // Check each bucket within this bucket. + _ = b.ForEachBucket(func(k []byte) error { + if child := b.Bucket(k); child != nil { + tx.checkBucket(child, reachable, freed, kvStringer, ch) + } + return nil + }) +} + +// recursivelyCheckPages confirms database consistency with respect to b-tree +// key order constraints: +// - keys on pages must be sorted +// - keys on children pages are between 2 consecutive keys on the parent's branch page). +func (tx *Tx) recursivelyCheckPages(pgId pgid, keyToString func([]byte) string, ch chan error) { + tx.recursivelyCheckPagesInternal(pgId, nil, nil, nil, keyToString, ch) +} + +// recursivelyCheckPagesInternal verifies that all keys in the subtree rooted at `pgid` are: +// - >=`minKeyClosed` (can be nil) +// - <`maxKeyOpen` (can be nil) +// - Are in right ordering relationship to their parents. +// `pagesStack` is expected to contain IDs of pages from the tree root to `pgid` for the clean debugging message. +func (tx *Tx) recursivelyCheckPagesInternal( + pgId pgid, minKeyClosed, maxKeyOpen []byte, pagesStack []pgid, + keyToString func([]byte) string, ch chan error) (maxKeyInSubtree []byte) { + + p := tx.page(pgId) + pagesStack = append(pagesStack, pgId) + switch { + case p.flags&branchPageFlag != 0: + // For branch page we navigate ranges of all subpages. + runningMin := minKeyClosed + for i := range p.branchPageElements() { + elem := p.branchPageElement(uint16(i)) + verifyKeyOrder(elem.pgid, "branch", i, elem.key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack) + + maxKey := maxKeyOpen + if i < len(p.branchPageElements())-1 { + maxKey = p.branchPageElement(uint16(i + 1)).key() + } + maxKeyInSubtree = tx.recursivelyCheckPagesInternal(elem.pgid, elem.key(), maxKey, pagesStack, keyToString, ch) + runningMin = maxKeyInSubtree + } + return maxKeyInSubtree + case p.flags&leafPageFlag != 0: + runningMin := minKeyClosed + for i := range p.leafPageElements() { + elem := p.leafPageElement(uint16(i)) + verifyKeyOrder(pgId, "leaf", i, elem.key(), runningMin, maxKeyOpen, ch, keyToString, pagesStack) + runningMin = elem.key() + } + if p.count > 0 { + return p.leafPageElement(p.count - 1).key() + } + default: + ch <- fmt.Errorf("unexpected page type for pgId:%d", pgId) + } + return maxKeyInSubtree +} + +/*** + * verifyKeyOrder checks whether an entry with given #index on pgId (pageType: "branch|leaf") that has given "key", + * is within range determined by (previousKey..maxKeyOpen) and reports found violations to the channel (ch). + */ +func verifyKeyOrder(pgId pgid, pageType string, index int, key []byte, previousKey []byte, maxKeyOpen []byte, ch chan error, keyToString func([]byte) string, pagesStack []pgid) { + if index == 0 && previousKey != nil && compareKeys(previousKey, key) > 0 { + ch <- fmt.Errorf("the first key[%d]=(hex)%s on %s page(%d) needs to be >= the key in the ancestor (%s). Stack: %v", + index, keyToString(key), pageType, pgId, keyToString(previousKey), pagesStack) + } + if index > 0 { + cmpRet := compareKeys(previousKey, key) + if cmpRet > 0 { + ch <- fmt.Errorf("key[%d]=(hex)%s on %s page(%d) needs to be > (found <) than previous element (hex)%s. Stack: %v", + index, keyToString(key), pageType, pgId, keyToString(previousKey), pagesStack) + } + if cmpRet == 0 { + ch <- fmt.Errorf("key[%d]=(hex)%s on %s page(%d) needs to be > (found =) than previous element (hex)%s. Stack: %v", + index, keyToString(key), pageType, pgId, keyToString(previousKey), pagesStack) + } + } + if maxKeyOpen != nil && compareKeys(key, maxKeyOpen) >= 0 { + ch <- fmt.Errorf("key[%d]=(hex)%s on %s page(%d) needs to be < than key of the next element in ancestor (hex)%s. Pages stack: %v", + index, keyToString(key), pageType, pgId, keyToString(previousKey), pagesStack) + } +} + +// =========================================================================================== + +type checkConfig struct { + kvStringer KVStringer +} + +type CheckOption func(options *checkConfig) + +func WithKVStringer(kvStringer KVStringer) CheckOption { + return func(c *checkConfig) { + c.kvStringer = kvStringer + } +} + +// KVStringer allows to prepare human-readable diagnostic messages. +type KVStringer interface { + KeyToString([]byte) string + ValueToString([]byte) string +} + +// HexKVStringer serializes both key & value to hex representation. +func HexKVStringer() KVStringer { + return hexKvStringer{} +} + +type hexKvStringer struct{} + +func (_ hexKvStringer) KeyToString(key []byte) string { + return hex.EncodeToString(key) +} + +func (_ hexKvStringer) ValueToString(value []byte) string { + return hex.EncodeToString(value) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 87350ad45f..f9d6e26e43 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -885,8 +885,8 @@ github.com/xeipuuv/gojsonreference # github.com/xeipuuv/gojsonschema v1.2.0 ## explicit github.com/xeipuuv/gojsonschema -# go.etcd.io/bbolt v1.3.6 -## explicit; go 1.12 +# go.etcd.io/bbolt v1.3.7 +## explicit; go 1.17 go.etcd.io/bbolt # go.mongodb.org/mongo-driver v1.11.1 ## explicit; go 1.13