diff --git a/go.mod b/go.mod index 81a1f8628..c2f76bf84 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,7 @@ require ( github.com/containernetworking/plugins v1.1.1 github.com/containers/image/v5 v5.21.1 github.com/containers/ocicrypt v1.1.4-0.20220428134531-566b808bdf6f - github.com/containers/storage v1.40.2 + github.com/containers/storage v1.41.0 github.com/coreos/go-systemd/v22 v22.3.2 github.com/cyphar/filepath-securejoin v0.2.3 github.com/disiqueira/gotree/v3 v3.0.2 diff --git a/go.sum b/go.sum index 37266b49a..82ea9bea1 100644 --- a/go.sum +++ b/go.sum @@ -289,8 +289,8 @@ github.com/containers/ocicrypt v1.1.3/go.mod h1:xpdkbVAuaH3WzbEabUd5yDsl9SwJA5pA github.com/containers/ocicrypt v1.1.4-0.20220428134531-566b808bdf6f h1:hffElEaoDQfREHltc2wtFPd68BqDmzW6KkEDpuSRBjs= github.com/containers/ocicrypt v1.1.4-0.20220428134531-566b808bdf6f/go.mod h1:xpdkbVAuaH3WzbEabUd5yDsl9SwJA5pABH85425Es2g= github.com/containers/storage v1.40.0/go.mod h1:zUyPC3CFIGR1OhY1CKkffxgw9+LuH76PGvVcFj38dgs= -github.com/containers/storage v1.40.2 h1:GUlHaGnrs1JOEwv6YEvkQdgYXOXZdU1Angy4wgWNgF8= -github.com/containers/storage v1.40.2/go.mod h1:zUyPC3CFIGR1OhY1CKkffxgw9+LuH76PGvVcFj38dgs= +github.com/containers/storage v1.41.0 h1:IsoAJ1q3s/jfHB7eoiRhvTRTcuV+ywY3CkbbgKtT5Bo= +github.com/containers/storage v1.41.0/go.mod h1:Pb0l5Sm/89kolX3o2KolKQ5cCHk5vPNpJrhNaLcdS5s= github.com/coreos/bbolt v1.3.2/go.mod h1:iRUV2dpdMOn7Bo10OQBFzIJO9kkE559Wcmn+qkEiiKk= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/etcd v3.3.13+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= @@ -611,8 +611,9 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.15.2 h1:3WH+AG7s2+T8o3nrM/8u2rdqUEcQhmga7smjrT41nAw= github.com/klauspost/compress v1.15.2/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/compress v1.15.4 h1:1kn4/7MepF/CHmYub99/nNX8az0IJjfSOU/jbnTVfqQ= +github.com/klauspost/compress v1.15.4/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= diff --git a/vendor/github.com/containers/storage/Makefile b/vendor/github.com/containers/storage/Makefile index 2c1e4a185..244576d54 100644 --- a/vendor/github.com/containers/storage/Makefile +++ b/vendor/github.com/containers/storage/Makefile @@ -59,8 +59,8 @@ binary local-binary: containers-storage local-gccgo: ## build using gccgo on the host GCCGO=$(PWD)/hack/gccgo-wrapper.sh $(GO) build $(MOD_VENDOR) -compiler gccgo $(BUILDFLAGS) -o containers-storage.gccgo ./cmd/containers-storage -local-cross: ## cross build the binaries for arm, darwin, and\nfreebsd - @for target in linux/amd64 linux/386 linux/arm linux/arm64 linux/ppc64 linux/ppc64le darwin/amd64 windows/amd64 ; do \ +local-cross: ## cross build the binaries for arm, darwin, and freebsd + @for target in linux/amd64 linux/386 linux/arm linux/arm64 linux/ppc64 linux/ppc64le darwin/amd64 windows/amd64 freebsd/amd64 freebsd/arm64 ; do \ os=`echo $${target} | cut -f1 -d/` ; \ arch=`echo $${target} | cut -f2 -d/` ; \ suffix=$${os}.$${arch} ; \ diff --git a/vendor/github.com/containers/storage/VERSION b/vendor/github.com/containers/storage/VERSION index 148dabb45..7d47e5998 100644 --- a/vendor/github.com/containers/storage/VERSION +++ b/vendor/github.com/containers/storage/VERSION @@ -1 +1 @@ -1.40.2 +1.41.0 diff --git a/vendor/github.com/containers/storage/drivers/overlay/overlay.go b/vendor/github.com/containers/storage/drivers/overlay/overlay.go index 09d24ae8b..8600ee685 100644 --- a/vendor/github.com/containers/storage/drivers/overlay/overlay.go +++ b/vendor/github.com/containers/storage/drivers/overlay/overlay.go @@ -207,14 +207,18 @@ func checkSupportVolatile(home, runhome string) (bool, error) { // checkAndRecordIDMappedSupport checks and stores if the kernel supports mounting overlay on top of a // idmapped lower layer. func checkAndRecordIDMappedSupport(home, runhome string) (bool, error) { + if os.Geteuid() != 0 { + return false, nil + } + feature := "idmapped-lower-dir" overlayCacheResult, overlayCacheText, err := cachedFeatureCheck(runhome, feature) if err == nil { if overlayCacheResult { - logrus.Debugf("Cached value indicated that overlay is supported") + logrus.Debugf("Cached value indicated that idmapped mounts for overlay are supported") return true, nil } - logrus.Debugf("Cached value indicated that overlay is not supported") + logrus.Debugf("Cached value indicated that idmapped mounts for overlay are not supported") return false, errors.New(overlayCacheText) } supportsIDMappedMounts, err := supportsIdmappedLowerLayers(home) diff --git a/vendor/github.com/containers/storage/go.mod b/vendor/github.com/containers/storage/go.mod index 5d8c59960..1b9f25bcb 100644 --- a/vendor/github.com/containers/storage/go.mod +++ b/vendor/github.com/containers/storage/go.mod @@ -12,7 +12,7 @@ require ( github.com/google/go-intervals v0.0.2 github.com/hashicorp/go-multierror v1.1.1 github.com/json-iterator/go v1.1.12 - github.com/klauspost/compress v1.15.2 + github.com/klauspost/compress v1.15.4 github.com/klauspost/pgzip v1.2.5 github.com/mattn/go-shellwords v1.0.12 github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible diff --git a/vendor/github.com/containers/storage/go.sum b/vendor/github.com/containers/storage/go.sum index 97a0d167d..6587fddb3 100644 --- a/vendor/github.com/containers/storage/go.sum +++ b/vendor/github.com/containers/storage/go.sum @@ -425,8 +425,8 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o github.com/klauspost/compress v1.11.3/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.11.13/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/compress v1.15.1/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= -github.com/klauspost/compress v1.15.2 h1:3WH+AG7s2+T8o3nrM/8u2rdqUEcQhmga7smjrT41nAw= -github.com/klauspost/compress v1.15.2/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= +github.com/klauspost/compress v1.15.4 h1:1kn4/7MepF/CHmYub99/nNX8az0IJjfSOU/jbnTVfqQ= +github.com/klauspost/compress v1.15.4/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= github.com/klauspost/pgzip v1.2.5 h1:qnWYvvKqedOF2ulHpMG72XQol4ILEJ8k2wwRl/Km8oE= github.com/klauspost/pgzip v1.2.5/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= diff --git a/vendor/github.com/containers/storage/pkg/chrootarchive/archive.go b/vendor/github.com/containers/storage/pkg/chrootarchive/archive.go index e874eb74e..482e03663 100644 --- a/vendor/github.com/containers/storage/pkg/chrootarchive/archive.go +++ b/vendor/github.com/containers/storage/pkg/chrootarchive/archive.go @@ -5,9 +5,7 @@ import ( "fmt" "io" "io/ioutil" - "net" "os" - "os/user" "path/filepath" "sync" @@ -17,13 +15,6 @@ import ( "github.com/pkg/errors" ) -func init() { - // initialize nss libraries in Glibc so that the dynamic libraries are loaded in the host - // environment not in the chroot from untrusted files. - _, _ = user.Lookup("storage") - _, _ = net.LookupHost("localhost") -} - // NewArchiver returns a new Archiver which uses chrootarchive.Untar func NewArchiver(idMappings *idtools.IDMappings) *archive.Archiver { archiver := archive.NewArchiver(idMappings) diff --git a/vendor/github.com/containers/storage/pkg/chrootarchive/chroot_linux.go b/vendor/github.com/containers/storage/pkg/chrootarchive/chroot_linux.go index 76c94c6c1..58729ec8c 100644 --- a/vendor/github.com/containers/storage/pkg/chrootarchive/chroot_linux.go +++ b/vendor/github.com/containers/storage/pkg/chrootarchive/chroot_linux.go @@ -3,7 +3,9 @@ package chrootarchive import ( "fmt" "io/ioutil" + "net" "os" + "os/user" "path/filepath" "github.com/containers/storage/pkg/mount" @@ -23,6 +25,11 @@ func chroot(path string) (err error) { return err } + // initialize nss libraries in Glibc so that the dynamic libraries are loaded in the host + // environment not in the chroot from untrusted files. + _, _ = user.Lookup("storage") + _, _ = net.LookupHost("localhost") + // if the process doesn't have CAP_SYS_ADMIN, but does have CAP_SYS_CHROOT, we need to use the actual chroot if !caps.Get(capability.EFFECTIVE, capability.CAP_SYS_ADMIN) && caps.Get(capability.EFFECTIVE, capability.CAP_SYS_CHROOT) { return realChroot(path) diff --git a/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go b/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go index 7de20feaa..9434499d2 100644 --- a/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go +++ b/vendor/github.com/containers/storage/pkg/chunked/storage_linux.go @@ -918,6 +918,9 @@ func (c *chunkedDiffer) storeMissingFiles(streams chan io.ReadCloser, errs chan case p := <-streams: part = p case err := <-errs: + if err == nil { + return errors.New("not enough data returned from the server") + } return err } if part == nil { @@ -1081,12 +1084,18 @@ func mergeMissingChunks(missingParts []missingPart, target int) []missingPart { func (c *chunkedDiffer) retrieveMissingFiles(dest string, dirfd int, missingParts []missingPart, options *archive.TarOptions) error { var chunksToRequest []ImageSourceChunk - for _, c := range missingParts { - if c.OriginFile == nil && !c.Hole { - chunksToRequest = append(chunksToRequest, *c.SourceChunk) + + calculateChunksToRequest := func() { + chunksToRequest = []ImageSourceChunk{} + for _, c := range missingParts { + if c.OriginFile == nil && !c.Hole { + chunksToRequest = append(chunksToRequest, *c.SourceChunk) + } } } + calculateChunksToRequest() + // There are some missing files. Prepare a multirange request for the missing chunks. var streams chan io.ReadCloser var err error @@ -1106,6 +1115,7 @@ func (c *chunkedDiffer) retrieveMissingFiles(dest string, dirfd int, missingPart // Merge more chunks to request missingParts = mergeMissingChunks(missingParts, requested/2) + calculateChunksToRequest() continue } return err @@ -1575,6 +1585,8 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra wg.Wait() for _, res := range copyResults[:filesToWaitFor] { + r := &mergedEntries[res.index] + if res.err != nil { return output, res.err } @@ -1584,8 +1596,6 @@ func (c *chunkedDiffer) ApplyDiff(dest string, options *archive.TarOptions) (gra continue } - r := &mergedEntries[res.index] - missingPartsSize += r.Size remainingSize := r.Size diff --git a/vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go b/vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go index 72ceec3dd..2404e331d 100644 --- a/vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go +++ b/vendor/github.com/containers/storage/pkg/mount/mounter_freebsd.go @@ -1,3 +1,6 @@ +//go:build freebsd && cgo +// +build freebsd,cgo + package mount /* diff --git a/vendor/github.com/containers/storage/pkg/mount/mounter_unsupported.go b/vendor/github.com/containers/storage/pkg/mount/mounter_unsupported.go index 9d20cfbf8..74fe66609 100644 --- a/vendor/github.com/containers/storage/pkg/mount/mounter_unsupported.go +++ b/vendor/github.com/containers/storage/pkg/mount/mounter_unsupported.go @@ -1,4 +1,6 @@ -// +build !linux,!freebsd +//go:build !linux && !(freebsd && cgo) +// +build !linux +// +build !freebsd !cgo package mount diff --git a/vendor/github.com/containers/storage/storage.conf-freebsd b/vendor/github.com/containers/storage/storage.conf-freebsd index cc655c62e..34d80152c 100644 --- a/vendor/github.com/containers/storage/storage.conf-freebsd +++ b/vendor/github.com/containers/storage/storage.conf-freebsd @@ -5,8 +5,8 @@ # files. # # Note: The storage.conf file overrides other storage.conf files based on this precedence: -# /usr/containers/storage.conf -# /etc/containers/storage.conf +# /usr/local/share/containers/storage.conf +# /usr/local/etc/containers/storage.conf # $HOME/.config/containers/storage.conf # $XDG_CONFIG_HOME/containers/storage.conf (If XDG_CONFIG_HOME is set) # See man 5 containers-storage.conf for more information diff --git a/vendor/github.com/containers/storage/store.go b/vendor/github.com/containers/storage/store.go index 30d3e8715..45912d0ca 100644 --- a/vendor/github.com/containers/storage/store.go +++ b/vendor/github.com/containers/storage/store.go @@ -1195,6 +1195,11 @@ func (s *store) imageTopLayerForMapping(image *Image, ristore ROImageStore, crea if layer == nil { layer = cLayer parentLayer = cParentLayer + if store != rlstore { + // The layer is in another store, so we cannot + // create a mapped version of it to the image. + createMappedLayer = false + } } } } diff --git a/vendor/github.com/containers/storage/types/options.go b/vendor/github.com/containers/storage/types/options.go index a71c6d2ef..d318421a4 100644 --- a/vendor/github.com/containers/storage/types/options.go +++ b/vendor/github.com/containers/storage/types/options.go @@ -25,22 +25,6 @@ type TomlConfig struct { } `toml:"storage"` } -const ( - // these are default path for run and graph root for rootful users - // for rootless path is constructed via getRootlessStorageOpts - defaultRunRoot string = "/run/containers/storage" - defaultGraphRoot string = "/var/lib/containers/storage" -) - -// defaultConfigFile path to the system wide storage.conf file -var ( - defaultConfigFile = "/usr/share/containers/storage.conf" - defaultOverrideConfigFile = "/etc/containers/storage.conf" - defaultConfigFileSet = false - // DefaultStoreOptions is a reasonable default set of options. - defaultStoreOptions StoreOptions -) - const ( overlayDriver = "overlay" overlay2 = "overlay2" diff --git a/vendor/github.com/containers/storage/types/options_darwin.go b/vendor/github.com/containers/storage/types/options_darwin.go new file mode 100644 index 000000000..d5ad50bc0 --- /dev/null +++ b/vendor/github.com/containers/storage/types/options_darwin.go @@ -0,0 +1,17 @@ +package types + +const ( + // these are default path for run and graph root for rootful users + // for rootless path is constructed via getRootlessStorageOpts + defaultRunRoot string = "/run/containers/storage" + defaultGraphRoot string = "/var/lib/containers/storage" +) + +// defaultConfigFile path to the system wide storage.conf file +var ( + defaultConfigFile = "/usr/share/containers/storage.conf" + defaultOverrideConfigFile = "/etc/containers/storage.conf" + defaultConfigFileSet = false + // DefaultStoreOptions is a reasonable default set of options. + defaultStoreOptions StoreOptions +) diff --git a/vendor/github.com/containers/storage/types/options_freebsd.go b/vendor/github.com/containers/storage/types/options_freebsd.go new file mode 100644 index 000000000..d5976b6d5 --- /dev/null +++ b/vendor/github.com/containers/storage/types/options_freebsd.go @@ -0,0 +1,17 @@ +package types + +const ( + // these are default path for run and graph root for rootful users + // for rootless path is constructed via getRootlessStorageOpts + defaultRunRoot string = "/var/run/containers/storage" + defaultGraphRoot string = "/var/db/containers/storage" +) + +// defaultConfigFile path to the system wide storage.conf file +var ( + defaultConfigFile = "/usr/local/share/containers/storage.conf" + defaultOverrideConfigFile = "/usr/local/etc/containers/storage.conf" + defaultConfigFileSet = false + // DefaultStoreOptions is a reasonable default set of options. + defaultStoreOptions StoreOptions +) diff --git a/vendor/github.com/containers/storage/types/options_linux.go b/vendor/github.com/containers/storage/types/options_linux.go new file mode 100644 index 000000000..d5ad50bc0 --- /dev/null +++ b/vendor/github.com/containers/storage/types/options_linux.go @@ -0,0 +1,17 @@ +package types + +const ( + // these are default path for run and graph root for rootful users + // for rootless path is constructed via getRootlessStorageOpts + defaultRunRoot string = "/run/containers/storage" + defaultGraphRoot string = "/var/lib/containers/storage" +) + +// defaultConfigFile path to the system wide storage.conf file +var ( + defaultConfigFile = "/usr/share/containers/storage.conf" + defaultOverrideConfigFile = "/etc/containers/storage.conf" + defaultConfigFileSet = false + // DefaultStoreOptions is a reasonable default set of options. + defaultStoreOptions StoreOptions +) diff --git a/vendor/github.com/containers/storage/types/options_windows.go b/vendor/github.com/containers/storage/types/options_windows.go new file mode 100644 index 000000000..d5ad50bc0 --- /dev/null +++ b/vendor/github.com/containers/storage/types/options_windows.go @@ -0,0 +1,17 @@ +package types + +const ( + // these are default path for run and graph root for rootful users + // for rootless path is constructed via getRootlessStorageOpts + defaultRunRoot string = "/run/containers/storage" + defaultGraphRoot string = "/var/lib/containers/storage" +) + +// defaultConfigFile path to the system wide storage.conf file +var ( + defaultConfigFile = "/usr/share/containers/storage.conf" + defaultOverrideConfigFile = "/etc/containers/storage.conf" + defaultConfigFileSet = false + // DefaultStoreOptions is a reasonable default set of options. + defaultStoreOptions StoreOptions +) diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md index 5b7cf781a..c3ec9d8a7 100644 --- a/vendor/github.com/klauspost/compress/README.md +++ b/vendor/github.com/klauspost/compress/README.md @@ -17,6 +17,16 @@ This package provides various compression algorithms. # changelog +* May 5, 2022 (v1.15.3) + * zstd: Allow to ignore checksum checking by @WojciechMula [#572](https://github.com/klauspost/compress/pull/572) + * s2: Fix incorrect seek for io.SeekEnd in [#575](https://github.com/klauspost/compress/pull/575) + +* Apr 26, 2022 (v1.15.2) + * zstd: Add x86-64 assembly for decompression on streams and blocks. Contributed by [@WojciechMula](https://github.com/WojciechMula). Typically 2x faster. [#528](https://github.com/klauspost/compress/pull/528) [#531](https://github.com/klauspost/compress/pull/531) [#545](https://github.com/klauspost/compress/pull/545) [#537](https://github.com/klauspost/compress/pull/537) + * zstd: Add options to ZipDecompressor and fixes [#539](https://github.com/klauspost/compress/pull/539) + * s2: Use sorted search for index [#555](https://github.com/klauspost/compress/pull/555) + * Minimum version is Go 1.16, added CI test on 1.18. + * Mar 11, 2022 (v1.15.1) * huff0: Add x86 assembly of Decode4X by @WojciechMula in [#512](https://github.com/klauspost/compress/pull/512) * zstd: Reuse zip decoders in [#514](https://github.com/klauspost/compress/pull/514) diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go index 8d632cea0..61342b6b8 100644 --- a/vendor/github.com/klauspost/compress/flate/inflate_gen.go +++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go @@ -24,7 +24,7 @@ func (f *decompressor) huffmanBytesBuffer() { // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb := f.nb, f.b + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -82,9 +82,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBytesBuffer f.stepState = stateInit f.b, f.nb = fb, fnb @@ -227,10 +227,10 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > uint32(f.dict.histSize()) { + if dist > uint32(dict.histSize()) { f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return @@ -243,14 +243,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb @@ -275,7 +275,7 @@ func (f *decompressor) huffmanBytesReader() { // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb := f.nb, f.b + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -333,9 +333,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBytesReader f.stepState = stateInit f.b, f.nb = fb, fnb @@ -478,10 +478,10 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > uint32(f.dict.histSize()) { + if dist > uint32(dict.histSize()) { f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return @@ -494,14 +494,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBytesReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb @@ -526,7 +526,7 @@ func (f *decompressor) huffmanBufioReader() { // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb := f.nb, f.b + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -584,9 +584,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBufioReader f.stepState = stateInit f.b, f.nb = fb, fnb @@ -729,10 +729,10 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > uint32(f.dict.histSize()) { + if dist > uint32(dict.histSize()) { f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return @@ -745,14 +745,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanBufioReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb @@ -777,7 +777,7 @@ func (f *decompressor) huffmanStringsReader() { // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb := f.nb, f.b + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -835,9 +835,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanStringsReader f.stepState = stateInit f.b, f.nb = fb, fnb @@ -980,10 +980,10 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > uint32(f.dict.histSize()) { + if dist > uint32(dict.histSize()) { f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return @@ -996,14 +996,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanStringsReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb @@ -1028,7 +1028,7 @@ func (f *decompressor) huffmanGenericReader() { // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, // but is smart enough to keep local variables in registers, so use nb and b, // inline call to moreBits and reassign b,nb back to f on return. - fnb, fb := f.nb, f.b + fnb, fb, dict := f.nb, f.b, &f.dict switch f.stepState { case stateInit: @@ -1086,9 +1086,9 @@ readLiteral: var length int switch { case v < 256: - f.dict.writeByte(byte(v)) - if f.dict.availWrite() == 0 { - f.toRead = f.dict.readFlush() + dict.writeByte(byte(v)) + if dict.availWrite() == 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanGenericReader f.stepState = stateInit f.b, f.nb = fb, fnb @@ -1231,10 +1231,10 @@ readLiteral: } // No check on length; encoding can be prescient. - if dist > uint32(f.dict.histSize()) { + if dist > uint32(dict.histSize()) { f.b, f.nb = fb, fnb if debugDecode { - fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + fmt.Println("dist > dict.histSize():", dist, dict.histSize()) } f.err = CorruptInputError(f.roffset) return @@ -1247,14 +1247,14 @@ readLiteral: copyHistory: // Perform a backwards copy according to RFC section 3.2.3. { - cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + cnt := dict.tryWriteCopy(f.copyDist, f.copyLen) if cnt == 0 { - cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + cnt = dict.writeCopy(f.copyDist, f.copyLen) } f.copyLen -= cnt - if f.dict.availWrite() == 0 || f.copyLen > 0 { - f.toRead = f.dict.readFlush() + if dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = dict.readFlush() f.step = (*decompressor).huffmanGenericReader // We need to continue this work f.stepState = stateDict f.b, f.nb = fb, fnb diff --git a/vendor/github.com/klauspost/compress/huff0/autogen.go b/vendor/github.com/klauspost/compress/huff0/autogen.go deleted file mode 100644 index ff2c69d60..000000000 --- a/vendor/github.com/klauspost/compress/huff0/autogen.go +++ /dev/null @@ -1,5 +0,0 @@ -package huff0 - -//go:generate go run generate.go -//go:generate asmfmt -w decompress_amd64.s -//go:generate asmfmt -w decompress_8b_amd64.s diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s b/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s deleted file mode 100644 index 0d6cb1a96..000000000 --- a/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s +++ /dev/null @@ -1,488 +0,0 @@ -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" -#include "funcdata.h" -#include "go_asm.h" - -#define bufoff 256 // see decompress.go, we're using [4][256]byte table - -// func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, -// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool) -TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8 -#define off R8 -#define buffer DI -#define table SI - -#define br_bits_read R9 -#define br_value R10 -#define br_offset R11 -#define peek_bits R12 -#define exhausted DX - -#define br0 R13 -#define br1 R14 -#define br2 R15 -#define br3 BP - - MOVQ BP, 0(SP) - - XORQ exhausted, exhausted // exhausted = false - XORQ off, off // off = 0 - - MOVBQZX peekBits+32(FP), peek_bits - MOVQ buf+40(FP), buffer - MOVQ tbl+48(FP), table - - MOVQ pbr0+0(FP), br0 - MOVQ pbr1+8(FP), br1 - MOVQ pbr2+16(FP), br2 - MOVQ pbr3+24(FP), br3 - -main_loop: - - // const stream = 0 - // br0.fillFast() - MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read - MOVQ bitReaderShifted_value(br0), br_value - MOVQ bitReaderShifted_off(br0), br_offset - - // if b.bitsRead >= 32 { - CMPQ br_bits_read, $32 - JB skip_fill0 - - SUBQ $32, br_bits_read // b.bitsRead -= 32 - SUBQ $4, br_offset // b.off -= 4 - - // v := b.in[b.off-4 : b.off] - // v = v[:4] - // low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - MOVQ bitReaderShifted_in(br0), AX - MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVQ br_bits_read, CX - SHLQ CL, AX - ORQ AX, br_value - - // exhausted = exhausted || (br0.off < 4) - CMPQ br_offset, $4 - SETLT DL - ORB DL, DH - - // } -skip_fill0: - - // val0 := br0.peekTopBits(peekBits) - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v0 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br0.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // val1 := br0.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v1 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br0.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - MOVBQZX AL, CX - SHLQ CX, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // these two writes get coalesced - // buf[stream][off] = uint8(v0.entry >> 8) - // buf[stream][off+1] = uint8(v1.entry >> 8) - MOVW BX, 0(buffer)(off*1) - - // SECOND PART: - // val2 := br0.peekTopBits(peekBits) - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v2 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br0.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // val3 := br0.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v3 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br0.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - MOVBQZX AL, CX - SHLQ CX, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // these two writes get coalesced - // buf[stream][off+2] = uint8(v2.entry >> 8) - // buf[stream][off+3] = uint8(v3.entry >> 8) - MOVW BX, 0+2(buffer)(off*1) - - // update the bitrader reader structure - MOVB br_bits_read, bitReaderShifted_bitsRead(br0) - MOVQ br_value, bitReaderShifted_value(br0) - MOVQ br_offset, bitReaderShifted_off(br0) - - // const stream = 1 - // br1.fillFast() - MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read - MOVQ bitReaderShifted_value(br1), br_value - MOVQ bitReaderShifted_off(br1), br_offset - - // if b.bitsRead >= 32 { - CMPQ br_bits_read, $32 - JB skip_fill1 - - SUBQ $32, br_bits_read // b.bitsRead -= 32 - SUBQ $4, br_offset // b.off -= 4 - - // v := b.in[b.off-4 : b.off] - // v = v[:4] - // low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - MOVQ bitReaderShifted_in(br1), AX - MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVQ br_bits_read, CX - SHLQ CL, AX - ORQ AX, br_value - - // exhausted = exhausted || (br1.off < 4) - CMPQ br_offset, $4 - SETLT DL - ORB DL, DH - - // } -skip_fill1: - - // val0 := br1.peekTopBits(peekBits) - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v0 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br1.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // val1 := br1.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v1 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br1.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - MOVBQZX AL, CX - SHLQ CX, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // these two writes get coalesced - // buf[stream][off] = uint8(v0.entry >> 8) - // buf[stream][off+1] = uint8(v1.entry >> 8) - MOVW BX, 256(buffer)(off*1) - - // SECOND PART: - // val2 := br1.peekTopBits(peekBits) - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v2 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br1.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // val3 := br1.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v3 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br1.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - MOVBQZX AL, CX - SHLQ CX, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // these two writes get coalesced - // buf[stream][off+2] = uint8(v2.entry >> 8) - // buf[stream][off+3] = uint8(v3.entry >> 8) - MOVW BX, 256+2(buffer)(off*1) - - // update the bitrader reader structure - MOVB br_bits_read, bitReaderShifted_bitsRead(br1) - MOVQ br_value, bitReaderShifted_value(br1) - MOVQ br_offset, bitReaderShifted_off(br1) - - // const stream = 2 - // br2.fillFast() - MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read - MOVQ bitReaderShifted_value(br2), br_value - MOVQ bitReaderShifted_off(br2), br_offset - - // if b.bitsRead >= 32 { - CMPQ br_bits_read, $32 - JB skip_fill2 - - SUBQ $32, br_bits_read // b.bitsRead -= 32 - SUBQ $4, br_offset // b.off -= 4 - - // v := b.in[b.off-4 : b.off] - // v = v[:4] - // low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - MOVQ bitReaderShifted_in(br2), AX - MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVQ br_bits_read, CX - SHLQ CL, AX - ORQ AX, br_value - - // exhausted = exhausted || (br2.off < 4) - CMPQ br_offset, $4 - SETLT DL - ORB DL, DH - - // } -skip_fill2: - - // val0 := br2.peekTopBits(peekBits) - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v0 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br2.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // val1 := br2.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v1 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br2.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - MOVBQZX AL, CX - SHLQ CX, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // these two writes get coalesced - // buf[stream][off] = uint8(v0.entry >> 8) - // buf[stream][off+1] = uint8(v1.entry >> 8) - MOVW BX, 512(buffer)(off*1) - - // SECOND PART: - // val2 := br2.peekTopBits(peekBits) - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v2 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br2.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // val3 := br2.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v3 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br2.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - MOVBQZX AL, CX - SHLQ CX, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // these two writes get coalesced - // buf[stream][off+2] = uint8(v2.entry >> 8) - // buf[stream][off+3] = uint8(v3.entry >> 8) - MOVW BX, 512+2(buffer)(off*1) - - // update the bitrader reader structure - MOVB br_bits_read, bitReaderShifted_bitsRead(br2) - MOVQ br_value, bitReaderShifted_value(br2) - MOVQ br_offset, bitReaderShifted_off(br2) - - // const stream = 3 - // br3.fillFast() - MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read - MOVQ bitReaderShifted_value(br3), br_value - MOVQ bitReaderShifted_off(br3), br_offset - - // if b.bitsRead >= 32 { - CMPQ br_bits_read, $32 - JB skip_fill3 - - SUBQ $32, br_bits_read // b.bitsRead -= 32 - SUBQ $4, br_offset // b.off -= 4 - - // v := b.in[b.off-4 : b.off] - // v = v[:4] - // low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - MOVQ bitReaderShifted_in(br3), AX - MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVQ br_bits_read, CX - SHLQ CL, AX - ORQ AX, br_value - - // exhausted = exhausted || (br3.off < 4) - CMPQ br_offset, $4 - SETLT DL - ORB DL, DH - - // } -skip_fill3: - - // val0 := br3.peekTopBits(peekBits) - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v0 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br3.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // val1 := br3.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v1 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br3.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - MOVBQZX AL, CX - SHLQ CX, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // these two writes get coalesced - // buf[stream][off] = uint8(v0.entry >> 8) - // buf[stream][off+1] = uint8(v1.entry >> 8) - MOVW BX, 768(buffer)(off*1) - - // SECOND PART: - // val2 := br3.peekTopBits(peekBits) - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v2 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br3.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // val3 := br3.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v3 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br3.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - MOVBQZX AL, CX - SHLQ CX, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // these two writes get coalesced - // buf[stream][off+2] = uint8(v2.entry >> 8) - // buf[stream][off+3] = uint8(v3.entry >> 8) - MOVW BX, 768+2(buffer)(off*1) - - // update the bitrader reader structure - MOVB br_bits_read, bitReaderShifted_bitsRead(br3) - MOVQ br_value, bitReaderShifted_value(br3) - MOVQ br_offset, bitReaderShifted_off(br3) - - ADDQ $4, off // off += 2 - - TESTB DH, DH // any br[i].ofs < 4? - JNZ end - - CMPQ off, $bufoff - JL main_loop - -end: - MOVQ 0(SP), BP - - MOVB off, ret+56(FP) - RET - -#undef off -#undef buffer -#undef table - -#undef br_bits_read -#undef br_value -#undef br_offset -#undef peek_bits -#undef exhausted - -#undef br0 -#undef br1 -#undef br2 -#undef br3 diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in b/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in deleted file mode 100644 index 6d477a2c1..000000000 --- a/vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in +++ /dev/null @@ -1,197 +0,0 @@ -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" -#include "funcdata.h" -#include "go_asm.h" - - -#define bufoff 256 // see decompress.go, we're using [4][256]byte table - -//func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, -// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool) -TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8 -#define off R8 -#define buffer DI -#define table SI - -#define br_bits_read R9 -#define br_value R10 -#define br_offset R11 -#define peek_bits R12 -#define exhausted DX - -#define br0 R13 -#define br1 R14 -#define br2 R15 -#define br3 BP - - MOVQ BP, 0(SP) - - XORQ exhausted, exhausted // exhausted = false - XORQ off, off // off = 0 - - MOVBQZX peekBits+32(FP), peek_bits - MOVQ buf+40(FP), buffer - MOVQ tbl+48(FP), table - - MOVQ pbr0+0(FP), br0 - MOVQ pbr1+8(FP), br1 - MOVQ pbr2+16(FP), br2 - MOVQ pbr3+24(FP), br3 - -main_loop: -{{ define "decode_2_values_x86" }} - // const stream = {{ var "id" }} - // br{{ var "id"}}.fillFast() - MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read - MOVQ bitReaderShifted_value(br{{ var "id" }}), br_value - MOVQ bitReaderShifted_off(br{{ var "id" }}), br_offset - - // if b.bitsRead >= 32 { - CMPQ br_bits_read, $32 - JB skip_fill{{ var "id" }} - - SUBQ $32, br_bits_read // b.bitsRead -= 32 - SUBQ $4, br_offset // b.off -= 4 - - // v := b.in[b.off-4 : b.off] - // v = v[:4] - // low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - MOVQ bitReaderShifted_in(br{{ var "id" }}), AX - MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) - - // b.value |= uint64(low) << (b.bitsRead & 63) - MOVQ br_bits_read, CX - SHLQ CL, AX - ORQ AX, br_value - - // exhausted = exhausted || (br{{ var "id"}}.off < 4) - CMPQ br_offset, $4 - SETLT DL - ORB DL, DH - // } -skip_fill{{ var "id" }}: - - // val0 := br{{ var "id"}}.peekTopBits(peekBits) - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v0 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br{{ var "id"}}.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // val1 := br{{ var "id"}}.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v1 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br{{ var "id"}}.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - MOVBQZX AL, CX - SHLQ CX, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - - // these two writes get coalesced - // buf[stream][off] = uint8(v0.entry >> 8) - // buf[stream][off+1] = uint8(v1.entry >> 8) - MOVW BX, {{ var "bufofs" }}(buffer)(off*1) - - // SECOND PART: - // val2 := br{{ var "id"}}.peekTopBits(peekBits) - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v2 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br{{ var "id"}}.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - // val3 := br{{ var "id"}}.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - - // v3 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br{{ var "id"}}.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - MOVBQZX AL, CX - SHLQ CX, br_value // value <<= n - ADDQ CX, br_bits_read // bits_read += n - - - // these two writes get coalesced - // buf[stream][off+2] = uint8(v2.entry >> 8) - // buf[stream][off+3] = uint8(v3.entry >> 8) - MOVW BX, {{ var "bufofs" }}+2(buffer)(off*1) - - // update the bitrader reader structure - MOVB br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }}) - MOVQ br_value, bitReaderShifted_value(br{{ var "id" }}) - MOVQ br_offset, bitReaderShifted_off(br{{ var "id" }}) -{{ end }} - - {{ set "id" "0" }} - {{ set "ofs" "0" }} - {{ set "bufofs" "0" }} {{/* id * bufoff */}} - {{ template "decode_2_values_x86" . }} - - {{ set "id" "1" }} - {{ set "ofs" "8" }} - {{ set "bufofs" "256" }} - {{ template "decode_2_values_x86" . }} - - {{ set "id" "2" }} - {{ set "ofs" "16" }} - {{ set "bufofs" "512" }} - {{ template "decode_2_values_x86" . }} - - {{ set "id" "3" }} - {{ set "ofs" "24" }} - {{ set "bufofs" "768" }} - {{ template "decode_2_values_x86" . }} - - ADDQ $4, off // off += 2 - - TESTB DH, DH // any br[i].ofs < 4? - JNZ end - - CMPQ off, $bufoff - JL main_loop -end: - MOVQ 0(SP), BP - - MOVB off, ret+56(FP) - RET -#undef off -#undef buffer -#undef table - -#undef br_bits_read -#undef br_value -#undef br_offset -#undef peek_bits -#undef exhausted - -#undef br0 -#undef br1 -#undef br2 -#undef br3 diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go index ce8e93bcd..3415e5da2 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go +++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.go @@ -13,19 +13,30 @@ import ( // decompress4x_main_loop_x86 is an x86 assembler implementation // of Decompress4X when tablelog > 8. //go:noescape -func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, - peekBits uint8, buf *byte, tbl *dEntrySingle) uint8 +func decompress4x_main_loop_amd64(ctx *decompress4xContext) // decompress4x_8b_loop_x86 is an x86 assembler implementation // of Decompress4X when tablelog <= 8 which decodes 4 entries // per loop. //go:noescape -func decompress4x_8b_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, - peekBits uint8, buf *byte, tbl *dEntrySingle) uint8 +func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) // fallback8BitSize is the size where using Go version is faster. const fallback8BitSize = 800 +type decompress4xContext struct { + pbr0 *bitReaderShifted + pbr1 *bitReaderShifted + pbr2 *bitReaderShifted + pbr3 *bitReaderShifted + peekBits uint8 + out *byte + dstEvery int + tbl *dEntrySingle + decoded int + limit *byte +} + // Decompress4X will decompress a 4X encoded stream. // The length of the supplied input must match the end of a block exactly. // The *capacity* of the dst slice must match the destination size of @@ -42,6 +53,7 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { if cap(dst) < fallback8BitSize && use8BitTables { return d.decompress4X8bit(dst, src) } + var br [4]bitReaderShifted // Decode "jump table" start := 6 @@ -71,70 +83,28 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { const tlMask = tlSize - 1 single := d.dt.single[:tlSize] - // Use temp table to avoid bound checks/append penalty. - buf := d.buffer() - var off uint8 var decoded int - const debug = false - - // see: bitReaderShifted.peekBitsFast() - peekBits := uint8((64 - d.actualTableLog) & 63) - - // Decode 2 values from each decoder/loop. - const bufoff = 256 - for { - if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 { - break + if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) { + ctx := decompress4xContext{ + pbr0: &br[0], + pbr1: &br[1], + pbr2: &br[2], + pbr3: &br[3], + peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast() + out: &out[0], + dstEvery: dstEvery, + tbl: &single[0], + limit: &out[dstEvery-4], // Always stop decoding when first buffer gets here to avoid writing OOB on last. } - if use8BitTables { - off = decompress4x_8b_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0]) + decompress4x_8b_main_loop_amd64(&ctx) } else { - off = decompress4x_main_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0]) - } - if debug { - fmt.Print("DEBUG: ") - fmt.Printf("off=%d,", off) - for i := 0; i < 4; i++ { - fmt.Printf(" br[%d]={bitsRead=%d, value=%x, off=%d}", - i, br[i].bitsRead, br[i].value, br[i].off) - } - fmt.Println("") - } - - if off != 0 { - break + decompress4x_main_loop_amd64(&ctx) } - if bufoff > dstEvery { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 1") - } - copy(out, buf[0][:]) - copy(out[dstEvery:], buf[1][:]) - copy(out[dstEvery*2:], buf[2][:]) - copy(out[dstEvery*3:], buf[3][:]) - out = out[bufoff:] - decoded += bufoff * 4 - // There must at least be 3 buffers left. - if len(out) < dstEvery*3 { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 2") - } - } - if off > 0 { - ioff := int(off) - if len(out) < dstEvery*3+ioff { - d.bufs.Put(buf) - return nil, errors.New("corruption detected: stream overrun 3") - } - copy(out, buf[0][:off]) - copy(out[dstEvery:], buf[1][:off]) - copy(out[dstEvery*2:], buf[2][:off]) - copy(out[dstEvery*3:], buf[3][:off]) - decoded += int(off) * 4 - out = out[off:] + decoded = ctx.decoded + out = out[decoded/4:] } // Decode remaining. @@ -150,7 +120,6 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { for bitsLeft > 0 { br.fill() if offset >= endsAt { - d.bufs.Put(buf) return nil, errors.New("corruption detected: stream overrun 4") } @@ -164,7 +133,6 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { offset++ } if offset != endsAt { - d.bufs.Put(buf) return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt) } decoded += offset - dstEvery*i @@ -173,7 +141,6 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) { return nil, err } } - d.bufs.Put(buf) if dstSize != decoded { return nil, errors.New("corruption detected: short output block") } diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s index 2edad3ea5..06287f568 100644 --- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s +++ b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s @@ -1,506 +1,662 @@ -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" -#include "funcdata.h" -#include "go_asm.h" - -#ifdef GOAMD64_v4 -#ifndef GOAMD64_v3 -#define GOAMD64_v3 -#endif -#endif - -#define bufoff 256 // see decompress.go, we're using [4][256]byte table - -// func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, -// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool) -TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8 -#define off R8 -#define buffer DI -#define table SI - -#define br_bits_read R9 -#define br_value R10 -#define br_offset R11 -#define peek_bits R12 -#define exhausted DX - -#define br0 R13 -#define br1 R14 -#define br2 R15 -#define br3 BP - - MOVQ BP, 0(SP) - - XORQ exhausted, exhausted // exhausted = false - XORQ off, off // off = 0 - - MOVBQZX peekBits+32(FP), peek_bits - MOVQ buf+40(FP), buffer - MOVQ tbl+48(FP), table - - MOVQ pbr0+0(FP), br0 - MOVQ pbr1+8(FP), br1 - MOVQ pbr2+16(FP), br2 - MOVQ pbr3+24(FP), br3 - +// Code generated by command: go run gen.go -out ../decompress_amd64.s -pkg=huff0. DO NOT EDIT. + +//go:build amd64 && !appengine && !noasm && gc +// +build amd64,!appengine,!noasm,gc + +// func decompress4x_main_loop_amd64(ctx *decompress4xContext) +TEXT ·decompress4x_main_loop_amd64(SB), $8-8 + XORQ DX, DX + + // Preload values + MOVQ ctx+0(FP), AX + MOVBQZX 32(AX), SI + MOVQ 40(AX), DI + MOVQ DI, BX + MOVQ 72(AX), CX + MOVQ CX, (SP) + MOVQ 48(AX), R8 + MOVQ 56(AX), R9 + MOVQ (AX), R10 + MOVQ 8(AX), R11 + MOVQ 16(AX), R12 + MOVQ 24(AX), R13 + + // Main loop main_loop: - - // const stream = 0 - // br0.fillFast() - MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read - MOVQ bitReaderShifted_value(br0), br_value - MOVQ bitReaderShifted_off(br0), br_offset - - // We must have at least 2 * max tablelog left - CMPQ br_bits_read, $64-22 - JBE skip_fill0 - - SUBQ $32, br_bits_read // b.bitsRead -= 32 - SUBQ $4, br_offset // b.off -= 4 - - // v := b.in[b.off-4 : b.off] - // v = v[:4] - // low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - MOVQ bitReaderShifted_in(br0), AX + MOVQ BX, DI + CMPQ DI, (SP) + SETGE DL + + // br0.fillFast32() + MOVQ 32(R10), R14 + MOVBQZX 40(R10), R15 + CMPQ R15, $0x20 + JBE skip_fill0 + MOVQ 24(R10), AX + SUBQ $0x20, R15 + SUBQ $0x04, AX + MOVQ (R10), BP // b.value |= uint64(low) << (b.bitsRead & 63) -#ifdef GOAMD64_v3 - SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63) - -#else - MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) - MOVQ br_bits_read, CX - SHLQ CL, AX - -#endif - - ORQ AX, br_value + MOVL (AX)(BP*1), BP + MOVQ R15, CX + SHLQ CL, BP + MOVQ AX, 24(R10) + ORQ BP, R14 // exhausted = exhausted || (br0.off < 4) - CMPQ br_offset, $4 - SETLT DL - ORB DL, DH + CMPQ AX, $0x04 + SETLT AL + ORB AL, DL - // } skip_fill0: - // val0 := br0.peekTopBits(peekBits) -#ifdef GOAMD64_v3 - SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask - -#else - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - -#endif + MOVQ R14, BP + MOVQ SI, CX + SHRQ CL, BP // v0 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br0.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - -#ifdef GOAMD64_v3 - MOVBQZX AL, CX - SHLXQ AX, br_value, br_value // value <<= n + MOVW (R9)(BP*2), CX -#else - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n + // br0.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R14 + ADDB CL, R15 -#endif - - ADDQ CX, br_bits_read // bits_read += n - -#ifdef GOAMD64_v3 - SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask - -#else // val1 := br0.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - -#endif + MOVQ SI, CX + MOVQ R14, BP + SHRQ CL, BP // v1 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 + MOVW (R9)(BP*2), CX // br0.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - -#ifdef GOAMD64_v3 - MOVBQZX AL, CX - SHLXQ AX, br_value, br_value // value <<= n - -#else - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - -#endif - - ADDQ CX, br_bits_read // bits_read += n + MOVB CH, AH + SHLQ CL, R14 + ADDB CL, R15 // these two writes get coalesced - // buf[stream][off] = uint8(v0.entry >> 8) - // buf[stream][off+1] = uint8(v1.entry >> 8) - MOVW BX, 0(buffer)(off*1) + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + MOVW AX, (DI) // update the bitrader reader structure - MOVB br_bits_read, bitReaderShifted_bitsRead(br0) - MOVQ br_value, bitReaderShifted_value(br0) - MOVQ br_offset, bitReaderShifted_off(br0) - - // const stream = 1 - // br1.fillFast() - MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read - MOVQ bitReaderShifted_value(br1), br_value - MOVQ bitReaderShifted_off(br1), br_offset - - // We must have at least 2 * max tablelog left - CMPQ br_bits_read, $64-22 - JBE skip_fill1 - - SUBQ $32, br_bits_read // b.bitsRead -= 32 - SUBQ $4, br_offset // b.off -= 4 - - // v := b.in[b.off-4 : b.off] - // v = v[:4] - // low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - MOVQ bitReaderShifted_in(br1), AX + MOVQ R14, 32(R10) + MOVB R15, 40(R10) + ADDQ R8, DI + + // br1.fillFast32() + MOVQ 32(R11), R14 + MOVBQZX 40(R11), R15 + CMPQ R15, $0x20 + JBE skip_fill1 + MOVQ 24(R11), AX + SUBQ $0x20, R15 + SUBQ $0x04, AX + MOVQ (R11), BP // b.value |= uint64(low) << (b.bitsRead & 63) -#ifdef GOAMD64_v3 - SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63) - -#else - MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) - MOVQ br_bits_read, CX - SHLQ CL, AX - -#endif - - ORQ AX, br_value + MOVL (AX)(BP*1), BP + MOVQ R15, CX + SHLQ CL, BP + MOVQ AX, 24(R11) + ORQ BP, R14 // exhausted = exhausted || (br1.off < 4) - CMPQ br_offset, $4 - SETLT DL - ORB DL, DH + CMPQ AX, $0x04 + SETLT AL + ORB AL, DL - // } skip_fill1: - // val0 := br1.peekTopBits(peekBits) -#ifdef GOAMD64_v3 - SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask - -#else - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - -#endif + MOVQ R14, BP + MOVQ SI, CX + SHRQ CL, BP // v0 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br1.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - -#ifdef GOAMD64_v3 - MOVBQZX AL, CX - SHLXQ AX, br_value, br_value // value <<= n + MOVW (R9)(BP*2), CX -#else - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n + // br1.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R14 + ADDB CL, R15 -#endif - - ADDQ CX, br_bits_read // bits_read += n - -#ifdef GOAMD64_v3 - SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask - -#else // val1 := br1.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - -#endif + MOVQ SI, CX + MOVQ R14, BP + SHRQ CL, BP // v1 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 + MOVW (R9)(BP*2), CX // br1.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - -#ifdef GOAMD64_v3 - MOVBQZX AL, CX - SHLXQ AX, br_value, br_value // value <<= n - -#else - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - -#endif - - ADDQ CX, br_bits_read // bits_read += n + MOVB CH, AH + SHLQ CL, R14 + ADDB CL, R15 // these two writes get coalesced - // buf[stream][off] = uint8(v0.entry >> 8) - // buf[stream][off+1] = uint8(v1.entry >> 8) - MOVW BX, 256(buffer)(off*1) + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + MOVW AX, (DI) // update the bitrader reader structure - MOVB br_bits_read, bitReaderShifted_bitsRead(br1) - MOVQ br_value, bitReaderShifted_value(br1) - MOVQ br_offset, bitReaderShifted_off(br1) - - // const stream = 2 - // br2.fillFast() - MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read - MOVQ bitReaderShifted_value(br2), br_value - MOVQ bitReaderShifted_off(br2), br_offset - - // We must have at least 2 * max tablelog left - CMPQ br_bits_read, $64-22 - JBE skip_fill2 - - SUBQ $32, br_bits_read // b.bitsRead -= 32 - SUBQ $4, br_offset // b.off -= 4 - - // v := b.in[b.off-4 : b.off] - // v = v[:4] - // low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - MOVQ bitReaderShifted_in(br2), AX + MOVQ R14, 32(R11) + MOVB R15, 40(R11) + ADDQ R8, DI + + // br2.fillFast32() + MOVQ 32(R12), R14 + MOVBQZX 40(R12), R15 + CMPQ R15, $0x20 + JBE skip_fill2 + MOVQ 24(R12), AX + SUBQ $0x20, R15 + SUBQ $0x04, AX + MOVQ (R12), BP // b.value |= uint64(low) << (b.bitsRead & 63) -#ifdef GOAMD64_v3 - SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63) - -#else - MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) - MOVQ br_bits_read, CX - SHLQ CL, AX - -#endif - - ORQ AX, br_value + MOVL (AX)(BP*1), BP + MOVQ R15, CX + SHLQ CL, BP + MOVQ AX, 24(R12) + ORQ BP, R14 // exhausted = exhausted || (br2.off < 4) - CMPQ br_offset, $4 - SETLT DL - ORB DL, DH + CMPQ AX, $0x04 + SETLT AL + ORB AL, DL - // } skip_fill2: - // val0 := br2.peekTopBits(peekBits) -#ifdef GOAMD64_v3 - SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask - -#else - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - -#endif + MOVQ R14, BP + MOVQ SI, CX + SHRQ CL, BP // v0 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 + MOVW (R9)(BP*2), CX - // br2.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) + // br2.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R14 + ADDB CL, R15 -#ifdef GOAMD64_v3 - MOVBQZX AL, CX - SHLXQ AX, br_value, br_value // value <<= n - -#else - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - -#endif - - ADDQ CX, br_bits_read // bits_read += n - -#ifdef GOAMD64_v3 - SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask - -#else // val1 := br2.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - -#endif + MOVQ SI, CX + MOVQ R14, BP + SHRQ CL, BP // v1 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 + MOVW (R9)(BP*2), CX // br2.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - -#ifdef GOAMD64_v3 - MOVBQZX AL, CX - SHLXQ AX, br_value, br_value // value <<= n - -#else - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n - -#endif - - ADDQ CX, br_bits_read // bits_read += n + MOVB CH, AH + SHLQ CL, R14 + ADDB CL, R15 // these two writes get coalesced - // buf[stream][off] = uint8(v0.entry >> 8) - // buf[stream][off+1] = uint8(v1.entry >> 8) - MOVW BX, 512(buffer)(off*1) + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + MOVW AX, (DI) // update the bitrader reader structure - MOVB br_bits_read, bitReaderShifted_bitsRead(br2) - MOVQ br_value, bitReaderShifted_value(br2) - MOVQ br_offset, bitReaderShifted_off(br2) - - // const stream = 3 - // br3.fillFast() - MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read - MOVQ bitReaderShifted_value(br3), br_value - MOVQ bitReaderShifted_off(br3), br_offset - - // We must have at least 2 * max tablelog left - CMPQ br_bits_read, $64-22 - JBE skip_fill3 - - SUBQ $32, br_bits_read // b.bitsRead -= 32 - SUBQ $4, br_offset // b.off -= 4 - - // v := b.in[b.off-4 : b.off] - // v = v[:4] - // low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - MOVQ bitReaderShifted_in(br3), AX + MOVQ R14, 32(R12) + MOVB R15, 40(R12) + ADDQ R8, DI + + // br3.fillFast32() + MOVQ 32(R13), R14 + MOVBQZX 40(R13), R15 + CMPQ R15, $0x20 + JBE skip_fill3 + MOVQ 24(R13), AX + SUBQ $0x20, R15 + SUBQ $0x04, AX + MOVQ (R13), BP // b.value |= uint64(low) << (b.bitsRead & 63) -#ifdef GOAMD64_v3 - SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63) - -#else - MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) - MOVQ br_bits_read, CX - SHLQ CL, AX - -#endif - - ORQ AX, br_value + MOVL (AX)(BP*1), BP + MOVQ R15, CX + SHLQ CL, BP + MOVQ AX, 24(R13) + ORQ BP, R14 // exhausted = exhausted || (br3.off < 4) - CMPQ br_offset, $4 - SETLT DL - ORB DL, DH + CMPQ AX, $0x04 + SETLT AL + ORB AL, DL - // } skip_fill3: - // val0 := br3.peekTopBits(peekBits) -#ifdef GOAMD64_v3 - SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask - -#else - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask - -#endif + MOVQ R14, BP + MOVQ SI, CX + SHRQ CL, BP // v0 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 + MOVW (R9)(BP*2), CX - // br3.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) + // br3.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R14 + ADDB CL, R15 -#ifdef GOAMD64_v3 - MOVBQZX AL, CX - SHLXQ AX, br_value, br_value // value <<= n - -#else - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n + // val1 := br3.peekTopBits(peekBits) + MOVQ SI, CX + MOVQ R14, BP + SHRQ CL, BP -#endif + // v1 := table[val1&mask] + MOVW (R9)(BP*2), CX - ADDQ CX, br_bits_read // bits_read += n + // br3.advance(uint8(v1.entry)) + MOVB CH, AH + SHLQ CL, R14 + ADDB CL, R15 -#ifdef GOAMD64_v3 - SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask + // these two writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + MOVW AX, (DI) -#else - // val1 := br3.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask + // update the bitrader reader structure + MOVQ R14, 32(R13) + MOVB R15, 40(R13) + ADDQ $0x02, BX + TESTB DL, DL + JZ main_loop + MOVQ ctx+0(FP), AX + MOVQ 40(AX), CX + MOVQ BX, DX + SUBQ CX, DX + SHLQ $0x02, DX + MOVQ DX, 64(AX) + RET -#endif +// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext) +TEXT ·decompress4x_8b_main_loop_amd64(SB), $16-8 + XORQ DX, DX + + // Preload values + MOVQ ctx+0(FP), CX + MOVBQZX 32(CX), BX + MOVQ 40(CX), SI + MOVQ SI, (SP) + MOVQ 72(CX), DX + MOVQ DX, 8(SP) + MOVQ 48(CX), DI + MOVQ 56(CX), R8 + MOVQ (CX), R9 + MOVQ 8(CX), R10 + MOVQ 16(CX), R11 + MOVQ 24(CX), R12 + + // Main loop +main_loop: + MOVQ (SP), SI + CMPQ SI, 8(SP) + SETGE DL + + // br1000.fillFast32() + MOVQ 32(R9), R13 + MOVBQZX 40(R9), R14 + CMPQ R14, $0x20 + JBE skip_fill1000 + MOVQ 24(R9), R15 + SUBQ $0x20, R14 + SUBQ $0x04, R15 + MOVQ (R9), BP - // v1 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (R15)(BP*1), BP + MOVQ R14, CX + SHLQ CL, BP + MOVQ R15, 24(R9) + ORQ BP, R13 + + // exhausted = exhausted || (br1000.off < 4) + CMPQ R15, $0x04 + SETLT AL + ORB AL, DL + +skip_fill1000: + // val0 := br0.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 - // br3.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) + // v0 := table[val0&mask] + MOVW (R8)(R15*2), CX -#ifdef GOAMD64_v3 - MOVBQZX AL, CX - SHLXQ AX, br_value, br_value // value <<= n + // br0.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 -#else - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n + // val1 := br0.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v1 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br0.advance(uint8(v1.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // val2 := br0.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v2 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br0.advance(uint8(v2.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + + // val3 := br0.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v3 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br0.advance(uint8(v3.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // these four writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + // out[id * dstEvery + 3] = uint8(v2.entry >> 8) + // out[id * dstEvery + 4] = uint8(v3.entry >> 8) + MOVL AX, (SI) + + // update the bitreader reader structure + MOVQ R13, 32(R9) + MOVB R14, 40(R9) + ADDQ DI, SI + + // br1001.fillFast32() + MOVQ 32(R10), R13 + MOVBQZX 40(R10), R14 + CMPQ R14, $0x20 + JBE skip_fill1001 + MOVQ 24(R10), R15 + SUBQ $0x20, R14 + SUBQ $0x04, R15 + MOVQ (R10), BP -#endif + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (R15)(BP*1), BP + MOVQ R14, CX + SHLQ CL, BP + MOVQ R15, 24(R10) + ORQ BP, R13 + + // exhausted = exhausted || (br1001.off < 4) + CMPQ R15, $0x04 + SETLT AL + ORB AL, DL + +skip_fill1001: + // val0 := br1.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 - ADDQ CX, br_bits_read // bits_read += n + // v0 := table[val0&mask] + MOVW (R8)(R15*2), CX - // these two writes get coalesced - // buf[stream][off] = uint8(v0.entry >> 8) - // buf[stream][off+1] = uint8(v1.entry >> 8) - MOVW BX, 768(buffer)(off*1) + // br1.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 - // update the bitrader reader structure - MOVB br_bits_read, bitReaderShifted_bitsRead(br3) - MOVQ br_value, bitReaderShifted_value(br3) - MOVQ br_offset, bitReaderShifted_off(br3) + // val1 := br1.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v1 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br1.advance(uint8(v1.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // val2 := br1.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v2 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br1.advance(uint8(v2.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + + // val3 := br1.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v3 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br1.advance(uint8(v3.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // these four writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + // out[id * dstEvery + 3] = uint8(v2.entry >> 8) + // out[id * dstEvery + 4] = uint8(v3.entry >> 8) + MOVL AX, (SI) + + // update the bitreader reader structure + MOVQ R13, 32(R10) + MOVB R14, 40(R10) + ADDQ DI, SI + + // br1002.fillFast32() + MOVQ 32(R11), R13 + MOVBQZX 40(R11), R14 + CMPQ R14, $0x20 + JBE skip_fill1002 + MOVQ 24(R11), R15 + SUBQ $0x20, R14 + SUBQ $0x04, R15 + MOVQ (R11), BP - ADDQ $2, off // off += 2 + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (R15)(BP*1), BP + MOVQ R14, CX + SHLQ CL, BP + MOVQ R15, 24(R11) + ORQ BP, R13 + + // exhausted = exhausted || (br1002.off < 4) + CMPQ R15, $0x04 + SETLT AL + ORB AL, DL + +skip_fill1002: + // val0 := br2.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 - TESTB DH, DH // any br[i].ofs < 4? - JNZ end + // v0 := table[val0&mask] + MOVW (R8)(R15*2), CX - CMPQ off, $bufoff - JL main_loop + // br2.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 -end: - MOVQ 0(SP), BP + // val1 := br2.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v1 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br2.advance(uint8(v1.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // val2 := br2.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v2 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br2.advance(uint8(v2.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + + // val3 := br2.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v3 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br2.advance(uint8(v3.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // these four writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + // out[id * dstEvery + 3] = uint8(v2.entry >> 8) + // out[id * dstEvery + 4] = uint8(v3.entry >> 8) + MOVL AX, (SI) + + // update the bitreader reader structure + MOVQ R13, 32(R11) + MOVB R14, 40(R11) + ADDQ DI, SI + + // br1003.fillFast32() + MOVQ 32(R12), R13 + MOVBQZX 40(R12), R14 + CMPQ R14, $0x20 + JBE skip_fill1003 + MOVQ 24(R12), R15 + SUBQ $0x20, R14 + SUBQ $0x04, R15 + MOVQ (R12), BP - MOVB off, ret+56(FP) - RET + // b.value |= uint64(low) << (b.bitsRead & 63) + MOVL (R15)(BP*1), BP + MOVQ R14, CX + SHLQ CL, BP + MOVQ R15, 24(R12) + ORQ BP, R13 + + // exhausted = exhausted || (br1003.off < 4) + CMPQ R15, $0x04 + SETLT AL + ORB AL, DL + +skip_fill1003: + // val0 := br3.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 -#undef off -#undef buffer -#undef table + // v0 := table[val0&mask] + MOVW (R8)(R15*2), CX -#undef br_bits_read -#undef br_value -#undef br_offset -#undef peek_bits -#undef exhausted + // br3.advance(uint8(v0.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 -#undef br0 -#undef br1 -#undef br2 -#undef br3 + // val1 := br3.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v1 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br3.advance(uint8(v1.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // val2 := br3.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v2 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br3.advance(uint8(v2.entry) + MOVB CH, AH + SHLQ CL, R13 + ADDB CL, R14 + + // val3 := br3.peekTopBits(peekBits) + MOVQ R13, R15 + MOVQ BX, CX + SHRQ CL, R15 + + // v3 := table[val0&mask] + MOVW (R8)(R15*2), CX + + // br3.advance(uint8(v3.entry) + MOVB CH, AL + SHLQ CL, R13 + ADDB CL, R14 + BSWAPL AX + + // these four writes get coalesced + // out[id * dstEvery + 0] = uint8(v0.entry >> 8) + // out[id * dstEvery + 1] = uint8(v1.entry >> 8) + // out[id * dstEvery + 3] = uint8(v2.entry >> 8) + // out[id * dstEvery + 4] = uint8(v3.entry >> 8) + MOVL AX, (SI) + + // update the bitreader reader structure + MOVQ R13, 32(R12) + MOVB R14, 40(R12) + ADDQ $0x04, (SP) + TESTB DL, DL + JZ main_loop + MOVQ ctx+0(FP), AX + MOVQ 40(AX), CX + MOVQ (SP), DX + SUBQ CX, DX + SHLQ $0x02, DX + MOVQ DX, 64(AX) + RET diff --git a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s.in b/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s.in deleted file mode 100644 index 330d86ae1..000000000 --- a/vendor/github.com/klauspost/compress/huff0/decompress_amd64.s.in +++ /dev/null @@ -1,195 +0,0 @@ -// +build !appengine -// +build gc -// +build !noasm - -#include "textflag.h" -#include "funcdata.h" -#include "go_asm.h" - -#ifdef GOAMD64_v4 -#ifndef GOAMD64_v3 -#define GOAMD64_v3 -#endif -#endif - -#define bufoff 256 // see decompress.go, we're using [4][256]byte table - -//func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted, -// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool) -TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8 -#define off R8 -#define buffer DI -#define table SI - -#define br_bits_read R9 -#define br_value R10 -#define br_offset R11 -#define peek_bits R12 -#define exhausted DX - -#define br0 R13 -#define br1 R14 -#define br2 R15 -#define br3 BP - - MOVQ BP, 0(SP) - - XORQ exhausted, exhausted // exhausted = false - XORQ off, off // off = 0 - - MOVBQZX peekBits+32(FP), peek_bits - MOVQ buf+40(FP), buffer - MOVQ tbl+48(FP), table - - MOVQ pbr0+0(FP), br0 - MOVQ pbr1+8(FP), br1 - MOVQ pbr2+16(FP), br2 - MOVQ pbr3+24(FP), br3 - -main_loop: -{{ define "decode_2_values_x86" }} - // const stream = {{ var "id" }} - // br{{ var "id"}}.fillFast() - MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read - MOVQ bitReaderShifted_value(br{{ var "id" }}), br_value - MOVQ bitReaderShifted_off(br{{ var "id" }}), br_offset - - // We must have at least 2 * max tablelog left - CMPQ br_bits_read, $64-22 - JBE skip_fill{{ var "id" }} - - SUBQ $32, br_bits_read // b.bitsRead -= 32 - SUBQ $4, br_offset // b.off -= 4 - - // v := b.in[b.off-4 : b.off] - // v = v[:4] - // low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) - MOVQ bitReaderShifted_in(br{{ var "id" }}), AX - - // b.value |= uint64(low) << (b.bitsRead & 63) -#ifdef GOAMD64_v3 - SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63) -#else - MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) - MOVQ br_bits_read, CX - SHLQ CL, AX -#endif - - ORQ AX, br_value - - // exhausted = exhausted || (br{{ var "id"}}.off < 4) - CMPQ br_offset, $4 - SETLT DL - ORB DL, DH - // } -skip_fill{{ var "id" }}: - - // val0 := br{{ var "id"}}.peekTopBits(peekBits) -#ifdef GOAMD64_v3 - SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask -#else - MOVQ br_value, AX - MOVQ peek_bits, CX - SHRQ CL, AX // AX = (value >> peek_bits) & mask -#endif - - // v0 := table[val0&mask] - MOVW 0(table)(AX*2), AX // AX - v0 - - // br{{ var "id"}}.advance(uint8(v0.entry)) - MOVB AH, BL // BL = uint8(v0.entry >> 8) - -#ifdef GOAMD64_v3 - MOVBQZX AL, CX - SHLXQ AX, br_value, br_value // value <<= n -#else - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n -#endif - - ADDQ CX, br_bits_read // bits_read += n - - -#ifdef GOAMD64_v3 - SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask -#else - // val1 := br{{ var "id"}}.peekTopBits(peekBits) - MOVQ peek_bits, CX - MOVQ br_value, AX - SHRQ CL, AX // AX = (value >> peek_bits) & mask -#endif - - // v1 := table[val1&mask] - MOVW 0(table)(AX*2), AX // AX - v1 - - // br{{ var "id"}}.advance(uint8(v1.entry)) - MOVB AH, BH // BH = uint8(v1.entry >> 8) - -#ifdef GOAMD64_v3 - MOVBQZX AL, CX - SHLXQ AX, br_value, br_value // value <<= n -#else - MOVBQZX AL, CX - SHLQ CL, br_value // value <<= n -#endif - - ADDQ CX, br_bits_read // bits_read += n - - - // these two writes get coalesced - // buf[stream][off] = uint8(v0.entry >> 8) - // buf[stream][off+1] = uint8(v1.entry >> 8) - MOVW BX, {{ var "bufofs" }}(buffer)(off*1) - - // update the bitrader reader structure - MOVB br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }}) - MOVQ br_value, bitReaderShifted_value(br{{ var "id" }}) - MOVQ br_offset, bitReaderShifted_off(br{{ var "id" }}) -{{ end }} - - {{ set "id" "0" }} - {{ set "ofs" "0" }} - {{ set "bufofs" "0" }} {{/* id * bufoff */}} - {{ template "decode_2_values_x86" . }} - - {{ set "id" "1" }} - {{ set "ofs" "8" }} - {{ set "bufofs" "256" }} - {{ template "decode_2_values_x86" . }} - - {{ set "id" "2" }} - {{ set "ofs" "16" }} - {{ set "bufofs" "512" }} - {{ template "decode_2_values_x86" . }} - - {{ set "id" "3" }} - {{ set "ofs" "24" }} - {{ set "bufofs" "768" }} - {{ template "decode_2_values_x86" . }} - - ADDQ $2, off // off += 2 - - TESTB DH, DH // any br[i].ofs < 4? - JNZ end - - CMPQ off, $bufoff - JL main_loop -end: - MOVQ 0(SP), BP - - MOVB off, ret+56(FP) - RET -#undef off -#undef buffer -#undef table - -#undef br_bits_read -#undef br_value -#undef br_offset -#undef peek_bits -#undef exhausted - -#undef br0 -#undef br1 -#undef br2 -#undef br3 diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go index c65ea9795..36119f385 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -439,7 +439,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) { println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp) } - if len(next.b) > 0 { + if !d.o.ignoreChecksum && len(next.b) > 0 { n, err := d.current.crc.Write(next.b) if err == nil { if n != len(next.b) { @@ -451,7 +451,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) { got := d.current.crc.Sum64() var tmp [4]byte binary.LittleEndian.PutUint32(tmp[:], uint32(got)) - if !bytes.Equal(tmp[:], next.d.checkCRC) && !ignoreCRC { + if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) { if debugDecoder { println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)") } @@ -535,9 +535,15 @@ func (d *Decoder) nextBlockSync() (ok bool) { // Update/Check CRC if d.frame.HasCheckSum { - d.frame.crc.Write(d.current.b) + if !d.o.ignoreChecksum { + d.frame.crc.Write(d.current.b) + } if d.current.d.Last { - d.current.err = d.frame.checkCRC() + if !d.o.ignoreChecksum { + d.current.err = d.frame.checkCRC() + } else { + d.current.err = d.frame.consumeCRC() + } if d.current.err != nil { println("CRC error:", d.current.err) return false diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go index fc52ebc40..c70e6fa0f 100644 --- a/vendor/github.com/klauspost/compress/zstd/decoder_options.go +++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go @@ -19,6 +19,7 @@ type decoderOptions struct { maxDecodedSize uint64 maxWindowSize uint64 dicts []dict + ignoreChecksum bool } func (o *decoderOptions) setDefault() { @@ -112,3 +113,11 @@ func WithDecoderMaxWindow(size uint64) DOption { return nil } } + +// IgnoreChecksum allows to forcibly ignore checksum checking. +func IgnoreChecksum(b bool) DOption { + return func(o *decoderOptions) error { + o.ignoreChecksum = b + return nil + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go index 509d5cece..3ff109cce 100644 --- a/vendor/github.com/klauspost/compress/zstd/framedec.go +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -290,13 +290,6 @@ func (d *frameDec) checkCRC() error { if !d.HasCheckSum { return nil } - var tmp [4]byte - got := d.crc.Sum64() - // Flip to match file order. - tmp[0] = byte(got >> 0) - tmp[1] = byte(got >> 8) - tmp[2] = byte(got >> 16) - tmp[3] = byte(got >> 24) // We can overwrite upper tmp now want, err := d.rawInput.readSmall(4) @@ -305,7 +298,19 @@ func (d *frameDec) checkCRC() error { return err } - if !bytes.Equal(tmp[:], want) && !ignoreCRC { + if d.o.ignoreChecksum { + return nil + } + + var tmp [4]byte + got := d.crc.Sum64() + // Flip to match file order. + tmp[0] = byte(got >> 0) + tmp[1] = byte(got >> 8) + tmp[2] = byte(got >> 16) + tmp[3] = byte(got >> 24) + + if !bytes.Equal(tmp[:], want) { if debugDecoder { println("CRC Check Failed:", tmp[:], "!=", want) } @@ -317,6 +322,19 @@ func (d *frameDec) checkCRC() error { return nil } +// consumeCRC reads the checksum data if the frame has one. +func (d *frameDec) consumeCRC() error { + if d.HasCheckSum { + _, err := d.rawInput.readSmall(4) + if err != nil { + println("CRC missing?", err) + return err + } + } + + return nil +} + // runDecoder will create a sync decoder that will decode a block of data. func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { saved := d.history.b @@ -373,13 +391,17 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize { err = ErrFrameSizeMismatch } else if d.HasCheckSum { - var n int - n, err = d.crc.Write(dst[crcStart:]) - if err == nil { - if n != len(dst)-crcStart { - err = io.ErrShortWrite - } else { - err = d.checkCRC() + if d.o.ignoreChecksum { + err = d.consumeCRC() + } else { + var n int + n, err = d.crc.Write(dst[crcStart:]) + if err == nil { + if n != len(dst)-crcStart { + err = io.ErrShortWrite + } else { + err = d.checkCRC() + } } } } diff --git a/vendor/github.com/klauspost/compress/zstd/fuzz.go b/vendor/github.com/klauspost/compress/zstd/fuzz.go deleted file mode 100644 index 7f2210e05..000000000 --- a/vendor/github.com/klauspost/compress/zstd/fuzz.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build ignorecrc -// +build ignorecrc - -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -// ignoreCRC can be used for fuzz testing to ignore CRC values... -const ignoreCRC = true diff --git a/vendor/github.com/klauspost/compress/zstd/fuzz_none.go b/vendor/github.com/klauspost/compress/zstd/fuzz_none.go deleted file mode 100644 index 6811c68a8..000000000 --- a/vendor/github.com/klauspost/compress/zstd/fuzz_none.go +++ /dev/null @@ -1,11 +0,0 @@ -//go:build !ignorecrc -// +build !ignorecrc - -// Copyright 2019+ Klaus Post. All rights reserved. -// License information can be found in the LICENSE file. -// Based on work by Yann Collet, released under BSD License. - -package zstd - -// ignoreCRC can be used for fuzz testing to ignore CRC values... -const ignoreCRC = false diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s index 01cc23fa8..2585b2e98 100644 --- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s +++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s @@ -1326,30 +1326,30 @@ copy_match: JA copy_overlapping_match // Copy non-overlapping match - XORQ R12, R12 + ADDQ R13, DI + MOVQ BX, R12 + ADDQ R13, BX copy_2: - MOVUPS (R11)(R12*1), X0 - MOVUPS X0, (BX)(R12*1) + MOVUPS (R11), X0 + MOVUPS X0, (R12) + ADDQ $0x10, R11 ADDQ $0x10, R12 - CMPQ R12, R13 - JB copy_2 - ADDQ R13, BX - ADDQ R13, DI + SUBQ $0x10, R13 + JHI copy_2 JMP handle_loop // Copy overlapping match copy_overlapping_match: - XORQ R12, R12 + ADDQ R13, DI copy_slow_3: - MOVB (R11)(R12*1), R14 - MOVB R14, (BX)(R12*1) - INCQ R12 - CMPQ R12, R13 - JB copy_slow_3 - ADDQ R13, BX - ADDQ R13, DI + MOVB (R11), R12 + MOVB R12, (BX) + INCQ R11 + INCQ BX + DECQ R13 + JNZ copy_slow_3 handle_loop: ADDQ $0x18, AX @@ -1826,30 +1826,30 @@ copy_match: JA copy_overlapping_match // Copy non-overlapping match - XORQ CX, CX + ADDQ R13, R12 + MOVQ R10, CX + ADDQ R13, R10 copy_2: - MOVUPS (AX)(CX*1), X0 - MOVUPS X0, (R10)(CX*1) + MOVUPS (AX), X0 + MOVUPS X0, (CX) + ADDQ $0x10, AX ADDQ $0x10, CX - CMPQ CX, R13 - JB copy_2 - ADDQ R13, R10 - ADDQ R13, R12 + SUBQ $0x10, R13 + JHI copy_2 JMP handle_loop // Copy overlapping match copy_overlapping_match: - XORQ CX, CX + ADDQ R13, R12 copy_slow_3: - MOVB (AX)(CX*1), R14 - MOVB R14, (R10)(CX*1) - INCQ CX - CMPQ CX, R13 - JB copy_slow_3 - ADDQ R13, R10 - ADDQ R13, R12 + MOVB (AX), CL + MOVB CL, (R10) + INCQ AX + INCQ R10 + DECQ R13 + JNZ copy_slow_3 handle_loop: MOVQ ctx+16(FP), AX @@ -2333,30 +2333,30 @@ copy_match: JA copy_overlapping_match // Copy non-overlapping match - XORQ R12, R12 + ADDQ R13, R11 + MOVQ R9, R12 + ADDQ R13, R9 copy_2: - MOVUPS (CX)(R12*1), X0 - MOVUPS X0, (R9)(R12*1) + MOVUPS (CX), X0 + MOVUPS X0, (R12) + ADDQ $0x10, CX ADDQ $0x10, R12 - CMPQ R12, R13 - JB copy_2 - ADDQ R13, R9 - ADDQ R13, R11 + SUBQ $0x10, R13 + JHI copy_2 JMP handle_loop // Copy overlapping match copy_overlapping_match: - XORQ R12, R12 + ADDQ R13, R11 copy_slow_3: - MOVB (CX)(R12*1), R14 - MOVB R14, (R9)(R12*1) - INCQ R12 - CMPQ R12, R13 - JB copy_slow_3 - ADDQ R13, R9 - ADDQ R13, R11 + MOVB (CX), R12 + MOVB R12, (R9) + INCQ CX + INCQ R9 + DECQ R13 + JNZ copy_slow_3 handle_loop: MOVQ ctx+16(FP), CX @@ -2862,6 +2862,7 @@ copy_match: JA copy_overlapping_match // Copy non-overlapping match + ADDQ R13, R12 XORQ CX, CX TESTQ $0x00000001, R13 JZ copy_2_word @@ -2900,21 +2901,19 @@ copy_2_test: CMPQ CX, R13 JB copy_2 ADDQ R13, R10 - ADDQ R13, R12 JMP handle_loop // Copy overlapping match copy_overlapping_match: - XORQ CX, CX + ADDQ R13, R12 copy_slow_3: - MOVB (AX)(CX*1), R14 - MOVB R14, (R10)(CX*1) - INCQ CX - CMPQ CX, R13 - JB copy_slow_3 - ADDQ R13, R10 - ADDQ R13, R12 + MOVB (AX), CL + MOVB CL, (R10) + INCQ AX + INCQ R10 + DECQ R13 + JNZ copy_slow_3 handle_loop: MOVQ ctx+16(FP), AX @@ -3398,6 +3397,7 @@ copy_match: JA copy_overlapping_match // Copy non-overlapping match + ADDQ R13, R11 XORQ R12, R12 TESTQ $0x00000001, R13 JZ copy_2_word @@ -3436,21 +3436,19 @@ copy_2_test: CMPQ R12, R13 JB copy_2 ADDQ R13, R9 - ADDQ R13, R11 JMP handle_loop // Copy overlapping match copy_overlapping_match: - XORQ R12, R12 + ADDQ R13, R11 copy_slow_3: - MOVB (CX)(R12*1), R14 - MOVB R14, (R9)(R12*1) - INCQ R12 - CMPQ R12, R13 - JB copy_slow_3 - ADDQ R13, R9 - ADDQ R13, R11 + MOVB (CX), R12 + MOVB R12, (R9) + INCQ CX + INCQ R9 + DECQ R13 + JNZ copy_slow_3 handle_loop: MOVQ ctx+16(FP), CX diff --git a/vendor/modules.txt b/vendor/modules.txt index 8fcaa6663..52f8c103d 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -134,7 +134,7 @@ github.com/containers/ocicrypt/keywrap/pkcs7 github.com/containers/ocicrypt/spec github.com/containers/ocicrypt/utils github.com/containers/ocicrypt/utils/keyprovider -# github.com/containers/storage v1.40.2 +# github.com/containers/storage v1.41.0 ## explicit github.com/containers/storage github.com/containers/storage/drivers @@ -276,7 +276,7 @@ github.com/jinzhu/copier # github.com/json-iterator/go v1.1.12 ## explicit github.com/json-iterator/go -# github.com/klauspost/compress v1.15.2 +# github.com/klauspost/compress v1.15.4 github.com/klauspost/compress github.com/klauspost/compress/flate github.com/klauspost/compress/fse