Update mattermost library (#2152)

* Update mattermost library * Fix linting
2025-07-13 07:56:28 +00:00 · 2024-05-24 23:08:09 +02:00
parent 65d78e38af
commit d16645c952
1003 changed files with 89451 additions and 114025 deletions
--- a/vendor/github.com/klauspost/compress/.goreleaser.yml
+++ b/vendor/github.com/klauspost/compress/.goreleaser.yml
@ -3,7 +3,6 @@
 before:
  hooks:
    - ./gen.sh
-    - go install mvdan.cc/garble@v0.10.1

 builds:
  -
@ -32,7 +31,6 @@ builds:
      - mips64le
    goarm:
      - 7
-    gobinary: garble
  -
    id: "s2d"
    binary: s2d
@ -59,7 +57,6 @@ builds:
      - mips64le
    goarm:
      - 7
-    gobinary: garble
  -
    id: "s2sx"
    binary: s2sx
@ -87,7 +84,6 @@ builds:
      - mips64le
    goarm:
      - 7
-    gobinary: garble

 archives:
  -
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@ -16,6 +16,46 @@ This package provides various compression algorithms.

 # changelog

+* Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6)
+	* zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923
+	* s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925
+  
+* Jan 26th, 2024 - [v1.17.5](https://github.com/klauspost/compress/releases/tag/v1.17.5)
+	* flate: Fix reset with dictionary on custom window encodes https://github.com/klauspost/compress/pull/912
+	* zstd: Add Frame header encoding and stripping https://github.com/klauspost/compress/pull/908
+	* zstd: Limit better/best default window to 8MB https://github.com/klauspost/compress/pull/913
+	* zstd: Speed improvements by @greatroar in https://github.com/klauspost/compress/pull/896 https://github.com/klauspost/compress/pull/910
+	* s2: Fix callbacks for skippable blocks and disallow 0xfe (Padding) by @Jille in https://github.com/klauspost/compress/pull/916 https://github.com/klauspost/compress/pull/917
+https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/compress/pull/918
+
+* Dec 1st, 2023 - [v1.17.4](https://github.com/klauspost/compress/releases/tag/v1.17.4)
+	* huff0: Speed up symbol counting by @greatroar in https://github.com/klauspost/compress/pull/887
+	* huff0: Remove byteReader by @greatroar in https://github.com/klauspost/compress/pull/886
+	* gzhttp: Allow overriding decompression on transport https://github.com/klauspost/compress/pull/892
+	* gzhttp: Clamp compression level https://github.com/klauspost/compress/pull/890
+	* gzip: Error out if reserved bits are set https://github.com/klauspost/compress/pull/891
+
+* Nov 15th, 2023 - [v1.17.3](https://github.com/klauspost/compress/releases/tag/v1.17.3)
+	* fse: Fix max header size https://github.com/klauspost/compress/pull/881
+	* zstd: Improve better/best compression https://github.com/klauspost/compress/pull/877
+	* gzhttp: Fix missing content type on Close https://github.com/klauspost/compress/pull/883
+
+* Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2)
+	* zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876
+
+* Oct 14th, 2023 - [v1.17.1](https://github.com/klauspost/compress/releases/tag/v1.17.1)
+	* s2: Fix S2 "best" dictionary wrong encoding by @klauspost in https://github.com/klauspost/compress/pull/871
+	* flate: Reduce allocations in decompressor and minor code improvements by @fakefloordiv in https://github.com/klauspost/compress/pull/869
+	* s2: Fix EstimateBlockSize on 6&7 length input by @klauspost in https://github.com/klauspost/compress/pull/867
+
+* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0)
+	* Add experimental dictionary builder  https://github.com/klauspost/compress/pull/853
+	* Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838
+	* flate: Add limited window compression https://github.com/klauspost/compress/pull/843
+	* s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839
+	* flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837
+	* gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860
+   
 * July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7)
 	* zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829
 	* s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832
@ -538,7 +578,7 @@ For direct deflate use, NewStatelessWriter and StatelessDeflate are available. S

 A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer:

-```
+```go
 	// replace 'ioutil.Discard' with your output.
 	gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression)
 	if err != nil {
@ -646,6 +686,7 @@ Here are other packages of good quality and pure Go (no cgo wrappers or autoconv
 * [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression.
 * [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression.
 * [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index.
+* [github.com/ybirader/pzip](https://github.com/ybirader/pzip) - Fast concurrent zip archiver and extractor.

 # license

--- a/vendor/github.com/klauspost/compress/fse/compress.go
+++ b/vendor/github.com/klauspost/compress/fse/compress.go
@ -212,7 +212,7 @@ func (s *Scratch) writeCount() error {
 		previous0 bool
 		charnum   uint16

-		maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
+		maxHeaderSize = ((int(s.symbolLen)*int(tableLog) + 4 + 2) >> 3) + 3

 		// Write Table Size
 		bitStream = uint32(tableLog - minTablelog)
--- a/vendor/github.com/klauspost/compress/huff0/bytereader.go
+++ b/vendor/github.com/klauspost/compress/huff0/bytereader.go
@ -1,44 +0,0 @@
-// Copyright 2018 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
-
-package huff0
-
-// byteReader provides a byte reader that reads
-// little endian values from a byte stream.
-// The input stream is manually advanced.
-// The reader performs no bounds checks.
-type byteReader struct {
-	b   []byte
-	off int
-}
-
-// init will initialize the reader and set the input.
-func (b *byteReader) init(in []byte) {
-	b.b = in
-	b.off = 0
-}
-
-// Int32 returns a little endian int32 starting at current offset.
-func (b byteReader) Int32() int32 {
-	v3 := int32(b.b[b.off+3])
-	v2 := int32(b.b[b.off+2])
-	v1 := int32(b.b[b.off+1])
-	v0 := int32(b.b[b.off])
-	return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
-}
-
-// Uint32 returns a little endian uint32 starting at current offset.
-func (b byteReader) Uint32() uint32 {
-	v3 := uint32(b.b[b.off+3])
-	v2 := uint32(b.b[b.off+2])
-	v1 := uint32(b.b[b.off+1])
-	v0 := uint32(b.b[b.off])
-	return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
-}
-
-// remain will return the number of bytes remaining.
-func (b byteReader) remain() int {
-	return len(b.b) - b.off
-}
--- a/vendor/github.com/klauspost/compress/huff0/compress.go
+++ b/vendor/github.com/klauspost/compress/huff0/compress.go
@ -350,6 +350,7 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
 // Does not update s.clearCount.
 func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
 	reuse = true
+	_ = s.count // Assert that s != nil to speed up the following loop.
 	for _, v := range in {
 		s.count[v]++
 	}
@ -415,7 +416,7 @@ func (s *Scratch) validateTable(c cTable) bool {

 // minTableLog provides the minimum logSize to safely represent a distribution.
 func (s *Scratch) minTableLog() uint8 {
-	minBitsSrc := highBit32(uint32(s.br.remain())) + 1
+	minBitsSrc := highBit32(uint32(s.srcLen)) + 1
 	minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2
 	if minBitsSrc < minBitsSymbols {
 		return uint8(minBitsSrc)
@ -427,7 +428,7 @@ func (s *Scratch) minTableLog() uint8 {
 func (s *Scratch) optimalTableLog() {
 	tableLog := s.TableLog
 	minBits := s.minTableLog()
-	maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1
+	maxBitsSrc := uint8(highBit32(uint32(s.srcLen-1))) - 1
 	if maxBitsSrc < tableLog {
 		// Accuracy can be reduced
 		tableLog = maxBitsSrc
--- a/vendor/github.com/klauspost/compress/huff0/huff0.go
+++ b/vendor/github.com/klauspost/compress/huff0/huff0.go
@ -88,7 +88,7 @@ type Scratch struct {
 	// Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
 	MaxDecodedSize int

-	br byteReader
+	srcLen int

 	// MaxSymbolValue will override the maximum symbol value of the next block.
 	MaxSymbolValue uint8
@ -170,7 +170,7 @@ func (s *Scratch) prepare(in []byte) (*Scratch, error) {
 	if s.fse == nil {
 		s.fse = &fse.Scratch{}
 	}
-	s.br.init(in)
+	s.srcLen = len(in)

 	return s, nil
 }
--- a/vendor/github.com/klauspost/compress/s2/.gitignore
+++ b/vendor/github.com/klauspost/compress/s2/.gitignore
@ -1,15 +0,0 @@
-testdata/bench
-
-# These explicitly listed benchmark data files are for an obsolete version of
-# snappy_test.go.
-testdata/alice29.txt
-testdata/asyoulik.txt
-testdata/fireworks.jpeg
-testdata/geo.protodata
-testdata/html
-testdata/html_x_4
-testdata/kppkn.gtb
-testdata/lcet10.txt
-testdata/paper-100k.pdf
-testdata/plrabn12.txt
-testdata/urls.10K
--- a/vendor/github.com/klauspost/compress/s2/LICENSE
+++ b/vendor/github.com/klauspost/compress/s2/LICENSE
@ -1,28 +0,0 @@
-Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
-Copyright (c) 2019 Klaus Post. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-   * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-   * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-   * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/vendor/github.com/klauspost/compress/s2/README.md
+++ b/vendor/github.com/klauspost/compress/s2/README.md
--- a/vendor/github.com/klauspost/compress/s2/decode.go
+++ b/vendor/github.com/klauspost/compress/s2/decode.go
@ -1,437 +0,0 @@
-// Copyright 2011 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
-	"encoding/binary"
-	"errors"
-	"fmt"
-	"strconv"
-)
-
-var (
-	// ErrCorrupt reports that the input is invalid.
-	ErrCorrupt = errors.New("s2: corrupt input")
-	// ErrCRC reports that the input failed CRC validation (streams only)
-	ErrCRC = errors.New("s2: corrupt input, crc mismatch")
-	// ErrTooLarge reports that the uncompressed length is too large.
-	ErrTooLarge = errors.New("s2: decoded block is too large")
-	// ErrUnsupported reports that the input isn't supported.
-	ErrUnsupported = errors.New("s2: unsupported input")
-)
-
-// DecodedLen returns the length of the decoded block.
-func DecodedLen(src []byte) (int, error) {
-	v, _, err := decodedLen(src)
-	return v, err
-}
-
-// decodedLen returns the length of the decoded block and the number of bytes
-// that the length header occupied.
-func decodedLen(src []byte) (blockLen, headerLen int, err error) {
-	v, n := binary.Uvarint(src)
-	if n <= 0 || v > 0xffffffff {
-		return 0, 0, ErrCorrupt
-	}
-
-	const wordSize = 32 << (^uint(0) >> 32 & 1)
-	if wordSize == 32 && v > 0x7fffffff {
-		return 0, 0, ErrTooLarge
-	}
-	return int(v), n, nil
-}
-
-const (
-	decodeErrCodeCorrupt = 1
-)
-
-// Decode returns the decoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire decoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-func Decode(dst, src []byte) ([]byte, error) {
-	dLen, s, err := decodedLen(src)
-	if err != nil {
-		return nil, err
-	}
-	if dLen <= cap(dst) {
-		dst = dst[:dLen]
-	} else {
-		dst = make([]byte, dLen)
-	}
-	if s2Decode(dst, src[s:]) != 0 {
-		return nil, ErrCorrupt
-	}
-	return dst, nil
-}
-
-// s2DecodeDict writes the decoding of src to dst. It assumes that the varint-encoded
-// length of the decompressed bytes has already been read, and that len(dst)
-// equals that length.
-//
-// It returns 0 on success or a decodeErrCodeXxx error code on failure.
-func s2DecodeDict(dst, src []byte, dict *Dict) int {
-	if dict == nil {
-		return s2Decode(dst, src)
-	}
-	const debug = false
-	const debugErrs = debug
-
-	if debug {
-		fmt.Println("Starting decode, dst len:", len(dst))
-	}
-	var d, s, length int
-	offset := len(dict.dict) - dict.repeat
-
-	// As long as we can read at least 5 bytes...
-	for s < len(src)-5 {
-		// Removing bounds checks is SLOWER, when if doing
-		// in := src[s:s+5]
-		// Checked on Go 1.18
-		switch src[s] & 0x03 {
-		case tagLiteral:
-			x := uint32(src[s] >> 2)
-			switch {
-			case x < 60:
-				s++
-			case x == 60:
-				s += 2
-				x = uint32(src[s-1])
-			case x == 61:
-				in := src[s : s+3]
-				x = uint32(in[1]) | uint32(in[2])<<8
-				s += 3
-			case x == 62:
-				in := src[s : s+4]
-				// Load as 32 bit and shift down.
-				x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
-				x >>= 8
-				s += 4
-			case x == 63:
-				in := src[s : s+5]
-				x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
-				s += 5
-			}
-			length = int(x) + 1
-			if debug {
-				fmt.Println("literals, length:", length, "d-after:", d+length)
-			}
-			if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
-				if debugErrs {
-					fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s)
-				}
-				return decodeErrCodeCorrupt
-			}
-
-			copy(dst[d:], src[s:s+length])
-			d += length
-			s += length
-			continue
-
-		case tagCopy1:
-			s += 2
-			toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
-			length = int(src[s-2]) >> 2 & 0x7
-			if toffset == 0 {
-				if debug {
-					fmt.Print("(repeat) ")
-				}
-				// keep last offset
-				switch length {
-				case 5:
-					length = int(src[s]) + 4
-					s += 1
-				case 6:
-					in := src[s : s+2]
-					length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
-					s += 2
-				case 7:
-					in := src[s : s+3]
-					length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
-					s += 3
-				default: // 0-> 4
-				}
-			} else {
-				offset = toffset
-			}
-			length += 4
-		case tagCopy2:
-			in := src[s : s+3]
-			offset = int(uint32(in[1]) | uint32(in[2])<<8)
-			length = 1 + int(in[0])>>2
-			s += 3
-
-		case tagCopy4:
-			in := src[s : s+5]
-			offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
-			length = 1 + int(in[0])>>2
-			s += 5
-		}
-
-		if offset <= 0 || length > len(dst)-d {
-			if debugErrs {
-				fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d)
-			}
-			return decodeErrCodeCorrupt
-		}
-
-		// copy from dict
-		if d < offset {
-			if d > MaxDictSrcOffset {
-				if debugErrs {
-					fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length)
-				}
-				return decodeErrCodeCorrupt
-			}
-			startOff := len(dict.dict) - offset + d
-			if startOff < 0 || startOff+length > len(dict.dict) {
-				if debugErrs {
-					fmt.Printf("offset (%d) + length (%d) bigger than dict (%d)\n", offset, length, len(dict.dict))
-				}
-				return decodeErrCodeCorrupt
-			}
-			if debug {
-				fmt.Println("dict copy, length:", length, "offset:", offset, "d-after:", d+length, "dict start offset:", startOff)
-			}
-			copy(dst[d:d+length], dict.dict[startOff:])
-			d += length
-			continue
-		}
-
-		if debug {
-			fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
-		}
-
-		// Copy from an earlier sub-slice of dst to a later sub-slice.
-		// If no overlap, use the built-in copy:
-		if offset > length {
-			copy(dst[d:d+length], dst[d-offset:])
-			d += length
-			continue
-		}
-
-		// Unlike the built-in copy function, this byte-by-byte copy always runs
-		// forwards, even if the slices overlap. Conceptually, this is:
-		//
-		// d += forwardCopy(dst[d:d+length], dst[d-offset:])
-		//
-		// We align the slices into a and b and show the compiler they are the same size.
-		// This allows the loop to run without bounds checks.
-		a := dst[d : d+length]
-		b := dst[d-offset:]
-		b = b[:len(a)]
-		for i := range a {
-			a[i] = b[i]
-		}
-		d += length
-	}
-
-	// Remaining with extra checks...
-	for s < len(src) {
-		switch src[s] & 0x03 {
-		case tagLiteral:
-			x := uint32(src[s] >> 2)
-			switch {
-			case x < 60:
-				s++
-			case x == 60:
-				s += 2
-				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-					if debugErrs {
-						fmt.Println("src went oob")
-					}
-					return decodeErrCodeCorrupt
-				}
-				x = uint32(src[s-1])
-			case x == 61:
-				s += 3
-				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-					if debugErrs {
-						fmt.Println("src went oob")
-					}
-					return decodeErrCodeCorrupt
-				}
-				x = uint32(src[s-2]) | uint32(src[s-1])<<8
-			case x == 62:
-				s += 4
-				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-					if debugErrs {
-						fmt.Println("src went oob")
-					}
-					return decodeErrCodeCorrupt
-				}
-				x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
-			case x == 63:
-				s += 5
-				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-					if debugErrs {
-						fmt.Println("src went oob")
-					}
-					return decodeErrCodeCorrupt
-				}
-				x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
-			}
-			length = int(x) + 1
-			if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
-				if debugErrs {
-					fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s)
-				}
-				return decodeErrCodeCorrupt
-			}
-			if debug {
-				fmt.Println("literals, length:", length, "d-after:", d+length)
-			}
-
-			copy(dst[d:], src[s:s+length])
-			d += length
-			s += length
-			continue
-
-		case tagCopy1:
-			s += 2
-			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-				if debugErrs {
-					fmt.Println("src went oob")
-				}
-				return decodeErrCodeCorrupt
-			}
-			length = int(src[s-2]) >> 2 & 0x7
-			toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
-			if toffset == 0 {
-				if debug {
-					fmt.Print("(repeat) ")
-				}
-				// keep last offset
-				switch length {
-				case 5:
-					s += 1
-					if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-						if debugErrs {
-							fmt.Println("src went oob")
-						}
-						return decodeErrCodeCorrupt
-					}
-					length = int(uint32(src[s-1])) + 4
-				case 6:
-					s += 2
-					if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-						if debugErrs {
-							fmt.Println("src went oob")
-						}
-						return decodeErrCodeCorrupt
-					}
-					length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
-				case 7:
-					s += 3
-					if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-						if debugErrs {
-							fmt.Println("src went oob")
-						}
-						return decodeErrCodeCorrupt
-					}
-					length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
-				default: // 0-> 4
-				}
-			} else {
-				offset = toffset
-			}
-			length += 4
-		case tagCopy2:
-			s += 3
-			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-				if debugErrs {
-					fmt.Println("src went oob")
-				}
-				return decodeErrCodeCorrupt
-			}
-			length = 1 + int(src[s-3])>>2
-			offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
-
-		case tagCopy4:
-			s += 5
-			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-				if debugErrs {
-					fmt.Println("src went oob")
-				}
-				return decodeErrCodeCorrupt
-			}
-			length = 1 + int(src[s-5])>>2
-			offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
-		}
-
-		if offset <= 0 || length > len(dst)-d {
-			if debugErrs {
-				fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d)
-			}
-			return decodeErrCodeCorrupt
-		}
-
-		// copy from dict
-		if d < offset {
-			if d > MaxDictSrcOffset {
-				if debugErrs {
-					fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length)
-				}
-				return decodeErrCodeCorrupt
-			}
-			rOff := len(dict.dict) - (offset - d)
-			if debug {
-				fmt.Println("starting dict entry from dict offset", len(dict.dict)-rOff)
-			}
-			if rOff+length > len(dict.dict) {
-				if debugErrs {
-					fmt.Println("err: END offset", rOff+length, "bigger than dict", len(dict.dict), "dict offset:", rOff, "length:", length)
-				}
-				return decodeErrCodeCorrupt
-			}
-			if rOff < 0 {
-				if debugErrs {
-					fmt.Println("err: START offset", rOff, "less than 0", len(dict.dict), "dict offset:", rOff, "length:", length)
-				}
-				return decodeErrCodeCorrupt
-			}
-			copy(dst[d:d+length], dict.dict[rOff:])
-			d += length
-			continue
-		}
-
-		if debug {
-			fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
-		}
-
-		// Copy from an earlier sub-slice of dst to a later sub-slice.
-		// If no overlap, use the built-in copy:
-		if offset > length {
-			copy(dst[d:d+length], dst[d-offset:])
-			d += length
-			continue
-		}
-
-		// Unlike the built-in copy function, this byte-by-byte copy always runs
-		// forwards, even if the slices overlap. Conceptually, this is:
-		//
-		// d += forwardCopy(dst[d:d+length], dst[d-offset:])
-		//
-		// We align the slices into a and b and show the compiler they are the same size.
-		// This allows the loop to run without bounds checks.
-		a := dst[d : d+length]
-		b := dst[d-offset:]
-		b = b[:len(a)]
-		for i := range a {
-			a[i] = b[i]
-		}
-		d += length
-	}
-
-	if d != len(dst) {
-		if debugErrs {
-			fmt.Println("wanted length", len(dst), "got", d)
-		}
-		return decodeErrCodeCorrupt
-	}
-	return 0
-}
--- a/vendor/github.com/klauspost/compress/s2/decode_amd64.s
+++ b/vendor/github.com/klauspost/compress/s2/decode_amd64.s
@ -1,568 +0,0 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-#include "textflag.h"
-
-#define R_TMP0 AX
-#define R_TMP1 BX
-#define R_LEN CX
-#define R_OFF DX
-#define R_SRC SI
-#define R_DST DI
-#define R_DBASE R8
-#define R_DLEN R9
-#define R_DEND R10
-#define R_SBASE R11
-#define R_SLEN R12
-#define R_SEND R13
-#define R_TMP2 R14
-#define R_TMP3 R15
-
-// The asm code generally follows the pure Go code in decode_other.go, except
-// where marked with a "!!!".
-
-// func decode(dst, src []byte) int
-//
-// All local variables fit into registers. The non-zero stack size is only to
-// spill registers and push args when issuing a CALL. The register allocation:
-//	- R_TMP0	scratch
-//	- R_TMP1	scratch
-//	- R_LEN	    length or x (shared)
-//	- R_OFF	    offset
-//	- R_SRC	    &src[s]
-//	- R_DST	    &dst[d]
-//	+ R_DBASE	dst_base
-//	+ R_DLEN	dst_len
-//	+ R_DEND	dst_base + dst_len
-//	+ R_SBASE	src_base
-//	+ R_SLEN	src_len
-//	+ R_SEND	src_base + src_len
-//	- R_TMP2	used by doCopy
-//	- R_TMP3	used by doCopy
-//
-// The registers R_DBASE-R_SEND (marked with a "+") are set at the start of the
-// function, and after a CALL returns, and are not otherwise modified.
-//
-// The d variable is implicitly R_DST - R_DBASE,  and len(dst)-d is R_DEND - R_DST.
-// The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC.
-TEXT ·s2Decode(SB), NOSPLIT, $48-56
-	// Initialize R_SRC, R_DST and R_DBASE-R_SEND.
-	MOVQ dst_base+0(FP), R_DBASE
-	MOVQ dst_len+8(FP), R_DLEN
-	MOVQ R_DBASE, R_DST
-	MOVQ R_DBASE, R_DEND
-	ADDQ R_DLEN, R_DEND
-	MOVQ src_base+24(FP), R_SBASE
-	MOVQ src_len+32(FP), R_SLEN
-	MOVQ R_SBASE, R_SRC
-	MOVQ R_SBASE, R_SEND
-	ADDQ R_SLEN, R_SEND
-	XORQ R_OFF, R_OFF
-
-loop:
-	// for s < len(src)
-	CMPQ R_SRC, R_SEND
-	JEQ  end
-
-	// R_LEN = uint32(src[s])
-	//
-	// switch src[s] & 0x03
-	MOVBLZX (R_SRC), R_LEN
-	MOVL    R_LEN, R_TMP1
-	ANDL    $3, R_TMP1
-	CMPL    R_TMP1, $1
-	JAE     tagCopy
-
-	// ----------------------------------------
-	// The code below handles literal tags.
-
-	// case tagLiteral:
-	// x := uint32(src[s] >> 2)
-	// switch
-	SHRL $2, R_LEN
-	CMPL R_LEN, $60
-	JAE  tagLit60Plus
-
-	// case x < 60:
-	// s++
-	INCQ R_SRC
-
-doLit:
-	// This is the end of the inner "switch", when we have a literal tag.
-	//
-	// We assume that R_LEN == x and x fits in a uint32, where x is the variable
-	// used in the pure Go decode_other.go code.
-
-	// length = int(x) + 1
-	//
-	// Unlike the pure Go code, we don't need to check if length <= 0 because
-	// R_LEN can hold 64 bits, so the increment cannot overflow.
-	INCQ R_LEN
-
-	// Prepare to check if copying length bytes will run past the end of dst or
-	// src.
-	//
-	// R_TMP0 = len(dst) - d
-	// R_TMP1 = len(src) - s
-	MOVQ R_DEND, R_TMP0
-	SUBQ R_DST, R_TMP0
-	MOVQ R_SEND, R_TMP1
-	SUBQ R_SRC, R_TMP1
-
-	// !!! Try a faster technique for short (16 or fewer bytes) copies.
-	//
-	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
-	//   goto callMemmove // Fall back on calling runtime·memmove.
-	// }
-	//
-	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
-	// against 21 instead of 16, because it cannot assume that all of its input
-	// is contiguous in memory and so it needs to leave enough source bytes to
-	// read the next tag without refilling buffers, but Go's Decode assumes
-	// contiguousness (the src argument is a []byte).
-	CMPQ R_LEN, $16
-	JGT  callMemmove
-	CMPQ R_TMP0, $16
-	JLT  callMemmove
-	CMPQ R_TMP1, $16
-	JLT  callMemmove
-
-	// !!! Implement the copy from src to dst as a 16-byte load and store.
-	// (Decode's documentation says that dst and src must not overlap.)
-	//
-	// This always copies 16 bytes, instead of only length bytes, but that's
-	// OK. If the input is a valid Snappy encoding then subsequent iterations
-	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
-	// non-nil error), so the overrun will be ignored.
-	//
-	// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
-	// 16-byte loads and stores. This technique probably wouldn't be as
-	// effective on architectures that are fussier about alignment.
-	MOVOU 0(R_SRC), X0
-	MOVOU X0, 0(R_DST)
-
-	// d += length
-	// s += length
-	ADDQ R_LEN, R_DST
-	ADDQ R_LEN, R_SRC
-	JMP  loop
-
-callMemmove:
-	// if length > len(dst)-d || length > len(src)-s { etc }
-	CMPQ R_LEN, R_TMP0
-	JGT  errCorrupt
-	CMPQ R_LEN, R_TMP1
-	JGT  errCorrupt
-
-	// copy(dst[d:], src[s:s+length])
-	//
-	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
-	// R_DST, R_SRC and R_LEN as arguments. Coincidentally, we also need to spill those
-	// three registers to the stack, to save local variables across the CALL.
-	MOVQ R_DST, 0(SP)
-	MOVQ R_SRC, 8(SP)
-	MOVQ R_LEN, 16(SP)
-	MOVQ R_DST, 24(SP)
-	MOVQ R_SRC, 32(SP)
-	MOVQ R_LEN, 40(SP)
-	MOVQ R_OFF, 48(SP)
-	CALL runtime·memmove(SB)
-
-	// Restore local variables: unspill registers from the stack and
-	// re-calculate R_DBASE-R_SEND.
-	MOVQ 24(SP), R_DST
-	MOVQ 32(SP), R_SRC
-	MOVQ 40(SP), R_LEN
-	MOVQ 48(SP), R_OFF
-	MOVQ dst_base+0(FP), R_DBASE
-	MOVQ dst_len+8(FP), R_DLEN
-	MOVQ R_DBASE, R_DEND
-	ADDQ R_DLEN, R_DEND
-	MOVQ src_base+24(FP), R_SBASE
-	MOVQ src_len+32(FP), R_SLEN
-	MOVQ R_SBASE, R_SEND
-	ADDQ R_SLEN, R_SEND
-
-	// d += length
-	// s += length
-	ADDQ R_LEN, R_DST
-	ADDQ R_LEN, R_SRC
-	JMP  loop
-
-tagLit60Plus:
-	// !!! This fragment does the
-	//
-	// s += x - 58; if uint(s) > uint(len(src)) { etc }
-	//
-	// checks. In the asm version, we code it once instead of once per switch case.
-	ADDQ R_LEN, R_SRC
-	SUBQ $58, R_SRC
-	CMPQ R_SRC, R_SEND
-	JA   errCorrupt
-
-	// case x == 60:
-	CMPL R_LEN, $61
-	JEQ  tagLit61
-	JA   tagLit62Plus
-
-	// x = uint32(src[s-1])
-	MOVBLZX -1(R_SRC), R_LEN
-	JMP     doLit
-
-tagLit61:
-	// case x == 61:
-	// x = uint32(src[s-2]) | uint32(src[s-1])<<8
-	MOVWLZX -2(R_SRC), R_LEN
-	JMP     doLit
-
-tagLit62Plus:
-	CMPL R_LEN, $62
-	JA   tagLit63
-
-	// case x == 62:
-	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
-	// We read one byte, safe to read one back, since we are just reading tag.
-	// x = binary.LittleEndian.Uint32(src[s-1:]) >> 8
-	MOVL -4(R_SRC), R_LEN
-	SHRL $8, R_LEN
-	JMP  doLit
-
-tagLit63:
-	// case x == 63:
-	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
-	MOVL -4(R_SRC), R_LEN
-	JMP  doLit
-
-// The code above handles literal tags.
-// ----------------------------------------
-// The code below handles copy tags.
-
-tagCopy4:
-	// case tagCopy4:
-	// s += 5
-	ADDQ $5, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	CMPQ R_SRC, R_SEND
-	JA   errCorrupt
-
-	// length = 1 + int(src[s-5])>>2
-	SHRQ $2, R_LEN
-	INCQ R_LEN
-
-	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
-	MOVLQZX -4(R_SRC), R_OFF
-	JMP     doCopy
-
-tagCopy2:
-	// case tagCopy2:
-	// s += 3
-	ADDQ $3, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	CMPQ R_SRC, R_SEND
-	JA   errCorrupt
-
-	// length = 1 + int(src[s-3])>>2
-	SHRQ $2, R_LEN
-	INCQ R_LEN
-
-	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
-	MOVWQZX -2(R_SRC), R_OFF
-	JMP     doCopy
-
-tagCopy:
-	// We have a copy tag. We assume that:
-	//	- R_TMP1 == src[s] & 0x03
-	//	- R_LEN == src[s]
-	CMPQ R_TMP1, $2
-	JEQ  tagCopy2
-	JA   tagCopy4
-
-	// case tagCopy1:
-	// s += 2
-	ADDQ $2, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	CMPQ R_SRC, R_SEND
-	JA   errCorrupt
-
-	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
-	// length = 4 + int(src[s-2])>>2&0x7
-	MOVBQZX -1(R_SRC), R_TMP1
-	MOVQ    R_LEN, R_TMP0
-	SHRQ    $2, R_LEN
-	ANDQ    $0xe0, R_TMP0
-	ANDQ    $7, R_LEN
-	SHLQ    $3, R_TMP0
-	ADDQ    $4, R_LEN
-	ORQ     R_TMP1, R_TMP0
-
-	// check if repeat code, ZF set by ORQ.
-	JZ repeatCode
-
-	// This is a regular copy, transfer our temporary value to R_OFF (length)
-	MOVQ R_TMP0, R_OFF
-	JMP  doCopy
-
-// This is a repeat code.
-repeatCode:
-	// If length < 9, reuse last offset, with the length already calculated.
-	CMPQ R_LEN, $9
-	JL   doCopyRepeat
-
-	// Read additional bytes for length.
-	JE repeatLen1
-
-	// Rare, so the extra branch shouldn't hurt too much.
-	CMPQ R_LEN, $10
-	JE   repeatLen2
-	JMP  repeatLen3
-
-// Read repeat lengths.
-repeatLen1:
-	// s ++
-	ADDQ $1, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	CMPQ R_SRC, R_SEND
-	JA   errCorrupt
-
-	// length = src[s-1] + 8
-	MOVBQZX -1(R_SRC), R_LEN
-	ADDL    $8, R_LEN
-	JMP     doCopyRepeat
-
-repeatLen2:
-	// s +=2
-	ADDQ $2, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	CMPQ R_SRC, R_SEND
-	JA   errCorrupt
-
-	// length = uint32(src[s-2]) | (uint32(src[s-1])<<8) + (1 << 8)
-	MOVWQZX -2(R_SRC), R_LEN
-	ADDL    $260, R_LEN
-	JMP     doCopyRepeat
-
-repeatLen3:
-	// s +=3
-	ADDQ $3, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	CMPQ R_SRC, R_SEND
-	JA   errCorrupt
-
-	// length = uint32(src[s-3]) | (uint32(src[s-2])<<8) | (uint32(src[s-1])<<16) + (1 << 16)
-	// Read one byte further back (just part of the tag, shifted out)
-	MOVL -4(R_SRC), R_LEN
-	SHRL $8, R_LEN
-	ADDL $65540, R_LEN
-	JMP  doCopyRepeat
-
-doCopy:
-	// This is the end of the outer "switch", when we have a copy tag.
-	//
-	// We assume that:
-	//	- R_LEN == length && R_LEN > 0
-	//	- R_OFF == offset
-
-	// if d < offset { etc }
-	MOVQ R_DST, R_TMP1
-	SUBQ R_DBASE, R_TMP1
-	CMPQ R_TMP1, R_OFF
-	JLT  errCorrupt
-
-	// Repeat values can skip the test above, since any offset > 0 will be in dst.
-doCopyRepeat:
-	// if offset <= 0 { etc }
-	CMPQ R_OFF, $0
-	JLE  errCorrupt
-
-	// if length > len(dst)-d { etc }
-	MOVQ R_DEND, R_TMP1
-	SUBQ R_DST, R_TMP1
-	CMPQ R_LEN, R_TMP1
-	JGT  errCorrupt
-
-	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
-	//
-	// Set:
-	//	- R_TMP2 = len(dst)-d
-	//	- R_TMP3 = &dst[d-offset]
-	MOVQ R_DEND, R_TMP2
-	SUBQ R_DST, R_TMP2
-	MOVQ R_DST, R_TMP3
-	SUBQ R_OFF, R_TMP3
-
-	// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
-	//
-	// First, try using two 8-byte load/stores, similar to the doLit technique
-	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
-	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
-	// and not one 16-byte load/store, and the first store has to be before the
-	// second load, due to the overlap if offset is in the range [8, 16).
-	//
-	// if length > 16 || offset < 8 || len(dst)-d < 16 {
-	//   goto slowForwardCopy
-	// }
-	// copy 16 bytes
-	// d += length
-	CMPQ R_LEN, $16
-	JGT  slowForwardCopy
-	CMPQ R_OFF, $8
-	JLT  slowForwardCopy
-	CMPQ R_TMP2, $16
-	JLT  slowForwardCopy
-	MOVQ 0(R_TMP3), R_TMP0
-	MOVQ R_TMP0, 0(R_DST)
-	MOVQ 8(R_TMP3), R_TMP1
-	MOVQ R_TMP1, 8(R_DST)
-	ADDQ R_LEN, R_DST
-	JMP  loop
-
-slowForwardCopy:
-	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
-	// can still try 8-byte load stores, provided we can overrun up to 10 extra
-	// bytes. As above, the overrun will be fixed up by subsequent iterations
-	// of the outermost loop.
-	//
-	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
-	// commentary says:
-	//
-	// ----
-	//
-	// The main part of this loop is a simple copy of eight bytes at a time
-	// until we've copied (at least) the requested amount of bytes.  However,
-	// if d and d-offset are less than eight bytes apart (indicating a
-	// repeating pattern of length < 8), we first need to expand the pattern in
-	// order to get the correct results. For instance, if the buffer looks like
-	// this, with the eight-byte <d-offset> and <d> patterns marked as
-	// intervals:
-	//
-	//    abxxxxxxxxxxxx
-	//    [------]           d-offset
-	//      [------]         d
-	//
-	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
-	// once, after which we can move <d> two bytes without moving <d-offset>:
-	//
-	//    ababxxxxxxxxxx
-	//    [------]           d-offset
-	//        [------]       d
-	//
-	// and repeat the exercise until the two no longer overlap.
-	//
-	// This allows us to do very well in the special case of one single byte
-	// repeated many times, without taking a big hit for more general cases.
-	//
-	// The worst case of extra writing past the end of the match occurs when
-	// offset == 1 and length == 1; the last copy will read from byte positions
-	// [0..7] and write to [4..11], whereas it was only supposed to write to
-	// position 1. Thus, ten excess bytes.
-	//
-	// ----
-	//
-	// That "10 byte overrun" worst case is confirmed by Go's
-	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
-	// and finishSlowForwardCopy algorithm.
-	//
-	// if length > len(dst)-d-10 {
-	//   goto verySlowForwardCopy
-	// }
-	SUBQ $10, R_TMP2
-	CMPQ R_LEN, R_TMP2
-	JGT  verySlowForwardCopy
-
-	// We want to keep the offset, so we use R_TMP2 from here.
-	MOVQ R_OFF, R_TMP2
-
-makeOffsetAtLeast8:
-	// !!! As above, expand the pattern so that offset >= 8 and we can use
-	// 8-byte load/stores.
-	//
-	// for offset < 8 {
-	//   copy 8 bytes from dst[d-offset:] to dst[d:]
-	//   length -= offset
-	//   d      += offset
-	//   offset += offset
-	//   // The two previous lines together means that d-offset, and therefore
-	//   // R_TMP3, is unchanged.
-	// }
-	CMPQ R_TMP2, $8
-	JGE  fixUpSlowForwardCopy
-	MOVQ (R_TMP3), R_TMP1
-	MOVQ R_TMP1, (R_DST)
-	SUBQ R_TMP2, R_LEN
-	ADDQ R_TMP2, R_DST
-	ADDQ R_TMP2, R_TMP2
-	JMP  makeOffsetAtLeast8
-
-fixUpSlowForwardCopy:
-	// !!! Add length (which might be negative now) to d (implied by R_DST being
-	// &dst[d]) so that d ends up at the right place when we jump back to the
-	// top of the loop. Before we do that, though, we save R_DST to R_TMP0 so that, if
-	// length is positive, copying the remaining length bytes will write to the
-	// right place.
-	MOVQ R_DST, R_TMP0
-	ADDQ R_LEN, R_DST
-
-finishSlowForwardCopy:
-	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
-	// length means that we overrun, but as above, that will be fixed up by
-	// subsequent iterations of the outermost loop.
-	CMPQ R_LEN, $0
-	JLE  loop
-	MOVQ (R_TMP3), R_TMP1
-	MOVQ R_TMP1, (R_TMP0)
-	ADDQ $8, R_TMP3
-	ADDQ $8, R_TMP0
-	SUBQ $8, R_LEN
-	JMP  finishSlowForwardCopy
-
-verySlowForwardCopy:
-	// verySlowForwardCopy is a simple implementation of forward copy. In C
-	// parlance, this is a do/while loop instead of a while loop, since we know
-	// that length > 0. In Go syntax:
-	//
-	// for {
-	//   dst[d] = dst[d - offset]
-	//   d++
-	//   length--
-	//   if length == 0 {
-	//     break
-	//   }
-	// }
-	MOVB (R_TMP3), R_TMP1
-	MOVB R_TMP1, (R_DST)
-	INCQ R_TMP3
-	INCQ R_DST
-	DECQ R_LEN
-	JNZ  verySlowForwardCopy
-	JMP  loop
-
-// The code above handles copy tags.
-// ----------------------------------------
-
-end:
-	// This is the end of the "for s < len(src)".
-	//
-	// if d != len(dst) { etc }
-	CMPQ R_DST, R_DEND
-	JNE  errCorrupt
-
-	// return 0
-	MOVQ $0, ret+48(FP)
-	RET
-
-errCorrupt:
-	// return decodeErrCodeCorrupt
-	MOVQ $1, ret+48(FP)
-	RET
--- a/vendor/github.com/klauspost/compress/s2/decode_arm64.s
+++ b/vendor/github.com/klauspost/compress/s2/decode_arm64.s
@ -1,574 +0,0 @@
-// Copyright 2020 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// +build !appengine
-// +build gc
-// +build !noasm
-
-#include "textflag.h"
-
-#define R_TMP0 R2
-#define R_TMP1 R3
-#define R_LEN R4
-#define R_OFF R5
-#define R_SRC R6
-#define R_DST R7
-#define R_DBASE R8
-#define R_DLEN R9
-#define R_DEND R10
-#define R_SBASE R11
-#define R_SLEN R12
-#define R_SEND R13
-#define R_TMP2 R14
-#define R_TMP3 R15
-
-// TEST_SRC will check if R_SRC is <= SRC_END
-#define TEST_SRC() \
-	CMP R_SEND, R_SRC \
-	BGT errCorrupt
-
-// MOVD R_SRC, R_TMP1
-// SUB  R_SBASE, R_TMP1, R_TMP1
-// CMP  R_SLEN, R_TMP1
-// BGT  errCorrupt
-
-// The asm code generally follows the pure Go code in decode_other.go, except
-// where marked with a "!!!".
-
-// func decode(dst, src []byte) int
-//
-// All local variables fit into registers. The non-zero stack size is only to
-// spill registers and push args when issuing a CALL. The register allocation:
-//	- R_TMP0	scratch
-//	- R_TMP1	scratch
-//	- R_LEN	length or x
-//	- R_OFF	offset
-//	- R_SRC	&src[s]
-//	- R_DST	&dst[d]
-//	+ R_DBASE	dst_base
-//	+ R_DLEN	dst_len
-//	+ R_DEND	dst_base + dst_len
-//	+ R_SBASE	src_base
-//	+ R_SLEN	src_len
-//	+ R_SEND	src_base + src_len
-//	- R_TMP2	used by doCopy
-//	- R_TMP3	used by doCopy
-//
-// The registers R_DBASE-R_SEND (marked with a "+") are set at the start of the
-// function, and after a CALL returns, and are not otherwise modified.
-//
-// The d variable is implicitly R_DST - R_DBASE,  and len(dst)-d is R_DEND - R_DST.
-// The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC.
-TEXT ·s2Decode(SB), NOSPLIT, $56-64
-	// Initialize R_SRC, R_DST and R_DBASE-R_SEND.
-	MOVD dst_base+0(FP), R_DBASE
-	MOVD dst_len+8(FP), R_DLEN
-	MOVD R_DBASE, R_DST
-	MOVD R_DBASE, R_DEND
-	ADD  R_DLEN, R_DEND, R_DEND
-	MOVD src_base+24(FP), R_SBASE
-	MOVD src_len+32(FP), R_SLEN
-	MOVD R_SBASE, R_SRC
-	MOVD R_SBASE, R_SEND
-	ADD  R_SLEN, R_SEND, R_SEND
-	MOVD $0, R_OFF
-
-loop:
-	// for s < len(src)
-	CMP R_SEND, R_SRC
-	BEQ end
-
-	// R_LEN = uint32(src[s])
-	//
-	// switch src[s] & 0x03
-	MOVBU (R_SRC), R_LEN
-	MOVW  R_LEN, R_TMP1
-	ANDW  $3, R_TMP1
-	MOVW  $1, R1
-	CMPW  R1, R_TMP1
-	BGE   tagCopy
-
-	// ----------------------------------------
-	// The code below handles literal tags.
-
-	// case tagLiteral:
-	// x := uint32(src[s] >> 2)
-	// switch
-	MOVW $60, R1
-	LSRW $2, R_LEN, R_LEN
-	CMPW R_LEN, R1
-	BLS  tagLit60Plus
-
-	// case x < 60:
-	// s++
-	ADD $1, R_SRC, R_SRC
-
-doLit:
-	// This is the end of the inner "switch", when we have a literal tag.
-	//
-	// We assume that R_LEN == x and x fits in a uint32, where x is the variable
-	// used in the pure Go decode_other.go code.
-
-	// length = int(x) + 1
-	//
-	// Unlike the pure Go code, we don't need to check if length <= 0 because
-	// R_LEN can hold 64 bits, so the increment cannot overflow.
-	ADD $1, R_LEN, R_LEN
-
-	// Prepare to check if copying length bytes will run past the end of dst or
-	// src.
-	//
-	// R_TMP0 = len(dst) - d
-	// R_TMP1 = len(src) - s
-	MOVD R_DEND, R_TMP0
-	SUB  R_DST, R_TMP0, R_TMP0
-	MOVD R_SEND, R_TMP1
-	SUB  R_SRC, R_TMP1, R_TMP1
-
-	// !!! Try a faster technique for short (16 or fewer bytes) copies.
-	//
-	// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
-	//   goto callMemmove // Fall back on calling runtime·memmove.
-	// }
-	//
-	// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
-	// against 21 instead of 16, because it cannot assume that all of its input
-	// is contiguous in memory and so it needs to leave enough source bytes to
-	// read the next tag without refilling buffers, but Go's Decode assumes
-	// contiguousness (the src argument is a []byte).
-	CMP $16, R_LEN
-	BGT callMemmove
-	CMP $16, R_TMP0
-	BLT callMemmove
-	CMP $16, R_TMP1
-	BLT callMemmove
-
-	// !!! Implement the copy from src to dst as a 16-byte load and store.
-	// (Decode's documentation says that dst and src must not overlap.)
-	//
-	// This always copies 16 bytes, instead of only length bytes, but that's
-	// OK. If the input is a valid Snappy encoding then subsequent iterations
-	// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
-	// non-nil error), so the overrun will be ignored.
-	//
-	// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
-	// 16-byte loads and stores. This technique probably wouldn't be as
-	// effective on architectures that are fussier about alignment.
-	LDP 0(R_SRC), (R_TMP2, R_TMP3)
-	STP (R_TMP2, R_TMP3), 0(R_DST)
-
-	// d += length
-	// s += length
-	ADD R_LEN, R_DST, R_DST
-	ADD R_LEN, R_SRC, R_SRC
-	B   loop
-
-callMemmove:
-	// if length > len(dst)-d || length > len(src)-s { etc }
-	CMP R_TMP0, R_LEN
-	BGT errCorrupt
-	CMP R_TMP1, R_LEN
-	BGT errCorrupt
-
-	// copy(dst[d:], src[s:s+length])
-	//
-	// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
-	// R_DST, R_SRC and R_LEN as arguments. Coincidentally, we also need to spill those
-	// three registers to the stack, to save local variables across the CALL.
-	MOVD R_DST, 8(RSP)
-	MOVD R_SRC, 16(RSP)
-	MOVD R_LEN, 24(RSP)
-	MOVD R_DST, 32(RSP)
-	MOVD R_SRC, 40(RSP)
-	MOVD R_LEN, 48(RSP)
-	MOVD R_OFF, 56(RSP)
-	CALL runtime·memmove(SB)
-
-	// Restore local variables: unspill registers from the stack and
-	// re-calculate R_DBASE-R_SEND.
-	MOVD 32(RSP), R_DST
-	MOVD 40(RSP), R_SRC
-	MOVD 48(RSP), R_LEN
-	MOVD 56(RSP), R_OFF
-	MOVD dst_base+0(FP), R_DBASE
-	MOVD dst_len+8(FP), R_DLEN
-	MOVD R_DBASE, R_DEND
-	ADD  R_DLEN, R_DEND, R_DEND
-	MOVD src_base+24(FP), R_SBASE
-	MOVD src_len+32(FP), R_SLEN
-	MOVD R_SBASE, R_SEND
-	ADD  R_SLEN, R_SEND, R_SEND
-
-	// d += length
-	// s += length
-	ADD R_LEN, R_DST, R_DST
-	ADD R_LEN, R_SRC, R_SRC
-	B   loop
-
-tagLit60Plus:
-	// !!! This fragment does the
-	//
-	// s += x - 58; if uint(s) > uint(len(src)) { etc }
-	//
-	// checks. In the asm version, we code it once instead of once per switch case.
-	ADD R_LEN, R_SRC, R_SRC
-	SUB $58, R_SRC, R_SRC
-	TEST_SRC()
-
-	// case x == 60:
-	MOVW $61, R1
-	CMPW R1, R_LEN
-	BEQ  tagLit61
-	BGT  tagLit62Plus
-
-	// x = uint32(src[s-1])
-	MOVBU -1(R_SRC), R_LEN
-	B     doLit
-
-tagLit61:
-	// case x == 61:
-	// x = uint32(src[s-2]) | uint32(src[s-1])<<8
-	MOVHU -2(R_SRC), R_LEN
-	B     doLit
-
-tagLit62Plus:
-	CMPW $62, R_LEN
-	BHI  tagLit63
-
-	// case x == 62:
-	// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
-	MOVHU -3(R_SRC), R_LEN
-	MOVBU -1(R_SRC), R_TMP1
-	ORR   R_TMP1<<16, R_LEN
-	B     doLit
-
-tagLit63:
-	// case x == 63:
-	// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
-	MOVWU -4(R_SRC), R_LEN
-	B     doLit
-
-	// The code above handles literal tags.
-	// ----------------------------------------
-	// The code below handles copy tags.
-
-tagCopy4:
-	// case tagCopy4:
-	// s += 5
-	ADD $5, R_SRC, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	MOVD R_SRC, R_TMP1
-	SUB  R_SBASE, R_TMP1, R_TMP1
-	CMP  R_SLEN, R_TMP1
-	BGT  errCorrupt
-
-	// length = 1 + int(src[s-5])>>2
-	MOVD $1, R1
-	ADD  R_LEN>>2, R1, R_LEN
-
-	// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
-	MOVWU -4(R_SRC), R_OFF
-	B     doCopy
-
-tagCopy2:
-	// case tagCopy2:
-	// s += 3
-	ADD $3, R_SRC, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	TEST_SRC()
-
-	// length = 1 + int(src[s-3])>>2
-	MOVD $1, R1
-	ADD  R_LEN>>2, R1, R_LEN
-
-	// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
-	MOVHU -2(R_SRC), R_OFF
-	B     doCopy
-
-tagCopy:
-	// We have a copy tag. We assume that:
-	//	- R_TMP1 == src[s] & 0x03
-	//	- R_LEN == src[s]
-	CMP $2, R_TMP1
-	BEQ tagCopy2
-	BGT tagCopy4
-
-	// case tagCopy1:
-	// s += 2
-	ADD $2, R_SRC, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	TEST_SRC()
-
-	// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
-	// Calculate offset in R_TMP0 in case it is a repeat.
-	MOVD  R_LEN, R_TMP0
-	AND   $0xe0, R_TMP0
-	MOVBU -1(R_SRC), R_TMP1
-	ORR   R_TMP0<<3, R_TMP1, R_TMP0
-
-	// length = 4 + int(src[s-2])>>2&0x7
-	MOVD $7, R1
-	AND  R_LEN>>2, R1, R_LEN
-	ADD  $4, R_LEN, R_LEN
-
-	// check if repeat code with offset 0.
-	CMP $0, R_TMP0
-	BEQ repeatCode
-
-	// This is a regular copy, transfer our temporary value to R_OFF (offset)
-	MOVD R_TMP0, R_OFF
-	B    doCopy
-
-	// This is a repeat code.
-repeatCode:
-	// If length < 9, reuse last offset, with the length already calculated.
-	CMP $9, R_LEN
-	BLT doCopyRepeat
-	BEQ repeatLen1
-	CMP $10, R_LEN
-	BEQ repeatLen2
-
-repeatLen3:
-	// s +=3
-	ADD $3, R_SRC, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	TEST_SRC()
-
-	// length = uint32(src[s-3]) | (uint32(src[s-2])<<8) | (uint32(src[s-1])<<16) + 65540
-	MOVBU -1(R_SRC), R_TMP0
-	MOVHU -3(R_SRC), R_LEN
-	ORR   R_TMP0<<16, R_LEN, R_LEN
-	ADD   $65540, R_LEN, R_LEN
-	B     doCopyRepeat
-
-repeatLen2:
-	// s +=2
-	ADD $2, R_SRC, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	TEST_SRC()
-
-	// length = uint32(src[s-2]) | (uint32(src[s-1])<<8) + 260
-	MOVHU -2(R_SRC), R_LEN
-	ADD   $260, R_LEN, R_LEN
-	B     doCopyRepeat
-
-repeatLen1:
-	// s +=1
-	ADD $1, R_SRC, R_SRC
-
-	// if uint(s) > uint(len(src)) { etc }
-	TEST_SRC()
-
-	// length = src[s-1] + 8
-	MOVBU -1(R_SRC), R_LEN
-	ADD   $8, R_LEN, R_LEN
-	B     doCopyRepeat
-
-doCopy:
-	// This is the end of the outer "switch", when we have a copy tag.
-	//
-	// We assume that:
-	//	- R_LEN == length && R_LEN > 0
-	//	- R_OFF == offset
-
-	// if d < offset { etc }
-	MOVD R_DST, R_TMP1
-	SUB  R_DBASE, R_TMP1, R_TMP1
-	CMP  R_OFF, R_TMP1
-	BLT  errCorrupt
-
-	// Repeat values can skip the test above, since any offset > 0 will be in dst.
-doCopyRepeat:
-
-	// if offset <= 0 { etc }
-	CMP $0, R_OFF
-	BLE errCorrupt
-
-	// if length > len(dst)-d { etc }
-	MOVD R_DEND, R_TMP1
-	SUB  R_DST, R_TMP1, R_TMP1
-	CMP  R_TMP1, R_LEN
-	BGT  errCorrupt
-
-	// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
-	//
-	// Set:
-	//	- R_TMP2 = len(dst)-d
-	//	- R_TMP3 = &dst[d-offset]
-	MOVD R_DEND, R_TMP2
-	SUB  R_DST, R_TMP2, R_TMP2
-	MOVD R_DST, R_TMP3
-	SUB  R_OFF, R_TMP3, R_TMP3
-
-	// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
-	//
-	// First, try using two 8-byte load/stores, similar to the doLit technique
-	// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
-	// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
-	// and not one 16-byte load/store, and the first store has to be before the
-	// second load, due to the overlap if offset is in the range [8, 16).
-	//
-	// if length > 16 || offset < 8 || len(dst)-d < 16 {
-	//   goto slowForwardCopy
-	// }
-	// copy 16 bytes
-	// d += length
-	CMP  $16, R_LEN
-	BGT  slowForwardCopy
-	CMP  $8, R_OFF
-	BLT  slowForwardCopy
-	CMP  $16, R_TMP2
-	BLT  slowForwardCopy
-	MOVD 0(R_TMP3), R_TMP0
-	MOVD R_TMP0, 0(R_DST)
-	MOVD 8(R_TMP3), R_TMP1
-	MOVD R_TMP1, 8(R_DST)
-	ADD  R_LEN, R_DST, R_DST
-	B    loop
-
-slowForwardCopy:
-	// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
-	// can still try 8-byte load stores, provided we can overrun up to 10 extra
-	// bytes. As above, the overrun will be fixed up by subsequent iterations
-	// of the outermost loop.
-	//
-	// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
-	// commentary says:
-	//
-	// ----
-	//
-	// The main part of this loop is a simple copy of eight bytes at a time
-	// until we've copied (at least) the requested amount of bytes.  However,
-	// if d and d-offset are less than eight bytes apart (indicating a
-	// repeating pattern of length < 8), we first need to expand the pattern in
-	// order to get the correct results. For instance, if the buffer looks like
-	// this, with the eight-byte <d-offset> and <d> patterns marked as
-	// intervals:
-	//
-	//    abxxxxxxxxxxxx
-	//    [------]           d-offset
-	//      [------]         d
-	//
-	// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
-	// once, after which we can move <d> two bytes without moving <d-offset>:
-	//
-	//    ababxxxxxxxxxx
-	//    [------]           d-offset
-	//        [------]       d
-	//
-	// and repeat the exercise until the two no longer overlap.
-	//
-	// This allows us to do very well in the special case of one single byte
-	// repeated many times, without taking a big hit for more general cases.
-	//
-	// The worst case of extra writing past the end of the match occurs when
-	// offset == 1 and length == 1; the last copy will read from byte positions
-	// [0..7] and write to [4..11], whereas it was only supposed to write to
-	// position 1. Thus, ten excess bytes.
-	//
-	// ----
-	//
-	// That "10 byte overrun" worst case is confirmed by Go's
-	// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
-	// and finishSlowForwardCopy algorithm.
-	//
-	// if length > len(dst)-d-10 {
-	//   goto verySlowForwardCopy
-	// }
-	SUB $10, R_TMP2, R_TMP2
-	CMP R_TMP2, R_LEN
-	BGT verySlowForwardCopy
-
-	// We want to keep the offset, so we use R_TMP2 from here.
-	MOVD R_OFF, R_TMP2
-
-makeOffsetAtLeast8:
-	// !!! As above, expand the pattern so that offset >= 8 and we can use
-	// 8-byte load/stores.
-	//
-	// for offset < 8 {
-	//   copy 8 bytes from dst[d-offset:] to dst[d:]
-	//   length -= offset
-	//   d      += offset
-	//   offset += offset
-	//   // The two previous lines together means that d-offset, and therefore
-	//   // R_TMP3, is unchanged.
-	// }
-	CMP  $8, R_TMP2
-	BGE  fixUpSlowForwardCopy
-	MOVD (R_TMP3), R_TMP1
-	MOVD R_TMP1, (R_DST)
-	SUB  R_TMP2, R_LEN, R_LEN
-	ADD  R_TMP2, R_DST, R_DST
-	ADD  R_TMP2, R_TMP2, R_TMP2
-	B    makeOffsetAtLeast8
-
-fixUpSlowForwardCopy:
-	// !!! Add length (which might be negative now) to d (implied by R_DST being
-	// &dst[d]) so that d ends up at the right place when we jump back to the
-	// top of the loop. Before we do that, though, we save R_DST to R_TMP0 so that, if
-	// length is positive, copying the remaining length bytes will write to the
-	// right place.
-	MOVD R_DST, R_TMP0
-	ADD  R_LEN, R_DST, R_DST
-
-finishSlowForwardCopy:
-	// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
-	// length means that we overrun, but as above, that will be fixed up by
-	// subsequent iterations of the outermost loop.
-	MOVD $0, R1
-	CMP  R1, R_LEN
-	BLE  loop
-	MOVD (R_TMP3), R_TMP1
-	MOVD R_TMP1, (R_TMP0)
-	ADD  $8, R_TMP3, R_TMP3
-	ADD  $8, R_TMP0, R_TMP0
-	SUB  $8, R_LEN, R_LEN
-	B    finishSlowForwardCopy
-
-verySlowForwardCopy:
-	// verySlowForwardCopy is a simple implementation of forward copy. In C
-	// parlance, this is a do/while loop instead of a while loop, since we know
-	// that length > 0. In Go syntax:
-	//
-	// for {
-	//   dst[d] = dst[d - offset]
-	//   d++
-	//   length--
-	//   if length == 0 {
-	//     break
-	//   }
-	// }
-	MOVB (R_TMP3), R_TMP1
-	MOVB R_TMP1, (R_DST)
-	ADD  $1, R_TMP3, R_TMP3
-	ADD  $1, R_DST, R_DST
-	SUB  $1, R_LEN, R_LEN
-	CBNZ R_LEN, verySlowForwardCopy
-	B    loop
-
-	// The code above handles copy tags.
-	// ----------------------------------------
-
-end:
-	// This is the end of the "for s < len(src)".
-	//
-	// if d != len(dst) { etc }
-	CMP R_DEND, R_DST
-	BNE errCorrupt
-
-	// return 0
-	MOVD $0, ret+48(FP)
-	RET
-
-errCorrupt:
-	// return decodeErrCodeCorrupt
-	MOVD $1, R_TMP0
-	MOVD R_TMP0, ret+48(FP)
-	RET
--- a/vendor/github.com/klauspost/compress/s2/decode_asm.go
+++ b/vendor/github.com/klauspost/compress/s2/decode_asm.go
@ -1,17 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (amd64 || arm64) && !appengine && gc && !noasm
-// +build amd64 arm64
-// +build !appengine
-// +build gc
-// +build !noasm
-
-package s2
-
-// decode has the same semantics as in decode_other.go.
-//
-//go:noescape
-func s2Decode(dst, src []byte) int
--- a/vendor/github.com/klauspost/compress/s2/decode_other.go
+++ b/vendor/github.com/klauspost/compress/s2/decode_other.go
@ -1,292 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-//go:build (!amd64 && !arm64) || appengine || !gc || noasm
-// +build !amd64,!arm64 appengine !gc noasm
-
-package s2
-
-import (
-	"fmt"
-	"strconv"
-)
-
-// decode writes the decoding of src to dst. It assumes that the varint-encoded
-// length of the decompressed bytes has already been read, and that len(dst)
-// equals that length.
-//
-// It returns 0 on success or a decodeErrCodeXxx error code on failure.
-func s2Decode(dst, src []byte) int {
-	const debug = false
-	if debug {
-		fmt.Println("Starting decode, dst len:", len(dst))
-	}
-	var d, s, length int
-	offset := 0
-
-	// As long as we can read at least 5 bytes...
-	for s < len(src)-5 {
-		// Removing bounds checks is SLOWER, when if doing
-		// in := src[s:s+5]
-		// Checked on Go 1.18
-		switch src[s] & 0x03 {
-		case tagLiteral:
-			x := uint32(src[s] >> 2)
-			switch {
-			case x < 60:
-				s++
-			case x == 60:
-				s += 2
-				x = uint32(src[s-1])
-			case x == 61:
-				in := src[s : s+3]
-				x = uint32(in[1]) | uint32(in[2])<<8
-				s += 3
-			case x == 62:
-				in := src[s : s+4]
-				// Load as 32 bit and shift down.
-				x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
-				x >>= 8
-				s += 4
-			case x == 63:
-				in := src[s : s+5]
-				x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
-				s += 5
-			}
-			length = int(x) + 1
-			if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
-				if debug {
-					fmt.Println("corrupt: lit size", length)
-				}
-				return decodeErrCodeCorrupt
-			}
-			if debug {
-				fmt.Println("literals, length:", length, "d-after:", d+length)
-			}
-
-			copy(dst[d:], src[s:s+length])
-			d += length
-			s += length
-			continue
-
-		case tagCopy1:
-			s += 2
-			toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
-			length = int(src[s-2]) >> 2 & 0x7
-			if toffset == 0 {
-				if debug {
-					fmt.Print("(repeat) ")
-				}
-				// keep last offset
-				switch length {
-				case 5:
-					length = int(src[s]) + 4
-					s += 1
-				case 6:
-					in := src[s : s+2]
-					length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
-					s += 2
-				case 7:
-					in := src[s : s+3]
-					length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
-					s += 3
-				default: // 0-> 4
-				}
-			} else {
-				offset = toffset
-			}
-			length += 4
-		case tagCopy2:
-			in := src[s : s+3]
-			offset = int(uint32(in[1]) | uint32(in[2])<<8)
-			length = 1 + int(in[0])>>2
-			s += 3
-
-		case tagCopy4:
-			in := src[s : s+5]
-			offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
-			length = 1 + int(in[0])>>2
-			s += 5
-		}
-
-		if offset <= 0 || d < offset || length > len(dst)-d {
-			if debug {
-				fmt.Println("corrupt: match, length", length, "offset:", offset, "dst avail:", len(dst)-d, "dst pos:", d)
-			}
-
-			return decodeErrCodeCorrupt
-		}
-
-		if debug {
-			fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
-		}
-
-		// Copy from an earlier sub-slice of dst to a later sub-slice.
-		// If no overlap, use the built-in copy:
-		if offset > length {
-			copy(dst[d:d+length], dst[d-offset:])
-			d += length
-			continue
-		}
-
-		// Unlike the built-in copy function, this byte-by-byte copy always runs
-		// forwards, even if the slices overlap. Conceptually, this is:
-		//
-		// d += forwardCopy(dst[d:d+length], dst[d-offset:])
-		//
-		// We align the slices into a and b and show the compiler they are the same size.
-		// This allows the loop to run without bounds checks.
-		a := dst[d : d+length]
-		b := dst[d-offset:]
-		b = b[:len(a)]
-		for i := range a {
-			a[i] = b[i]
-		}
-		d += length
-	}
-
-	// Remaining with extra checks...
-	for s < len(src) {
-		switch src[s] & 0x03 {
-		case tagLiteral:
-			x := uint32(src[s] >> 2)
-			switch {
-			case x < 60:
-				s++
-			case x == 60:
-				s += 2
-				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-					return decodeErrCodeCorrupt
-				}
-				x = uint32(src[s-1])
-			case x == 61:
-				s += 3
-				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-					return decodeErrCodeCorrupt
-				}
-				x = uint32(src[s-2]) | uint32(src[s-1])<<8
-			case x == 62:
-				s += 4
-				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-					return decodeErrCodeCorrupt
-				}
-				x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
-			case x == 63:
-				s += 5
-				if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-					return decodeErrCodeCorrupt
-				}
-				x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
-			}
-			length = int(x) + 1
-			if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
-				if debug {
-					fmt.Println("corrupt: lit size", length)
-				}
-				return decodeErrCodeCorrupt
-			}
-			if debug {
-				fmt.Println("literals, length:", length, "d-after:", d+length)
-			}
-
-			copy(dst[d:], src[s:s+length])
-			d += length
-			s += length
-			continue
-
-		case tagCopy1:
-			s += 2
-			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-				return decodeErrCodeCorrupt
-			}
-			length = int(src[s-2]) >> 2 & 0x7
-			toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
-			if toffset == 0 {
-				if debug {
-					fmt.Print("(repeat) ")
-				}
-				// keep last offset
-				switch length {
-				case 5:
-					s += 1
-					if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-						return decodeErrCodeCorrupt
-					}
-					length = int(uint32(src[s-1])) + 4
-				case 6:
-					s += 2
-					if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-						return decodeErrCodeCorrupt
-					}
-					length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
-				case 7:
-					s += 3
-					if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-						return decodeErrCodeCorrupt
-					}
-					length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
-				default: // 0-> 4
-				}
-			} else {
-				offset = toffset
-			}
-			length += 4
-		case tagCopy2:
-			s += 3
-			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-				return decodeErrCodeCorrupt
-			}
-			length = 1 + int(src[s-3])>>2
-			offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
-
-		case tagCopy4:
-			s += 5
-			if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
-				return decodeErrCodeCorrupt
-			}
-			length = 1 + int(src[s-5])>>2
-			offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
-		}
-
-		if offset <= 0 || d < offset || length > len(dst)-d {
-			if debug {
-				fmt.Println("corrupt: match, length", length, "offset:", offset, "dst avail:", len(dst)-d, "dst pos:", d)
-			}
-			return decodeErrCodeCorrupt
-		}
-
-		if debug {
-			fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
-		}
-
-		// Copy from an earlier sub-slice of dst to a later sub-slice.
-		// If no overlap, use the built-in copy:
-		if offset > length {
-			copy(dst[d:d+length], dst[d-offset:])
-			d += length
-			continue
-		}
-
-		// Unlike the built-in copy function, this byte-by-byte copy always runs
-		// forwards, even if the slices overlap. Conceptually, this is:
-		//
-		// d += forwardCopy(dst[d:d+length], dst[d-offset:])
-		//
-		// We align the slices into a and b and show the compiler they are the same size.
-		// This allows the loop to run without bounds checks.
-		a := dst[d : d+length]
-		b := dst[d-offset:]
-		b = b[:len(a)]
-		for i := range a {
-			a[i] = b[i]
-		}
-		d += length
-	}
-
-	if d != len(dst) {
-		return decodeErrCodeCorrupt
-	}
-	return 0
-}
--- a/vendor/github.com/klauspost/compress/s2/dict.go
+++ b/vendor/github.com/klauspost/compress/s2/dict.go
@ -1,350 +0,0 @@
-// Copyright (c) 2022+ Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
-	"bytes"
-	"encoding/binary"
-	"sync"
-)
-
-const (
-	// MinDictSize is the minimum dictionary size when repeat has been read.
-	MinDictSize = 16
-
-	// MaxDictSize is the maximum dictionary size when repeat has been read.
-	MaxDictSize = 65536
-
-	// MaxDictSrcOffset is the maximum offset where a dictionary entry can start.
-	MaxDictSrcOffset = 65535
-)
-
-// Dict contains a dictionary that can be used for encoding and decoding s2
-type Dict struct {
-	dict   []byte
-	repeat int // Repeat as index of dict
-
-	fast, better, best sync.Once
-	fastTable          *[1 << 14]uint16
-
-	betterTableShort *[1 << 14]uint16
-	betterTableLong  *[1 << 17]uint16
-
-	bestTableShort *[1 << 16]uint32
-	bestTableLong  *[1 << 19]uint32
-}
-
-// NewDict will read a dictionary.
-// It will return nil if the dictionary is invalid.
-func NewDict(dict []byte) *Dict {
-	if len(dict) == 0 {
-		return nil
-	}
-	var d Dict
-	// Repeat is the first value of the dict
-	r, n := binary.Uvarint(dict)
-	if n <= 0 {
-		return nil
-	}
-	dict = dict[n:]
-	d.dict = dict
-	if cap(d.dict) < len(d.dict)+16 {
-		d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
-	}
-	if len(dict) < MinDictSize || len(dict) > MaxDictSize {
-		return nil
-	}
-	d.repeat = int(r)
-	if d.repeat > len(dict) {
-		return nil
-	}
-	return &d
-}
-
-// Bytes will return a serialized version of the dictionary.
-// The output can be sent to NewDict.
-func (d *Dict) Bytes() []byte {
-	dst := make([]byte, binary.MaxVarintLen16+len(d.dict))
-	return append(dst[:binary.PutUvarint(dst, uint64(d.repeat))], d.dict...)
-}
-
-// MakeDict will create a dictionary.
-// 'data' must be at least MinDictSize.
-// If data is longer than MaxDictSize only the last MaxDictSize bytes will be used.
-// If searchStart is set the start repeat value will be set to the last
-// match of this content.
-// If no matches are found, it will attempt to find shorter matches.
-// This content should match the typical start of a block.
-// If at least 4 bytes cannot be matched, repeat is set to start of block.
-func MakeDict(data []byte, searchStart []byte) *Dict {
-	if len(data) == 0 {
-		return nil
-	}
-	if len(data) > MaxDictSize {
-		data = data[len(data)-MaxDictSize:]
-	}
-	var d Dict
-	dict := data
-	d.dict = dict
-	if cap(d.dict) < len(d.dict)+16 {
-		d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
-	}
-	if len(dict) < MinDictSize {
-		return nil
-	}
-
-	// Find the longest match possible, last entry if multiple.
-	for s := len(searchStart); s > 4; s-- {
-		if idx := bytes.LastIndex(data, searchStart[:s]); idx >= 0 && idx <= len(data)-8 {
-			d.repeat = idx
-			break
-		}
-	}
-
-	return &d
-}
-
-// MakeDictManual will create a dictionary.
-// 'data' must be at least MinDictSize and less than or equal to MaxDictSize.
-// A manual first repeat index into data must be provided.
-// It must be less than len(data)-8.
-func MakeDictManual(data []byte, firstIdx uint16) *Dict {
-	if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize {
-		return nil
-	}
-	var d Dict
-	dict := data
-	d.dict = dict
-	if cap(d.dict) < len(d.dict)+16 {
-		d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
-	}
-
-	d.repeat = int(firstIdx)
-	return &d
-}
-
-// Encode returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func (d *Dict) Encode(dst, src []byte) []byte {
-	if n := MaxEncodedLen(len(src)); n < 0 {
-		panic(ErrTooLarge)
-	} else if cap(dst) < n {
-		dst = make([]byte, n)
-	} else {
-		dst = dst[:n]
-	}
-
-	// The block starts with the varint-encoded length of the decompressed bytes.
-	dstP := binary.PutUvarint(dst, uint64(len(src)))
-
-	if len(src) == 0 {
-		return dst[:dstP]
-	}
-	if len(src) < minNonLiteralBlockSize {
-		dstP += emitLiteral(dst[dstP:], src)
-		return dst[:dstP]
-	}
-	n := encodeBlockDictGo(dst[dstP:], src, d)
-	if n > 0 {
-		dstP += n
-		return dst[:dstP]
-	}
-	// Not compressible
-	dstP += emitLiteral(dst[dstP:], src)
-	return dst[:dstP]
-}
-
-// EncodeBetter returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// EncodeBetter compresses better than Encode but typically with a
-// 10-40% speed decrease on both compression and decompression.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func (d *Dict) EncodeBetter(dst, src []byte) []byte {
-	if n := MaxEncodedLen(len(src)); n < 0 {
-		panic(ErrTooLarge)
-	} else if len(dst) < n {
-		dst = make([]byte, n)
-	}
-
-	// The block starts with the varint-encoded length of the decompressed bytes.
-	dstP := binary.PutUvarint(dst, uint64(len(src)))
-
-	if len(src) == 0 {
-		return dst[:dstP]
-	}
-	if len(src) < minNonLiteralBlockSize {
-		dstP += emitLiteral(dst[dstP:], src)
-		return dst[:dstP]
-	}
-	n := encodeBlockBetterDict(dst[dstP:], src, d)
-	if n > 0 {
-		dstP += n
-		return dst[:dstP]
-	}
-	// Not compressible
-	dstP += emitLiteral(dst[dstP:], src)
-	return dst[:dstP]
-}
-
-// EncodeBest returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// EncodeBest compresses as good as reasonably possible but with a
-// big speed decrease.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func (d *Dict) EncodeBest(dst, src []byte) []byte {
-	if n := MaxEncodedLen(len(src)); n < 0 {
-		panic(ErrTooLarge)
-	} else if len(dst) < n {
-		dst = make([]byte, n)
-	}
-
-	// The block starts with the varint-encoded length of the decompressed bytes.
-	dstP := binary.PutUvarint(dst, uint64(len(src)))
-
-	if len(src) == 0 {
-		return dst[:dstP]
-	}
-	if len(src) < minNonLiteralBlockSize {
-		dstP += emitLiteral(dst[dstP:], src)
-		return dst[:dstP]
-	}
-	n := encodeBlockBest(dst[dstP:], src, d)
-	if n > 0 {
-		dstP += n
-		return dst[:dstP]
-	}
-	// Not compressible
-	dstP += emitLiteral(dst[dstP:], src)
-	return dst[:dstP]
-}
-
-// Decode returns the decoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire decoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-func (d *Dict) Decode(dst, src []byte) ([]byte, error) {
-	dLen, s, err := decodedLen(src)
-	if err != nil {
-		return nil, err
-	}
-	if dLen <= cap(dst) {
-		dst = dst[:dLen]
-	} else {
-		dst = make([]byte, dLen)
-	}
-	if s2DecodeDict(dst, src[s:], d) != 0 {
-		return nil, ErrCorrupt
-	}
-	return dst, nil
-}
-
-func (d *Dict) initFast() {
-	d.fast.Do(func() {
-		const (
-			tableBits    = 14
-			maxTableSize = 1 << tableBits
-		)
-
-		var table [maxTableSize]uint16
-		// We stop so any entry of length 8 can always be read.
-		for i := 0; i < len(d.dict)-8-2; i += 3 {
-			x0 := load64(d.dict, i)
-			h0 := hash6(x0, tableBits)
-			h1 := hash6(x0>>8, tableBits)
-			h2 := hash6(x0>>16, tableBits)
-			table[h0] = uint16(i)
-			table[h1] = uint16(i + 1)
-			table[h2] = uint16(i + 2)
-		}
-		d.fastTable = &table
-	})
-}
-
-func (d *Dict) initBetter() {
-	d.better.Do(func() {
-		const (
-			// Long hash matches.
-			lTableBits    = 17
-			maxLTableSize = 1 << lTableBits
-
-			// Short hash matches.
-			sTableBits    = 14
-			maxSTableSize = 1 << sTableBits
-		)
-
-		var lTable [maxLTableSize]uint16
-		var sTable [maxSTableSize]uint16
-
-		// We stop so any entry of length 8 can always be read.
-		for i := 0; i < len(d.dict)-8; i++ {
-			cv := load64(d.dict, i)
-			lTable[hash7(cv, lTableBits)] = uint16(i)
-			sTable[hash4(cv, sTableBits)] = uint16(i)
-		}
-		d.betterTableShort = &sTable
-		d.betterTableLong = &lTable
-	})
-}
-
-func (d *Dict) initBest() {
-	d.best.Do(func() {
-		const (
-			// Long hash matches.
-			lTableBits    = 19
-			maxLTableSize = 1 << lTableBits
-
-			// Short hash matches.
-			sTableBits    = 16
-			maxSTableSize = 1 << sTableBits
-		)
-
-		var lTable [maxLTableSize]uint32
-		var sTable [maxSTableSize]uint32
-
-		// We stop so any entry of length 8 can always be read.
-		for i := 0; i < len(d.dict)-8; i++ {
-			cv := load64(d.dict, i)
-			hashL := hash8(cv, lTableBits)
-			hashS := hash4(cv, sTableBits)
-			candidateL := lTable[hashL]
-			candidateS := sTable[hashS]
-			lTable[hashL] = uint32(i) | candidateL<<16
-			sTable[hashS] = uint32(i) | candidateS<<16
-		}
-		d.bestTableShort = &sTable
-		d.bestTableLong = &lTable
-	})
-}
--- a/vendor/github.com/klauspost/compress/s2/encode.go
+++ b/vendor/github.com/klauspost/compress/s2/encode.go
@ -1,393 +0,0 @@
-// Copyright 2011 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
-	"encoding/binary"
-	"math"
-	"math/bits"
-)
-
-// Encode returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func Encode(dst, src []byte) []byte {
-	if n := MaxEncodedLen(len(src)); n < 0 {
-		panic(ErrTooLarge)
-	} else if cap(dst) < n {
-		dst = make([]byte, n)
-	} else {
-		dst = dst[:n]
-	}
-
-	// The block starts with the varint-encoded length of the decompressed bytes.
-	d := binary.PutUvarint(dst, uint64(len(src)))
-
-	if len(src) == 0 {
-		return dst[:d]
-	}
-	if len(src) < minNonLiteralBlockSize {
-		d += emitLiteral(dst[d:], src)
-		return dst[:d]
-	}
-	n := encodeBlock(dst[d:], src)
-	if n > 0 {
-		d += n
-		return dst[:d]
-	}
-	// Not compressible
-	d += emitLiteral(dst[d:], src)
-	return dst[:d]
-}
-
-// EstimateBlockSize will perform a very fast compression
-// without outputting the result and return the compressed output size.
-// The function returns -1 if no improvement could be achieved.
-// Using actual compression will most often produce better compression than the estimate.
-func EstimateBlockSize(src []byte) (d int) {
-	if len(src) < 6 || int64(len(src)) > 0xffffffff {
-		return -1
-	}
-	if len(src) <= 1024 {
-		d = calcBlockSizeSmall(src)
-	} else {
-		d = calcBlockSize(src)
-	}
-
-	if d == 0 {
-		return -1
-	}
-	// Size of the varint encoded block size.
-	d += (bits.Len64(uint64(len(src))) + 7) / 7
-
-	if d >= len(src) {
-		return -1
-	}
-	return d
-}
-
-// EncodeBetter returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// EncodeBetter compresses better than Encode but typically with a
-// 10-40% speed decrease on both compression and decompression.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func EncodeBetter(dst, src []byte) []byte {
-	if n := MaxEncodedLen(len(src)); n < 0 {
-		panic(ErrTooLarge)
-	} else if len(dst) < n {
-		dst = make([]byte, n)
-	}
-
-	// The block starts with the varint-encoded length of the decompressed bytes.
-	d := binary.PutUvarint(dst, uint64(len(src)))
-
-	if len(src) == 0 {
-		return dst[:d]
-	}
-	if len(src) < minNonLiteralBlockSize {
-		d += emitLiteral(dst[d:], src)
-		return dst[:d]
-	}
-	n := encodeBlockBetter(dst[d:], src)
-	if n > 0 {
-		d += n
-		return dst[:d]
-	}
-	// Not compressible
-	d += emitLiteral(dst[d:], src)
-	return dst[:d]
-}
-
-// EncodeBest returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// EncodeBest compresses as good as reasonably possible but with a
-// big speed decrease.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func EncodeBest(dst, src []byte) []byte {
-	if n := MaxEncodedLen(len(src)); n < 0 {
-		panic(ErrTooLarge)
-	} else if len(dst) < n {
-		dst = make([]byte, n)
-	}
-
-	// The block starts with the varint-encoded length of the decompressed bytes.
-	d := binary.PutUvarint(dst, uint64(len(src)))
-
-	if len(src) == 0 {
-		return dst[:d]
-	}
-	if len(src) < minNonLiteralBlockSize {
-		d += emitLiteral(dst[d:], src)
-		return dst[:d]
-	}
-	n := encodeBlockBest(dst[d:], src, nil)
-	if n > 0 {
-		d += n
-		return dst[:d]
-	}
-	// Not compressible
-	d += emitLiteral(dst[d:], src)
-	return dst[:d]
-}
-
-// EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The output is Snappy compatible and will likely decompress faster.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func EncodeSnappy(dst, src []byte) []byte {
-	if n := MaxEncodedLen(len(src)); n < 0 {
-		panic(ErrTooLarge)
-	} else if cap(dst) < n {
-		dst = make([]byte, n)
-	} else {
-		dst = dst[:n]
-	}
-
-	// The block starts with the varint-encoded length of the decompressed bytes.
-	d := binary.PutUvarint(dst, uint64(len(src)))
-
-	if len(src) == 0 {
-		return dst[:d]
-	}
-	if len(src) < minNonLiteralBlockSize {
-		d += emitLiteral(dst[d:], src)
-		return dst[:d]
-	}
-
-	n := encodeBlockSnappy(dst[d:], src)
-	if n > 0 {
-		d += n
-		return dst[:d]
-	}
-	// Not compressible
-	d += emitLiteral(dst[d:], src)
-	return dst[:d]
-}
-
-// EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The output is Snappy compatible and will likely decompress faster.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func EncodeSnappyBetter(dst, src []byte) []byte {
-	if n := MaxEncodedLen(len(src)); n < 0 {
-		panic(ErrTooLarge)
-	} else if cap(dst) < n {
-		dst = make([]byte, n)
-	} else {
-		dst = dst[:n]
-	}
-
-	// The block starts with the varint-encoded length of the decompressed bytes.
-	d := binary.PutUvarint(dst, uint64(len(src)))
-
-	if len(src) == 0 {
-		return dst[:d]
-	}
-	if len(src) < minNonLiteralBlockSize {
-		d += emitLiteral(dst[d:], src)
-		return dst[:d]
-	}
-
-	n := encodeBlockBetterSnappy(dst[d:], src)
-	if n > 0 {
-		d += n
-		return dst[:d]
-	}
-	// Not compressible
-	d += emitLiteral(dst[d:], src)
-	return dst[:d]
-}
-
-// EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub-
-// slice of dst if dst was large enough to hold the entire encoded block.
-// Otherwise, a newly allocated slice will be returned.
-//
-// The output is Snappy compatible and will likely decompress faster.
-//
-// The dst and src must not overlap. It is valid to pass a nil dst.
-//
-// The blocks will require the same amount of memory to decode as encoding,
-// and does not make for concurrent decoding.
-// Also note that blocks do not contain CRC information, so corruption may be undetected.
-//
-// If you need to encode larger amounts of data, consider using
-// the streaming interface which gives all of these features.
-func EncodeSnappyBest(dst, src []byte) []byte {
-	if n := MaxEncodedLen(len(src)); n < 0 {
-		panic(ErrTooLarge)
-	} else if cap(dst) < n {
-		dst = make([]byte, n)
-	} else {
-		dst = dst[:n]
-	}
-
-	// The block starts with the varint-encoded length of the decompressed bytes.
-	d := binary.PutUvarint(dst, uint64(len(src)))
-
-	if len(src) == 0 {
-		return dst[:d]
-	}
-	if len(src) < minNonLiteralBlockSize {
-		d += emitLiteral(dst[d:], src)
-		return dst[:d]
-	}
-
-	n := encodeBlockBestSnappy(dst[d:], src)
-	if n > 0 {
-		d += n
-		return dst[:d]
-	}
-	// Not compressible
-	d += emitLiteral(dst[d:], src)
-	return dst[:d]
-}
-
-// ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination.
-// If the destination is nil or too small, a new will be allocated.
-// The blocks are not validated, so garbage in = garbage out.
-// dst may not overlap block data.
-// Any data in dst is preserved as is, so it will not be considered a block.
-func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) {
-	totalSize := uint64(0)
-	compSize := 0
-	for _, b := range blocks {
-		l, hdr, err := decodedLen(b)
-		if err != nil {
-			return nil, err
-		}
-		totalSize += uint64(l)
-		compSize += len(b) - hdr
-	}
-	if totalSize == 0 {
-		dst = append(dst, 0)
-		return dst, nil
-	}
-	if totalSize > math.MaxUint32 {
-		return nil, ErrTooLarge
-	}
-	var tmp [binary.MaxVarintLen32]byte
-	hdrSize := binary.PutUvarint(tmp[:], totalSize)
-	wantSize := hdrSize + compSize
-
-	if cap(dst)-len(dst) < wantSize {
-		dst = append(make([]byte, 0, wantSize+len(dst)), dst...)
-	}
-	dst = append(dst, tmp[:hdrSize]...)
-	for _, b := range blocks {
-		_, hdr, err := decodedLen(b)
-		if err != nil {
-			return nil, err
-		}
-		dst = append(dst, b[hdr:]...)
-	}
-	return dst, nil
-}
-
-// inputMargin is the minimum number of extra input bytes to keep, inside
-// encodeBlock's inner loop. On some architectures, this margin lets us
-// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
-// literals can be implemented as a single load to and store from a 16-byte
-// register. That literal's actual length can be as short as 1 byte, so this
-// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
-// the encoding loop will fix up the copy overrun, and this inputMargin ensures
-// that we don't overrun the dst and src buffers.
-const inputMargin = 8
-
-// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
-// will be accepted by the encoder.
-const minNonLiteralBlockSize = 32
-
-const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits)
-
-// MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size.
-// Blocks this big are highly discouraged, though.
-// Half the size on 32 bit systems.
-const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5
-
-// MaxEncodedLen returns the maximum length of a snappy block, given its
-// uncompressed length.
-//
-// It will return a negative value if srcLen is too large to encode.
-// 32 bit platforms will have lower thresholds for rejecting big content.
-func MaxEncodedLen(srcLen int) int {
-	n := uint64(srcLen)
-	if intReduction == 1 {
-		// 32 bits
-		if n > math.MaxInt32 {
-			// Also includes negative.
-			return -1
-		}
-	} else if n > 0xffffffff {
-		// 64 bits
-		// Also includes negative.
-		return -1
-	}
-	// Size of the varint encoded block size.
-	n = n + uint64((bits.Len64(n)+7)/7)
-
-	// Add maximum size of encoding block as literals.
-	n += uint64(literalExtraSize(int64(srcLen)))
-	if intReduction == 1 {
-		// 32 bits
-		if n > math.MaxInt32 {
-			return -1
-		}
-	} else if n > 0xffffffff {
-		// 64 bits
-		// Also includes negative.
-		return -1
-	}
-	return int(n)
-}
--- a/vendor/github.com/klauspost/compress/s2/encode_all.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_all.go
--- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_amd64.go
@ -1,148 +0,0 @@
-//go:build !appengine && !noasm && gc
-// +build !appengine,!noasm,gc
-
-package s2
-
-const hasAmd64Asm = true
-
-// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-//
-//	len(dst) >= MaxEncodedLen(len(src)) &&
-//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlock(dst, src []byte) (d int) {
-	const (
-		// Use 12 bit table when less than...
-		limit12B = 16 << 10
-		// Use 10 bit table when less than...
-		limit10B = 4 << 10
-		// Use 8 bit table when less than...
-		limit8B = 512
-	)
-
-	if len(src) >= 4<<20 {
-		return encodeBlockAsm(dst, src)
-	}
-	if len(src) >= limit12B {
-		return encodeBlockAsm4MB(dst, src)
-	}
-	if len(src) >= limit10B {
-		return encodeBlockAsm12B(dst, src)
-	}
-	if len(src) >= limit8B {
-		return encodeBlockAsm10B(dst, src)
-	}
-	if len(src) < minNonLiteralBlockSize {
-		return 0
-	}
-	return encodeBlockAsm8B(dst, src)
-}
-
-// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-//
-//	len(dst) >= MaxEncodedLen(len(src)) &&
-//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockBetter(dst, src []byte) (d int) {
-	const (
-		// Use 12 bit table when less than...
-		limit12B = 16 << 10
-		// Use 10 bit table when less than...
-		limit10B = 4 << 10
-		// Use 8 bit table when less than...
-		limit8B = 512
-	)
-
-	if len(src) > 4<<20 {
-		return encodeBetterBlockAsm(dst, src)
-	}
-	if len(src) >= limit12B {
-		return encodeBetterBlockAsm4MB(dst, src)
-	}
-	if len(src) >= limit10B {
-		return encodeBetterBlockAsm12B(dst, src)
-	}
-	if len(src) >= limit8B {
-		return encodeBetterBlockAsm10B(dst, src)
-	}
-	if len(src) < minNonLiteralBlockSize {
-		return 0
-	}
-	return encodeBetterBlockAsm8B(dst, src)
-}
-
-// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-//
-//	len(dst) >= MaxEncodedLen(len(src)) &&
-//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockSnappy(dst, src []byte) (d int) {
-	const (
-		// Use 12 bit table when less than...
-		limit12B = 16 << 10
-		// Use 10 bit table when less than...
-		limit10B = 4 << 10
-		// Use 8 bit table when less than...
-		limit8B = 512
-	)
-	if len(src) >= 64<<10 {
-		return encodeSnappyBlockAsm(dst, src)
-	}
-	if len(src) >= limit12B {
-		return encodeSnappyBlockAsm64K(dst, src)
-	}
-	if len(src) >= limit10B {
-		return encodeSnappyBlockAsm12B(dst, src)
-	}
-	if len(src) >= limit8B {
-		return encodeSnappyBlockAsm10B(dst, src)
-	}
-	if len(src) < minNonLiteralBlockSize {
-		return 0
-	}
-	return encodeSnappyBlockAsm8B(dst, src)
-}
-
-// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-//
-//	len(dst) >= MaxEncodedLen(len(src)) &&
-//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockBetterSnappy(dst, src []byte) (d int) {
-	const (
-		// Use 12 bit table when less than...
-		limit12B = 16 << 10
-		// Use 10 bit table when less than...
-		limit10B = 4 << 10
-		// Use 8 bit table when less than...
-		limit8B = 512
-	)
-	if len(src) >= 64<<10 {
-		return encodeSnappyBetterBlockAsm(dst, src)
-	}
-	if len(src) >= limit12B {
-		return encodeSnappyBetterBlockAsm64K(dst, src)
-	}
-	if len(src) >= limit10B {
-		return encodeSnappyBetterBlockAsm12B(dst, src)
-	}
-	if len(src) >= limit8B {
-		return encodeSnappyBetterBlockAsm10B(dst, src)
-	}
-	if len(src) < minNonLiteralBlockSize {
-		return 0
-	}
-	return encodeSnappyBetterBlockAsm8B(dst, src)
-}
--- a/vendor/github.com/klauspost/compress/s2/encode_best.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_best.go
@ -1,793 +0,0 @@
-// Copyright 2016 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
-	"fmt"
-	"math"
-	"math/bits"
-)
-
-// encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-//
-//	len(dst) >= MaxEncodedLen(len(src)) &&
-//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
-	// Initialize the hash tables.
-	const (
-		// Long hash matches.
-		lTableBits    = 19
-		maxLTableSize = 1 << lTableBits
-
-		// Short hash matches.
-		sTableBits    = 16
-		maxSTableSize = 1 << sTableBits
-
-		inputMargin = 8 + 2
-
-		debug = false
-	)
-
-	// sLimit is when to stop looking for offset/length copies. The inputMargin
-	// lets us use a fast path for emitLiteral in the main loop, while we are
-	// looking for copies.
-	sLimit := len(src) - inputMargin
-	if len(src) < minNonLiteralBlockSize {
-		return 0
-	}
-	sLimitDict := len(src) - inputMargin
-	if sLimitDict > MaxDictSrcOffset-inputMargin {
-		sLimitDict = MaxDictSrcOffset - inputMargin
-	}
-
-	var lTable [maxLTableSize]uint64
-	var sTable [maxSTableSize]uint64
-
-	// Bail if we can't compress to at least this.
-	dstLimit := len(src) - 5
-
-	// nextEmit is where in src the next emitLiteral should start from.
-	nextEmit := 0
-
-	// The encoded form must start with a literal, as there are no previous
-	// bytes to copy, so we start looking for hash matches at s == 1.
-	s := 1
-	repeat := 1
-	if dict != nil {
-		dict.initBest()
-		s = 0
-		repeat = len(dict.dict) - dict.repeat
-	}
-	cv := load64(src, s)
-
-	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
-	const lowbitMask = 0xffffffff
-	getCur := func(x uint64) int {
-		return int(x & lowbitMask)
-	}
-	getPrev := func(x uint64) int {
-		return int(x >> 32)
-	}
-	const maxSkip = 64
-
-	for {
-		type match struct {
-			offset    int
-			s         int
-			length    int
-			score     int
-			rep, dict bool
-		}
-		var best match
-		for {
-			// Next src position to check
-			nextS := (s-nextEmit)>>8 + 1
-			if nextS > maxSkip {
-				nextS = s + maxSkip
-			} else {
-				nextS += s
-			}
-			if nextS > sLimit {
-				goto emitRemainder
-			}
-			if dict != nil && s >= MaxDictSrcOffset {
-				dict = nil
-				if repeat > s {
-					repeat = math.MinInt32
-				}
-			}
-			hashL := hash8(cv, lTableBits)
-			hashS := hash4(cv, sTableBits)
-			candidateL := lTable[hashL]
-			candidateS := sTable[hashS]
-
-			score := func(m match) int {
-				// Matches that are longer forward are penalized since we must emit it as a literal.
-				score := m.length - m.s
-				if nextEmit == m.s {
-					// If we do not have to emit literals, we save 1 byte
-					score++
-				}
-				offset := m.s - m.offset
-				if m.rep {
-					return score - emitRepeatSize(offset, m.length)
-				}
-				return score - emitCopySize(offset, m.length)
-			}
-
-			matchAt := func(offset, s int, first uint32, rep bool) match {
-				if best.length != 0 && best.s-best.offset == s-offset {
-					// Don't retest if we have the same offset.
-					return match{offset: offset, s: s}
-				}
-				if load32(src, offset) != first {
-					return match{offset: offset, s: s}
-				}
-				m := match{offset: offset, s: s, length: 4 + offset, rep: rep}
-				s += 4
-				for s < len(src) {
-					if len(src)-s < 8 {
-						if src[s] == src[m.length] {
-							m.length++
-							s++
-							continue
-						}
-						break
-					}
-					if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
-						m.length += bits.TrailingZeros64(diff) >> 3
-						break
-					}
-					s += 8
-					m.length += 8
-				}
-				m.length -= offset
-				m.score = score(m)
-				if m.score <= -m.s {
-					// Eliminate if no savings, we might find a better one.
-					m.length = 0
-				}
-				return m
-			}
-			matchDict := func(candidate, s int, first uint32, rep bool) match {
-				// Calculate offset as if in continuous array with s
-				offset := -len(dict.dict) + candidate
-				if best.length != 0 && best.s-best.offset == s-offset && !rep {
-					// Don't retest if we have the same offset.
-					return match{offset: offset, s: s}
-				}
-
-				if load32(dict.dict, candidate) != first {
-					return match{offset: offset, s: s}
-				}
-				m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true}
-				s += 4
-				if !rep {
-					for s < sLimitDict && m.length < len(dict.dict) {
-						if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
-							if src[s] == dict.dict[m.length] {
-								m.length++
-								s++
-								continue
-							}
-							break
-						}
-						if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
-							m.length += bits.TrailingZeros64(diff) >> 3
-							break
-						}
-						s += 8
-						m.length += 8
-					}
-				} else {
-					for s < len(src) && m.length < len(dict.dict) {
-						if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
-							if src[s] == dict.dict[m.length] {
-								m.length++
-								s++
-								continue
-							}
-							break
-						}
-						if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
-							m.length += bits.TrailingZeros64(diff) >> 3
-							break
-						}
-						s += 8
-						m.length += 8
-					}
-				}
-				m.length -= candidate
-				m.score = score(m)
-				if m.score <= -m.s {
-					// Eliminate if no savings, we might find a better one.
-					m.length = 0
-				}
-				return m
-			}
-
-			bestOf := func(a, b match) match {
-				if b.length == 0 {
-					return a
-				}
-				if a.length == 0 {
-					return b
-				}
-				as := a.score + b.s
-				bs := b.score + a.s
-				if as >= bs {
-					return a
-				}
-				return b
-			}
-
-			if s > 0 {
-				best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false))
-				best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false))
-				best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false))
-			}
-			if dict != nil {
-				candidateL := dict.bestTableLong[hashL]
-				candidateS := dict.bestTableShort[hashS]
-				best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
-				best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false))
-				best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
-				best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false))
-			}
-			{
-				if (dict == nil || repeat <= s) && repeat > 0 {
-					best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
-				} else if s-repeat < -4 && dict != nil {
-					candidate := len(dict.dict) - (repeat - s)
-					best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
-					candidate++
-					best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true))
-				}
-
-				if best.length > 0 {
-					hashS := hash4(cv>>8, sTableBits)
-					// s+1
-					nextShort := sTable[hashS]
-					s := s + 1
-					cv := load64(src, s)
-					hashL := hash8(cv, lTableBits)
-					nextLong := lTable[hashL]
-					best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
-					best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
-					best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
-					best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
-
-					// Dict at + 1
-					if dict != nil {
-						candidateL := dict.bestTableLong[hashL]
-						candidateS := dict.bestTableShort[hashS]
-
-						best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
-						best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
-					}
-
-					// s+2
-					if true {
-						hashS := hash4(cv>>8, sTableBits)
-
-						nextShort = sTable[hashS]
-						s++
-						cv = load64(src, s)
-						hashL := hash8(cv, lTableBits)
-						nextLong = lTable[hashL]
-
-						if (dict == nil || repeat <= s) && repeat > 0 {
-							// Repeat at + 2
-							best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true))
-						} else if repeat-s > 4 && dict != nil {
-							candidate := len(dict.dict) - (repeat - s)
-							best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
-						}
-						best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
-						best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
-						best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
-						best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
-
-						// Dict at +2
-						// Very small gain
-						if dict != nil {
-							candidateL := dict.bestTableLong[hashL]
-							candidateS := dict.bestTableShort[hashS]
-
-							best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
-							best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
-						}
-					}
-					// Search for a match at best match end, see if that is better.
-					// Allow some bytes at the beginning to mismatch.
-					// Sweet spot is around 1-2 bytes, but depends on input.
-					// The skipped bytes are tested in Extend backwards,
-					// and still picked up as part of the match if they do.
-					const skipBeginning = 2
-					const skipEnd = 1
-					if sAt := best.s + best.length - skipEnd; sAt < sLimit {
-
-						sBack := best.s + skipBeginning - skipEnd
-						backL := best.length - skipBeginning
-						// Load initial values
-						cv = load64(src, sBack)
-
-						// Grab candidates...
-						next := lTable[hash8(load64(src, sAt), lTableBits)]
-
-						if checkAt := getCur(next) - backL; checkAt > 0 {
-							best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
-						}
-						if checkAt := getPrev(next) - backL; checkAt > 0 {
-							best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
-						}
-						// Disabled: Extremely small gain
-						if false {
-							next = sTable[hash4(load64(src, sAt), sTableBits)]
-							if checkAt := getCur(next) - backL; checkAt > 0 {
-								best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
-							}
-							if checkAt := getPrev(next) - backL; checkAt > 0 {
-								best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
-							}
-						}
-					}
-				}
-			}
-
-			// Update table
-			lTable[hashL] = uint64(s) | candidateL<<32
-			sTable[hashS] = uint64(s) | candidateS<<32
-
-			if best.length > 0 {
-				break
-			}
-
-			cv = load64(src, nextS)
-			s = nextS
-		}
-
-		// Extend backwards, not needed for repeats...
-		s = best.s
-		if !best.rep && !best.dict {
-			for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
-				best.offset--
-				best.length++
-				s--
-			}
-		}
-		if false && best.offset >= s {
-			panic(fmt.Errorf("t %d >= s %d", best.offset, s))
-		}
-		// Bail if we exceed the maximum size.
-		if d+(s-nextEmit) > dstLimit {
-			return 0
-		}
-
-		base := s
-		offset := s - best.offset
-		s += best.length
-
-		if offset > 65535 && s-base <= 5 && !best.rep {
-			// Bail if the match is equal or worse to the encoding.
-			s = best.s + 1
-			if s >= sLimit {
-				goto emitRemainder
-			}
-			cv = load64(src, s)
-			continue
-		}
-		if debug && nextEmit != base {
-			fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base)
-		}
-		d += emitLiteral(dst[d:], src[nextEmit:base])
-		if best.rep {
-			if nextEmit > 0 || best.dict {
-				if debug {
-					fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
-				}
-				// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
-				d += emitRepeat(dst[d:], offset, best.length)
-			} else {
-				// First match without dict cannot be a repeat.
-				if debug {
-					fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
-				}
-				d += emitCopy(dst[d:], offset, best.length)
-			}
-		} else {
-			if debug {
-				fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
-			}
-			d += emitCopy(dst[d:], offset, best.length)
-		}
-		repeat = offset
-
-		nextEmit = s
-		if s >= sLimit {
-			goto emitRemainder
-		}
-
-		if d > dstLimit {
-			// Do we have space for more, if not bail.
-			return 0
-		}
-		// Fill tables...
-		for i := best.s + 1; i < s; i++ {
-			cv0 := load64(src, i)
-			long0 := hash8(cv0, lTableBits)
-			short0 := hash4(cv0, sTableBits)
-			lTable[long0] = uint64(i) | lTable[long0]<<32
-			sTable[short0] = uint64(i) | sTable[short0]<<32
-		}
-		cv = load64(src, s)
-	}
-
-emitRemainder:
-	if nextEmit < len(src) {
-		// Bail if we exceed the maximum size.
-		if d+len(src)-nextEmit > dstLimit {
-			return 0
-		}
-		if debug && nextEmit != s {
-			fmt.Println("emitted ", len(src)-nextEmit, "literals")
-		}
-		d += emitLiteral(dst[d:], src[nextEmit:])
-	}
-	return d
-}
-
-// encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-//
-//	len(dst) >= MaxEncodedLen(len(src)) &&
-//	minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
-func encodeBlockBestSnappy(dst, src []byte) (d int) {
-	// Initialize the hash tables.
-	const (
-		// Long hash matches.
-		lTableBits    = 19
-		maxLTableSize = 1 << lTableBits
-
-		// Short hash matches.
-		sTableBits    = 16
-		maxSTableSize = 1 << sTableBits
-
-		inputMargin = 8 + 2
-	)
-
-	// sLimit is when to stop looking for offset/length copies. The inputMargin
-	// lets us use a fast path for emitLiteral in the main loop, while we are
-	// looking for copies.
-	sLimit := len(src) - inputMargin
-	if len(src) < minNonLiteralBlockSize {
-		return 0
-	}
-
-	var lTable [maxLTableSize]uint64
-	var sTable [maxSTableSize]uint64
-
-	// Bail if we can't compress to at least this.
-	dstLimit := len(src) - 5
-
-	// nextEmit is where in src the next emitLiteral should start from.
-	nextEmit := 0
-
-	// The encoded form must start with a literal, as there are no previous
-	// bytes to copy, so we start looking for hash matches at s == 1.
-	s := 1
-	cv := load64(src, s)
-
-	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
-	repeat := 1
-	const lowbitMask = 0xffffffff
-	getCur := func(x uint64) int {
-		return int(x & lowbitMask)
-	}
-	getPrev := func(x uint64) int {
-		return int(x >> 32)
-	}
-	const maxSkip = 64
-
-	for {
-		type match struct {
-			offset int
-			s      int
-			length int
-			score  int
-		}
-		var best match
-		for {
-			// Next src position to check
-			nextS := (s-nextEmit)>>8 + 1
-			if nextS > maxSkip {
-				nextS = s + maxSkip
-			} else {
-				nextS += s
-			}
-			if nextS > sLimit {
-				goto emitRemainder
-			}
-			hashL := hash8(cv, lTableBits)
-			hashS := hash4(cv, sTableBits)
-			candidateL := lTable[hashL]
-			candidateS := sTable[hashS]
-
-			score := func(m match) int {
-				// Matches that are longer forward are penalized since we must emit it as a literal.
-				score := m.length - m.s
-				if nextEmit == m.s {
-					// If we do not have to emit literals, we save 1 byte
-					score++
-				}
-				offset := m.s - m.offset
-
-				return score - emitCopyNoRepeatSize(offset, m.length)
-			}
-
-			matchAt := func(offset, s int, first uint32) match {
-				if best.length != 0 && best.s-best.offset == s-offset {
-					// Don't retest if we have the same offset.
-					return match{offset: offset, s: s}
-				}
-				if load32(src, offset) != first {
-					return match{offset: offset, s: s}
-				}
-				m := match{offset: offset, s: s, length: 4 + offset}
-				s += 4
-				for s <= sLimit {
-					if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
-						m.length += bits.TrailingZeros64(diff) >> 3
-						break
-					}
-					s += 8
-					m.length += 8
-				}
-				m.length -= offset
-				m.score = score(m)
-				if m.score <= -m.s {
-					// Eliminate if no savings, we might find a better one.
-					m.length = 0
-				}
-				return m
-			}
-
-			bestOf := func(a, b match) match {
-				if b.length == 0 {
-					return a
-				}
-				if a.length == 0 {
-					return b
-				}
-				as := a.score + b.s
-				bs := b.score + a.s
-				if as >= bs {
-					return a
-				}
-				return b
-			}
-
-			best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv)))
-			best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv)))
-			best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv)))
-
-			{
-				best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
-				if best.length > 0 {
-					// s+1
-					nextShort := sTable[hash4(cv>>8, sTableBits)]
-					s := s + 1
-					cv := load64(src, s)
-					nextLong := lTable[hash8(cv, lTableBits)]
-					best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
-					best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
-					best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
-					best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
-					// Repeat at + 2
-					best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
-
-					// s+2
-					if true {
-						nextShort = sTable[hash4(cv>>8, sTableBits)]
-						s++
-						cv = load64(src, s)
-						nextLong = lTable[hash8(cv, lTableBits)]
-						best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
-						best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
-						best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
-						best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
-					}
-					// Search for a match at best match end, see if that is better.
-					if sAt := best.s + best.length; sAt < sLimit {
-						sBack := best.s
-						backL := best.length
-						// Load initial values
-						cv = load64(src, sBack)
-						// Search for mismatch
-						next := lTable[hash8(load64(src, sAt), lTableBits)]
-						//next := sTable[hash4(load64(src, sAt), sTableBits)]
-
-						if checkAt := getCur(next) - backL; checkAt > 0 {
-							best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
-						}
-						if checkAt := getPrev(next) - backL; checkAt > 0 {
-							best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
-						}
-					}
-				}
-			}
-
-			// Update table
-			lTable[hashL] = uint64(s) | candidateL<<32
-			sTable[hashS] = uint64(s) | candidateS<<32
-
-			if best.length > 0 {
-				break
-			}
-
-			cv = load64(src, nextS)
-			s = nextS
-		}
-
-		// Extend backwards, not needed for repeats...
-		s = best.s
-		if true {
-			for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
-				best.offset--
-				best.length++
-				s--
-			}
-		}
-		if false && best.offset >= s {
-			panic(fmt.Errorf("t %d >= s %d", best.offset, s))
-		}
-		// Bail if we exceed the maximum size.
-		if d+(s-nextEmit) > dstLimit {
-			return 0
-		}
-
-		base := s
-		offset := s - best.offset
-
-		s += best.length
-
-		if offset > 65535 && s-base <= 5 {
-			// Bail if the match is equal or worse to the encoding.
-			s = best.s + 1
-			if s >= sLimit {
-				goto emitRemainder
-			}
-			cv = load64(src, s)
-			continue
-		}
-		d += emitLiteral(dst[d:], src[nextEmit:base])
-		d += emitCopyNoRepeat(dst[d:], offset, best.length)
-		repeat = offset
-
-		nextEmit = s
-		if s >= sLimit {
-			goto emitRemainder
-		}
-
-		if d > dstLimit {
-			// Do we have space for more, if not bail.
-			return 0
-		}
-		// Fill tables...
-		for i := best.s + 1; i < s; i++ {
-			cv0 := load64(src, i)
-			long0 := hash8(cv0, lTableBits)
-			short0 := hash4(cv0, sTableBits)
-			lTable[long0] = uint64(i) | lTable[long0]<<32
-			sTable[short0] = uint64(i) | sTable[short0]<<32
-		}
-		cv = load64(src, s)
-	}
-
-emitRemainder:
-	if nextEmit < len(src) {
-		// Bail if we exceed the maximum size.
-		if d+len(src)-nextEmit > dstLimit {
-			return 0
-		}
-		d += emitLiteral(dst[d:], src[nextEmit:])
-	}
-	return d
-}
-
-// emitCopySize returns the size to encode the offset+length
-//
-// It assumes that:
-//
-//	1 <= offset && offset <= math.MaxUint32
-//	4 <= length && length <= 1 << 24
-func emitCopySize(offset, length int) int {
-	if offset >= 65536 {
-		i := 0
-		if length > 64 {
-			length -= 64
-			if length >= 4 {
-				// Emit remaining as repeats
-				return 5 + emitRepeatSize(offset, length)
-			}
-			i = 5
-		}
-		if length == 0 {
-			return i
-		}
-		return i + 5
-	}
-
-	// Offset no more than 2 bytes.
-	if length > 64 {
-		if offset < 2048 {
-			// Emit 8 bytes, then rest as repeats...
-			return 2 + emitRepeatSize(offset, length-8)
-		}
-		// Emit remaining as repeats, at least 4 bytes remain.
-		return 3 + emitRepeatSize(offset, length-60)
-	}
-	if length >= 12 || offset >= 2048 {
-		return 3
-	}
-	// Emit the remaining copy, encoded as 2 bytes.
-	return 2
-}
-
-// emitCopyNoRepeatSize returns the size to encode the offset+length
-//
-// It assumes that:
-//
-//	1 <= offset && offset <= math.MaxUint32
-//	4 <= length && length <= 1 << 24
-func emitCopyNoRepeatSize(offset, length int) int {
-	if offset >= 65536 {
-		return 5 + 5*(length/64)
-	}
-
-	// Offset no more than 2 bytes.
-	if length > 64 {
-		// Emit remaining as repeats, at least 4 bytes remain.
-		return 3 + 3*(length/60)
-	}
-	if length >= 12 || offset >= 2048 {
-		return 3
-	}
-	// Emit the remaining copy, encoded as 2 bytes.
-	return 2
-}
-
-// emitRepeatSize returns the number of bytes required to encode a repeat.
-// Length must be at least 4 and < 1<<24
-func emitRepeatSize(offset, length int) int {
-	// Repeat offset, make length cheaper
-	if length <= 4+4 || (length < 8+4 && offset < 2048) {
-		return 2
-	}
-	if length < (1<<8)+4+4 {
-		return 3
-	}
-	if length < (1<<16)+(1<<8)+4 {
-		return 4
-	}
-	const maxRepeat = (1 << 24) - 1
-	length -= (1 << 16) - 4
-	left := 0
-	if length > maxRepeat {
-		left = length - maxRepeat + 4
-	}
-	if left > 0 {
-		return 5 + emitRepeatSize(offset, left)
-	}
-	return 5
-}
--- a/vendor/github.com/klauspost/compress/s2/encode_better.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_better.go
--- a/vendor/github.com/klauspost/compress/s2/encode_go.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_go.go
@ -1,727 +0,0 @@
-//go:build !amd64 || appengine || !gc || noasm
-// +build !amd64 appengine !gc noasm
-
-package s2
-
-import (
-	"bytes"
-	"math/bits"
-)
-
-const hasAmd64Asm = false
-
-// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-//
-//	len(dst) >= MaxEncodedLen(len(src))
-func encodeBlock(dst, src []byte) (d int) {
-	if len(src) < minNonLiteralBlockSize {
-		return 0
-	}
-	return encodeBlockGo(dst, src)
-}
-
-// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-//
-//	len(dst) >= MaxEncodedLen(len(src))
-func encodeBlockBetter(dst, src []byte) (d int) {
-	return encodeBlockBetterGo(dst, src)
-}
-
-// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-//
-//	len(dst) >= MaxEncodedLen(len(src))
-func encodeBlockBetterSnappy(dst, src []byte) (d int) {
-	return encodeBlockBetterSnappyGo(dst, src)
-}
-
-// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
-// assumes that the varint-encoded length of the decompressed bytes has already
-// been written.
-//
-// It also assumes that:
-//
-//	len(dst) >= MaxEncodedLen(len(src))
-func encodeBlockSnappy(dst, src []byte) (d int) {
-	if len(src) < minNonLiteralBlockSize {
-		return 0
-	}
-	return encodeBlockSnappyGo(dst, src)
-}
-
-// emitLiteral writes a literal chunk and returns the number of bytes written.
-//
-// It assumes that:
-//
-//	dst is long enough to hold the encoded bytes
-//	0 <= len(lit) && len(lit) <= math.MaxUint32
-func emitLiteral(dst, lit []byte) int {
-	if len(lit) == 0 {
-		return 0
-	}
-	const num = 63<<2 | tagLiteral
-	i, n := 0, uint(len(lit)-1)
-	switch {
-	case n < 60:
-		dst[0] = uint8(n)<<2 | tagLiteral
-		i = 1
-	case n < 1<<8:
-		dst[1] = uint8(n)
-		dst[0] = 60<<2 | tagLiteral
-		i = 2
-	case n < 1<<16:
-		dst[2] = uint8(n >> 8)
-		dst[1] = uint8(n)
-		dst[0] = 61<<2 | tagLiteral
-		i = 3
-	case n < 1<<24:
-		dst[3] = uint8(n >> 16)
-		dst[2] = uint8(n >> 8)
-		dst[1] = uint8(n)
-		dst[0] = 62<<2 | tagLiteral
-		i = 4
-	default:
-		dst[4] = uint8(n >> 24)
-		dst[3] = uint8(n >> 16)
-		dst[2] = uint8(n >> 8)
-		dst[1] = uint8(n)
-		dst[0] = 63<<2 | tagLiteral
-		i = 5
-	}
-	return i + copy(dst[i:], lit)
-}
-
-// emitRepeat writes a repeat chunk and returns the number of bytes written.
-// Length must be at least 4 and < 1<<24
-func emitRepeat(dst []byte, offset, length int) int {
-	// Repeat offset, make length cheaper
-	length -= 4
-	if length <= 4 {
-		dst[0] = uint8(length)<<2 | tagCopy1
-		dst[1] = 0
-		return 2
-	}
-	if length < 8 && offset < 2048 {
-		// Encode WITH offset
-		dst[1] = uint8(offset)
-		dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
-		return 2
-	}
-	if length < (1<<8)+4 {
-		length -= 4
-		dst[2] = uint8(length)
-		dst[1] = 0
-		dst[0] = 5<<2 | tagCopy1
-		return 3
-	}
-	if length < (1<<16)+(1<<8) {
-		length -= 1 << 8
-		dst[3] = uint8(length >> 8)
-		dst[2] = uint8(length >> 0)
-		dst[1] = 0
-		dst[0] = 6<<2 | tagCopy1
-		return 4
-	}
-	const maxRepeat = (1 << 24) - 1
-	length -= 1 << 16
-	left := 0
-	if length > maxRepeat {
-		left = length - maxRepeat + 4
-		length = maxRepeat - 4
-	}
-	dst[4] = uint8(length >> 16)
-	dst[3] = uint8(length >> 8)
-	dst[2] = uint8(length >> 0)
-	dst[1] = 0
-	dst[0] = 7<<2 | tagCopy1
-	if left > 0 {
-		return 5 + emitRepeat(dst[5:], offset, left)
-	}
-	return 5
-}
-
-// emitCopy writes a copy chunk and returns the number of bytes written.
-//
-// It assumes that:
-//
-//	dst is long enough to hold the encoded bytes
-//	1 <= offset && offset <= math.MaxUint32
-//	4 <= length && length <= 1 << 24
-func emitCopy(dst []byte, offset, length int) int {
-	if offset >= 65536 {
-		i := 0
-		if length > 64 {
-			// Emit a length 64 copy, encoded as 5 bytes.
-			dst[4] = uint8(offset >> 24)
-			dst[3] = uint8(offset >> 16)
-			dst[2] = uint8(offset >> 8)
-			dst[1] = uint8(offset)
-			dst[0] = 63<<2 | tagCopy4
-			length -= 64
-			if length >= 4 {
-				// Emit remaining as repeats
-				return 5 + emitRepeat(dst[5:], offset, length)
-			}
-			i = 5
-		}
-		if length == 0 {
-			return i
-		}
-		// Emit a copy, offset encoded as 4 bytes.
-		dst[i+0] = uint8(length-1)<<2 | tagCopy4
-		dst[i+1] = uint8(offset)
-		dst[i+2] = uint8(offset >> 8)
-		dst[i+3] = uint8(offset >> 16)
-		dst[i+4] = uint8(offset >> 24)
-		return i + 5
-	}
-
-	// Offset no more than 2 bytes.
-	if length > 64 {
-		off := 3
-		if offset < 2048 {
-			// emit 8 bytes as tagCopy1, rest as repeats.
-			dst[1] = uint8(offset)
-			dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
-			length -= 8
-			off = 2
-		} else {
-			// Emit a length 60 copy, encoded as 3 bytes.
-			// Emit remaining as repeat value (minimum 4 bytes).
-			dst[2] = uint8(offset >> 8)
-			dst[1] = uint8(offset)
-			dst[0] = 59<<2 | tagCopy2
-			length -= 60
-		}
-		// Emit remaining as repeats, at least 4 bytes remain.
-		return off + emitRepeat(dst[off:], offset, length)
-	}
-	if length >= 12 || offset >= 2048 {
-		// Emit the remaining copy, encoded as 3 bytes.
-		dst[2] = uint8(offset >> 8)
-		dst[1] = uint8(offset)
-		dst[0] = uint8(length-1)<<2 | tagCopy2
-		return 3
-	}
-	// Emit the remaining copy, encoded as 2 bytes.
-	dst[1] = uint8(offset)
-	dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
-	return 2
-}
-
-// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
-//
-// It assumes that:
-//
-//	dst is long enough to hold the encoded bytes
-//	1 <= offset && offset <= math.MaxUint32
-//	4 <= length && length <= 1 << 24
-func emitCopyNoRepeat(dst []byte, offset, length int) int {
-	if offset >= 65536 {
-		i := 0
-		if length > 64 {
-			// Emit a length 64 copy, encoded as 5 bytes.
-			dst[4] = uint8(offset >> 24)
-			dst[3] = uint8(offset >> 16)
-			dst[2] = uint8(offset >> 8)
-			dst[1] = uint8(offset)
-			dst[0] = 63<<2 | tagCopy4
-			length -= 64
-			if length >= 4 {
-				// Emit remaining as repeats
-				return 5 + emitCopyNoRepeat(dst[5:], offset, length)
-			}
-			i = 5
-		}
-		if length == 0 {
-			return i
-		}
-		// Emit a copy, offset encoded as 4 bytes.
-		dst[i+0] = uint8(length-1)<<2 | tagCopy4
-		dst[i+1] = uint8(offset)
-		dst[i+2] = uint8(offset >> 8)
-		dst[i+3] = uint8(offset >> 16)
-		dst[i+4] = uint8(offset >> 24)
-		return i + 5
-	}
-
-	// Offset no more than 2 bytes.
-	if length > 64 {
-		// Emit a length 60 copy, encoded as 3 bytes.
-		// Emit remaining as repeat value (minimum 4 bytes).
-		dst[2] = uint8(offset >> 8)
-		dst[1] = uint8(offset)
-		dst[0] = 59<<2 | tagCopy2
-		length -= 60
-		// Emit remaining as repeats, at least 4 bytes remain.
-		return 3 + emitCopyNoRepeat(dst[3:], offset, length)
-	}
-	if length >= 12 || offset >= 2048 {
-		// Emit the remaining copy, encoded as 3 bytes.
-		dst[2] = uint8(offset >> 8)
-		dst[1] = uint8(offset)
-		dst[0] = uint8(length-1)<<2 | tagCopy2
-		return 3
-	}
-	// Emit the remaining copy, encoded as 2 bytes.
-	dst[1] = uint8(offset)
-	dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
-	return 2
-}
-
-// matchLen returns how many bytes match in a and b
-//
-// It assumes that:
-//
-//	len(a) <= len(b)
-func matchLen(a []byte, b []byte) int {
-	b = b[:len(a)]
-	var checked int
-	if len(a) > 4 {
-		// Try 4 bytes first
-		if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
-			return bits.TrailingZeros32(diff) >> 3
-		}
-		// Switch to 8 byte matching.
-		checked = 4
-		a = a[4:]
-		b = b[4:]
-		for len(a) >= 8 {
-			b = b[:len(a)]
-			if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
-				return checked + (bits.TrailingZeros64(diff) >> 3)
-			}
-			checked += 8
-			a = a[8:]
-			b = b[8:]
-		}
-	}
-	b = b[:len(a)]
-	for i := range a {
-		if a[i] != b[i] {
-			return int(i) + checked
-		}
-	}
-	return len(a) + checked
-}
-
-func calcBlockSize(src []byte) (d int) {
-	// Initialize the hash table.
-	const (
-		tableBits    = 13
-		maxTableSize = 1 << tableBits
-	)
-
-	var table [maxTableSize]uint32
-
-	// sLimit is when to stop looking for offset/length copies. The inputMargin
-	// lets us use a fast path for emitLiteral in the main loop, while we are
-	// looking for copies.
-	sLimit := len(src) - inputMargin
-
-	// Bail if we can't compress to at least this.
-	dstLimit := len(src) - len(src)>>5 - 5
-
-	// nextEmit is where in src the next emitLiteral should start from.
-	nextEmit := 0
-
-	// The encoded form must start with a literal, as there are no previous
-	// bytes to copy, so we start looking for hash matches at s == 1.
-	s := 1
-	cv := load64(src, s)
-
-	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
-	repeat := 1
-
-	for {
-		candidate := 0
-		for {
-			// Next src position to check
-			nextS := s + (s-nextEmit)>>6 + 4
-			if nextS > sLimit {
-				goto emitRemainder
-			}
-			hash0 := hash6(cv, tableBits)
-			hash1 := hash6(cv>>8, tableBits)
-			candidate = int(table[hash0])
-			candidate2 := int(table[hash1])
-			table[hash0] = uint32(s)
-			table[hash1] = uint32(s + 1)
-			hash2 := hash6(cv>>16, tableBits)
-
-			// Check repeat at offset checkRep.
-			const checkRep = 1
-			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
-				base := s + checkRep
-				// Extend back
-				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
-					i--
-					base--
-				}
-				d += emitLiteralSize(src[nextEmit:base])
-
-				// Extend forward
-				candidate := s - repeat + 4 + checkRep
-				s += 4 + checkRep
-				for s <= sLimit {
-					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
-						s += bits.TrailingZeros64(diff) >> 3
-						break
-					}
-					s += 8
-					candidate += 8
-				}
-
-				d += emitCopyNoRepeatSize(repeat, s-base)
-				nextEmit = s
-				if s >= sLimit {
-					goto emitRemainder
-				}
-
-				cv = load64(src, s)
-				continue
-			}
-
-			if uint32(cv) == load32(src, candidate) {
-				break
-			}
-			candidate = int(table[hash2])
-			if uint32(cv>>8) == load32(src, candidate2) {
-				table[hash2] = uint32(s + 2)
-				candidate = candidate2
-				s++
-				break
-			}
-			table[hash2] = uint32(s + 2)
-			if uint32(cv>>16) == load32(src, candidate) {
-				s += 2
-				break
-			}
-
-			cv = load64(src, nextS)
-			s = nextS
-		}
-
-		// Extend backwards
-		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
-			candidate--
-			s--
-		}
-
-		// Bail if we exceed the maximum size.
-		if d+(s-nextEmit) > dstLimit {
-			return 0
-		}
-
-		// A 4-byte match has been found. We'll later see if more than 4 bytes
-		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
-		// them as literal bytes.
-
-		d += emitLiteralSize(src[nextEmit:s])
-
-		// Call emitCopy, and then see if another emitCopy could be our next
-		// move. Repeat until we find no match for the input immediately after
-		// what was consumed by the last emitCopy call.
-		//
-		// If we exit this loop normally then we need to call emitLiteral next,
-		// though we don't yet know how big the literal will be. We handle that
-		// by proceeding to the next iteration of the main loop. We also can
-		// exit this loop via goto if we get close to exhausting the input.
-		for {
-			// Invariant: we have a 4-byte match at s, and no need to emit any
-			// literal bytes prior to s.
-			base := s
-			repeat = base - candidate
-
-			// Extend the 4-byte match as long as possible.
-			s += 4
-			candidate += 4
-			for s <= len(src)-8 {
-				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
-					s += bits.TrailingZeros64(diff) >> 3
-					break
-				}
-				s += 8
-				candidate += 8
-			}
-
-			d += emitCopyNoRepeatSize(repeat, s-base)
-			if false {
-				// Validate match.
-				a := src[base:s]
-				b := src[base-repeat : base-repeat+(s-base)]
-				if !bytes.Equal(a, b) {
-					panic("mismatch")
-				}
-			}
-
-			nextEmit = s
-			if s >= sLimit {
-				goto emitRemainder
-			}
-
-			if d > dstLimit {
-				// Do we have space for more, if not bail.
-				return 0
-			}
-			// Check for an immediate match, otherwise start search at s+1
-			x := load64(src, s-2)
-			m2Hash := hash6(x, tableBits)
-			currHash := hash6(x>>16, tableBits)
-			candidate = int(table[currHash])
-			table[m2Hash] = uint32(s - 2)
-			table[currHash] = uint32(s)
-			if uint32(x>>16) != load32(src, candidate) {
-				cv = load64(src, s+1)
-				s++
-				break
-			}
-		}
-	}
-
-emitRemainder:
-	if nextEmit < len(src) {
-		// Bail if we exceed the maximum size.
-		if d+len(src)-nextEmit > dstLimit {
-			return 0
-		}
-		d += emitLiteralSize(src[nextEmit:])
-	}
-	return d
-}
-
-func calcBlockSizeSmall(src []byte) (d int) {
-	// Initialize the hash table.
-	const (
-		tableBits    = 9
-		maxTableSize = 1 << tableBits
-	)
-
-	var table [maxTableSize]uint32
-
-	// sLimit is when to stop looking for offset/length copies. The inputMargin
-	// lets us use a fast path for emitLiteral in the main loop, while we are
-	// looking for copies.
-	sLimit := len(src) - inputMargin
-
-	// Bail if we can't compress to at least this.
-	dstLimit := len(src) - len(src)>>5 - 5
-
-	// nextEmit is where in src the next emitLiteral should start from.
-	nextEmit := 0
-
-	// The encoded form must start with a literal, as there are no previous
-	// bytes to copy, so we start looking for hash matches at s == 1.
-	s := 1
-	cv := load64(src, s)
-
-	// We search for a repeat at -1, but don't output repeats when nextEmit == 0
-	repeat := 1
-
-	for {
-		candidate := 0
-		for {
-			// Next src position to check
-			nextS := s + (s-nextEmit)>>6 + 4
-			if nextS > sLimit {
-				goto emitRemainder
-			}
-			hash0 := hash6(cv, tableBits)
-			hash1 := hash6(cv>>8, tableBits)
-			candidate = int(table[hash0])
-			candidate2 := int(table[hash1])
-			table[hash0] = uint32(s)
-			table[hash1] = uint32(s + 1)
-			hash2 := hash6(cv>>16, tableBits)
-
-			// Check repeat at offset checkRep.
-			const checkRep = 1
-			if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
-				base := s + checkRep
-				// Extend back
-				for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
-					i--
-					base--
-				}
-				d += emitLiteralSize(src[nextEmit:base])
-
-				// Extend forward
-				candidate := s - repeat + 4 + checkRep
-				s += 4 + checkRep
-				for s <= sLimit {
-					if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
-						s += bits.TrailingZeros64(diff) >> 3
-						break
-					}
-					s += 8
-					candidate += 8
-				}
-
-				d += emitCopyNoRepeatSize(repeat, s-base)
-				nextEmit = s
-				if s >= sLimit {
-					goto emitRemainder
-				}
-
-				cv = load64(src, s)
-				continue
-			}
-
-			if uint32(cv) == load32(src, candidate) {
-				break
-			}
-			candidate = int(table[hash2])
-			if uint32(cv>>8) == load32(src, candidate2) {
-				table[hash2] = uint32(s + 2)
-				candidate = candidate2
-				s++
-				break
-			}
-			table[hash2] = uint32(s + 2)
-			if uint32(cv>>16) == load32(src, candidate) {
-				s += 2
-				break
-			}
-
-			cv = load64(src, nextS)
-			s = nextS
-		}
-
-		// Extend backwards
-		for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
-			candidate--
-			s--
-		}
-
-		// Bail if we exceed the maximum size.
-		if d+(s-nextEmit) > dstLimit {
-			return 0
-		}
-
-		// A 4-byte match has been found. We'll later see if more than 4 bytes
-		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
-		// them as literal bytes.
-
-		d += emitLiteralSize(src[nextEmit:s])
-
-		// Call emitCopy, and then see if another emitCopy could be our next
-		// move. Repeat until we find no match for the input immediately after
-		// what was consumed by the last emitCopy call.
-		//
-		// If we exit this loop normally then we need to call emitLiteral next,
-		// though we don't yet know how big the literal will be. We handle that
-		// by proceeding to the next iteration of the main loop. We also can
-		// exit this loop via goto if we get close to exhausting the input.
-		for {
-			// Invariant: we have a 4-byte match at s, and no need to emit any
-			// literal bytes prior to s.
-			base := s
-			repeat = base - candidate
-
-			// Extend the 4-byte match as long as possible.
-			s += 4
-			candidate += 4
-			for s <= len(src)-8 {
-				if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
-					s += bits.TrailingZeros64(diff) >> 3
-					break
-				}
-				s += 8
-				candidate += 8
-			}
-
-			d += emitCopyNoRepeatSize(repeat, s-base)
-			if false {
-				// Validate match.
-				a := src[base:s]
-				b := src[base-repeat : base-repeat+(s-base)]
-				if !bytes.Equal(a, b) {
-					panic("mismatch")
-				}
-			}
-
-			nextEmit = s
-			if s >= sLimit {
-				goto emitRemainder
-			}
-
-			if d > dstLimit {
-				// Do we have space for more, if not bail.
-				return 0
-			}
-			// Check for an immediate match, otherwise start search at s+1
-			x := load64(src, s-2)
-			m2Hash := hash6(x, tableBits)
-			currHash := hash6(x>>16, tableBits)
-			candidate = int(table[currHash])
-			table[m2Hash] = uint32(s - 2)
-			table[currHash] = uint32(s)
-			if uint32(x>>16) != load32(src, candidate) {
-				cv = load64(src, s+1)
-				s++
-				break
-			}
-		}
-	}
-
-emitRemainder:
-	if nextEmit < len(src) {
-		// Bail if we exceed the maximum size.
-		if d+len(src)-nextEmit > dstLimit {
-			return 0
-		}
-		d += emitLiteralSize(src[nextEmit:])
-	}
-	return d
-}
-
-// emitLiteral writes a literal chunk and returns the number of bytes written.
-//
-// It assumes that:
-//
-//	dst is long enough to hold the encoded bytes
-//	0 <= len(lit) && len(lit) <= math.MaxUint32
-func emitLiteralSize(lit []byte) int {
-	if len(lit) == 0 {
-		return 0
-	}
-	switch {
-	case len(lit) <= 60:
-		return len(lit) + 1
-	case len(lit) <= 1<<8:
-		return len(lit) + 2
-	case len(lit) <= 1<<16:
-		return len(lit) + 3
-	case len(lit) <= 1<<24:
-		return len(lit) + 4
-	default:
-		return len(lit) + 5
-	}
-}
-
-func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
-	panic("cvtLZ4BlockAsm should be unreachable")
-}
-
-func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
-	panic("cvtLZ4BlockSnappyAsm should be unreachable")
-}
-
-func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
-	panic("cvtLZ4sBlockAsm should be unreachable")
-}
-
-func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
-	panic("cvtLZ4sBlockSnappyAsm should be unreachable")
-}
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
@ -1,228 +0,0 @@
-// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
-
-//go:build !appengine && !noasm && gc && !noasm
-
-package s2
-
-func _dummy_()
-
-// encodeBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4294967295 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBlockAsm(dst []byte, src []byte) int
-
-// encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4194304 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBlockAsm4MB(dst []byte, src []byte) int
-
-// encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 16383 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBlockAsm12B(dst []byte, src []byte) int
-
-// encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4095 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBlockAsm10B(dst []byte, src []byte) int
-
-// encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 511 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBlockAsm8B(dst []byte, src []byte) int
-
-// encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4294967295 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBetterBlockAsm(dst []byte, src []byte) int
-
-// encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4194304 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
-
-// encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 16383 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBetterBlockAsm12B(dst []byte, src []byte) int
-
-// encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4095 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBetterBlockAsm10B(dst []byte, src []byte) int
-
-// encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 511 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeBetterBlockAsm8B(dst []byte, src []byte) int
-
-// encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4294967295 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBlockAsm(dst []byte, src []byte) int
-
-// encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 65535 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
-
-// encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 16383 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
-
-// encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4095 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
-
-// encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 511 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
-
-// encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4294967295 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
-
-// encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 65535 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
-
-// encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 16383 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
-
-// encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4095 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
-
-// encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 511 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
-
-// calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 4294967295 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func calcBlockSize(src []byte) int
-
-// calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst.
-// Maximum input 1024 bytes.
-// It assumes that the varint-encoded length of the decompressed bytes has already been written.
-//
-//go:noescape
-func calcBlockSizeSmall(src []byte) int
-
-// emitLiteral writes a literal chunk and returns the number of bytes written.
-//
-// It assumes that:
-//
-//	dst is long enough to hold the encoded bytes with margin of 0 bytes
-//	0 <= len(lit) && len(lit) <= math.MaxUint32
-//
-//go:noescape
-func emitLiteral(dst []byte, lit []byte) int
-
-// emitRepeat writes a repeat chunk and returns the number of bytes written.
-// Length must be at least 4 and < 1<<32
-//
-//go:noescape
-func emitRepeat(dst []byte, offset int, length int) int
-
-// emitCopy writes a copy chunk and returns the number of bytes written.
-//
-// It assumes that:
-//
-//	dst is long enough to hold the encoded bytes
-//	1 <= offset && offset <= math.MaxUint32
-//	4 <= length && length <= 1 << 24
-//
-//go:noescape
-func emitCopy(dst []byte, offset int, length int) int
-
-// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
-//
-// It assumes that:
-//
-//	dst is long enough to hold the encoded bytes
-//	1 <= offset && offset <= math.MaxUint32
-//	4 <= length && length <= 1 << 24
-//
-//go:noescape
-func emitCopyNoRepeat(dst []byte, offset int, length int) int
-
-// matchLen returns how many bytes match in a and b
-//
-// It assumes that:
-//
-//	len(a) <= len(b)
-//
-//go:noescape
-func matchLen(a []byte, b []byte) int
-
-// cvtLZ4Block converts an LZ4 block to S2
-//
-//go:noescape
-func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
-
-// cvtLZ4sBlock converts an LZ4s block to S2
-//
-//go:noescape
-func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
-
-// cvtLZ4Block converts an LZ4 block to Snappy
-//
-//go:noescape
-func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
-
-// cvtLZ4sBlock converts an LZ4s block to Snappy
-//
-//go:noescape
-func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
--- a/vendor/github.com/klauspost/compress/s2/index.go
+++ b/vendor/github.com/klauspost/compress/s2/index.go
@ -1,596 +0,0 @@
-// Copyright (c) 2022+ Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
-	"bytes"
-	"encoding/binary"
-	"encoding/json"
-	"fmt"
-	"io"
-	"sort"
-)
-
-const (
-	S2IndexHeader   = "s2idx\x00"
-	S2IndexTrailer  = "\x00xdi2s"
-	maxIndexEntries = 1 << 16
-)
-
-// Index represents an S2/Snappy index.
-type Index struct {
-	TotalUncompressed int64 // Total Uncompressed size if known. Will be -1 if unknown.
-	TotalCompressed   int64 // Total Compressed size if known. Will be -1 if unknown.
-	info              []struct {
-		compressedOffset   int64
-		uncompressedOffset int64
-	}
-	estBlockUncomp int64
-}
-
-func (i *Index) reset(maxBlock int) {
-	i.estBlockUncomp = int64(maxBlock)
-	i.TotalCompressed = -1
-	i.TotalUncompressed = -1
-	if len(i.info) > 0 {
-		i.info = i.info[:0]
-	}
-}
-
-// allocInfos will allocate an empty slice of infos.
-func (i *Index) allocInfos(n int) {
-	if n > maxIndexEntries {
-		panic("n > maxIndexEntries")
-	}
-	i.info = make([]struct {
-		compressedOffset   int64
-		uncompressedOffset int64
-	}, 0, n)
-}
-
-// add an uncompressed and compressed pair.
-// Entries must be sent in order.
-func (i *Index) add(compressedOffset, uncompressedOffset int64) error {
-	if i == nil {
-		return nil
-	}
-	lastIdx := len(i.info) - 1
-	if lastIdx >= 0 {
-		latest := i.info[lastIdx]
-		if latest.uncompressedOffset == uncompressedOffset {
-			// Uncompressed didn't change, don't add entry,
-			// but update start index.
-			latest.compressedOffset = compressedOffset
-			i.info[lastIdx] = latest
-			return nil
-		}
-		if latest.uncompressedOffset > uncompressedOffset {
-			return fmt.Errorf("internal error: Earlier uncompressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
-		}
-		if latest.compressedOffset > compressedOffset {
-			return fmt.Errorf("internal error: Earlier compressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
-		}
-	}
-	i.info = append(i.info, struct {
-		compressedOffset   int64
-		uncompressedOffset int64
-	}{compressedOffset: compressedOffset, uncompressedOffset: uncompressedOffset})
-	return nil
-}
-
-// Find the offset at or before the wanted (uncompressed) offset.
-// If offset is 0 or positive it is the offset from the beginning of the file.
-// If the uncompressed size is known, the offset must be within the file.
-// If an offset outside the file is requested io.ErrUnexpectedEOF is returned.
-// If the offset is negative, it is interpreted as the distance from the end of the file,
-// where -1 represents the last byte.
-// If offset from the end of the file is requested, but size is unknown,
-// ErrUnsupported will be returned.
-func (i *Index) Find(offset int64) (compressedOff, uncompressedOff int64, err error) {
-	if i.TotalUncompressed < 0 {
-		return 0, 0, ErrCorrupt
-	}
-	if offset < 0 {
-		offset = i.TotalUncompressed + offset
-		if offset < 0 {
-			return 0, 0, io.ErrUnexpectedEOF
-		}
-	}
-	if offset > i.TotalUncompressed {
-		return 0, 0, io.ErrUnexpectedEOF
-	}
-	if len(i.info) > 200 {
-		n := sort.Search(len(i.info), func(n int) bool {
-			return i.info[n].uncompressedOffset > offset
-		})
-		if n == 0 {
-			n = 1
-		}
-		return i.info[n-1].compressedOffset, i.info[n-1].uncompressedOffset, nil
-	}
-	for _, info := range i.info {
-		if info.uncompressedOffset > offset {
-			break
-		}
-		compressedOff = info.compressedOffset
-		uncompressedOff = info.uncompressedOffset
-	}
-	return compressedOff, uncompressedOff, nil
-}
-
-// reduce to stay below maxIndexEntries
-func (i *Index) reduce() {
-	if len(i.info) < maxIndexEntries && i.estBlockUncomp >= 1<<20 {
-		return
-	}
-
-	// Algorithm, keep 1, remove removeN entries...
-	removeN := (len(i.info) + 1) / maxIndexEntries
-	src := i.info
-	j := 0
-
-	// Each block should be at least 1MB, but don't reduce below 1000 entries.
-	for i.estBlockUncomp*(int64(removeN)+1) < 1<<20 && len(i.info)/(removeN+1) > 1000 {
-		removeN++
-	}
-	for idx := 0; idx < len(src); idx++ {
-		i.info[j] = src[idx]
-		j++
-		idx += removeN
-	}
-	i.info = i.info[:j]
-	// Update maxblock estimate.
-	i.estBlockUncomp += i.estBlockUncomp * int64(removeN)
-}
-
-func (i *Index) appendTo(b []byte, uncompTotal, compTotal int64) []byte {
-	i.reduce()
-	var tmp [binary.MaxVarintLen64]byte
-
-	initSize := len(b)
-	// We make the start a skippable header+size.
-	b = append(b, ChunkTypeIndex, 0, 0, 0)
-	b = append(b, []byte(S2IndexHeader)...)
-	// Total Uncompressed size
-	n := binary.PutVarint(tmp[:], uncompTotal)
-	b = append(b, tmp[:n]...)
-	// Total Compressed size
-	n = binary.PutVarint(tmp[:], compTotal)
-	b = append(b, tmp[:n]...)
-	// Put EstBlockUncomp size
-	n = binary.PutVarint(tmp[:], i.estBlockUncomp)
-	b = append(b, tmp[:n]...)
-	// Put length
-	n = binary.PutVarint(tmp[:], int64(len(i.info)))
-	b = append(b, tmp[:n]...)
-
-	// Check if we should add uncompressed offsets
-	var hasUncompressed byte
-	for idx, info := range i.info {
-		if idx == 0 {
-			if info.uncompressedOffset != 0 {
-				hasUncompressed = 1
-				break
-			}
-			continue
-		}
-		if info.uncompressedOffset != i.info[idx-1].uncompressedOffset+i.estBlockUncomp {
-			hasUncompressed = 1
-			break
-		}
-	}
-	b = append(b, hasUncompressed)
-
-	// Add each entry
-	if hasUncompressed == 1 {
-		for idx, info := range i.info {
-			uOff := info.uncompressedOffset
-			if idx > 0 {
-				prev := i.info[idx-1]
-				uOff -= prev.uncompressedOffset + (i.estBlockUncomp)
-			}
-			n = binary.PutVarint(tmp[:], uOff)
-			b = append(b, tmp[:n]...)
-		}
-	}
-
-	// Initial compressed size estimate.
-	cPredict := i.estBlockUncomp / 2
-
-	for idx, info := range i.info {
-		cOff := info.compressedOffset
-		if idx > 0 {
-			prev := i.info[idx-1]
-			cOff -= prev.compressedOffset + cPredict
-			// Update compressed size prediction, with half the error.
-			cPredict += cOff / 2
-		}
-		n = binary.PutVarint(tmp[:], cOff)
-		b = append(b, tmp[:n]...)
-	}
-
-	// Add Total Size.
-	// Stored as fixed size for easier reading.
-	binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)-initSize+4+len(S2IndexTrailer)))
-	b = append(b, tmp[:4]...)
-	// Trailer
-	b = append(b, []byte(S2IndexTrailer)...)
-
-	// Update size
-	chunkLen := len(b) - initSize - skippableFrameHeader
-	b[initSize+1] = uint8(chunkLen >> 0)
-	b[initSize+2] = uint8(chunkLen >> 8)
-	b[initSize+3] = uint8(chunkLen >> 16)
-	//fmt.Printf("chunklen: 0x%x Uncomp:%d, Comp:%d\n", chunkLen, uncompTotal, compTotal)
-	return b
-}
-
-// Load a binary index.
-// A zero value Index can be used or a previous one can be reused.
-func (i *Index) Load(b []byte) ([]byte, error) {
-	if len(b) <= 4+len(S2IndexHeader)+len(S2IndexTrailer) {
-		return b, io.ErrUnexpectedEOF
-	}
-	if b[0] != ChunkTypeIndex {
-		return b, ErrCorrupt
-	}
-	chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
-	b = b[4:]
-
-	// Validate we have enough...
-	if len(b) < chunkLen {
-		return b, io.ErrUnexpectedEOF
-	}
-	if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) {
-		return b, ErrUnsupported
-	}
-	b = b[len(S2IndexHeader):]
-
-	// Total Uncompressed
-	if v, n := binary.Varint(b); n <= 0 || v < 0 {
-		return b, ErrCorrupt
-	} else {
-		i.TotalUncompressed = v
-		b = b[n:]
-	}
-
-	// Total Compressed
-	if v, n := binary.Varint(b); n <= 0 {
-		return b, ErrCorrupt
-	} else {
-		i.TotalCompressed = v
-		b = b[n:]
-	}
-
-	// Read EstBlockUncomp
-	if v, n := binary.Varint(b); n <= 0 {
-		return b, ErrCorrupt
-	} else {
-		if v < 0 {
-			return b, ErrCorrupt
-		}
-		i.estBlockUncomp = v
-		b = b[n:]
-	}
-
-	var entries int
-	if v, n := binary.Varint(b); n <= 0 {
-		return b, ErrCorrupt
-	} else {
-		if v < 0 || v > maxIndexEntries {
-			return b, ErrCorrupt
-		}
-		entries = int(v)
-		b = b[n:]
-	}
-	if cap(i.info) < entries {
-		i.allocInfos(entries)
-	}
-	i.info = i.info[:entries]
-
-	if len(b) < 1 {
-		return b, io.ErrUnexpectedEOF
-	}
-	hasUncompressed := b[0]
-	b = b[1:]
-	if hasUncompressed&1 != hasUncompressed {
-		return b, ErrCorrupt
-	}
-
-	// Add each uncompressed entry
-	for idx := range i.info {
-		var uOff int64
-		if hasUncompressed != 0 {
-			// Load delta
-			if v, n := binary.Varint(b); n <= 0 {
-				return b, ErrCorrupt
-			} else {
-				uOff = v
-				b = b[n:]
-			}
-		}
-
-		if idx > 0 {
-			prev := i.info[idx-1].uncompressedOffset
-			uOff += prev + (i.estBlockUncomp)
-			if uOff <= prev {
-				return b, ErrCorrupt
-			}
-		}
-		if uOff < 0 {
-			return b, ErrCorrupt
-		}
-		i.info[idx].uncompressedOffset = uOff
-	}
-
-	// Initial compressed size estimate.
-	cPredict := i.estBlockUncomp / 2
-
-	// Add each compressed entry
-	for idx := range i.info {
-		var cOff int64
-		if v, n := binary.Varint(b); n <= 0 {
-			return b, ErrCorrupt
-		} else {
-			cOff = v
-			b = b[n:]
-		}
-
-		if idx > 0 {
-			// Update compressed size prediction, with half the error.
-			cPredictNew := cPredict + cOff/2
-
-			prev := i.info[idx-1].compressedOffset
-			cOff += prev + cPredict
-			if cOff <= prev {
-				return b, ErrCorrupt
-			}
-			cPredict = cPredictNew
-		}
-		if cOff < 0 {
-			return b, ErrCorrupt
-		}
-		i.info[idx].compressedOffset = cOff
-	}
-	if len(b) < 4+len(S2IndexTrailer) {
-		return b, io.ErrUnexpectedEOF
-	}
-	// Skip size...
-	b = b[4:]
-
-	// Check trailer...
-	if !bytes.Equal(b[:len(S2IndexTrailer)], []byte(S2IndexTrailer)) {
-		return b, ErrCorrupt
-	}
-	return b[len(S2IndexTrailer):], nil
-}
-
-// LoadStream will load an index from the end of the supplied stream.
-// ErrUnsupported will be returned if the signature cannot be found.
-// ErrCorrupt will be returned if unexpected values are found.
-// io.ErrUnexpectedEOF is returned if there are too few bytes.
-// IO errors are returned as-is.
-func (i *Index) LoadStream(rs io.ReadSeeker) error {
-	// Go to end.
-	_, err := rs.Seek(-10, io.SeekEnd)
-	if err != nil {
-		return err
-	}
-	var tmp [10]byte
-	_, err = io.ReadFull(rs, tmp[:])
-	if err != nil {
-		return err
-	}
-	// Check trailer...
-	if !bytes.Equal(tmp[4:4+len(S2IndexTrailer)], []byte(S2IndexTrailer)) {
-		return ErrUnsupported
-	}
-	sz := binary.LittleEndian.Uint32(tmp[:4])
-	if sz > maxChunkSize+skippableFrameHeader {
-		return ErrCorrupt
-	}
-	_, err = rs.Seek(-int64(sz), io.SeekEnd)
-	if err != nil {
-		return err
-	}
-
-	// Read index.
-	buf := make([]byte, sz)
-	_, err = io.ReadFull(rs, buf)
-	if err != nil {
-		return err
-	}
-	_, err = i.Load(buf)
-	return err
-}
-
-// IndexStream will return an index for a stream.
-// The stream structure will be checked, but
-// data within blocks is not verified.
-// The returned index can either be appended to the end of the stream
-// or stored separately.
-func IndexStream(r io.Reader) ([]byte, error) {
-	var i Index
-	var buf [maxChunkSize]byte
-	var readHeader bool
-	for {
-		_, err := io.ReadFull(r, buf[:4])
-		if err != nil {
-			if err == io.EOF {
-				return i.appendTo(nil, i.TotalUncompressed, i.TotalCompressed), nil
-			}
-			return nil, err
-		}
-		// Start of this chunk.
-		startChunk := i.TotalCompressed
-		i.TotalCompressed += 4
-
-		chunkType := buf[0]
-		if !readHeader {
-			if chunkType != chunkTypeStreamIdentifier {
-				return nil, ErrCorrupt
-			}
-			readHeader = true
-		}
-		chunkLen := int(buf[1]) | int(buf[2])<<8 | int(buf[3])<<16
-		if chunkLen < checksumSize {
-			return nil, ErrCorrupt
-		}
-
-		i.TotalCompressed += int64(chunkLen)
-		_, err = io.ReadFull(r, buf[:chunkLen])
-		if err != nil {
-			return nil, io.ErrUnexpectedEOF
-		}
-		// The chunk types are specified at
-		// https://github.com/google/snappy/blob/master/framing_format.txt
-		switch chunkType {
-		case chunkTypeCompressedData:
-			// Section 4.2. Compressed data (chunk type 0x00).
-			// Skip checksum.
-			dLen, err := DecodedLen(buf[checksumSize:])
-			if err != nil {
-				return nil, err
-			}
-			if dLen > maxBlockSize {
-				return nil, ErrCorrupt
-			}
-			if i.estBlockUncomp == 0 {
-				// Use first block for estimate...
-				i.estBlockUncomp = int64(dLen)
-			}
-			err = i.add(startChunk, i.TotalUncompressed)
-			if err != nil {
-				return nil, err
-			}
-			i.TotalUncompressed += int64(dLen)
-			continue
-		case chunkTypeUncompressedData:
-			n2 := chunkLen - checksumSize
-			if n2 > maxBlockSize {
-				return nil, ErrCorrupt
-			}
-			if i.estBlockUncomp == 0 {
-				// Use first block for estimate...
-				i.estBlockUncomp = int64(n2)
-			}
-			err = i.add(startChunk, i.TotalUncompressed)
-			if err != nil {
-				return nil, err
-			}
-			i.TotalUncompressed += int64(n2)
-			continue
-		case chunkTypeStreamIdentifier:
-			// Section 4.1. Stream identifier (chunk type 0xff).
-			if chunkLen != len(magicBody) {
-				return nil, ErrCorrupt
-			}
-
-			if string(buf[:len(magicBody)]) != magicBody {
-				if string(buf[:len(magicBody)]) != magicBodySnappy {
-					return nil, ErrCorrupt
-				}
-			}
-
-			continue
-		}
-
-		if chunkType <= 0x7f {
-			// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
-			return nil, ErrUnsupported
-		}
-		if chunkLen > maxChunkSize {
-			return nil, ErrUnsupported
-		}
-		// Section 4.4 Padding (chunk type 0xfe).
-		// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
-	}
-}
-
-// JSON returns the index as JSON text.
-func (i *Index) JSON() []byte {
-	type offset struct {
-		CompressedOffset   int64 `json:"compressed"`
-		UncompressedOffset int64 `json:"uncompressed"`
-	}
-	x := struct {
-		TotalUncompressed int64    `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
-		TotalCompressed   int64    `json:"total_compressed"`   // Total Compressed size if known. Will be -1 if unknown.
-		Offsets           []offset `json:"offsets"`
-		EstBlockUncomp    int64    `json:"est_block_uncompressed"`
-	}{
-		TotalUncompressed: i.TotalUncompressed,
-		TotalCompressed:   i.TotalCompressed,
-		EstBlockUncomp:    i.estBlockUncomp,
-	}
-	for _, v := range i.info {
-		x.Offsets = append(x.Offsets, offset{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
-	}
-	b, _ := json.MarshalIndent(x, "", "  ")
-	return b
-}
-
-// RemoveIndexHeaders will trim all headers and trailers from a given index.
-// This is expected to save 20 bytes.
-// These can be restored using RestoreIndexHeaders.
-// This removes a layer of security, but is the most compact representation.
-// Returns nil if headers contains errors.
-// The returned slice references the provided slice.
-func RemoveIndexHeaders(b []byte) []byte {
-	const save = 4 + len(S2IndexHeader) + len(S2IndexTrailer) + 4
-	if len(b) <= save {
-		return nil
-	}
-	if b[0] != ChunkTypeIndex {
-		return nil
-	}
-	chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
-	b = b[4:]
-
-	// Validate we have enough...
-	if len(b) < chunkLen {
-		return nil
-	}
-	b = b[:chunkLen]
-
-	if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) {
-		return nil
-	}
-	b = b[len(S2IndexHeader):]
-	if !bytes.HasSuffix(b, []byte(S2IndexTrailer)) {
-		return nil
-	}
-	b = bytes.TrimSuffix(b, []byte(S2IndexTrailer))
-
-	if len(b) < 4 {
-		return nil
-	}
-	return b[:len(b)-4]
-}
-
-// RestoreIndexHeaders will index restore headers removed by RemoveIndexHeaders.
-// No error checking is performed on the input.
-// If a 0 length slice is sent, it is returned without modification.
-func RestoreIndexHeaders(in []byte) []byte {
-	if len(in) == 0 {
-		return in
-	}
-	b := make([]byte, 0, 4+len(S2IndexHeader)+len(in)+len(S2IndexTrailer)+4)
-	b = append(b, ChunkTypeIndex, 0, 0, 0)
-	b = append(b, []byte(S2IndexHeader)...)
-	b = append(b, in...)
-
-	var tmp [4]byte
-	binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)+4+len(S2IndexTrailer)))
-	b = append(b, tmp[:4]...)
-	// Trailer
-	b = append(b, []byte(S2IndexTrailer)...)
-
-	chunkLen := len(b) - skippableFrameHeader
-	b[1] = uint8(chunkLen >> 0)
-	b[2] = uint8(chunkLen >> 8)
-	b[3] = uint8(chunkLen >> 16)
-	return b
-}
--- a/vendor/github.com/klauspost/compress/s2/lz4convert.go
+++ b/vendor/github.com/klauspost/compress/s2/lz4convert.go
@ -1,585 +0,0 @@
-// Copyright (c) 2022 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
-	"encoding/binary"
-	"errors"
-	"fmt"
-)
-
-// LZ4Converter provides conversion from LZ4 blocks as defined here:
-// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
-type LZ4Converter struct {
-}
-
-// ErrDstTooSmall is returned when provided destination is too small.
-var ErrDstTooSmall = errors.New("s2: destination too small")
-
-// ConvertBlock will convert an LZ4 block and append it as an S2
-// block without block length to dst.
-// The uncompressed size is returned as well.
-// dst must have capacity to contain the entire compressed block.
-func (l *LZ4Converter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
-	if len(src) == 0 {
-		return dst, 0, nil
-	}
-	const debug = false
-	const inline = true
-	const lz4MinMatch = 4
-
-	s, d := 0, len(dst)
-	dst = dst[:cap(dst)]
-	if !debug && hasAmd64Asm {
-		res, sz := cvtLZ4BlockAsm(dst[d:], src)
-		if res < 0 {
-			const (
-				errCorrupt     = -1
-				errDstTooSmall = -2
-			)
-			switch res {
-			case errCorrupt:
-				return nil, 0, ErrCorrupt
-			case errDstTooSmall:
-				return nil, 0, ErrDstTooSmall
-			default:
-				return nil, 0, fmt.Errorf("unexpected result: %d", res)
-			}
-		}
-		if d+sz > len(dst) {
-			return nil, 0, ErrDstTooSmall
-		}
-		return dst[:d+sz], res, nil
-	}
-
-	dLimit := len(dst) - 10
-	var lastOffset uint16
-	var uncompressed int
-	if debug {
-		fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
-	}
-
-	for {
-		if s >= len(src) {
-			return dst[:d], 0, ErrCorrupt
-		}
-		// Read literal info
-		token := src[s]
-		ll := int(token >> 4)
-		ml := int(lz4MinMatch + (token & 0xf))
-
-		// If upper nibble is 15, literal length is extended
-		if token >= 0xf0 {
-			for {
-				s++
-				if s >= len(src) {
-					if debug {
-						fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
-					}
-					return dst[:d], 0, ErrCorrupt
-				}
-				val := src[s]
-				ll += int(val)
-				if val != 255 {
-					break
-				}
-			}
-		}
-		// Skip past token
-		if s+ll >= len(src) {
-			if debug {
-				fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
-			}
-			return nil, 0, ErrCorrupt
-		}
-		s++
-		if ll > 0 {
-			if d+ll > dLimit {
-				return nil, 0, ErrDstTooSmall
-			}
-			if debug {
-				fmt.Printf("emit %d literals\n", ll)
-			}
-			d += emitLiteralGo(dst[d:], src[s:s+ll])
-			s += ll
-			uncompressed += ll
-		}
-
-		// Check if we are done...
-		if s == len(src) && ml == lz4MinMatch {
-			break
-		}
-		// 2 byte offset
-		if s >= len(src)-2 {
-			if debug {
-				fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
-			}
-			return nil, 0, ErrCorrupt
-		}
-		offset := binary.LittleEndian.Uint16(src[s:])
-		s += 2
-		if offset == 0 {
-			if debug {
-				fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
-			}
-			return nil, 0, ErrCorrupt
-		}
-		if int(offset) > uncompressed {
-			if debug {
-				fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
-			}
-			return nil, 0, ErrCorrupt
-		}
-
-		if ml == lz4MinMatch+15 {
-			for {
-				if s >= len(src) {
-					if debug {
-						fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
-					}
-					return nil, 0, ErrCorrupt
-				}
-				val := src[s]
-				s++
-				ml += int(val)
-				if val != 255 {
-					if s >= len(src) {
-						if debug {
-							fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
-						}
-						return nil, 0, ErrCorrupt
-					}
-					break
-				}
-			}
-		}
-		if offset == lastOffset {
-			if debug {
-				fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
-			}
-			if !inline {
-				d += emitRepeat16(dst[d:], offset, ml)
-			} else {
-				length := ml
-				dst := dst[d:]
-				for len(dst) > 5 {
-					// Repeat offset, make length cheaper
-					length -= 4
-					if length <= 4 {
-						dst[0] = uint8(length)<<2 | tagCopy1
-						dst[1] = 0
-						d += 2
-						break
-					}
-					if length < 8 && offset < 2048 {
-						// Encode WITH offset
-						dst[1] = uint8(offset)
-						dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
-						d += 2
-						break
-					}
-					if length < (1<<8)+4 {
-						length -= 4
-						dst[2] = uint8(length)
-						dst[1] = 0
-						dst[0] = 5<<2 | tagCopy1
-						d += 3
-						break
-					}
-					if length < (1<<16)+(1<<8) {
-						length -= 1 << 8
-						dst[3] = uint8(length >> 8)
-						dst[2] = uint8(length >> 0)
-						dst[1] = 0
-						dst[0] = 6<<2 | tagCopy1
-						d += 4
-						break
-					}
-					const maxRepeat = (1 << 24) - 1
-					length -= 1 << 16
-					left := 0
-					if length > maxRepeat {
-						left = length - maxRepeat + 4
-						length = maxRepeat - 4
-					}
-					dst[4] = uint8(length >> 16)
-					dst[3] = uint8(length >> 8)
-					dst[2] = uint8(length >> 0)
-					dst[1] = 0
-					dst[0] = 7<<2 | tagCopy1
-					if left > 0 {
-						d += 5 + emitRepeat16(dst[5:], offset, left)
-						break
-					}
-					d += 5
-					break
-				}
-			}
-		} else {
-			if debug {
-				fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
-			}
-			if !inline {
-				d += emitCopy16(dst[d:], offset, ml)
-			} else {
-				length := ml
-				dst := dst[d:]
-				for len(dst) > 5 {
-					// Offset no more than 2 bytes.
-					if length > 64 {
-						off := 3
-						if offset < 2048 {
-							// emit 8 bytes as tagCopy1, rest as repeats.
-							dst[1] = uint8(offset)
-							dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
-							length -= 8
-							off = 2
-						} else {
-							// Emit a length 60 copy, encoded as 3 bytes.
-							// Emit remaining as repeat value (minimum 4 bytes).
-							dst[2] = uint8(offset >> 8)
-							dst[1] = uint8(offset)
-							dst[0] = 59<<2 | tagCopy2
-							length -= 60
-						}
-						// Emit remaining as repeats, at least 4 bytes remain.
-						d += off + emitRepeat16(dst[off:], offset, length)
-						break
-					}
-					if length >= 12 || offset >= 2048 {
-						// Emit the remaining copy, encoded as 3 bytes.
-						dst[2] = uint8(offset >> 8)
-						dst[1] = uint8(offset)
-						dst[0] = uint8(length-1)<<2 | tagCopy2
-						d += 3
-						break
-					}
-					// Emit the remaining copy, encoded as 2 bytes.
-					dst[1] = uint8(offset)
-					dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
-					d += 2
-					break
-				}
-			}
-			lastOffset = offset
-		}
-		uncompressed += ml
-		if d > dLimit {
-			return nil, 0, ErrDstTooSmall
-		}
-	}
-
-	return dst[:d], uncompressed, nil
-}
-
-// ConvertBlockSnappy will convert an LZ4 block and append it
-// as a Snappy block without block length to dst.
-// The uncompressed size is returned as well.
-// dst must have capacity to contain the entire compressed block.
-func (l *LZ4Converter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
-	if len(src) == 0 {
-		return dst, 0, nil
-	}
-	const debug = false
-	const lz4MinMatch = 4
-
-	s, d := 0, len(dst)
-	dst = dst[:cap(dst)]
-	// Use assembly when possible
-	if !debug && hasAmd64Asm {
-		res, sz := cvtLZ4BlockSnappyAsm(dst[d:], src)
-		if res < 0 {
-			const (
-				errCorrupt     = -1
-				errDstTooSmall = -2
-			)
-			switch res {
-			case errCorrupt:
-				return nil, 0, ErrCorrupt
-			case errDstTooSmall:
-				return nil, 0, ErrDstTooSmall
-			default:
-				return nil, 0, fmt.Errorf("unexpected result: %d", res)
-			}
-		}
-		if d+sz > len(dst) {
-			return nil, 0, ErrDstTooSmall
-		}
-		return dst[:d+sz], res, nil
-	}
-
-	dLimit := len(dst) - 10
-	var uncompressed int
-	if debug {
-		fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
-	}
-
-	for {
-		if s >= len(src) {
-			return nil, 0, ErrCorrupt
-		}
-		// Read literal info
-		token := src[s]
-		ll := int(token >> 4)
-		ml := int(lz4MinMatch + (token & 0xf))
-
-		// If upper nibble is 15, literal length is extended
-		if token >= 0xf0 {
-			for {
-				s++
-				if s >= len(src) {
-					if debug {
-						fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
-					}
-					return nil, 0, ErrCorrupt
-				}
-				val := src[s]
-				ll += int(val)
-				if val != 255 {
-					break
-				}
-			}
-		}
-		// Skip past token
-		if s+ll >= len(src) {
-			if debug {
-				fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
-			}
-			return nil, 0, ErrCorrupt
-		}
-		s++
-		if ll > 0 {
-			if d+ll > dLimit {
-				return nil, 0, ErrDstTooSmall
-			}
-			if debug {
-				fmt.Printf("emit %d literals\n", ll)
-			}
-			d += emitLiteralGo(dst[d:], src[s:s+ll])
-			s += ll
-			uncompressed += ll
-		}
-
-		// Check if we are done...
-		if s == len(src) && ml == lz4MinMatch {
-			break
-		}
-		// 2 byte offset
-		if s >= len(src)-2 {
-			if debug {
-				fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
-			}
-			return nil, 0, ErrCorrupt
-		}
-		offset := binary.LittleEndian.Uint16(src[s:])
-		s += 2
-		if offset == 0 {
-			if debug {
-				fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
-			}
-			return nil, 0, ErrCorrupt
-		}
-		if int(offset) > uncompressed {
-			if debug {
-				fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
-			}
-			return nil, 0, ErrCorrupt
-		}
-
-		if ml == lz4MinMatch+15 {
-			for {
-				if s >= len(src) {
-					if debug {
-						fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
-					}
-					return nil, 0, ErrCorrupt
-				}
-				val := src[s]
-				s++
-				ml += int(val)
-				if val != 255 {
-					if s >= len(src) {
-						if debug {
-							fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
-						}
-						return nil, 0, ErrCorrupt
-					}
-					break
-				}
-			}
-		}
-		if debug {
-			fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
-		}
-		length := ml
-		// d += emitCopyNoRepeat(dst[d:], int(offset), ml)
-		for length > 0 {
-			if d >= dLimit {
-				return nil, 0, ErrDstTooSmall
-			}
-
-			// Offset no more than 2 bytes.
-			if length > 64 {
-				// Emit a length 64 copy, encoded as 3 bytes.
-				dst[d+2] = uint8(offset >> 8)
-				dst[d+1] = uint8(offset)
-				dst[d+0] = 63<<2 | tagCopy2
-				length -= 64
-				d += 3
-				continue
-			}
-			if length >= 12 || offset >= 2048 || length < 4 {
-				// Emit the remaining copy, encoded as 3 bytes.
-				dst[d+2] = uint8(offset >> 8)
-				dst[d+1] = uint8(offset)
-				dst[d+0] = uint8(length-1)<<2 | tagCopy2
-				d += 3
-				break
-			}
-			// Emit the remaining copy, encoded as 2 bytes.
-			dst[d+1] = uint8(offset)
-			dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
-			d += 2
-			break
-		}
-		uncompressed += ml
-		if d > dLimit {
-			return nil, 0, ErrDstTooSmall
-		}
-	}
-
-	return dst[:d], uncompressed, nil
-}
-
-// emitRepeat writes a repeat chunk and returns the number of bytes written.
-// Length must be at least 4 and < 1<<24
-func emitRepeat16(dst []byte, offset uint16, length int) int {
-	// Repeat offset, make length cheaper
-	length -= 4
-	if length <= 4 {
-		dst[0] = uint8(length)<<2 | tagCopy1
-		dst[1] = 0
-		return 2
-	}
-	if length < 8 && offset < 2048 {
-		// Encode WITH offset
-		dst[1] = uint8(offset)
-		dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
-		return 2
-	}
-	if length < (1<<8)+4 {
-		length -= 4
-		dst[2] = uint8(length)
-		dst[1] = 0
-		dst[0] = 5<<2 | tagCopy1
-		return 3
-	}
-	if length < (1<<16)+(1<<8) {
-		length -= 1 << 8
-		dst[3] = uint8(length >> 8)
-		dst[2] = uint8(length >> 0)
-		dst[1] = 0
-		dst[0] = 6<<2 | tagCopy1
-		return 4
-	}
-	const maxRepeat = (1 << 24) - 1
-	length -= 1 << 16
-	left := 0
-	if length > maxRepeat {
-		left = length - maxRepeat + 4
-		length = maxRepeat - 4
-	}
-	dst[4] = uint8(length >> 16)
-	dst[3] = uint8(length >> 8)
-	dst[2] = uint8(length >> 0)
-	dst[1] = 0
-	dst[0] = 7<<2 | tagCopy1
-	if left > 0 {
-		return 5 + emitRepeat16(dst[5:], offset, left)
-	}
-	return 5
-}
-
-// emitCopy writes a copy chunk and returns the number of bytes written.
-//
-// It assumes that:
-//
-//	dst is long enough to hold the encoded bytes
-//	1 <= offset && offset <= math.MaxUint16
-//	4 <= length && length <= math.MaxUint32
-func emitCopy16(dst []byte, offset uint16, length int) int {
-	// Offset no more than 2 bytes.
-	if length > 64 {
-		off := 3
-		if offset < 2048 {
-			// emit 8 bytes as tagCopy1, rest as repeats.
-			dst[1] = uint8(offset)
-			dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
-			length -= 8
-			off = 2
-		} else {
-			// Emit a length 60 copy, encoded as 3 bytes.
-			// Emit remaining as repeat value (minimum 4 bytes).
-			dst[2] = uint8(offset >> 8)
-			dst[1] = uint8(offset)
-			dst[0] = 59<<2 | tagCopy2
-			length -= 60
-		}
-		// Emit remaining as repeats, at least 4 bytes remain.
-		return off + emitRepeat16(dst[off:], offset, length)
-	}
-	if length >= 12 || offset >= 2048 {
-		// Emit the remaining copy, encoded as 3 bytes.
-		dst[2] = uint8(offset >> 8)
-		dst[1] = uint8(offset)
-		dst[0] = uint8(length-1)<<2 | tagCopy2
-		return 3
-	}
-	// Emit the remaining copy, encoded as 2 bytes.
-	dst[1] = uint8(offset)
-	dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
-	return 2
-}
-
-// emitLiteral writes a literal chunk and returns the number of bytes written.
-//
-// It assumes that:
-//
-//	dst is long enough to hold the encoded bytes
-//	0 <= len(lit) && len(lit) <= math.MaxUint32
-func emitLiteralGo(dst, lit []byte) int {
-	if len(lit) == 0 {
-		return 0
-	}
-	i, n := 0, uint(len(lit)-1)
-	switch {
-	case n < 60:
-		dst[0] = uint8(n)<<2 | tagLiteral
-		i = 1
-	case n < 1<<8:
-		dst[1] = uint8(n)
-		dst[0] = 60<<2 | tagLiteral
-		i = 2
-	case n < 1<<16:
-		dst[2] = uint8(n >> 8)
-		dst[1] = uint8(n)
-		dst[0] = 61<<2 | tagLiteral
-		i = 3
-	case n < 1<<24:
-		dst[3] = uint8(n >> 16)
-		dst[2] = uint8(n >> 8)
-		dst[1] = uint8(n)
-		dst[0] = 62<<2 | tagLiteral
-		i = 4
-	default:
-		dst[4] = uint8(n >> 24)
-		dst[3] = uint8(n >> 16)
-		dst[2] = uint8(n >> 8)
-		dst[1] = uint8(n)
-		dst[0] = 63<<2 | tagLiteral
-		i = 5
-	}
-	return i + copy(dst[i:], lit)
-}
--- a/vendor/github.com/klauspost/compress/s2/lz4sconvert.go
+++ b/vendor/github.com/klauspost/compress/s2/lz4sconvert.go
@ -1,467 +0,0 @@
-// Copyright (c) 2022 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package s2
-
-import (
-	"encoding/binary"
-	"fmt"
-)
-
-// LZ4sConverter provides conversion from LZ4s.
-// (Intel modified LZ4 Blocks)
-// https://cdrdv2-public.intel.com/743912/743912-qat-programmers-guide-v2.0.pdf
-// LZ4s is a variant of LZ4 block format. LZ4s should be considered as an intermediate compressed block format.
-// The LZ4s format is selected when the application sets the compType to CPA_DC_LZ4S in CpaDcSessionSetupData.
-// The LZ4s block returned by the Intel® QAT hardware can be used by an external
-// software post-processing to generate other compressed data formats.
-// The following table lists the differences between LZ4 and LZ4s block format. LZ4s block format uses
-// the same high-level formatting as LZ4 block format with the following encoding changes:
-// For Min Match of 4 bytes, Copy length value 1-15 means length 4-18 with 18 bytes adding an extra byte.
-// ONLY "Min match of 4 bytes" is supported.
-type LZ4sConverter struct {
-}
-
-// ConvertBlock will convert an LZ4s block and append it as an S2
-// block without block length to dst.
-// The uncompressed size is returned as well.
-// dst must have capacity to contain the entire compressed block.
-func (l *LZ4sConverter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
-	if len(src) == 0 {
-		return dst, 0, nil
-	}
-	const debug = false
-	const inline = true
-	const lz4MinMatch = 3
-
-	s, d := 0, len(dst)
-	dst = dst[:cap(dst)]
-	if !debug && hasAmd64Asm {
-		res, sz := cvtLZ4sBlockAsm(dst[d:], src)
-		if res < 0 {
-			const (
-				errCorrupt     = -1
-				errDstTooSmall = -2
-			)
-			switch res {
-			case errCorrupt:
-				return nil, 0, ErrCorrupt
-			case errDstTooSmall:
-				return nil, 0, ErrDstTooSmall
-			default:
-				return nil, 0, fmt.Errorf("unexpected result: %d", res)
-			}
-		}
-		if d+sz > len(dst) {
-			return nil, 0, ErrDstTooSmall
-		}
-		return dst[:d+sz], res, nil
-	}
-
-	dLimit := len(dst) - 10
-	var lastOffset uint16
-	var uncompressed int
-	if debug {
-		fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
-	}
-
-	for {
-		if s >= len(src) {
-			return dst[:d], 0, ErrCorrupt
-		}
-		// Read literal info
-		token := src[s]
-		ll := int(token >> 4)
-		ml := int(lz4MinMatch + (token & 0xf))
-
-		// If upper nibble is 15, literal length is extended
-		if token >= 0xf0 {
-			for {
-				s++
-				if s >= len(src) {
-					if debug {
-						fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
-					}
-					return dst[:d], 0, ErrCorrupt
-				}
-				val := src[s]
-				ll += int(val)
-				if val != 255 {
-					break
-				}
-			}
-		}
-		// Skip past token
-		if s+ll >= len(src) {
-			if debug {
-				fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
-			}
-			return nil, 0, ErrCorrupt
-		}
-		s++
-		if ll > 0 {
-			if d+ll > dLimit {
-				return nil, 0, ErrDstTooSmall
-			}
-			if debug {
-				fmt.Printf("emit %d literals\n", ll)
-			}
-			d += emitLiteralGo(dst[d:], src[s:s+ll])
-			s += ll
-			uncompressed += ll
-		}
-
-		// Check if we are done...
-		if ml == lz4MinMatch {
-			if s == len(src) {
-				break
-			}
-			// 0 bytes.
-			continue
-		}
-		// 2 byte offset
-		if s >= len(src)-2 {
-			if debug {
-				fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
-			}
-			return nil, 0, ErrCorrupt
-		}
-		offset := binary.LittleEndian.Uint16(src[s:])
-		s += 2
-		if offset == 0 {
-			if debug {
-				fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
-			}
-			return nil, 0, ErrCorrupt
-		}
-		if int(offset) > uncompressed {
-			if debug {
-				fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
-			}
-			return nil, 0, ErrCorrupt
-		}
-
-		if ml == lz4MinMatch+15 {
-			for {
-				if s >= len(src) {
-					if debug {
-						fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
-					}
-					return nil, 0, ErrCorrupt
-				}
-				val := src[s]
-				s++
-				ml += int(val)
-				if val != 255 {
-					if s >= len(src) {
-						if debug {
-							fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
-						}
-						return nil, 0, ErrCorrupt
-					}
-					break
-				}
-			}
-		}
-		if offset == lastOffset {
-			if debug {
-				fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
-			}
-			if !inline {
-				d += emitRepeat16(dst[d:], offset, ml)
-			} else {
-				length := ml
-				dst := dst[d:]
-				for len(dst) > 5 {
-					// Repeat offset, make length cheaper
-					length -= 4
-					if length <= 4 {
-						dst[0] = uint8(length)<<2 | tagCopy1
-						dst[1] = 0
-						d += 2
-						break
-					}
-					if length < 8 && offset < 2048 {
-						// Encode WITH offset
-						dst[1] = uint8(offset)
-						dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
-						d += 2
-						break
-					}
-					if length < (1<<8)+4 {
-						length -= 4
-						dst[2] = uint8(length)
-						dst[1] = 0
-						dst[0] = 5<<2 | tagCopy1
-						d += 3
-						break
-					}
-					if length < (1<<16)+(1<<8) {
-						length -= 1 << 8
-						dst[3] = uint8(length >> 8)
-						dst[2] = uint8(length >> 0)
-						dst[1] = 0
-						dst[0] = 6<<2 | tagCopy1
-						d += 4
-						break
-					}
-					const maxRepeat = (1 << 24) - 1
-					length -= 1 << 16
-					left := 0
-					if length > maxRepeat {
-						left = length - maxRepeat + 4
-						length = maxRepeat - 4
-					}
-					dst[4] = uint8(length >> 16)
-					dst[3] = uint8(length >> 8)
-					dst[2] = uint8(length >> 0)
-					dst[1] = 0
-					dst[0] = 7<<2 | tagCopy1
-					if left > 0 {
-						d += 5 + emitRepeat16(dst[5:], offset, left)
-						break
-					}
-					d += 5
-					break
-				}
-			}
-		} else {
-			if debug {
-				fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
-			}
-			if !inline {
-				d += emitCopy16(dst[d:], offset, ml)
-			} else {
-				length := ml
-				dst := dst[d:]
-				for len(dst) > 5 {
-					// Offset no more than 2 bytes.
-					if length > 64 {
-						off := 3
-						if offset < 2048 {
-							// emit 8 bytes as tagCopy1, rest as repeats.
-							dst[1] = uint8(offset)
-							dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
-							length -= 8
-							off = 2
-						} else {
-							// Emit a length 60 copy, encoded as 3 bytes.
-							// Emit remaining as repeat value (minimum 4 bytes).
-							dst[2] = uint8(offset >> 8)
-							dst[1] = uint8(offset)
-							dst[0] = 59<<2 | tagCopy2
-							length -= 60
-						}
-						// Emit remaining as repeats, at least 4 bytes remain.
-						d += off + emitRepeat16(dst[off:], offset, length)
-						break
-					}
-					if length >= 12 || offset >= 2048 {
-						// Emit the remaining copy, encoded as 3 bytes.
-						dst[2] = uint8(offset >> 8)
-						dst[1] = uint8(offset)
-						dst[0] = uint8(length-1)<<2 | tagCopy2
-						d += 3
-						break
-					}
-					// Emit the remaining copy, encoded as 2 bytes.
-					dst[1] = uint8(offset)
-					dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
-					d += 2
-					break
-				}
-			}
-			lastOffset = offset
-		}
-		uncompressed += ml
-		if d > dLimit {
-			return nil, 0, ErrDstTooSmall
-		}
-	}
-
-	return dst[:d], uncompressed, nil
-}
-
-// ConvertBlockSnappy will convert an LZ4s block and append it
-// as a Snappy block without block length to dst.
-// The uncompressed size is returned as well.
-// dst must have capacity to contain the entire compressed block.
-func (l *LZ4sConverter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
-	if len(src) == 0 {
-		return dst, 0, nil
-	}
-	const debug = false
-	const lz4MinMatch = 3
-
-	s, d := 0, len(dst)
-	dst = dst[:cap(dst)]
-	// Use assembly when possible
-	if !debug && hasAmd64Asm {
-		res, sz := cvtLZ4sBlockSnappyAsm(dst[d:], src)
-		if res < 0 {
-			const (
-				errCorrupt     = -1
-				errDstTooSmall = -2
-			)
-			switch res {
-			case errCorrupt:
-				return nil, 0, ErrCorrupt
-			case errDstTooSmall:
-				return nil, 0, ErrDstTooSmall
-			default:
-				return nil, 0, fmt.Errorf("unexpected result: %d", res)
-			}
-		}
-		if d+sz > len(dst) {
-			return nil, 0, ErrDstTooSmall
-		}
-		return dst[:d+sz], res, nil
-	}
-
-	dLimit := len(dst) - 10
-	var uncompressed int
-	if debug {
-		fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
-	}
-
-	for {
-		if s >= len(src) {
-			return nil, 0, ErrCorrupt
-		}
-		// Read literal info
-		token := src[s]
-		ll := int(token >> 4)
-		ml := int(lz4MinMatch + (token & 0xf))
-
-		// If upper nibble is 15, literal length is extended
-		if token >= 0xf0 {
-			for {
-				s++
-				if s >= len(src) {
-					if debug {
-						fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
-					}
-					return nil, 0, ErrCorrupt
-				}
-				val := src[s]
-				ll += int(val)
-				if val != 255 {
-					break
-				}
-			}
-		}
-		// Skip past token
-		if s+ll >= len(src) {
-			if debug {
-				fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
-			}
-			return nil, 0, ErrCorrupt
-		}
-		s++
-		if ll > 0 {
-			if d+ll > dLimit {
-				return nil, 0, ErrDstTooSmall
-			}
-			if debug {
-				fmt.Printf("emit %d literals\n", ll)
-			}
-			d += emitLiteralGo(dst[d:], src[s:s+ll])
-			s += ll
-			uncompressed += ll
-		}
-
-		// Check if we are done...
-		if ml == lz4MinMatch {
-			if s == len(src) {
-				break
-			}
-			// 0 bytes.
-			continue
-		}
-		// 2 byte offset
-		if s >= len(src)-2 {
-			if debug {
-				fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
-			}
-			return nil, 0, ErrCorrupt
-		}
-		offset := binary.LittleEndian.Uint16(src[s:])
-		s += 2
-		if offset == 0 {
-			if debug {
-				fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
-			}
-			return nil, 0, ErrCorrupt
-		}
-		if int(offset) > uncompressed {
-			if debug {
-				fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
-			}
-			return nil, 0, ErrCorrupt
-		}
-
-		if ml == lz4MinMatch+15 {
-			for {
-				if s >= len(src) {
-					if debug {
-						fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
-					}
-					return nil, 0, ErrCorrupt
-				}
-				val := src[s]
-				s++
-				ml += int(val)
-				if val != 255 {
-					if s >= len(src) {
-						if debug {
-							fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
-						}
-						return nil, 0, ErrCorrupt
-					}
-					break
-				}
-			}
-		}
-		if debug {
-			fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
-		}
-		length := ml
-		// d += emitCopyNoRepeat(dst[d:], int(offset), ml)
-		for length > 0 {
-			if d >= dLimit {
-				return nil, 0, ErrDstTooSmall
-			}
-
-			// Offset no more than 2 bytes.
-			if length > 64 {
-				// Emit a length 64 copy, encoded as 3 bytes.
-				dst[d+2] = uint8(offset >> 8)
-				dst[d+1] = uint8(offset)
-				dst[d+0] = 63<<2 | tagCopy2
-				length -= 64
-				d += 3
-				continue
-			}
-			if length >= 12 || offset >= 2048 || length < 4 {
-				// Emit the remaining copy, encoded as 3 bytes.
-				dst[d+2] = uint8(offset >> 8)
-				dst[d+1] = uint8(offset)
-				dst[d+0] = uint8(length-1)<<2 | tagCopy2
-				d += 3
-				break
-			}
-			// Emit the remaining copy, encoded as 2 bytes.
-			dst[d+1] = uint8(offset)
-			dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
-			d += 2
-			break
-		}
-		uncompressed += ml
-		if d > dLimit {
-			return nil, 0, ErrDstTooSmall
-		}
-	}
-
-	return dst[:d], uncompressed, nil
-}
--- a/vendor/github.com/klauspost/compress/s2/reader.go
+++ b/vendor/github.com/klauspost/compress/s2/reader.go
--- a/vendor/github.com/klauspost/compress/s2/s2.go
+++ b/vendor/github.com/klauspost/compress/s2/s2.go
@ -1,143 +0,0 @@
-// Copyright 2011 The Snappy-Go Authors. All rights reserved.
-// Copyright (c) 2019 Klaus Post. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Package s2 implements the S2 compression format.
-//
-// S2 is an extension of Snappy. Similar to Snappy S2 is aimed for high throughput,
-// which is why it features concurrent compression for bigger payloads.
-//
-// Decoding is compatible with Snappy compressed content,
-// but content compressed with S2 cannot be decompressed by Snappy.
-//
-// For more information on Snappy/S2 differences see README in: https://github.com/klauspost/compress/tree/master/s2
-//
-// There are actually two S2 formats: block and stream. They are related,
-// but different: trying to decompress block-compressed data as a S2 stream
-// will fail, and vice versa. The block format is the Decode and Encode
-// functions and the stream format is the Reader and Writer types.
-//
-// A "better" compression option is available. This will trade some compression
-// speed
-//
-// The block format, the more common case, is used when the complete size (the
-// number of bytes) of the original data is known upfront, at the time
-// compression starts. The stream format, also known as the framing format, is
-// for when that isn't always true.
-//
-// Blocks to not offer much data protection, so it is up to you to
-// add data validation of decompressed blocks.
-//
-// Streams perform CRC validation of the decompressed data.
-// Stream compression will also be performed on multiple CPU cores concurrently
-// significantly improving throughput.
-package s2
-
-import (
-	"bytes"
-	"hash/crc32"
-)
-
-/*
-Each encoded block begins with the varint-encoded length of the decoded data,
-followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
-first byte of each chunk is broken into its 2 least and 6 most significant bits
-called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
-Zero means a literal tag. All other values mean a copy tag.
-
-For literal tags:
-  - If m < 60, the next 1 + m bytes are literal bytes.
-  - Otherwise, let n be the little-endian unsigned integer denoted by the next
-    m - 59 bytes. The next 1 + n bytes after that are literal bytes.
-
-For copy tags, length bytes are copied from offset bytes ago, in the style of
-Lempel-Ziv compression algorithms. In particular:
-  - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
-    The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
-    of the offset. The next byte is bits 0-7 of the offset.
-  - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
-    The length is 1 + m. The offset is the little-endian unsigned integer
-    denoted by the next 2 bytes.
-  - For l == 3, the offset ranges in [0, 1<<32) and the length in
-    [1, 65). The length is 1 + m. The offset is the little-endian unsigned
-    integer denoted by the next 4 bytes.
-*/
-const (
-	tagLiteral = 0x00
-	tagCopy1   = 0x01
-	tagCopy2   = 0x02
-	tagCopy4   = 0x03
-)
-
-const (
-	checksumSize     = 4
-	chunkHeaderSize  = 4
-	magicChunk       = "\xff\x06\x00\x00" + magicBody
-	magicChunkSnappy = "\xff\x06\x00\x00" + magicBodySnappy
-	magicBodySnappy  = "sNaPpY"
-	magicBody        = "S2sTwO"
-
-	// maxBlockSize is the maximum size of the input to encodeBlock.
-	//
-	// For the framing format (Writer type instead of Encode function),
-	// this is the maximum uncompressed size of a block.
-	maxBlockSize = 4 << 20
-
-	// minBlockSize is the minimum size of block setting when creating a writer.
-	minBlockSize = 4 << 10
-
-	skippableFrameHeader = 4
-	maxChunkSize         = 1<<24 - 1 // 16777215
-
-	// Default block size
-	defaultBlockSize = 1 << 20
-
-	// maxSnappyBlockSize is the maximum snappy block size.
-	maxSnappyBlockSize = 1 << 16
-
-	obufHeaderLen = checksumSize + chunkHeaderSize
-)
-
-const (
-	chunkTypeCompressedData   = 0x00
-	chunkTypeUncompressedData = 0x01
-	ChunkTypeIndex            = 0x99
-	chunkTypePadding          = 0xfe
-	chunkTypeStreamIdentifier = 0xff
-)
-
-var crcTable = crc32.MakeTable(crc32.Castagnoli)
-
-// crc implements the checksum specified in section 3 of
-// https://github.com/google/snappy/blob/master/framing_format.txt
-func crc(b []byte) uint32 {
-	c := crc32.Update(0, crcTable, b)
-	return c>>15 | c<<17 + 0xa282ead8
-}
-
-// literalExtraSize returns the extra size of encoding n literals.
-// n should be >= 0 and <= math.MaxUint32.
-func literalExtraSize(n int64) int64 {
-	if n == 0 {
-		return 0
-	}
-	switch {
-	case n < 60:
-		return 1
-	case n < 1<<8:
-		return 2
-	case n < 1<<16:
-		return 3
-	case n < 1<<24:
-		return 4
-	default:
-		return 5
-	}
-}
-
-type byter interface {
-	Bytes() []byte
-}
-
-var _ byter = &bytes.Buffer{}
--- a/vendor/github.com/klauspost/compress/s2/writer.go
+++ b/vendor/github.com/klauspost/compress/s2/writer.go
--- a/vendor/github.com/klauspost/compress/s2sx.mod
+++ b/vendor/github.com/klauspost/compress/s2sx.mod
@ -1,4 +1,4 @@
 module github.com/klauspost/compress

-go 1.16
+go 1.19

--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@ -259,7 +259,7 @@ nyc-taxi-data-10M.csv   gzkp    1   3325605752  922273214   13929   227.68

 ## Decompressor

-Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
+Status: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.

 This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz),
 kindly supplied by [fuzzit.dev](https://fuzzit.dev/). 
--- a/vendor/github.com/klauspost/compress/zstd/decodeheader.go
+++ b/vendor/github.com/klauspost/compress/zstd/decodeheader.go
@ -95,42 +95,54 @@ type Header struct {
 // If there isn't enough input, io.ErrUnexpectedEOF is returned.
 // The FirstBlock.OK will indicate if enough information was available to decode the first block header.
 func (h *Header) Decode(in []byte) error {
+	_, err := h.DecodeAndStrip(in)
+	return err
+}
+
+// DecodeAndStrip will decode the header from the beginning of the stream
+// and on success return the remaining bytes.
+// This will decode the frame header and the first block header if enough bytes are provided.
+// It is recommended to provide at least HeaderMaxSize bytes.
+// If the frame header cannot be read an error will be returned.
+// If there isn't enough input, io.ErrUnexpectedEOF is returned.
+// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
+func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) {
 	*h = Header{}
 	if len(in) < 4 {
-		return io.ErrUnexpectedEOF
+		return nil, io.ErrUnexpectedEOF
 	}
 	h.HeaderSize += 4
 	b, in := in[:4], in[4:]
 	if string(b) != frameMagic {
 		if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
-			return ErrMagicMismatch
+			return nil, ErrMagicMismatch
 		}
 		if len(in) < 4 {
-			return io.ErrUnexpectedEOF
+			return nil, io.ErrUnexpectedEOF
 		}
 		h.HeaderSize += 4
 		h.Skippable = true
 		h.SkippableID = int(b[0] & 0xf)
 		h.SkippableSize = binary.LittleEndian.Uint32(in)
-		return nil
+		return in[4:], nil
 	}

 	// Read Window_Descriptor
 	// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
 	if len(in) < 1 {
-		return io.ErrUnexpectedEOF
+		return nil, io.ErrUnexpectedEOF
 	}
 	fhd, in := in[0], in[1:]
 	h.HeaderSize++
 	h.SingleSegment = fhd&(1<<5) != 0
 	h.HasCheckSum = fhd&(1<<2) != 0
 	if fhd&(1<<3) != 0 {
-		return errors.New("reserved bit set on frame header")
+		return nil, errors.New("reserved bit set on frame header")
 	}

 	if !h.SingleSegment {
 		if len(in) < 1 {
-			return io.ErrUnexpectedEOF
+			return nil, io.ErrUnexpectedEOF
 		}
 		var wd byte
 		wd, in = in[0], in[1:]
@ -148,7 +160,7 @@ func (h *Header) Decode(in []byte) error {
 			size = 4
 		}
 		if len(in) < int(size) {
-			return io.ErrUnexpectedEOF
+			return nil, io.ErrUnexpectedEOF
 		}
 		b, in = in[:size], in[size:]
 		h.HeaderSize += int(size)
@ -178,7 +190,7 @@ func (h *Header) Decode(in []byte) error {
 	if fcsSize > 0 {
 		h.HasFCS = true
 		if len(in) < fcsSize {
-			return io.ErrUnexpectedEOF
+			return nil, io.ErrUnexpectedEOF
 		}
 		b, in = in[:fcsSize], in[fcsSize:]
 		h.HeaderSize += int(fcsSize)
@ -199,7 +211,7 @@ func (h *Header) Decode(in []byte) error {

 	// Frame Header done, we will not fail from now on.
 	if len(in) < 3 {
-		return nil
+		return in, nil
 	}
 	tmp := in[:3]
 	bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
@ -209,7 +221,7 @@ func (h *Header) Decode(in []byte) error {
 	cSize := int(bh >> 3)
 	switch blockType {
 	case blockTypeReserved:
-		return nil
+		return in, nil
 	case blockTypeRLE:
 		h.FirstBlock.Compressed = true
 		h.FirstBlock.DecompressedSize = cSize
@ -225,5 +237,25 @@ func (h *Header) Decode(in []byte) error {
 	}

 	h.FirstBlock.OK = true
-	return nil
+	return in, nil
+}
+
+// AppendTo will append the encoded header to the dst slice.
+// There is no error checking performed on the header values.
+func (h *Header) AppendTo(dst []byte) ([]byte, error) {
+	if h.Skippable {
+		magic := [4]byte{0x50, 0x2a, 0x4d, 0x18}
+		magic[0] |= byte(h.SkippableID & 0xf)
+		dst = append(dst, magic[:]...)
+		f := h.SkippableSize
+		return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil
+	}
+	f := frameHeader{
+		ContentSize:   h.FrameContentSize,
+		WindowSize:    uint32(h.WindowSize),
+		SingleSegment: h.SingleSegment,
+		Checksum:      h.HasCheckSum,
+		DictID:        h.DictionaryID,
+	}
+	return f.appendTo(dst), nil
 }
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) {
 	if m.rep < 0 {
 		ofc = ofCode(uint32(m.s-m.offset) + 3)
 	} else {
-		ofc = ofCode(uint32(m.rep))
+		ofc = ofCode(uint32(m.rep) & 3)
 	}
 	// Cost, excluding
 	ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
@ -197,17 +197,10 @@ encodeLoop:

 		// Set m to a match at offset if it looks like that will improve compression.
 		improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
-			if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
+			delta := s - offset
+			if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
 				return
 			}
-			if debugAsserts {
-				if offset <= 0 {
-					panic(offset)
-				}
-				if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
-					panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
-				}
-			}
 			// Try to quick reject if we already have a long match.
 			if m.length > 16 {
 				left := len(src) - int(m.s+m.length)
@ -226,8 +219,10 @@ encodeLoop:
 				}
 			}
 			l := 4 + e.matchlen(s+4, offset+4, src)
-			if rep < 0 {
+			if m.rep <= 0 {
 				// Extend candidate match backwards as far as possible.
+				// Do not extend repeats as we can assume they are optimal
+				// and offsets change if s == nextEmit.
 				tMin := s - e.maxMatchOff
 				if tMin < 0 {
 					tMin = 0
@ -238,7 +233,14 @@ encodeLoop:
 					l++
 				}
 			}
-
+			if debugAsserts {
+				if offset >= s {
+					panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
+				}
+				if !bytes.Equal(src[s:s+l], src[offset:offset+l]) {
+					panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
+				}
+			}
 			cand := match{offset: offset, s: s, length: l, rep: rep}
 			cand.estBits(bitsPerByte)
 			if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
@ -281,6 +283,7 @@ encodeLoop:
 		// Load next and check...
 		e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
 		e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
+		index0 := s + 1

 		// Look far ahead, unless we have a really long match already...
 		if best.length < goodEnough {
@ -334,41 +337,45 @@ encodeLoop:
 		}

 		if debugAsserts {
+			if best.offset >= best.s {
+				panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s))
+			}
+			if best.s < nextEmit {
+				panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit))
+			}
+			if best.offset < s-e.maxMatchOff {
+				panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff))
+			}
 			if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
 				panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
 			}
 		}

 		// We have a match, we can store the forward value
+		s = best.s
 		if best.rep > 0 {
 			var seq seq
 			seq.matchLen = uint32(best.length - zstdMinMatch)
-			if debugAsserts && s <= nextEmit {
-				panic("s <= nextEmit")
-			}
 			addLiterals(&seq, best.s)

 			// Repeat. If bit 4 is set, this is a non-lit repeat.
 			seq.offset = uint32(best.rep & 3)
 			if debugSequences {
-				println("repeat sequence", seq, "next s:", s)
+				println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset)
 			}
 			blk.sequences = append(blk.sequences, seq)

 			// Index old s + 1 -> s - 1
-			index0 := s + 1
 			s = best.s + best.length
-
 			nextEmit = s
-			if s >= sLimit {
-				if debugEncoder {
-					println("repeat ended", s, best.length)
-				}
-				break encodeLoop
-			}
+
 			// Index skipped...
+			end := s
+			if s > sLimit+4 {
+				end = sLimit + 4
+			}
 			off := index0 + e.cur
-			for index0 < s {
+			for index0 < end {
 				cv0 := load6432(src, index0)
 				h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 				h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@ -377,6 +384,7 @@ encodeLoop:
 				off++
 				index0++
 			}
+
 			switch best.rep {
 			case 2, 4 | 1:
 				offset1, offset2 = offset2, offset1
@ -385,13 +393,17 @@ encodeLoop:
 			case 4 | 3:
 				offset1, offset2, offset3 = offset1-1, offset1, offset2
 			}
+			if s >= sLimit {
+				if debugEncoder {
+					println("repeat ended", s, best.length)
+				}
+				break encodeLoop
+			}
 			continue
 		}

 		// A 4-byte match has been found. Update recent offsets.
 		// We'll later see if more than 4 bytes.
-		index0 := s + 1
-		s = best.s
 		t := best.offset
 		offset1, offset2, offset3 = s-t, offset1, offset2

@ -418,19 +430,25 @@ encodeLoop:
 		}
 		blk.sequences = append(blk.sequences, seq)
 		nextEmit = s
-		if s >= sLimit {
-			break encodeLoop
+
+		// Index old s + 1 -> s - 1 or sLimit
+		end := s
+		if s > sLimit-4 {
+			end = sLimit - 4
 		}

-		// Index old s + 1 -> s - 1
-		for index0 < s {
+		off := index0 + e.cur
+		for index0 < end {
 			cv0 := load6432(src, index0)
 			h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
 			h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
-			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
 			index0++
+			off++
+		}
+		if s >= sLimit {
+			break encodeLoop
 		}
 	}

--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@ -145,7 +145,7 @@ encodeLoop:
 		var t int32
 		// We allow the encoder to optionally turn off repeat offsets across blocks
 		canRepeat := len(blk.sequences) > 2
-		var matched int32
+		var matched, index0 int32

 		for {
 			if debugAsserts && canRepeat && offset1 == 0 {
@ -162,6 +162,7 @@ encodeLoop:
 			off := s + e.cur
 			e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
 			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
+			index0 = s + 1

 			if canRepeat {
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@ -258,7 +259,6 @@ encodeLoop:
 					}
 					blk.sequences = append(blk.sequences, seq)

-					index0 := s + repOff2
 					s += lenght + repOff2
 					nextEmit = s
 					if s >= sLimit {
@ -498,15 +498,15 @@ encodeLoop:
 		}

 		// Index match start+1 (long) -> s - 1
-		index0 := s - l + 1
+		off := index0 + e.cur
 		for index0 < s-1 {
 			cv0 := load6432(src, index0)
 			cv1 := cv0 >> 8
 			h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
-			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
 			index0 += 2
+			off += 2
 		}

 		cv = load6432(src, s)
@ -672,7 +672,7 @@ encodeLoop:
 		var t int32
 		// We allow the encoder to optionally turn off repeat offsets across blocks
 		canRepeat := len(blk.sequences) > 2
-		var matched int32
+		var matched, index0 int32

 		for {
 			if debugAsserts && canRepeat && offset1 == 0 {
@ -691,6 +691,7 @@ encodeLoop:
 			e.markLongShardDirty(nextHashL)
 			e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
 			e.markShortShardDirty(nextHashS)
+			index0 = s + 1

 			if canRepeat {
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@ -726,7 +727,6 @@ encodeLoop:
 					blk.sequences = append(blk.sequences, seq)

 					// Index match start+1 (long) -> s - 1
-					index0 := s + repOff
 					s += lenght + repOff

 					nextEmit = s
@ -790,7 +790,6 @@ encodeLoop:
 					}
 					blk.sequences = append(blk.sequences, seq)

-					index0 := s + repOff2
 					s += lenght + repOff2
 					nextEmit = s
 					if s >= sLimit {
@ -1024,18 +1023,18 @@ encodeLoop:
 		}

 		// Index match start+1 (long) -> s - 1
-		index0 := s - l + 1
+		off := index0 + e.cur
 		for index0 < s-1 {
 			cv0 := load6432(src, index0)
 			cv1 := cv0 >> 8
 			h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
-			off := index0 + e.cur
 			e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
 			e.markLongShardDirty(h0)
 			h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
 			e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
 			e.markShortShardDirty(h1)
 			index0 += 2
+			off += 2
 		}

 		cv = load6432(src, s)
--- a/vendor/github.com/klauspost/compress/zstd/encoder_options.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go
@ -94,7 +94,7 @@ func WithEncoderConcurrency(n int) EOption {
 // The value must be a power of two between MinWindowSize and MaxWindowSize.
 // A larger value will enable better compression but allocate more memory and,
 // for above-default values, take considerably longer.
-// The default value is determined by the compression level.
+// The default value is determined by the compression level and max 8MB.
 func WithWindowSize(n int) EOption {
 	return func(o *encoderOptions) error {
 		switch {
@ -232,9 +232,9 @@ func WithEncoderLevel(l EncoderLevel) EOption {
 			case SpeedDefault:
 				o.windowSize = 8 << 20
 			case SpeedBetterCompression:
-				o.windowSize = 16 << 20
+				o.windowSize = 8 << 20
 			case SpeedBestCompression:
-				o.windowSize = 32 << 20
+				o.windowSize = 8 << 20
 			}
 		}
 		if !o.customALEntropy {
--- a/vendor/github.com/klauspost/compress/zstd/frameenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go
@ -76,7 +76,7 @@ func (f frameHeader) appendTo(dst []byte) []byte {
 		if f.SingleSegment {
 			dst = append(dst, uint8(f.ContentSize))
 		}
-		// Unless SingleSegment is set, framessizes < 256 are nto stored.
+		// Unless SingleSegment is set, framessizes < 256 are not stored.
 	case 1:
 		f.ContentSize -= 256
 		dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8))
--- a/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go
@ -20,10 +20,9 @@ func (s *fseDecoder) buildDtable() error {
 			if v == -1 {
 				s.dt[highThreshold].setAddBits(uint8(i))
 				highThreshold--
-				symbolNext[i] = 1
-			} else {
-				symbolNext[i] = uint16(v)
+				v = 1
 			}
+			symbolNext[i] = uint16(v)
 		}
 	}

@ -35,10 +34,12 @@ func (s *fseDecoder) buildDtable() error {
 		for ss, v := range s.norm[:s.symbolLen] {
 			for i := 0; i < int(v); i++ {
 				s.dt[position].setAddBits(uint8(ss))
-				position = (position + step) & tableMask
-				for position > highThreshold {
+				for {
 					// lowprob area
 					position = (position + step) & tableMask
+					if position <= highThreshold {
+						break
+					}
 				}
 			}
 		}
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@ -157,8 +157,7 @@ sequenceDecs_decode_amd64_ll_update_zero:

 	// Update Literal Length State
 	MOVBQZX DI, R14
-	SHRQ    $0x10, DI
-	MOVWQZX DI, DI
+	SHRL    $0x10, DI
 	LEAQ    (BX)(R14*1), CX
 	MOVQ    DX, R15
 	MOVQ    CX, BX
@ -177,8 +176,7 @@ sequenceDecs_decode_amd64_ll_update_zero:

 	// Update Match Length State
 	MOVBQZX R8, R14
-	SHRQ    $0x10, R8
-	MOVWQZX R8, R8
+	SHRL    $0x10, R8
 	LEAQ    (BX)(R14*1), CX
 	MOVQ    DX, R15
 	MOVQ    CX, BX
@ -197,8 +195,7 @@ sequenceDecs_decode_amd64_ll_update_zero:

 	// Update Offset State
 	MOVBQZX R9, R14
-	SHRQ    $0x10, R9
-	MOVWQZX R9, R9
+	SHRL    $0x10, R9
 	LEAQ    (BX)(R14*1), CX
 	MOVQ    DX, R15
 	MOVQ    CX, BX
@ -459,8 +456,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:

 	// Update Literal Length State
 	MOVBQZX DI, R14
-	SHRQ    $0x10, DI
-	MOVWQZX DI, DI
+	SHRL    $0x10, DI
 	LEAQ    (BX)(R14*1), CX
 	MOVQ    DX, R15
 	MOVQ    CX, BX
@ -479,8 +475,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:

 	// Update Match Length State
 	MOVBQZX R8, R14
-	SHRQ    $0x10, R8
-	MOVWQZX R8, R8
+	SHRL    $0x10, R8
 	LEAQ    (BX)(R14*1), CX
 	MOVQ    DX, R15
 	MOVQ    CX, BX
@ -499,8 +494,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:

 	// Update Offset State
 	MOVBQZX R9, R14
-	SHRQ    $0x10, R9
-	MOVWQZX R9, R9
+	SHRL    $0x10, R9
 	LEAQ    (BX)(R14*1), CX
 	MOVQ    DX, R15
 	MOVQ    CX, BX
@ -772,11 +766,10 @@ sequenceDecs_decode_bmi2_fill_2_end:
 	BZHIQ   R14, R15, R15

 	// Update Offset State
-	BZHIQ  R8, R15, CX
-	SHRXQ  R8, R15, R15
-	MOVQ   $0x00001010, R14
-	BEXTRQ R14, R8, R8
-	ADDQ   CX, R8
+	BZHIQ R8, R15, CX
+	SHRXQ R8, R15, R15
+	SHRL  $0x10, R8
+	ADDQ  CX, R8

 	// Load ctx.ofTable
 	MOVQ ctx+16(FP), CX
@ -784,11 +777,10 @@ sequenceDecs_decode_bmi2_fill_2_end:
 	MOVQ (CX)(R8*8), R8

 	// Update Match Length State
-	BZHIQ  DI, R15, CX
-	SHRXQ  DI, R15, R15
-	MOVQ   $0x00001010, R14
-	BEXTRQ R14, DI, DI
-	ADDQ   CX, DI
+	BZHIQ DI, R15, CX
+	SHRXQ DI, R15, R15
+	SHRL  $0x10, DI
+	ADDQ  CX, DI

 	// Load ctx.mlTable
 	MOVQ ctx+16(FP), CX
@ -796,10 +788,9 @@ sequenceDecs_decode_bmi2_fill_2_end:
 	MOVQ (CX)(DI*8), DI

 	// Update Literal Length State
-	BZHIQ  SI, R15, CX
-	MOVQ   $0x00001010, R14
-	BEXTRQ R14, SI, SI
-	ADDQ   CX, SI
+	BZHIQ SI, R15, CX
+	SHRL  $0x10, SI
+	ADDQ  CX, SI

 	// Load ctx.llTable
 	MOVQ ctx+16(FP), CX
@ -1032,11 +1023,10 @@ sequenceDecs_decode_56_bmi2_fill_end:
 	BZHIQ   R14, R15, R15

 	// Update Offset State
-	BZHIQ  R8, R15, CX
-	SHRXQ  R8, R15, R15
-	MOVQ   $0x00001010, R14
-	BEXTRQ R14, R8, R8
-	ADDQ   CX, R8
+	BZHIQ R8, R15, CX
+	SHRXQ R8, R15, R15
+	SHRL  $0x10, R8
+	ADDQ  CX, R8

 	// Load ctx.ofTable
 	MOVQ ctx+16(FP), CX
@ -1044,11 +1034,10 @@ sequenceDecs_decode_56_bmi2_fill_end:
 	MOVQ (CX)(R8*8), R8

 	// Update Match Length State
-	BZHIQ  DI, R15, CX
-	SHRXQ  DI, R15, R15
-	MOVQ   $0x00001010, R14
-	BEXTRQ R14, DI, DI
-	ADDQ   CX, DI
+	BZHIQ DI, R15, CX
+	SHRXQ DI, R15, R15
+	SHRL  $0x10, DI
+	ADDQ  CX, DI

 	// Load ctx.mlTable
 	MOVQ ctx+16(FP), CX
@ -1056,10 +1045,9 @@ sequenceDecs_decode_56_bmi2_fill_end:
 	MOVQ (CX)(DI*8), DI

 	// Update Literal Length State
-	BZHIQ  SI, R15, CX
-	MOVQ   $0x00001010, R14
-	BEXTRQ R14, SI, SI
-	ADDQ   CX, SI
+	BZHIQ SI, R15, CX
+	SHRL  $0x10, SI
+	ADDQ  CX, SI

 	// Load ctx.llTable
 	MOVQ ctx+16(FP), CX
@ -1967,8 +1955,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:

 	// Update Literal Length State
 	MOVBQZX DI, R13
-	SHRQ    $0x10, DI
-	MOVWQZX DI, DI
+	SHRL    $0x10, DI
 	LEAQ    (BX)(R13*1), CX
 	MOVQ    DX, R14
 	MOVQ    CX, BX
@ -1987,8 +1974,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:

 	// Update Match Length State
 	MOVBQZX R8, R13
-	SHRQ    $0x10, R8
-	MOVWQZX R8, R8
+	SHRL    $0x10, R8
 	LEAQ    (BX)(R13*1), CX
 	MOVQ    DX, R14
 	MOVQ    CX, BX
@ -2007,8 +1993,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:

 	// Update Offset State
 	MOVBQZX R9, R13
-	SHRQ    $0x10, R9
-	MOVWQZX R9, R9
+	SHRL    $0x10, R9
 	LEAQ    (BX)(R13*1), CX
 	MOVQ    DX, R14
 	MOVQ    CX, BX
@ -2514,11 +2499,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
 	BZHIQ   R13, R14, R14

 	// Update Offset State
-	BZHIQ  R8, R14, CX
-	SHRXQ  R8, R14, R14
-	MOVQ   $0x00001010, R13
-	BEXTRQ R13, R8, R8
-	ADDQ   CX, R8
+	BZHIQ R8, R14, CX
+	SHRXQ R8, R14, R14
+	SHRL  $0x10, R8
+	ADDQ  CX, R8

 	// Load ctx.ofTable
 	MOVQ ctx+16(FP), CX
@ -2526,11 +2510,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
 	MOVQ (CX)(R8*8), R8

 	// Update Match Length State
-	BZHIQ  DI, R14, CX
-	SHRXQ  DI, R14, R14
-	MOVQ   $0x00001010, R13
-	BEXTRQ R13, DI, DI
-	ADDQ   CX, DI
+	BZHIQ DI, R14, CX
+	SHRXQ DI, R14, R14
+	SHRL  $0x10, DI
+	ADDQ  CX, DI

 	// Load ctx.mlTable
 	MOVQ ctx+16(FP), CX
@ -2538,10 +2521,9 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
 	MOVQ (CX)(DI*8), DI

 	// Update Literal Length State
-	BZHIQ  SI, R14, CX
-	MOVQ   $0x00001010, R13
-	BEXTRQ R13, SI, SI
-	ADDQ   CX, SI
+	BZHIQ SI, R14, CX
+	SHRL  $0x10, SI
+	ADDQ  CX, SI

 	// Load ctx.llTable
 	MOVQ ctx+16(FP), CX
@ -3055,8 +3037,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:

 	// Update Literal Length State
 	MOVBQZX DI, R13
-	SHRQ    $0x10, DI
-	MOVWQZX DI, DI
+	SHRL    $0x10, DI
 	LEAQ    (BX)(R13*1), CX
 	MOVQ    DX, R14
 	MOVQ    CX, BX
@ -3075,8 +3056,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:

 	// Update Match Length State
 	MOVBQZX R8, R13
-	SHRQ    $0x10, R8
-	MOVWQZX R8, R8
+	SHRL    $0x10, R8
 	LEAQ    (BX)(R13*1), CX
 	MOVQ    DX, R14
 	MOVQ    CX, BX
@ -3095,8 +3075,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:

 	// Update Offset State
 	MOVBQZX R9, R13
-	SHRQ    $0x10, R9
-	MOVWQZX R9, R9
+	SHRL    $0x10, R9
 	LEAQ    (BX)(R13*1), CX
 	MOVQ    DX, R14
 	MOVQ    CX, BX
@ -3704,11 +3683,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
 	BZHIQ   R13, R14, R14

 	// Update Offset State
-	BZHIQ  R8, R14, CX
-	SHRXQ  R8, R14, R14
-	MOVQ   $0x00001010, R13
-	BEXTRQ R13, R8, R8
-	ADDQ   CX, R8
+	BZHIQ R8, R14, CX
+	SHRXQ R8, R14, R14
+	SHRL  $0x10, R8
+	ADDQ  CX, R8

 	// Load ctx.ofTable
 	MOVQ ctx+16(FP), CX
@ -3716,11 +3694,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
 	MOVQ (CX)(R8*8), R8

 	// Update Match Length State
-	BZHIQ  DI, R14, CX
-	SHRXQ  DI, R14, R14
-	MOVQ   $0x00001010, R13
-	BEXTRQ R13, DI, DI
-	ADDQ   CX, DI
+	BZHIQ DI, R14, CX
+	SHRXQ DI, R14, R14
+	SHRL  $0x10, DI
+	ADDQ  CX, DI

 	// Load ctx.mlTable
 	MOVQ ctx+16(FP), CX
@ -3728,10 +3705,9 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
 	MOVQ (CX)(DI*8), DI

 	// Update Literal Length State
-	BZHIQ  SI, R14, CX
-	MOVQ   $0x00001010, R13
-	BEXTRQ R13, SI, SI
-	ADDQ   CX, SI
+	BZHIQ SI, R14, CX
+	SHRL  $0x10, SI
+	ADDQ  CX, SI

 	// Load ctx.llTable
 	MOVQ ctx+16(FP), CX
--- a/vendor/github.com/klauspost/cpuid/v2/.gitignore
+++ b/vendor/github.com/klauspost/cpuid/v2/.gitignore
@ -1,24 +0,0 @@
-# Compiled Object files, Static and Dynamic libs (Shared Objects)
-*.o
-*.a
-*.so
-
-# Folders
-_obj
-_test
-
-# Architecture specific extensions/prefixes
-*.[568vq]
-[568vq].out
-
-*.cgo1.go
-*.cgo2.c
-_cgo_defun.c
-_cgo_gotypes.go
-_cgo_export.*
-
-_testmain.go
-
-*.exe
-*.test
-*.prof
--- a/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
+++ b/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
@ -1,74 +0,0 @@
-# This is an example goreleaser.yaml file with some sane defaults.
-# Make sure to check the documentation at http://goreleaser.com
-
-builds:
-  -
-    id: "cpuid"
-    binary: cpuid
-    main: ./cmd/cpuid/main.go
-    env:
-      - CGO_ENABLED=0
-    flags:
-      - -ldflags=-s -w
-    goos:
-      - aix
-      - linux
-      - freebsd
-      - netbsd
-      - windows
-      - darwin
-    goarch:
-      - 386
-      - amd64
-      - arm64
-    goarm:
-      - 7
-
-archives:
-  -
-    id: cpuid
-    name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
-    replacements:
-      aix: AIX
-      darwin: OSX
-      linux: Linux
-      windows: Windows
-      386: i386
-      amd64: x86_64
-      freebsd: FreeBSD
-      netbsd: NetBSD
-    format_overrides:
-      - goos: windows
-        format: zip
-    files:
-      - LICENSE
-checksum:
-  name_template: 'checksums.txt'
-snapshot:
-  name_template: "{{ .Tag }}-next"
-changelog:
-  sort: asc
-  filters:
-    exclude:
-    - '^doc:'
-    - '^docs:'
-    - '^test:'
-    - '^tests:'
-    - '^Update\sREADME.md'
-
-nfpms:
-  -
-    file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
-    vendor: Klaus Post
-    homepage: https://github.com/klauspost/cpuid
-    maintainer: Klaus Post <klauspost@gmail.com>
-    description: CPUID Tool
-    license: BSD 3-Clause
-    formats:
-      - deb
-      - rpm
-    replacements:
-      darwin: Darwin
-      linux: Linux
-      freebsd: FreeBSD
-      amd64: x86_64
--- a/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt
+++ b/vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt
@ -1,35 +0,0 @@
-Developer Certificate of Origin
-Version 1.1
-
-Copyright (C) 2015- Klaus Post & Contributors.
-Email: klauspost@gmail.com
-
-Everyone is permitted to copy and distribute verbatim copies of this
-license document, but changing it is not allowed.
-
-
-Developer's Certificate of Origin 1.1
-
-By making a contribution to this project, I certify that:
-
-(a) The contribution was created in whole or in part by me and I
-    have the right to submit it under the open source license
-    indicated in the file; or
-
-(b) The contribution is based upon previous work that, to the best
-    of my knowledge, is covered under an appropriate open source
-    license and I have the right under that license to submit that
-    work with modifications, whether created in whole or in part
-    by me, under the same open source license (unless I am
-    permitted to submit under a different license), as indicated
-    in the file; or
-
-(c) The contribution was provided directly to me by some other
-    person who certified (a), (b) or (c) and I have not modified
-    it.
-
-(d) I understand and agree that this project and the contribution
-    are public and that a record of the contribution (including all
-    personal information I submit with it, including my sign-off) is
-    maintained indefinitely and may be redistributed consistent with
-    this project or the open source license(s) involved.
--- a/vendor/github.com/klauspost/cpuid/v2/LICENSE
+++ b/vendor/github.com/klauspost/cpuid/v2/LICENSE
@ -1,22 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2015 Klaus Post
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
--- a/vendor/github.com/klauspost/cpuid/v2/README.md
+++ b/vendor/github.com/klauspost/cpuid/v2/README.md
@ -1,497 +0,0 @@
-# cpuid
-Package cpuid provides information about the CPU running the current program.
-
-CPU features are detected on startup, and kept for fast access through the life of the application.
-Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
-
-You can access the CPU information by accessing the shared CPU variable of the cpuid library.
-
-Package home: https://github.com/klauspost/cpuid
-
-[![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
-[![Go](https://github.com/klauspost/cpuid/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/cpuid/actions/workflows/go.yml)
-
-## installing
-
-`go get -u github.com/klauspost/cpuid/v2` using modules.
-Drop `v2` for others.
-
-Installing binary:
-
-`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
-
-Or download binaries from release page: https://github.com/klauspost/cpuid/releases
-
-### Homebrew
-
-For macOS/Linux users, you can install via [brew](https://brew.sh/)
-
-```sh
-$ brew install cpuid
-```
-
-## example
-
-```Go
-package main
-
-import (
-	"fmt"
-	"strings"
-
-	. "github.com/klauspost/cpuid/v2"
-)
-
-func main() {
-	// Print basic CPU information:
-	fmt.Println("Name:", CPU.BrandName)
-	fmt.Println("PhysicalCores:", CPU.PhysicalCores)
-	fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore)
-	fmt.Println("LogicalCores:", CPU.LogicalCores)
-	fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID)
-	fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ","))
-	fmt.Println("Cacheline bytes:", CPU.CacheLine)
-	fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes")
-	fmt.Println("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
-	fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes")
-	fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes")
-	fmt.Println("Frequency", CPU.Hz, "hz")
-
-	// Test if we have these specific features:
-	if CPU.Supports(SSE, SSE2) {
-		fmt.Println("We have Streaming SIMD 2 Extensions")
-	}
-}
-```
-
-Sample output:
-```
->go run main.go
-Name: AMD Ryzen 9 3950X 16-Core Processor
-PhysicalCores: 16
-ThreadsPerCore: 2
-LogicalCores: 32
-Family 23 Model: 113 Vendor ID: AMD
-Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CMOV,CX16,F16C,FMA3,HTT,HYPERVISOR,LZCNT,MMX,MMXEXT,NX,POPCNT,RDRAND,RDSEED,RDTSCP,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3
-Cacheline bytes: 64
-L1 Data Cache: 32768 bytes
-L1 Instruction Cache: 32768 bytes
-L2 Cache: 524288 bytes
-L3 Cache: 16777216 bytes
-Frequency 0 hz
-We have Streaming SIMD 2 Extensions
-```
-
-# usage
-
-The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features.
-A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler.  
-
-To test a larger number of features, they can be combined using `f := CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)`, etc.
-This can be using with `cpuid.CPU.HasAll(f)` to quickly test if all features are supported.
-
-Note that for some cpu/os combinations some features will not be detected.
-`amd64` has rather good support and should work reliably on all platforms.
-
-Note that hypervisors may not pass through all CPU features through to the guest OS,
-so even if your host supports a feature it may not be visible on guests.
-
-## arm64 feature detection
-
-Not all operating systems provide ARM features directly 
-and there is no safe way to do so for the rest.
-
-Currently `arm64/linux` and `arm64/freebsd` should be quite reliable. 
-`arm64/darwin` adds features expected from the M1 processor, but a lot remains undetected.
-
-A `DetectARM()` can be used if you are able to control your deployment,
-it will detect CPU features, but may crash if the OS doesn't intercept the calls.
-A `-cpu.arm` flag for detecting unsafe ARM features can be added. See below.
- 
-Note that currently only features are detected on ARM, 
-no additional information is currently available. 
-
-## flags
-
-It is possible to add flags that affects cpu detection.
-
-For this the `Flags()` command is provided.
-
-This must be called *before* `flag.Parse()` AND after the flags have been parsed `Detect()` must be called.
-
-This means that any detection used in `init()` functions will not contain these flags.
-
-Example:
-
-```Go
-package main
-
-import (
-	"flag"
-	"fmt"
-	"strings"
-
-	"github.com/klauspost/cpuid/v2"
-)
-
-func main() {
-	cpuid.Flags()
-	flag.Parse()
-	cpuid.Detect()
-
-	// Test if we have these specific features:
-	if cpuid.CPU.Supports(cpuid.SSE, cpuid.SSE2) {
-		fmt.Println("We have Streaming SIMD 2 Extensions")
-	}
-}
-```
-
-## commandline
-
-Download as binary from: https://github.com/klauspost/cpuid/releases
-
-Install from source:
-
-`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
-
-### Example
-
-```
-λ cpuid
-Name: AMD Ryzen 9 3950X 16-Core Processor
-Vendor String: AuthenticAMD
-Vendor ID: AMD
-PhysicalCores: 16
-Threads Per Core: 2
-Logical Cores: 32
-CPU Family 23 Model: 113
-Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CLZERO,CMOV,CMPXCHG8,CPBOOST,CX16,F16C,FMA3,FXSR,FXSROPT,HTT,HYPERVISOR,LAHF,LZCNT,MCAOVERFLOW,MMX,MMXEXT,MOVBE,NX,OSXSAVE,POPCNT,RDRAND,RDSEED,RDTSCP,SCE,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3,SUCCOR,X87,XSAVE
-Microarchitecture level: 3
-Cacheline bytes: 64
-L1 Instruction Cache: 32768 bytes
-L1 Data Cache: 32768 bytes
-L2 Cache: 524288 bytes
-L3 Cache: 16777216 bytes
-
-```
-### JSON Output:
-
-```
-λ cpuid --json
-{
-  "BrandName": "AMD Ryzen 9 3950X 16-Core Processor",
-  "VendorID": 2,
-  "VendorString": "AuthenticAMD",
-  "PhysicalCores": 16,
-  "ThreadsPerCore": 2,
-  "LogicalCores": 32,
-  "Family": 23,
-  "Model": 113,
-  "CacheLine": 64,
-  "Hz": 0,
-  "BoostFreq": 0,
-  "Cache": {
-    "L1I": 32768,
-    "L1D": 32768,
-    "L2": 524288,
-    "L3": 16777216
-  },
-  "SGX": {
-    "Available": false,
-    "LaunchControl": false,
-    "SGX1Supported": false,
-    "SGX2Supported": false,
-    "MaxEnclaveSizeNot64": 0,
-    "MaxEnclaveSize64": 0,
-    "EPCSections": null
-  },
-  "Features": [
-    "ADX",
-    "AESNI",
-    "AVX",
-    "AVX2",
-    "BMI1",
-    "BMI2",
-    "CLMUL",
-    "CLZERO",
-    "CMOV",
-    "CMPXCHG8",
-    "CPBOOST",
-    "CX16",
-    "F16C",
-    "FMA3",
-    "FXSR",
-    "FXSROPT",
-    "HTT",
-    "HYPERVISOR",
-    "LAHF",
-    "LZCNT",
-    "MCAOVERFLOW",
-    "MMX",
-    "MMXEXT",
-    "MOVBE",
-    "NX",
-    "OSXSAVE",
-    "POPCNT",
-    "RDRAND",
-    "RDSEED",
-    "RDTSCP",
-    "SCE",
-    "SHA",
-    "SSE",
-    "SSE2",
-    "SSE3",
-    "SSE4",
-    "SSE42",
-    "SSE4A",
-    "SSSE3",
-    "SUCCOR",
-    "X87",
-    "XSAVE"
-  ],
-  "X64Level": 3
-}
-```
-
-### Check CPU microarch level
-
-```
-λ cpuid --check-level=3
-2022/03/18 17:04:40 AMD Ryzen 9 3950X 16-Core Processor
-2022/03/18 17:04:40 Microarchitecture level 3 is supported. Max level is 3.
-Exit Code 0
-
-λ cpuid --check-level=4
-2022/03/18 17:06:18 AMD Ryzen 9 3950X 16-Core Processor
-2022/03/18 17:06:18 Microarchitecture level 4 not supported. Max level is 3.
-Exit Code 1
-```
-
-
-## Available flags
-
-### x86 & amd64 
-
-| Feature Flag       | Description                                                                                                                                                                        |
-|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| ADX                | Intel ADX (Multi-Precision Add-Carry Instruction Extensions)                                                                                                                       |
-| AESNI              | Advanced Encryption Standard New Instructions                                                                                                                                      |
-| AMD3DNOW           | AMD 3DNOW                                                                                                                                                                          |
-| AMD3DNOWEXT        | AMD 3DNowExt                                                                                                                                                                       |
-| AMXBF16            | Tile computational operations on BFLOAT16 numbers                                                                                                                                  |
-| AMXINT8            | Tile computational operations on 8-bit integers                                                                                                                                    |
-| AMXFP16            | Tile computational operations on FP16 numbers                                                                                                                                      |
-| AMXTILE            | Tile architecture                                                                                                                                                                  |
-| APX_F              | Intel APX                                                                                                                                                                          |
-| AVX                | AVX functions                                                                                                                                                                      |
-| AVX10              | If set the Intel AVX10 Converged Vector ISA is supported                                                                                                                           |
-| AVX10_128          | If set indicates that AVX10 128-bit vector support is present                                                                                                                      |
-| AVX10_256          | If set indicates that AVX10 256-bit vector support is present                                                                                                                      |
-| AVX10_512          | If set indicates that AVX10 512-bit vector support is present                                                                                                                      |
-| AVX2               | AVX2 functions                                                                                                                                                                     |
-| AVX512BF16         | AVX-512 BFLOAT16 Instructions                                                                                                                                                      |
-| AVX512BITALG       | AVX-512 Bit Algorithms                                                                                                                                                             |
-| AVX512BW           | AVX-512 Byte and Word Instructions                                                                                                                                                 |
-| AVX512CD           | AVX-512 Conflict Detection Instructions                                                                                                                                            |
-| AVX512DQ           | AVX-512 Doubleword and Quadword Instructions                                                                                                                                       |
-| AVX512ER           | AVX-512 Exponential and Reciprocal Instructions                                                                                                                                    |
-| AVX512F            | AVX-512 Foundation                                                                                                                                                                 |
-| AVX512FP16         | AVX-512 FP16 Instructions                                                                                                                                                          |
-| AVX512IFMA         | AVX-512 Integer Fused Multiply-Add Instructions                                                                                                                                    |
-| AVX512PF           | AVX-512 Prefetch Instructions                                                                                                                                                      |
-| AVX512VBMI         | AVX-512 Vector Bit Manipulation Instructions                                                                                                                                       |
-| AVX512VBMI2        | AVX-512 Vector Bit Manipulation Instructions, Version 2                                                                                                                            |
-| AVX512VL           | AVX-512 Vector Length Extensions                                                                                                                                                   |
-| AVX512VNNI         | AVX-512 Vector Neural Network Instructions                                                                                                                                         |
-| AVX512VP2INTERSECT | AVX-512 Intersect for D/Q                                                                                                                                                          |
-| AVX512VPOPCNTDQ    | AVX-512 Vector Population Count Doubleword and Quadword                                                                                                                            |
-| AVXIFMA            | AVX-IFMA instructions                                                                                                                                                              |
-| AVXNECONVERT       | AVX-NE-CONVERT instructions                                                                                                                                                        |
-| AVXSLOW            | Indicates the CPU performs 2 128 bit operations instead of one                                                                                                                     |
-| AVXVNNI            | AVX (VEX encoded) VNNI neural network instructions                                                                                                                                 |
-| AVXVNNIINT8        | AVX-VNNI-INT8 instructions                                                                                                                                                         |
-| BHI_CTRL           | Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598                                                                    |
-| BMI1               | Bit Manipulation Instruction Set 1                                                                                                                                                 |
-| BMI2               | Bit Manipulation Instruction Set 2                                                                                                                                                 |
-| CETIBT             | Intel CET Indirect Branch Tracking                                                                                                                                                 |
-| CETSS              | Intel CET Shadow Stack                                                                                                                                                             |
-| CLDEMOTE           | Cache Line Demote                                                                                                                                                                  |
-| CLMUL              | Carry-less Multiplication                                                                                                                                                          |
-| CLZERO             | CLZERO instruction supported                                                                                                                                                       |
-| CMOV               | i686 CMOV                                                                                                                                                                          |
-| CMPCCXADD          | CMPCCXADD instructions                                                                                                                                                             |
-| CMPSB_SCADBS_SHORT | Fast short CMPSB and SCASB                                                                                                                                                         |
-| CMPXCHG8           | CMPXCHG8 instruction                                                                                                                                                               |
-| CPBOOST            | Core Performance Boost                                                                                                                                                             |
-| CPPC               | AMD: Collaborative Processor Performance Control                                                                                                                                   |
-| CX16               | CMPXCHG16B Instruction                                                                                                                                                             |
-| EFER_LMSLE_UNS     | AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ                                                                                                                        |
-| ENQCMD             | Enqueue Command                                                                                                                                                                    |
-| ERMS               | Enhanced REP MOVSB/STOSB                                                                                                                                                           |
-| F16C               | Half-precision floating-point conversion                                                                                                                                           |
-| FLUSH_L1D          | Flush L1D cache                                                                                                                                                                    |
-| FMA3               | Intel FMA 3. Does not imply AVX.                                                                                                                                                   |
-| FMA4               | Bulldozer FMA4 functions                                                                                                                                                           |
-| FP128              | AMD: When set, the internal FP/SIMD execution datapath is 128-bits wide                                                                                                            |
-| FP256              | AMD: When set, the internal FP/SIMD execution datapath is 256-bits wide                                                                                                            |
-| FSRM               | Fast Short Rep Mov                                                                                                                                                                 |
-| FXSR               | FXSAVE, FXRESTOR instructions, CR4 bit 9                                                                                                                                           |
-| FXSROPT            | FXSAVE/FXRSTOR optimizations                                                                                                                                                       |
-| GFNI               | Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.                                                                                  |
-| HLE                | Hardware Lock Elision                                                                                                                                                              |
-| HRESET             | If set CPU supports history reset and the IA32_HRESET_ENABLE MSR                                                                                                                   |
-| HTT                | Hyperthreading (enabled)                                                                                                                                                           |
-| HWA                | Hardware assert supported. Indicates support for MSRC001_10                                                                                                                        |
-| HYBRID_CPU         | This part has CPUs of more than one type.                                                                                                                                          |
-| HYPERVISOR         | This bit has been reserved by Intel & AMD for use by hypervisors                                                                                                                   |
-| IA32_ARCH_CAP      | IA32_ARCH_CAPABILITIES MSR (Intel)                                                                                                                                                 |
-| IA32_CORE_CAP      | IA32_CORE_CAPABILITIES MSR                                                                                                                                                         |
-| IBPB               | Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)                                                                                         |
-| IBRS               | AMD: Indirect Branch Restricted Speculation                                                                                                                                        |
-| IBRS_PREFERRED     | AMD: IBRS is preferred over software solution                                                                                                                                      |
-| IBRS_PROVIDES_SMP  | AMD: IBRS provides Same Mode Protection                                                                                                                                            |
-| IBS                | Instruction Based Sampling (AMD)                                                                                                                                                   |
-| IBSBRNTRGT         | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
-| IBSFETCHSAM        | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
-| IBSFFV             | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
-| IBSOPCNT           | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
-| IBSOPCNTEXT        | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
-| IBSOPSAM           | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
-| IBSRDWROPCNT       | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
-| IBSRIPINVALIDCHK   | Instruction Based Sampling Feature (AMD)                                                                                                                                           |
-| IBS_FETCH_CTLX     | AMD: IBS fetch control extended MSR supported                                                                                                                                      |
-| IBS_OPDATA4        | AMD: IBS op data 4 MSR supported                                                                                                                                                   |
-| IBS_OPFUSE         | AMD: Indicates support for IbsOpFuse                                                                                                                                               |
-| IBS_PREVENTHOST    | Disallowing IBS use by the host supported                                                                                                                                          |
-| IBS_ZEN4           | Fetch and Op IBS support IBS extensions added with Zen4                                                                                                                            |
-| IDPRED_CTRL        | IPRED_DIS                                                                                                                                                                          |
-| INT_WBINVD         | WBINVD/WBNOINVD are interruptible.                                                                                                                                                 |
-| INVLPGB            | NVLPGB and TLBSYNC instruction supported                                                                                                                                           |
-| KEYLOCKER          | Key locker                                                                                                                                                                         |
-| KEYLOCKERW         | Key locker wide                                                                                                                                                                    |
-| LAHF               | LAHF/SAHF in long mode                                                                                                                                                             |
-| LAM                | If set, CPU supports Linear Address Masking                                                                                                                                        |
-| LBRVIRT            | LBR virtualization                                                                                                                                                                 |
-| LZCNT              | LZCNT instruction                                                                                                                                                                  |
-| MCAOVERFLOW        | MCA overflow recovery support.                                                                                                                                                     |
-| MCDT_NO            | Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.                                                                             |
-| MCOMMIT            | MCOMMIT instruction supported                                                                                                                                                      |
-| MD_CLEAR           | VERW clears CPU buffers                                                                                                                                                            |
-| MMX                | standard MMX                                                                                                                                                                       |
-| MMXEXT             | SSE integer functions or AMD MMX ext                                                                                                                                               |
-| MOVBE              | MOVBE instruction (big-endian)                                                                                                                                                     |
-| MOVDIR64B          | Move 64 Bytes as Direct Store                                                                                                                                                      |
-| MOVDIRI            | Move Doubleword as Direct Store                                                                                                                                                    |
-| MOVSB_ZL           | Fast Zero-Length MOVSB                                                                                                                                                             |
-| MPX                | Intel MPX (Memory Protection Extensions)                                                                                                                                           |
-| MOVU               | MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD        |
-| MSRIRC             | Instruction Retired Counter MSR available                                                                                                                                          |
-| MSRLIST            | Read/Write List of Model Specific Registers                                                                                                                                        |
-| MSR_PAGEFLUSH      | Page Flush MSR available                                                                                                                                                           |
-| NRIPS              | Indicates support for NRIP save on VMEXIT                                                                                                                                          |
-| NX                 | NX (No-Execute) bit                                                                                                                                                                |
-| OSXSAVE            | XSAVE enabled by OS                                                                                                                                                                |
-| PCONFIG            | PCONFIG for Intel Multi-Key Total Memory Encryption                                                                                                                                |
-| POPCNT             | POPCNT instruction                                                                                                                                                                 |
-| PPIN               | AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled                                            |
-| PREFETCHI          | PREFETCHIT0/1 instructions                                                                                                                                                         |
-| PSFD               | Predictive Store Forward Disable                                                                                                                                                   |
-| RDPRU              | RDPRU instruction supported                                                                                                                                                        |
-| RDRAND             | RDRAND instruction is available                                                                                                                                                    |
-| RDSEED             | RDSEED instruction is available                                                                                                                                                    |
-| RDTSCP             | RDTSCP Instruction                                                                                                                                                                 |
-| RRSBA_CTRL         | Restricted RSB Alternate                                                                                                                                                           |
-| RTM                | Restricted Transactional Memory                                                                                                                                                    |
-| RTM_ALWAYS_ABORT   | Indicates that the loaded microcode is forcing RTM abort.                                                                                                                          |
-| SERIALIZE          | Serialize Instruction Execution                                                                                                                                                    |
-| SEV                | AMD Secure Encrypted Virtualization supported                                                                                                                                      |
-| SEV_64BIT          | AMD SEV guest execution only allowed from a 64-bit host                                                                                                                            |
-| SEV_ALTERNATIVE    | AMD SEV Alternate Injection supported                                                                                                                                              |
-| SEV_DEBUGSWAP      | Full debug state swap supported for SEV-ES guests                                                                                                                                  |
-| SEV_ES             | AMD SEV Encrypted State supported                                                                                                                                                  |
-| SEV_RESTRICTED     | AMD SEV Restricted Injection supported                                                                                                                                             |
-| SEV_SNP            | AMD SEV Secure Nested Paging supported                                                                                                                                             |
-| SGX                | Software Guard Extensions                                                                                                                                                          |
-| SGXLC              | Software Guard Extensions Launch Control                                                                                                                                           |
-| SHA                | Intel SHA Extensions                                                                                                                                                               |
-| SME                | AMD Secure Memory Encryption supported                                                                                                                                             |
-| SME_COHERENT       | AMD Hardware cache coherency across encryption domains enforced                                                                                                                    |
-| SPEC_CTRL_SSBD     | Speculative Store Bypass Disable                                                                                                                                                   |
-| SRBDS_CTRL         | SRBDS mitigation MSR available                                                                                                                                                     |
-| SSE                | SSE functions                                                                                                                                                                      |
-| SSE2               | P4 SSE functions                                                                                                                                                                   |
-| SSE3               | Prescott SSE3 functions                                                                                                                                                            |
-| SSE4               | Penryn SSE4.1 functions                                                                                                                                                            |
-| SSE42              | Nehalem SSE4.2 functions                                                                                                                                                           |
-| SSE4A              | AMD Barcelona microarchitecture SSE4a instructions                                                                                                                                 |
-| SSSE3              | Conroe SSSE3 functions                                                                                                                                                             |
-| STIBP              | Single Thread Indirect Branch Predictors                                                                                                                                           |
-| STIBP_ALWAYSON     | AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On                                                                              |
-| STOSB_SHORT        | Fast short STOSB                                                                                                                                                                   |
-| SUCCOR             | Software uncorrectable error containment and recovery capability.                                                                                                                  |
-| SVM                | AMD Secure Virtual Machine                                                                                                                                                         |
-| SVMDA              | Indicates support for the SVM decode assists.                                                                                                                                      |
-| SVMFBASID          | SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control |
-| SVML               | AMD SVM lock. Indicates support for SVM-Lock.                                                                                                                                      |
-| SVMNP              | AMD SVM nested paging                                                                                                                                                              |
-| SVMPF              | SVM pause intercept filter. Indicates support for the pause intercept filter                                                                                                       |
-| SVMPFT             | SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold                                                                                           |
-| SYSCALL            | System-Call Extension (SCE): SYSCALL and SYSRET instructions.                                                                                                                      |
-| SYSEE              | SYSENTER and SYSEXIT instructions                                                                                                                                                  |
-| TBM                | AMD Trailing Bit Manipulation                                                                                                                                                      |
-| TDX_GUEST          | Intel Trust Domain Extensions Guest                                                                                                                                                |
-| TLB_FLUSH_NESTED   | AMD: Flushing includes all the nested translations for guest translations                                                                                                          |
-| TME                | Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.                         |
-| TOPEXT             | TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.                                                         |
-| TSCRATEMSR         | MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104                                                                                                       |
-| TSXLDTRK           | Intel TSX Suspend Load Address Tracking                                                                                                                                            |
-| VAES               | Vector AES. AVX(512) versions requires additional checks.                                                                                                                          |
-| VMCBCLEAN          | VMCB clean bits. Indicates support for VMCB clean bits.                                                                                                                            |
-| VMPL               | AMD VM Permission Levels supported                                                                                                                                                 |
-| VMSA_REGPROT       | AMD VMSA Register Protection supported                                                                                                                                             |
-| VMX                | Virtual Machine Extensions                                                                                                                                                         |
-| VPCLMULQDQ         | Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.                                                                                                          |
-| VTE                | AMD Virtual Transparent Encryption supported                                                                                                                                       |
-| WAITPKG            | TPAUSE, UMONITOR, UMWAIT                                                                                                                                                           |
-| WBNOINVD           | Write Back and Do Not Invalidate Cache                                                                                                                                             |
-| WRMSRNS            | Non-Serializing Write to Model Specific Register                                                                                                                                   |
-| X87                | FPU                                                                                                                                                                                |
-| XGETBV1            | Supports XGETBV with ECX = 1                                                                                                                                                       |
-| XOP                | Bulldozer XOP functions                                                                                                                                                            |
-| XSAVE              | XSAVE, XRESTOR, XSETBV, XGETBV                                                                                                                                                     |
-| XSAVEC             | Supports XSAVEC and the compacted form of XRSTOR.                                                                                                                                  |
-| XSAVEOPT           | XSAVEOPT available                                                                                                                                                                 |
-| XSAVES             | Supports XSAVES/XRSTORS and IA32_XSS                                                                                                                                               |
-
-# ARM features:
-
-| Feature Flag | Description                                                      |
-|--------------|------------------------------------------------------------------|
-| AESARM       | AES instructions                                                 |
-| ARMCPUID     | Some CPU ID registers readable at user-level                     |
-| ASIMD        | Advanced SIMD                                                    |
-| ASIMDDP      | SIMD Dot Product                                                 |
-| ASIMDHP      | Advanced SIMD half-precision floating point                      |
-| ASIMDRDM     | Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) |
-| ATOMICS      | Large System Extensions (LSE)                                    |
-| CRC32        | CRC32/CRC32C instructions                                        |
-| DCPOP        | Data cache clean to Point of Persistence (DC CVAP)               |
-| EVTSTRM      | Generic timer                                                    |
-| FCMA         | Floatin point complex number addition and multiplication         |
-| FP           | Single-precision and double-precision floating point             |
-| FPHP         | Half-precision floating point                                    |
-| GPA          | Generic Pointer Authentication                                   |
-| JSCVT        | Javascript-style double->int convert (FJCVTZS)                   |
-| LRCPC        | Weaker release consistency (LDAPR, etc)                          |
-| PMULL        | Polynomial Multiply instructions (PMULL/PMULL2)                  |
-| SHA1         | SHA-1 instructions (SHA1C, etc)                                  |
-| SHA2         | SHA-2 instructions (SHA256H, etc)                                |
-| SHA3         | SHA-3 instructions (EOR3, RAXI, XAR, BCAX)                       |
-| SHA512       | SHA512 instructions                                              |
-| SM3          | SM3 instructions                                                 |
-| SM4          | SM4 instructions                                                 |
-| SVE          | Scalable Vector Extension                                        |
-
-# license
-
-This code is published under an MIT license. See LICENSE file for more information.
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
@ -1,47 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build 386,!gccgo,!noasm,!appengine
-
-// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuid(SB), 7, $0
-	XORL CX, CX
-	MOVL op+0(FP), AX
-	CPUID
-	MOVL AX, eax+4(FP)
-	MOVL BX, ebx+8(FP)
-	MOVL CX, ecx+12(FP)
-	MOVL DX, edx+16(FP)
-	RET
-
-// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuidex(SB), 7, $0
-	MOVL op+0(FP), AX
-	MOVL op2+4(FP), CX
-	CPUID
-	MOVL AX, eax+8(FP)
-	MOVL BX, ebx+12(FP)
-	MOVL CX, ecx+16(FP)
-	MOVL DX, edx+20(FP)
-	RET
-
-// func xgetbv(index uint32) (eax, edx uint32)
-TEXT ·asmXgetbv(SB), 7, $0
-	MOVL index+0(FP), CX
-	BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
-	MOVL AX, eax+4(FP)
-	MOVL DX, edx+8(FP)
-	RET
-
-// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-TEXT ·asmRdtscpAsm(SB), 7, $0
-	BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
-	MOVL AX, eax+0(FP)
-	MOVL BX, ebx+4(FP)
-	MOVL CX, ecx+8(FP)
-	MOVL DX, edx+12(FP)
-	RET
-
-// func asmDarwinHasAVX512() bool
-TEXT ·asmDarwinHasAVX512(SB), 7, $0
-	MOVL $0, eax+0(FP)
-	RET
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
@ -1,72 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build amd64,!gccgo,!noasm,!appengine
-
-// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuid(SB), 7, $0
-	XORQ CX, CX
-	MOVL op+0(FP), AX
-	CPUID
-	MOVL AX, eax+8(FP)
-	MOVL BX, ebx+12(FP)
-	MOVL CX, ecx+16(FP)
-	MOVL DX, edx+20(FP)
-	RET
-
-// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuidex(SB), 7, $0
-	MOVL op+0(FP), AX
-	MOVL op2+4(FP), CX
-	CPUID
-	MOVL AX, eax+8(FP)
-	MOVL BX, ebx+12(FP)
-	MOVL CX, ecx+16(FP)
-	MOVL DX, edx+20(FP)
-	RET
-
-// func asmXgetbv(index uint32) (eax, edx uint32)
-TEXT ·asmXgetbv(SB), 7, $0
-	MOVL index+0(FP), CX
-	BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
-	MOVL AX, eax+8(FP)
-	MOVL DX, edx+12(FP)
-	RET
-
-// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-TEXT ·asmRdtscpAsm(SB), 7, $0
-	BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
-	MOVL AX, eax+0(FP)
-	MOVL BX, ebx+4(FP)
-	MOVL CX, ecx+8(FP)
-	MOVL DX, edx+12(FP)
-	RET
-
-// From https://go-review.googlesource.com/c/sys/+/285572/
-// func asmDarwinHasAVX512() bool
-TEXT ·asmDarwinHasAVX512(SB), 7, $0-1
-	MOVB $0, ret+0(FP) // default to false
-
-#ifdef GOOS_darwin // return if not darwin
-#ifdef GOARCH_amd64 // return if not amd64
-// These values from:
-// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
-#define commpage64_base_address         0x00007fffffe00000
-#define commpage64_cpu_capabilities64   (commpage64_base_address+0x010)
-#define commpage64_version              (commpage64_base_address+0x01E)
-#define hasAVX512F                      0x0000004000000000
-	MOVQ $commpage64_version, BX
-	MOVW (BX), AX
-	CMPW AX, $13                            // versions < 13 do not support AVX512
-	JL   no_avx512
-	MOVQ $commpage64_cpu_capabilities64, BX
-	MOVQ (BX), AX
-	MOVQ $hasAVX512F, CX
-	ANDQ CX, AX
-	JZ   no_avx512
-	MOVB $1, ret+0(FP)
-
-no_avx512:
-#endif
-#endif
-	RET
-
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s
@ -1,26 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build arm64,!gccgo,!noasm,!appengine
-
-// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
-
-// func getMidr
-TEXT ·getMidr(SB), 7, $0
-	WORD $0xd5380000    // mrs x0, midr_el1         /* Main ID Register */
-	MOVD R0, midr+0(FP)
-	RET
-
-// func getProcFeatures
-TEXT ·getProcFeatures(SB), 7, $0
-	WORD $0xd5380400            // mrs x0, id_aa64pfr0_el1  /* Processor Feature Register 0 */
-	MOVD R0, procFeatures+0(FP)
-	RET
-
-// func getInstAttributes
-TEXT ·getInstAttributes(SB), 7, $0
-	WORD $0xd5380600            // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */
-	WORD $0xd5380621            // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */
-	MOVD R0, instAttrReg0+0(FP)
-	MOVD R1, instAttrReg1+8(FP)
-	RET
-
--- a/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_arm64.go
@ -1,247 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//go:build arm64 && !gccgo && !noasm && !appengine
-// +build arm64,!gccgo,!noasm,!appengine
-
-package cpuid
-
-import "runtime"
-
-func getMidr() (midr uint64)
-func getProcFeatures() (procFeatures uint64)
-func getInstAttributes() (instAttrReg0, instAttrReg1 uint64)
-
-func initCPU() {
-	cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
-	cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
-	xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
-	rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
-}
-
-func addInfo(c *CPUInfo, safe bool) {
-	// Seems to be safe to assume on ARM64
-	c.CacheLine = 64
-	detectOS(c)
-
-	// ARM64 disabled since it may crash if interrupt is not intercepted by OS.
-	if safe && !c.Supports(ARMCPUID) && runtime.GOOS != "freebsd" {
-		return
-	}
-	midr := getMidr()
-
-	// MIDR_EL1 - Main ID Register
-	// https://developer.arm.com/docs/ddi0595/h/aarch64-system-registers/midr_el1
-	//  x--------------------------------------------------x
-	//  | Name                         |  bits   | visible |
-	//  |--------------------------------------------------|
-	//  | Implementer                  | [31-24] |    y    |
-	//  |--------------------------------------------------|
-	//  | Variant                      | [23-20] |    y    |
-	//  |--------------------------------------------------|
-	//  | Architecture                 | [19-16] |    y    |
-	//  |--------------------------------------------------|
-	//  | PartNum                      | [15-4]  |    y    |
-	//  |--------------------------------------------------|
-	//  | Revision                     | [3-0]   |    y    |
-	//  x--------------------------------------------------x
-
-	switch (midr >> 24) & 0xff {
-	case 0xC0:
-		c.VendorString = "Ampere Computing"
-		c.VendorID = Ampere
-	case 0x41:
-		c.VendorString = "Arm Limited"
-		c.VendorID = ARM
-	case 0x42:
-		c.VendorString = "Broadcom Corporation"
-		c.VendorID = Broadcom
-	case 0x43:
-		c.VendorString = "Cavium Inc"
-		c.VendorID = Cavium
-	case 0x44:
-		c.VendorString = "Digital Equipment Corporation"
-		c.VendorID = DEC
-	case 0x46:
-		c.VendorString = "Fujitsu Ltd"
-		c.VendorID = Fujitsu
-	case 0x49:
-		c.VendorString = "Infineon Technologies AG"
-		c.VendorID = Infineon
-	case 0x4D:
-		c.VendorString = "Motorola or Freescale Semiconductor Inc"
-		c.VendorID = Motorola
-	case 0x4E:
-		c.VendorString = "NVIDIA Corporation"
-		c.VendorID = NVIDIA
-	case 0x50:
-		c.VendorString = "Applied Micro Circuits Corporation"
-		c.VendorID = AMCC
-	case 0x51:
-		c.VendorString = "Qualcomm Inc"
-		c.VendorID = Qualcomm
-	case 0x56:
-		c.VendorString = "Marvell International Ltd"
-		c.VendorID = Marvell
-	case 0x69:
-		c.VendorString = "Intel Corporation"
-		c.VendorID = Intel
-	}
-
-	// Lower 4 bits: Architecture
-	// Architecture	Meaning
-	// 0b0001		Armv4.
-	// 0b0010		Armv4T.
-	// 0b0011		Armv5 (obsolete).
-	// 0b0100		Armv5T.
-	// 0b0101		Armv5TE.
-	// 0b0110		Armv5TEJ.
-	// 0b0111		Armv6.
-	// 0b1111		Architectural features are individually identified in the ID_* registers, see 'ID registers'.
-	// Upper 4 bit: Variant
-	// An IMPLEMENTATION DEFINED variant number.
-	// Typically, this field is used to distinguish between different product variants, or major revisions of a product.
-	c.Family = int(midr>>16) & 0xff
-
-	// PartNum, bits [15:4]
-	// An IMPLEMENTATION DEFINED primary part number for the device.
-	// On processors implemented by Arm, if the top four bits of the primary
-	// part number are 0x0 or 0x7, the variant and architecture are encoded differently.
-	// Revision, bits [3:0]
-	// An IMPLEMENTATION DEFINED revision number for the device.
-	c.Model = int(midr) & 0xffff
-
-	procFeatures := getProcFeatures()
-
-	// ID_AA64PFR0_EL1 - Processor Feature Register 0
-	// x--------------------------------------------------x
-	// | Name                         |  bits   | visible |
-	// |--------------------------------------------------|
-	// | DIT                          | [51-48] |    y    |
-	// |--------------------------------------------------|
-	// | SVE                          | [35-32] |    y    |
-	// |--------------------------------------------------|
-	// | GIC                          | [27-24] |    n    |
-	// |--------------------------------------------------|
-	// | AdvSIMD                      | [23-20] |    y    |
-	// |--------------------------------------------------|
-	// | FP                           | [19-16] |    y    |
-	// |--------------------------------------------------|
-	// | EL3                          | [15-12] |    n    |
-	// |--------------------------------------------------|
-	// | EL2                          | [11-8]  |    n    |
-	// |--------------------------------------------------|
-	// | EL1                          | [7-4]   |    n    |
-	// |--------------------------------------------------|
-	// | EL0                          | [3-0]   |    n    |
-	// x--------------------------------------------------x
-
-	var f flagSet
-	// if procFeatures&(0xf<<48) != 0 {
-	// 	fmt.Println("DIT")
-	// }
-	f.setIf(procFeatures&(0xf<<32) != 0, SVE)
-	if procFeatures&(0xf<<20) != 15<<20 {
-		f.set(ASIMD)
-		// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
-		// 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
-		f.setIf(procFeatures&(0xf<<20) == 1<<20, FPHP, ASIMDHP)
-	}
-	f.setIf(procFeatures&(0xf<<16) != 0, FP)
-
-	instAttrReg0, instAttrReg1 := getInstAttributes()
-
-	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
-	//
-	// ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
-	// x--------------------------------------------------x
-	// | Name                         |  bits   | visible |
-	// |--------------------------------------------------|
-	// | TS                           | [55-52] |    y    |
-	// |--------------------------------------------------|
-	// | FHM                          | [51-48] |    y    |
-	// |--------------------------------------------------|
-	// | DP                           | [47-44] |    y    |
-	// |--------------------------------------------------|
-	// | SM4                          | [43-40] |    y    |
-	// |--------------------------------------------------|
-	// | SM3                          | [39-36] |    y    |
-	// |--------------------------------------------------|
-	// | SHA3                         | [35-32] |    y    |
-	// |--------------------------------------------------|
-	// | RDM                          | [31-28] |    y    |
-	// |--------------------------------------------------|
-	// | ATOMICS                      | [23-20] |    y    |
-	// |--------------------------------------------------|
-	// | CRC32                        | [19-16] |    y    |
-	// |--------------------------------------------------|
-	// | SHA2                         | [15-12] |    y    |
-	// |--------------------------------------------------|
-	// | SHA1                         | [11-8]  |    y    |
-	// |--------------------------------------------------|
-	// | AES                          | [7-4]   |    y    |
-	// x--------------------------------------------------x
-
-	// if instAttrReg0&(0xf<<52) != 0 {
-	// 	fmt.Println("TS")
-	// }
-	// if instAttrReg0&(0xf<<48) != 0 {
-	// 	fmt.Println("FHM")
-	// }
-	f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP)
-	f.setIf(instAttrReg0&(0xf<<40) != 0, SM4)
-	f.setIf(instAttrReg0&(0xf<<36) != 0, SM3)
-	f.setIf(instAttrReg0&(0xf<<32) != 0, SHA3)
-	f.setIf(instAttrReg0&(0xf<<28) != 0, ASIMDRDM)
-	f.setIf(instAttrReg0&(0xf<<20) != 0, ATOMICS)
-	f.setIf(instAttrReg0&(0xf<<16) != 0, CRC32)
-	f.setIf(instAttrReg0&(0xf<<12) != 0, SHA2)
-	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
-	// 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
-	f.setIf(instAttrReg0&(0xf<<12) == 2<<12, SHA512)
-	f.setIf(instAttrReg0&(0xf<<8) != 0, SHA1)
-	f.setIf(instAttrReg0&(0xf<<4) != 0, AESARM)
-	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
-	// 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
-	f.setIf(instAttrReg0&(0xf<<4) == 2<<4, PMULL)
-
-	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1
-	//
-	// ID_AA64ISAR1_EL1 - Instruction set attribute register 1
-	// x--------------------------------------------------x
-	// | Name                         |  bits   | visible |
-	// |--------------------------------------------------|
-	// | GPI                          | [31-28] |    y    |
-	// |--------------------------------------------------|
-	// | GPA                          | [27-24] |    y    |
-	// |--------------------------------------------------|
-	// | LRCPC                        | [23-20] |    y    |
-	// |--------------------------------------------------|
-	// | FCMA                         | [19-16] |    y    |
-	// |--------------------------------------------------|
-	// | JSCVT                        | [15-12] |    y    |
-	// |--------------------------------------------------|
-	// | API                          | [11-8]  |    y    |
-	// |--------------------------------------------------|
-	// | APA                          | [7-4]   |    y    |
-	// |--------------------------------------------------|
-	// | DPB                          | [3-0]   |    y    |
-	// x--------------------------------------------------x
-
-	// if instAttrReg1&(0xf<<28) != 0 {
-	// 	fmt.Println("GPI")
-	// }
-	f.setIf(instAttrReg1&(0xf<<28) != 24, GPA)
-	f.setIf(instAttrReg1&(0xf<<20) != 0, LRCPC)
-	f.setIf(instAttrReg1&(0xf<<16) != 0, FCMA)
-	f.setIf(instAttrReg1&(0xf<<12) != 0, JSCVT)
-	// if instAttrReg1&(0xf<<8) != 0 {
-	// 	fmt.Println("API")
-	// }
-	// if instAttrReg1&(0xf<<4) != 0 {
-	// 	fmt.Println("APA")
-	// }
-	f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP)
-
-	// Store
-	c.featureSet.or(f)
-}
--- a/vendor/github.com/klauspost/cpuid/v2/detect_ref.go
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_ref.go
@ -1,15 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//go:build (!amd64 && !386 && !arm64) || gccgo || noasm || appengine
-// +build !amd64,!386,!arm64 gccgo noasm appengine
-
-package cpuid
-
-func initCPU() {
-	cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
-	cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
-	xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
-	rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
-}
-
-func addInfo(info *CPUInfo, safe bool) {}
--- a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
@ -1,38 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//go:build (386 && !gccgo && !noasm && !appengine) || (amd64 && !gccgo && !noasm && !appengine)
-// +build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine
-
-package cpuid
-
-func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-func asmXgetbv(index uint32) (eax, edx uint32)
-func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-func asmDarwinHasAVX512() bool
-
-func initCPU() {
-	cpuid = asmCpuid
-	cpuidex = asmCpuidex
-	xgetbv = asmXgetbv
-	rdtscpAsm = asmRdtscpAsm
-	darwinHasAVX512 = asmDarwinHasAVX512
-}
-
-func addInfo(c *CPUInfo, safe bool) {
-	c.maxFunc = maxFunctionID()
-	c.maxExFunc = maxExtendedFunction()
-	c.BrandName = brandName()
-	c.CacheLine = cacheLine()
-	c.Family, c.Model, c.Stepping = familyModel()
-	c.featureSet = support()
-	c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
-	c.AMDMemEncryption = hasAMDMemEncryption(c.featureSet.inSet(SME) || c.featureSet.inSet(SEV))
-	c.ThreadsPerCore = threadsPerCore()
-	c.LogicalCores = logicalCores()
-	c.PhysicalCores = physicalCores()
-	c.VendorID, c.VendorString = vendorID()
-	c.AVX10Level = c.supportAVX10()
-	c.cacheSize()
-	c.frequencies()
-}
--- a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
+++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
@ -1,284 +0,0 @@
-// Code generated by "stringer -type=FeatureID,Vendor"; DO NOT EDIT.
-
-package cpuid
-
-import "strconv"
-
-func _() {
-	// An "invalid array index" compiler error signifies that the constant values have changed.
-	// Re-run the stringer command to generate them again.
-	var x [1]struct{}
-	_ = x[ADX-1]
-	_ = x[AESNI-2]
-	_ = x[AMD3DNOW-3]
-	_ = x[AMD3DNOWEXT-4]
-	_ = x[AMXBF16-5]
-	_ = x[AMXFP16-6]
-	_ = x[AMXINT8-7]
-	_ = x[AMXTILE-8]
-	_ = x[APX_F-9]
-	_ = x[AVX-10]
-	_ = x[AVX10-11]
-	_ = x[AVX10_128-12]
-	_ = x[AVX10_256-13]
-	_ = x[AVX10_512-14]
-	_ = x[AVX2-15]
-	_ = x[AVX512BF16-16]
-	_ = x[AVX512BITALG-17]
-	_ = x[AVX512BW-18]
-	_ = x[AVX512CD-19]
-	_ = x[AVX512DQ-20]
-	_ = x[AVX512ER-21]
-	_ = x[AVX512F-22]
-	_ = x[AVX512FP16-23]
-	_ = x[AVX512IFMA-24]
-	_ = x[AVX512PF-25]
-	_ = x[AVX512VBMI-26]
-	_ = x[AVX512VBMI2-27]
-	_ = x[AVX512VL-28]
-	_ = x[AVX512VNNI-29]
-	_ = x[AVX512VP2INTERSECT-30]
-	_ = x[AVX512VPOPCNTDQ-31]
-	_ = x[AVXIFMA-32]
-	_ = x[AVXNECONVERT-33]
-	_ = x[AVXSLOW-34]
-	_ = x[AVXVNNI-35]
-	_ = x[AVXVNNIINT8-36]
-	_ = x[BHI_CTRL-37]
-	_ = x[BMI1-38]
-	_ = x[BMI2-39]
-	_ = x[CETIBT-40]
-	_ = x[CETSS-41]
-	_ = x[CLDEMOTE-42]
-	_ = x[CLMUL-43]
-	_ = x[CLZERO-44]
-	_ = x[CMOV-45]
-	_ = x[CMPCCXADD-46]
-	_ = x[CMPSB_SCADBS_SHORT-47]
-	_ = x[CMPXCHG8-48]
-	_ = x[CPBOOST-49]
-	_ = x[CPPC-50]
-	_ = x[CX16-51]
-	_ = x[EFER_LMSLE_UNS-52]
-	_ = x[ENQCMD-53]
-	_ = x[ERMS-54]
-	_ = x[F16C-55]
-	_ = x[FLUSH_L1D-56]
-	_ = x[FMA3-57]
-	_ = x[FMA4-58]
-	_ = x[FP128-59]
-	_ = x[FP256-60]
-	_ = x[FSRM-61]
-	_ = x[FXSR-62]
-	_ = x[FXSROPT-63]
-	_ = x[GFNI-64]
-	_ = x[HLE-65]
-	_ = x[HRESET-66]
-	_ = x[HTT-67]
-	_ = x[HWA-68]
-	_ = x[HYBRID_CPU-69]
-	_ = x[HYPERVISOR-70]
-	_ = x[IA32_ARCH_CAP-71]
-	_ = x[IA32_CORE_CAP-72]
-	_ = x[IBPB-73]
-	_ = x[IBPB_BRTYPE-74]
-	_ = x[IBRS-75]
-	_ = x[IBRS_PREFERRED-76]
-	_ = x[IBRS_PROVIDES_SMP-77]
-	_ = x[IBS-78]
-	_ = x[IBSBRNTRGT-79]
-	_ = x[IBSFETCHSAM-80]
-	_ = x[IBSFFV-81]
-	_ = x[IBSOPCNT-82]
-	_ = x[IBSOPCNTEXT-83]
-	_ = x[IBSOPSAM-84]
-	_ = x[IBSRDWROPCNT-85]
-	_ = x[IBSRIPINVALIDCHK-86]
-	_ = x[IBS_FETCH_CTLX-87]
-	_ = x[IBS_OPDATA4-88]
-	_ = x[IBS_OPFUSE-89]
-	_ = x[IBS_PREVENTHOST-90]
-	_ = x[IBS_ZEN4-91]
-	_ = x[IDPRED_CTRL-92]
-	_ = x[INT_WBINVD-93]
-	_ = x[INVLPGB-94]
-	_ = x[KEYLOCKER-95]
-	_ = x[KEYLOCKERW-96]
-	_ = x[LAHF-97]
-	_ = x[LAM-98]
-	_ = x[LBRVIRT-99]
-	_ = x[LZCNT-100]
-	_ = x[MCAOVERFLOW-101]
-	_ = x[MCDT_NO-102]
-	_ = x[MCOMMIT-103]
-	_ = x[MD_CLEAR-104]
-	_ = x[MMX-105]
-	_ = x[MMXEXT-106]
-	_ = x[MOVBE-107]
-	_ = x[MOVDIR64B-108]
-	_ = x[MOVDIRI-109]
-	_ = x[MOVSB_ZL-110]
-	_ = x[MOVU-111]
-	_ = x[MPX-112]
-	_ = x[MSRIRC-113]
-	_ = x[MSRLIST-114]
-	_ = x[MSR_PAGEFLUSH-115]
-	_ = x[NRIPS-116]
-	_ = x[NX-117]
-	_ = x[OSXSAVE-118]
-	_ = x[PCONFIG-119]
-	_ = x[POPCNT-120]
-	_ = x[PPIN-121]
-	_ = x[PREFETCHI-122]
-	_ = x[PSFD-123]
-	_ = x[RDPRU-124]
-	_ = x[RDRAND-125]
-	_ = x[RDSEED-126]
-	_ = x[RDTSCP-127]
-	_ = x[RRSBA_CTRL-128]
-	_ = x[RTM-129]
-	_ = x[RTM_ALWAYS_ABORT-130]
-	_ = x[SBPB-131]
-	_ = x[SERIALIZE-132]
-	_ = x[SEV-133]
-	_ = x[SEV_64BIT-134]
-	_ = x[SEV_ALTERNATIVE-135]
-	_ = x[SEV_DEBUGSWAP-136]
-	_ = x[SEV_ES-137]
-	_ = x[SEV_RESTRICTED-138]
-	_ = x[SEV_SNP-139]
-	_ = x[SGX-140]
-	_ = x[SGXLC-141]
-	_ = x[SHA-142]
-	_ = x[SME-143]
-	_ = x[SME_COHERENT-144]
-	_ = x[SPEC_CTRL_SSBD-145]
-	_ = x[SRBDS_CTRL-146]
-	_ = x[SRSO_MSR_FIX-147]
-	_ = x[SRSO_NO-148]
-	_ = x[SRSO_USER_KERNEL_NO-149]
-	_ = x[SSE-150]
-	_ = x[SSE2-151]
-	_ = x[SSE3-152]
-	_ = x[SSE4-153]
-	_ = x[SSE42-154]
-	_ = x[SSE4A-155]
-	_ = x[SSSE3-156]
-	_ = x[STIBP-157]
-	_ = x[STIBP_ALWAYSON-158]
-	_ = x[STOSB_SHORT-159]
-	_ = x[SUCCOR-160]
-	_ = x[SVM-161]
-	_ = x[SVMDA-162]
-	_ = x[SVMFBASID-163]
-	_ = x[SVML-164]
-	_ = x[SVMNP-165]
-	_ = x[SVMPF-166]
-	_ = x[SVMPFT-167]
-	_ = x[SYSCALL-168]
-	_ = x[SYSEE-169]
-	_ = x[TBM-170]
-	_ = x[TDX_GUEST-171]
-	_ = x[TLB_FLUSH_NESTED-172]
-	_ = x[TME-173]
-	_ = x[TOPEXT-174]
-	_ = x[TSCRATEMSR-175]
-	_ = x[TSXLDTRK-176]
-	_ = x[VAES-177]
-	_ = x[VMCBCLEAN-178]
-	_ = x[VMPL-179]
-	_ = x[VMSA_REGPROT-180]
-	_ = x[VMX-181]
-	_ = x[VPCLMULQDQ-182]
-	_ = x[VTE-183]
-	_ = x[WAITPKG-184]
-	_ = x[WBNOINVD-185]
-	_ = x[WRMSRNS-186]
-	_ = x[X87-187]
-	_ = x[XGETBV1-188]
-	_ = x[XOP-189]
-	_ = x[XSAVE-190]
-	_ = x[XSAVEC-191]
-	_ = x[XSAVEOPT-192]
-	_ = x[XSAVES-193]
-	_ = x[AESARM-194]
-	_ = x[ARMCPUID-195]
-	_ = x[ASIMD-196]
-	_ = x[ASIMDDP-197]
-	_ = x[ASIMDHP-198]
-	_ = x[ASIMDRDM-199]
-	_ = x[ATOMICS-200]
-	_ = x[CRC32-201]
-	_ = x[DCPOP-202]
-	_ = x[EVTSTRM-203]
-	_ = x[FCMA-204]
-	_ = x[FP-205]
-	_ = x[FPHP-206]
-	_ = x[GPA-207]
-	_ = x[JSCVT-208]
-	_ = x[LRCPC-209]
-	_ = x[PMULL-210]
-	_ = x[SHA1-211]
-	_ = x[SHA2-212]
-	_ = x[SHA3-213]
-	_ = x[SHA512-214]
-	_ = x[SM3-215]
-	_ = x[SM4-216]
-	_ = x[SVE-217]
-	_ = x[lastID-218]
-	_ = x[firstID-0]
-}
-
-const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
-
-var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 319, 323, 327, 333, 338, 346, 351, 357, 361, 370, 388, 396, 403, 407, 411, 425, 431, 435, 439, 448, 452, 456, 461, 466, 470, 474, 481, 485, 488, 494, 497, 500, 510, 520, 533, 546, 550, 561, 565, 579, 596, 599, 609, 620, 626, 634, 645, 653, 665, 681, 695, 706, 716, 731, 739, 750, 760, 767, 776, 786, 790, 793, 800, 805, 816, 823, 830, 838, 841, 847, 852, 861, 868, 876, 880, 883, 889, 896, 909, 914, 916, 923, 930, 936, 940, 949, 953, 958, 964, 970, 976, 986, 989, 1005, 1009, 1018, 1021, 1030, 1045, 1058, 1064, 1078, 1085, 1088, 1093, 1096, 1099, 1111, 1125, 1135, 1147, 1154, 1173, 1176, 1180, 1184, 1188, 1193, 1198, 1203, 1208, 1222, 1233, 1239, 1242, 1247, 1256, 1260, 1265, 1270, 1276, 1283, 1288, 1291, 1300, 1316, 1319, 1325, 1335, 1343, 1347, 1356, 1360, 1372, 1375, 1385, 1388, 1395, 1403, 1410, 1413, 1420, 1423, 1428, 1434, 1442, 1448, 1454, 1462, 1467, 1474, 1481, 1489, 1496, 1501, 1506, 1513, 1517, 1519, 1523, 1526, 1531, 1536, 1541, 1545, 1549, 1553, 1559, 1562, 1565, 1568, 1574}
-
-func (i FeatureID) String() string {
-	if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
-		return "FeatureID(" + strconv.FormatInt(int64(i), 10) + ")"
-	}
-	return _FeatureID_name[_FeatureID_index[i]:_FeatureID_index[i+1]]
-}
-func _() {
-	// An "invalid array index" compiler error signifies that the constant values have changed.
-	// Re-run the stringer command to generate them again.
-	var x [1]struct{}
-	_ = x[VendorUnknown-0]
-	_ = x[Intel-1]
-	_ = x[AMD-2]
-	_ = x[VIA-3]
-	_ = x[Transmeta-4]
-	_ = x[NSC-5]
-	_ = x[KVM-6]
-	_ = x[MSVM-7]
-	_ = x[VMware-8]
-	_ = x[XenHVM-9]
-	_ = x[Bhyve-10]
-	_ = x[Hygon-11]
-	_ = x[SiS-12]
-	_ = x[RDC-13]
-	_ = x[Ampere-14]
-	_ = x[ARM-15]
-	_ = x[Broadcom-16]
-	_ = x[Cavium-17]
-	_ = x[DEC-18]
-	_ = x[Fujitsu-19]
-	_ = x[Infineon-20]
-	_ = x[Motorola-21]
-	_ = x[NVIDIA-22]
-	_ = x[AMCC-23]
-	_ = x[Qualcomm-24]
-	_ = x[Marvell-25]
-	_ = x[lastVendor-26]
-}
-
-const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvelllastVendor"
-
-var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 155}
-
-func (i Vendor) String() string {
-	if i < 0 || i >= Vendor(len(_Vendor_index)-1) {
-		return "Vendor(" + strconv.FormatInt(int64(i), 10) + ")"
-	}
-	return _Vendor_name[_Vendor_index[i]:_Vendor_index[i+1]]
-}
--- a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
@ -1,121 +0,0 @@
-// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
-
-package cpuid
-
-import (
-	"runtime"
-	"strings"
-
-	"golang.org/x/sys/unix"
-)
-
-func detectOS(c *CPUInfo) bool {
-	if runtime.GOOS != "ios" {
-		tryToFillCPUInfoFomSysctl(c)
-	}
-	// There are no hw.optional sysctl values for the below features on Mac OS 11.0
-	// to detect their supported state dynamically. Assume the CPU features that
-	// Apple Silicon M1 supports to be available as a minimal set of features
-	// to all Go programs running on darwin/arm64.
-	// TODO: Add more if we know them.
-	c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2)
-
-	return true
-}
-
-func sysctlGetBool(name string) bool {
-	value, err := unix.SysctlUint32(name)
-	if err != nil {
-		return false
-	}
-	return value != 0
-}
-
-func sysctlGetString(name string) string {
-	value, err := unix.Sysctl(name)
-	if err != nil {
-		return ""
-	}
-	return value
-}
-
-func sysctlGetInt(unknown int, names ...string) int {
-	for _, name := range names {
-		value, err := unix.SysctlUint32(name)
-		if err != nil {
-			continue
-		}
-		if value != 0 {
-			return int(value)
-		}
-	}
-	return unknown
-}
-
-func sysctlGetInt64(unknown int, names ...string) int {
-	for _, name := range names {
-		value64, err := unix.SysctlUint64(name)
-		if err != nil {
-			continue
-		}
-		if int(value64) != unknown {
-			return int(value64)
-		}
-	}
-	return unknown
-}
-
-func setFeature(c *CPUInfo, name string, feature FeatureID) {
-	c.featureSet.setIf(sysctlGetBool(name), feature)
-}
-func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
-	c.BrandName = sysctlGetString("machdep.cpu.brand_string")
-
-	if len(c.BrandName) != 0 {
-		c.VendorString = strings.Fields(c.BrandName)[0]
-	}
-
-	c.PhysicalCores = sysctlGetInt(runtime.NumCPU(), "hw.physicalcpu")
-	c.ThreadsPerCore = sysctlGetInt(1, "machdep.cpu.thread_count", "kern.num_threads") /
-		sysctlGetInt(1, "hw.physicalcpu")
-	c.LogicalCores = sysctlGetInt(runtime.NumCPU(), "machdep.cpu.core_count")
-	c.Family = sysctlGetInt(0, "machdep.cpu.family", "hw.cpufamily")
-	c.Model = sysctlGetInt(0, "machdep.cpu.model")
-	c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize")
-	c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize")
-	c.Cache.L1D = sysctlGetInt64(-1, "hw.l1dcachesize")
-	c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize")
-	c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize")
-
-	// from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile
-	setFeature(c, "hw.optional.arm.FEAT_AES", AESARM)
-	setFeature(c, "hw.optional.AdvSIMD", ASIMD)
-	setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP)
-	setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM)
-	setFeature(c, "hw.optional.FEAT_CRC32", CRC32)
-	setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP)
-	// setFeature(c, "", EVTSTRM)
-	setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA)
-	setFeature(c, "hw.optional.arm.FEAT_FP", FP)
-	setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP)
-	setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA)
-	setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT)
-	setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC)
-	setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL)
-	setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1)
-	setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2)
-	setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3)
-	setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512)
-	// setFeature(c, "", SM3)
-	// setFeature(c, "", SM4)
-	setFeature(c, "hw.optional.arm.FEAT_SVE", SVE)
-
-	// from empirical observation
-	setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP)
-	setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS)
-	setFeature(c, "hw.optional.floatingpoint", FP)
-	setFeature(c, "hw.optional.armv8_2_sha3", SHA3)
-	setFeature(c, "hw.optional.armv8_2_sha512", SHA512)
-	setFeature(c, "hw.optional.armv8_3_compnum", FCMA)
-	setFeature(c, "hw.optional.armv8_crc32", CRC32)
-}
--- a/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
@ -1,130 +0,0 @@
-// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
-
-// Copyright 2018 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file located
-// here https://github.com/golang/sys/blob/master/LICENSE
-
-package cpuid
-
-import (
-	"encoding/binary"
-	"io/ioutil"
-	"runtime"
-)
-
-// HWCAP bits.
-const (
-	hwcap_FP       = 1 << 0
-	hwcap_ASIMD    = 1 << 1
-	hwcap_EVTSTRM  = 1 << 2
-	hwcap_AES      = 1 << 3
-	hwcap_PMULL    = 1 << 4
-	hwcap_SHA1     = 1 << 5
-	hwcap_SHA2     = 1 << 6
-	hwcap_CRC32    = 1 << 7
-	hwcap_ATOMICS  = 1 << 8
-	hwcap_FPHP     = 1 << 9
-	hwcap_ASIMDHP  = 1 << 10
-	hwcap_CPUID    = 1 << 11
-	hwcap_ASIMDRDM = 1 << 12
-	hwcap_JSCVT    = 1 << 13
-	hwcap_FCMA     = 1 << 14
-	hwcap_LRCPC    = 1 << 15
-	hwcap_DCPOP    = 1 << 16
-	hwcap_SHA3     = 1 << 17
-	hwcap_SM3      = 1 << 18
-	hwcap_SM4      = 1 << 19
-	hwcap_ASIMDDP  = 1 << 20
-	hwcap_SHA512   = 1 << 21
-	hwcap_SVE      = 1 << 22
-	hwcap_ASIMDFHM = 1 << 23
-)
-
-func detectOS(c *CPUInfo) bool {
-	// For now assuming no hyperthreading is reasonable.
-	c.LogicalCores = runtime.NumCPU()
-	c.PhysicalCores = c.LogicalCores
-	c.ThreadsPerCore = 1
-	if hwcap == 0 {
-		// We did not get values from the runtime.
-		// Try reading /proc/self/auxv
-
-		// From https://github.com/golang/sys
-		const (
-			_AT_HWCAP  = 16
-			_AT_HWCAP2 = 26
-
-			uintSize = int(32 << (^uint(0) >> 63))
-		)
-
-		buf, err := ioutil.ReadFile("/proc/self/auxv")
-		if err != nil {
-			// e.g. on android /proc/self/auxv is not accessible, so silently
-			// ignore the error and leave Initialized = false. On some
-			// architectures (e.g. arm64) doinit() implements a fallback
-			// readout and will set Initialized = true again.
-			return false
-		}
-		bo := binary.LittleEndian
-		for len(buf) >= 2*(uintSize/8) {
-			var tag, val uint
-			switch uintSize {
-			case 32:
-				tag = uint(bo.Uint32(buf[0:]))
-				val = uint(bo.Uint32(buf[4:]))
-				buf = buf[8:]
-			case 64:
-				tag = uint(bo.Uint64(buf[0:]))
-				val = uint(bo.Uint64(buf[8:]))
-				buf = buf[16:]
-			}
-			switch tag {
-			case _AT_HWCAP:
-				hwcap = val
-			case _AT_HWCAP2:
-				// Not used
-			}
-		}
-		if hwcap == 0 {
-			return false
-		}
-	}
-
-	// HWCap was populated by the runtime from the auxiliary vector.
-	// Use HWCap information since reading aarch64 system registers
-	// is not supported in user space on older linux kernels.
-	c.featureSet.setIf(isSet(hwcap, hwcap_AES), AESARM)
-	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMD), ASIMD)
-	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDDP), ASIMDDP)
-	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDHP), ASIMDHP)
-	c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDRDM), ASIMDRDM)
-	c.featureSet.setIf(isSet(hwcap, hwcap_CPUID), ARMCPUID)
-	c.featureSet.setIf(isSet(hwcap, hwcap_CRC32), CRC32)
-	c.featureSet.setIf(isSet(hwcap, hwcap_DCPOP), DCPOP)
-	c.featureSet.setIf(isSet(hwcap, hwcap_EVTSTRM), EVTSTRM)
-	c.featureSet.setIf(isSet(hwcap, hwcap_FCMA), FCMA)
-	c.featureSet.setIf(isSet(hwcap, hwcap_FP), FP)
-	c.featureSet.setIf(isSet(hwcap, hwcap_FPHP), FPHP)
-	c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT)
-	c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC)
-	c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL)
-	c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1)
-	c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2)
-	c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3)
-	c.featureSet.setIf(isSet(hwcap, hwcap_SHA512), SHA512)
-	c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3)
-	c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4)
-	c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE)
-
-	// The Samsung S9+ kernel reports support for atomics, but not all cores
-	// actually support them, resulting in SIGILL. See issue #28431.
-	// TODO(elias.naur): Only disable the optimization on bad chipsets on android.
-	c.featureSet.setIf(isSet(hwcap, hwcap_ATOMICS) && runtime.GOOS != "android", ATOMICS)
-
-	return true
-}
-
-func isSet(hwc uint, value uint) bool {
-	return hwc&value != 0
-}
--- a/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
@ -1,16 +0,0 @@
-// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
-
-//go:build arm64 && !linux && !darwin
-// +build arm64,!linux,!darwin
-
-package cpuid
-
-import "runtime"
-
-func detectOS(c *CPUInfo) bool {
-	c.PhysicalCores = runtime.NumCPU()
-	// For now assuming 1 thread per core...
-	c.ThreadsPerCore = 1
-	c.LogicalCores = c.PhysicalCores
-	return false
-}
--- a/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
@ -1,8 +0,0 @@
-// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
-
-//go:build nounsafe
-// +build nounsafe
-
-package cpuid
-
-var hwcap uint
--- a/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
@ -1,11 +0,0 @@
-// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
-
-//go:build !nounsafe
-// +build !nounsafe
-
-package cpuid
-
-import _ "unsafe" // needed for go:linkname
-
-//go:linkname hwcap internal/cpu.HWCap
-var hwcap uint
--- a/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
+++ b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
@ -1,15 +0,0 @@
-#!/bin/sh
-
-set -e
-
-go tool dist list | while IFS=/ read os arch; do
-    echo "Checking $os/$arch..."
-    echo " normal"
-    GOARCH=$arch GOOS=$os go build -o /dev/null .
-    echo " noasm"
-    GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null .
-    echo " appengine"
-    GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null .
-    echo " noasm,appengine"
-    GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null .
-done