mirror of
https://github.com/cwinfo/matterbridge.git
synced 2025-07-13 07:56:28 +00:00
Update mattermost library (#2152)
* Update mattermost library * Fix linting
This commit is contained in:
4
vendor/github.com/klauspost/compress/.goreleaser.yml
generated
vendored
4
vendor/github.com/klauspost/compress/.goreleaser.yml
generated
vendored
@ -3,7 +3,6 @@
|
||||
before:
|
||||
hooks:
|
||||
- ./gen.sh
|
||||
- go install mvdan.cc/garble@v0.10.1
|
||||
|
||||
builds:
|
||||
-
|
||||
@ -32,7 +31,6 @@ builds:
|
||||
- mips64le
|
||||
goarm:
|
||||
- 7
|
||||
gobinary: garble
|
||||
-
|
||||
id: "s2d"
|
||||
binary: s2d
|
||||
@ -59,7 +57,6 @@ builds:
|
||||
- mips64le
|
||||
goarm:
|
||||
- 7
|
||||
gobinary: garble
|
||||
-
|
||||
id: "s2sx"
|
||||
binary: s2sx
|
||||
@ -87,7 +84,6 @@ builds:
|
||||
- mips64le
|
||||
goarm:
|
||||
- 7
|
||||
gobinary: garble
|
||||
|
||||
archives:
|
||||
-
|
||||
|
43
vendor/github.com/klauspost/compress/README.md
generated
vendored
43
vendor/github.com/klauspost/compress/README.md
generated
vendored
@ -16,6 +16,46 @@ This package provides various compression algorithms.
|
||||
|
||||
# changelog
|
||||
|
||||
* Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6)
|
||||
* zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923
|
||||
* s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925
|
||||
|
||||
* Jan 26th, 2024 - [v1.17.5](https://github.com/klauspost/compress/releases/tag/v1.17.5)
|
||||
* flate: Fix reset with dictionary on custom window encodes https://github.com/klauspost/compress/pull/912
|
||||
* zstd: Add Frame header encoding and stripping https://github.com/klauspost/compress/pull/908
|
||||
* zstd: Limit better/best default window to 8MB https://github.com/klauspost/compress/pull/913
|
||||
* zstd: Speed improvements by @greatroar in https://github.com/klauspost/compress/pull/896 https://github.com/klauspost/compress/pull/910
|
||||
* s2: Fix callbacks for skippable blocks and disallow 0xfe (Padding) by @Jille in https://github.com/klauspost/compress/pull/916 https://github.com/klauspost/compress/pull/917
|
||||
https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/compress/pull/918
|
||||
|
||||
* Dec 1st, 2023 - [v1.17.4](https://github.com/klauspost/compress/releases/tag/v1.17.4)
|
||||
* huff0: Speed up symbol counting by @greatroar in https://github.com/klauspost/compress/pull/887
|
||||
* huff0: Remove byteReader by @greatroar in https://github.com/klauspost/compress/pull/886
|
||||
* gzhttp: Allow overriding decompression on transport https://github.com/klauspost/compress/pull/892
|
||||
* gzhttp: Clamp compression level https://github.com/klauspost/compress/pull/890
|
||||
* gzip: Error out if reserved bits are set https://github.com/klauspost/compress/pull/891
|
||||
|
||||
* Nov 15th, 2023 - [v1.17.3](https://github.com/klauspost/compress/releases/tag/v1.17.3)
|
||||
* fse: Fix max header size https://github.com/klauspost/compress/pull/881
|
||||
* zstd: Improve better/best compression https://github.com/klauspost/compress/pull/877
|
||||
* gzhttp: Fix missing content type on Close https://github.com/klauspost/compress/pull/883
|
||||
|
||||
* Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2)
|
||||
* zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876
|
||||
|
||||
* Oct 14th, 2023 - [v1.17.1](https://github.com/klauspost/compress/releases/tag/v1.17.1)
|
||||
* s2: Fix S2 "best" dictionary wrong encoding by @klauspost in https://github.com/klauspost/compress/pull/871
|
||||
* flate: Reduce allocations in decompressor and minor code improvements by @fakefloordiv in https://github.com/klauspost/compress/pull/869
|
||||
* s2: Fix EstimateBlockSize on 6&7 length input by @klauspost in https://github.com/klauspost/compress/pull/867
|
||||
|
||||
* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0)
|
||||
* Add experimental dictionary builder https://github.com/klauspost/compress/pull/853
|
||||
* Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838
|
||||
* flate: Add limited window compression https://github.com/klauspost/compress/pull/843
|
||||
* s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839
|
||||
* flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837
|
||||
* gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860
|
||||
|
||||
* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7)
|
||||
* zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829
|
||||
* s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832
|
||||
@ -538,7 +578,7 @@ For direct deflate use, NewStatelessWriter and StatelessDeflate are available. S
|
||||
|
||||
A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer:
|
||||
|
||||
```
|
||||
```go
|
||||
// replace 'ioutil.Discard' with your output.
|
||||
gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression)
|
||||
if err != nil {
|
||||
@ -646,6 +686,7 @@ Here are other packages of good quality and pure Go (no cgo wrappers or autoconv
|
||||
* [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression.
|
||||
* [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression.
|
||||
* [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index.
|
||||
* [github.com/ybirader/pzip](https://github.com/ybirader/pzip) - Fast concurrent zip archiver and extractor.
|
||||
|
||||
# license
|
||||
|
||||
|
2
vendor/github.com/klauspost/compress/fse/compress.go
generated
vendored
2
vendor/github.com/klauspost/compress/fse/compress.go
generated
vendored
@ -212,7 +212,7 @@ func (s *Scratch) writeCount() error {
|
||||
previous0 bool
|
||||
charnum uint16
|
||||
|
||||
maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
|
||||
maxHeaderSize = ((int(s.symbolLen)*int(tableLog) + 4 + 2) >> 3) + 3
|
||||
|
||||
// Write Table Size
|
||||
bitStream = uint32(tableLog - minTablelog)
|
||||
|
44
vendor/github.com/klauspost/compress/huff0/bytereader.go
generated
vendored
44
vendor/github.com/klauspost/compress/huff0/bytereader.go
generated
vendored
@ -1,44 +0,0 @@
|
||||
// Copyright 2018 Klaus Post. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
|
||||
|
||||
package huff0
|
||||
|
||||
// byteReader provides a byte reader that reads
|
||||
// little endian values from a byte stream.
|
||||
// The input stream is manually advanced.
|
||||
// The reader performs no bounds checks.
|
||||
type byteReader struct {
|
||||
b []byte
|
||||
off int
|
||||
}
|
||||
|
||||
// init will initialize the reader and set the input.
|
||||
func (b *byteReader) init(in []byte) {
|
||||
b.b = in
|
||||
b.off = 0
|
||||
}
|
||||
|
||||
// Int32 returns a little endian int32 starting at current offset.
|
||||
func (b byteReader) Int32() int32 {
|
||||
v3 := int32(b.b[b.off+3])
|
||||
v2 := int32(b.b[b.off+2])
|
||||
v1 := int32(b.b[b.off+1])
|
||||
v0 := int32(b.b[b.off])
|
||||
return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
|
||||
}
|
||||
|
||||
// Uint32 returns a little endian uint32 starting at current offset.
|
||||
func (b byteReader) Uint32() uint32 {
|
||||
v3 := uint32(b.b[b.off+3])
|
||||
v2 := uint32(b.b[b.off+2])
|
||||
v1 := uint32(b.b[b.off+1])
|
||||
v0 := uint32(b.b[b.off])
|
||||
return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
|
||||
}
|
||||
|
||||
// remain will return the number of bytes remaining.
|
||||
func (b byteReader) remain() int {
|
||||
return len(b.b) - b.off
|
||||
}
|
5
vendor/github.com/klauspost/compress/huff0/compress.go
generated
vendored
5
vendor/github.com/klauspost/compress/huff0/compress.go
generated
vendored
@ -350,6 +350,7 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
|
||||
// Does not update s.clearCount.
|
||||
func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
|
||||
reuse = true
|
||||
_ = s.count // Assert that s != nil to speed up the following loop.
|
||||
for _, v := range in {
|
||||
s.count[v]++
|
||||
}
|
||||
@ -415,7 +416,7 @@ func (s *Scratch) validateTable(c cTable) bool {
|
||||
|
||||
// minTableLog provides the minimum logSize to safely represent a distribution.
|
||||
func (s *Scratch) minTableLog() uint8 {
|
||||
minBitsSrc := highBit32(uint32(s.br.remain())) + 1
|
||||
minBitsSrc := highBit32(uint32(s.srcLen)) + 1
|
||||
minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2
|
||||
if minBitsSrc < minBitsSymbols {
|
||||
return uint8(minBitsSrc)
|
||||
@ -427,7 +428,7 @@ func (s *Scratch) minTableLog() uint8 {
|
||||
func (s *Scratch) optimalTableLog() {
|
||||
tableLog := s.TableLog
|
||||
minBits := s.minTableLog()
|
||||
maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1
|
||||
maxBitsSrc := uint8(highBit32(uint32(s.srcLen-1))) - 1
|
||||
if maxBitsSrc < tableLog {
|
||||
// Accuracy can be reduced
|
||||
tableLog = maxBitsSrc
|
||||
|
4
vendor/github.com/klauspost/compress/huff0/huff0.go
generated
vendored
4
vendor/github.com/klauspost/compress/huff0/huff0.go
generated
vendored
@ -88,7 +88,7 @@ type Scratch struct {
|
||||
// Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
|
||||
MaxDecodedSize int
|
||||
|
||||
br byteReader
|
||||
srcLen int
|
||||
|
||||
// MaxSymbolValue will override the maximum symbol value of the next block.
|
||||
MaxSymbolValue uint8
|
||||
@ -170,7 +170,7 @@ func (s *Scratch) prepare(in []byte) (*Scratch, error) {
|
||||
if s.fse == nil {
|
||||
s.fse = &fse.Scratch{}
|
||||
}
|
||||
s.br.init(in)
|
||||
s.srcLen = len(in)
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
15
vendor/github.com/klauspost/compress/s2/.gitignore
generated
vendored
15
vendor/github.com/klauspost/compress/s2/.gitignore
generated
vendored
@ -1,15 +0,0 @@
|
||||
testdata/bench
|
||||
|
||||
# These explicitly listed benchmark data files are for an obsolete version of
|
||||
# snappy_test.go.
|
||||
testdata/alice29.txt
|
||||
testdata/asyoulik.txt
|
||||
testdata/fireworks.jpeg
|
||||
testdata/geo.protodata
|
||||
testdata/html
|
||||
testdata/html_x_4
|
||||
testdata/kppkn.gtb
|
||||
testdata/lcet10.txt
|
||||
testdata/paper-100k.pdf
|
||||
testdata/plrabn12.txt
|
||||
testdata/urls.10K
|
28
vendor/github.com/klauspost/compress/s2/LICENSE
generated
vendored
28
vendor/github.com/klauspost/compress/s2/LICENSE
generated
vendored
@ -1,28 +0,0 @@
|
||||
Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
|
||||
Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
* Neither the name of Google Inc. nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
1120
vendor/github.com/klauspost/compress/s2/README.md
generated
vendored
1120
vendor/github.com/klauspost/compress/s2/README.md
generated
vendored
File diff suppressed because it is too large
Load Diff
437
vendor/github.com/klauspost/compress/s2/decode.go
generated
vendored
437
vendor/github.com/klauspost/compress/s2/decode.go
generated
vendored
@ -1,437 +0,0 @@
|
||||
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
|
||||
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package s2
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
var (
|
||||
// ErrCorrupt reports that the input is invalid.
|
||||
ErrCorrupt = errors.New("s2: corrupt input")
|
||||
// ErrCRC reports that the input failed CRC validation (streams only)
|
||||
ErrCRC = errors.New("s2: corrupt input, crc mismatch")
|
||||
// ErrTooLarge reports that the uncompressed length is too large.
|
||||
ErrTooLarge = errors.New("s2: decoded block is too large")
|
||||
// ErrUnsupported reports that the input isn't supported.
|
||||
ErrUnsupported = errors.New("s2: unsupported input")
|
||||
)
|
||||
|
||||
// DecodedLen returns the length of the decoded block.
|
||||
func DecodedLen(src []byte) (int, error) {
|
||||
v, _, err := decodedLen(src)
|
||||
return v, err
|
||||
}
|
||||
|
||||
// decodedLen returns the length of the decoded block and the number of bytes
|
||||
// that the length header occupied.
|
||||
func decodedLen(src []byte) (blockLen, headerLen int, err error) {
|
||||
v, n := binary.Uvarint(src)
|
||||
if n <= 0 || v > 0xffffffff {
|
||||
return 0, 0, ErrCorrupt
|
||||
}
|
||||
|
||||
const wordSize = 32 << (^uint(0) >> 32 & 1)
|
||||
if wordSize == 32 && v > 0x7fffffff {
|
||||
return 0, 0, ErrTooLarge
|
||||
}
|
||||
return int(v), n, nil
|
||||
}
|
||||
|
||||
const (
|
||||
decodeErrCodeCorrupt = 1
|
||||
)
|
||||
|
||||
// Decode returns the decoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire decoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
func Decode(dst, src []byte) ([]byte, error) {
|
||||
dLen, s, err := decodedLen(src)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if dLen <= cap(dst) {
|
||||
dst = dst[:dLen]
|
||||
} else {
|
||||
dst = make([]byte, dLen)
|
||||
}
|
||||
if s2Decode(dst, src[s:]) != 0 {
|
||||
return nil, ErrCorrupt
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
// s2DecodeDict writes the decoding of src to dst. It assumes that the varint-encoded
|
||||
// length of the decompressed bytes has already been read, and that len(dst)
|
||||
// equals that length.
|
||||
//
|
||||
// It returns 0 on success or a decodeErrCodeXxx error code on failure.
|
||||
func s2DecodeDict(dst, src []byte, dict *Dict) int {
|
||||
if dict == nil {
|
||||
return s2Decode(dst, src)
|
||||
}
|
||||
const debug = false
|
||||
const debugErrs = debug
|
||||
|
||||
if debug {
|
||||
fmt.Println("Starting decode, dst len:", len(dst))
|
||||
}
|
||||
var d, s, length int
|
||||
offset := len(dict.dict) - dict.repeat
|
||||
|
||||
// As long as we can read at least 5 bytes...
|
||||
for s < len(src)-5 {
|
||||
// Removing bounds checks is SLOWER, when if doing
|
||||
// in := src[s:s+5]
|
||||
// Checked on Go 1.18
|
||||
switch src[s] & 0x03 {
|
||||
case tagLiteral:
|
||||
x := uint32(src[s] >> 2)
|
||||
switch {
|
||||
case x < 60:
|
||||
s++
|
||||
case x == 60:
|
||||
s += 2
|
||||
x = uint32(src[s-1])
|
||||
case x == 61:
|
||||
in := src[s : s+3]
|
||||
x = uint32(in[1]) | uint32(in[2])<<8
|
||||
s += 3
|
||||
case x == 62:
|
||||
in := src[s : s+4]
|
||||
// Load as 32 bit and shift down.
|
||||
x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
|
||||
x >>= 8
|
||||
s += 4
|
||||
case x == 63:
|
||||
in := src[s : s+5]
|
||||
x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
|
||||
s += 5
|
||||
}
|
||||
length = int(x) + 1
|
||||
if debug {
|
||||
fmt.Println("literals, length:", length, "d-after:", d+length)
|
||||
}
|
||||
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
|
||||
if debugErrs {
|
||||
fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s)
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
|
||||
copy(dst[d:], src[s:s+length])
|
||||
d += length
|
||||
s += length
|
||||
continue
|
||||
|
||||
case tagCopy1:
|
||||
s += 2
|
||||
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||
length = int(src[s-2]) >> 2 & 0x7
|
||||
if toffset == 0 {
|
||||
if debug {
|
||||
fmt.Print("(repeat) ")
|
||||
}
|
||||
// keep last offset
|
||||
switch length {
|
||||
case 5:
|
||||
length = int(src[s]) + 4
|
||||
s += 1
|
||||
case 6:
|
||||
in := src[s : s+2]
|
||||
length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
|
||||
s += 2
|
||||
case 7:
|
||||
in := src[s : s+3]
|
||||
length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
|
||||
s += 3
|
||||
default: // 0-> 4
|
||||
}
|
||||
} else {
|
||||
offset = toffset
|
||||
}
|
||||
length += 4
|
||||
case tagCopy2:
|
||||
in := src[s : s+3]
|
||||
offset = int(uint32(in[1]) | uint32(in[2])<<8)
|
||||
length = 1 + int(in[0])>>2
|
||||
s += 3
|
||||
|
||||
case tagCopy4:
|
||||
in := src[s : s+5]
|
||||
offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
|
||||
length = 1 + int(in[0])>>2
|
||||
s += 5
|
||||
}
|
||||
|
||||
if offset <= 0 || length > len(dst)-d {
|
||||
if debugErrs {
|
||||
fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d)
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
|
||||
// copy from dict
|
||||
if d < offset {
|
||||
if d > MaxDictSrcOffset {
|
||||
if debugErrs {
|
||||
fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length)
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
startOff := len(dict.dict) - offset + d
|
||||
if startOff < 0 || startOff+length > len(dict.dict) {
|
||||
if debugErrs {
|
||||
fmt.Printf("offset (%d) + length (%d) bigger than dict (%d)\n", offset, length, len(dict.dict))
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
if debug {
|
||||
fmt.Println("dict copy, length:", length, "offset:", offset, "d-after:", d+length, "dict start offset:", startOff)
|
||||
}
|
||||
copy(dst[d:d+length], dict.dict[startOff:])
|
||||
d += length
|
||||
continue
|
||||
}
|
||||
|
||||
if debug {
|
||||
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
|
||||
}
|
||||
|
||||
// Copy from an earlier sub-slice of dst to a later sub-slice.
|
||||
// If no overlap, use the built-in copy:
|
||||
if offset > length {
|
||||
copy(dst[d:d+length], dst[d-offset:])
|
||||
d += length
|
||||
continue
|
||||
}
|
||||
|
||||
// Unlike the built-in copy function, this byte-by-byte copy always runs
|
||||
// forwards, even if the slices overlap. Conceptually, this is:
|
||||
//
|
||||
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
|
||||
//
|
||||
// We align the slices into a and b and show the compiler they are the same size.
|
||||
// This allows the loop to run without bounds checks.
|
||||
a := dst[d : d+length]
|
||||
b := dst[d-offset:]
|
||||
b = b[:len(a)]
|
||||
for i := range a {
|
||||
a[i] = b[i]
|
||||
}
|
||||
d += length
|
||||
}
|
||||
|
||||
// Remaining with extra checks...
|
||||
for s < len(src) {
|
||||
switch src[s] & 0x03 {
|
||||
case tagLiteral:
|
||||
x := uint32(src[s] >> 2)
|
||||
switch {
|
||||
case x < 60:
|
||||
s++
|
||||
case x == 60:
|
||||
s += 2
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
if debugErrs {
|
||||
fmt.Println("src went oob")
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
x = uint32(src[s-1])
|
||||
case x == 61:
|
||||
s += 3
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
if debugErrs {
|
||||
fmt.Println("src went oob")
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
x = uint32(src[s-2]) | uint32(src[s-1])<<8
|
||||
case x == 62:
|
||||
s += 4
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
if debugErrs {
|
||||
fmt.Println("src went oob")
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
|
||||
case x == 63:
|
||||
s += 5
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
if debugErrs {
|
||||
fmt.Println("src went oob")
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
|
||||
}
|
||||
length = int(x) + 1
|
||||
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
|
||||
if debugErrs {
|
||||
fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s)
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
if debug {
|
||||
fmt.Println("literals, length:", length, "d-after:", d+length)
|
||||
}
|
||||
|
||||
copy(dst[d:], src[s:s+length])
|
||||
d += length
|
||||
s += length
|
||||
continue
|
||||
|
||||
case tagCopy1:
|
||||
s += 2
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
if debugErrs {
|
||||
fmt.Println("src went oob")
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = int(src[s-2]) >> 2 & 0x7
|
||||
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||
if toffset == 0 {
|
||||
if debug {
|
||||
fmt.Print("(repeat) ")
|
||||
}
|
||||
// keep last offset
|
||||
switch length {
|
||||
case 5:
|
||||
s += 1
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
if debugErrs {
|
||||
fmt.Println("src went oob")
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = int(uint32(src[s-1])) + 4
|
||||
case 6:
|
||||
s += 2
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
if debugErrs {
|
||||
fmt.Println("src went oob")
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
|
||||
case 7:
|
||||
s += 3
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
if debugErrs {
|
||||
fmt.Println("src went oob")
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
|
||||
default: // 0-> 4
|
||||
}
|
||||
} else {
|
||||
offset = toffset
|
||||
}
|
||||
length += 4
|
||||
case tagCopy2:
|
||||
s += 3
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
if debugErrs {
|
||||
fmt.Println("src went oob")
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = 1 + int(src[s-3])>>2
|
||||
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
|
||||
|
||||
case tagCopy4:
|
||||
s += 5
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
if debugErrs {
|
||||
fmt.Println("src went oob")
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = 1 + int(src[s-5])>>2
|
||||
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
|
||||
}
|
||||
|
||||
if offset <= 0 || length > len(dst)-d {
|
||||
if debugErrs {
|
||||
fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d)
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
|
||||
// copy from dict
|
||||
if d < offset {
|
||||
if d > MaxDictSrcOffset {
|
||||
if debugErrs {
|
||||
fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length)
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
rOff := len(dict.dict) - (offset - d)
|
||||
if debug {
|
||||
fmt.Println("starting dict entry from dict offset", len(dict.dict)-rOff)
|
||||
}
|
||||
if rOff+length > len(dict.dict) {
|
||||
if debugErrs {
|
||||
fmt.Println("err: END offset", rOff+length, "bigger than dict", len(dict.dict), "dict offset:", rOff, "length:", length)
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
if rOff < 0 {
|
||||
if debugErrs {
|
||||
fmt.Println("err: START offset", rOff, "less than 0", len(dict.dict), "dict offset:", rOff, "length:", length)
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
copy(dst[d:d+length], dict.dict[rOff:])
|
||||
d += length
|
||||
continue
|
||||
}
|
||||
|
||||
if debug {
|
||||
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
|
||||
}
|
||||
|
||||
// Copy from an earlier sub-slice of dst to a later sub-slice.
|
||||
// If no overlap, use the built-in copy:
|
||||
if offset > length {
|
||||
copy(dst[d:d+length], dst[d-offset:])
|
||||
d += length
|
||||
continue
|
||||
}
|
||||
|
||||
// Unlike the built-in copy function, this byte-by-byte copy always runs
|
||||
// forwards, even if the slices overlap. Conceptually, this is:
|
||||
//
|
||||
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
|
||||
//
|
||||
// We align the slices into a and b and show the compiler they are the same size.
|
||||
// This allows the loop to run without bounds checks.
|
||||
a := dst[d : d+length]
|
||||
b := dst[d-offset:]
|
||||
b = b[:len(a)]
|
||||
for i := range a {
|
||||
a[i] = b[i]
|
||||
}
|
||||
d += length
|
||||
}
|
||||
|
||||
if d != len(dst) {
|
||||
if debugErrs {
|
||||
fmt.Println("wanted length", len(dst), "got", d)
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
return 0
|
||||
}
|
568
vendor/github.com/klauspost/compress/s2/decode_amd64.s
generated
vendored
568
vendor/github.com/klauspost/compress/s2/decode_amd64.s
generated
vendored
@ -1,568 +0,0 @@
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define R_TMP0 AX
|
||||
#define R_TMP1 BX
|
||||
#define R_LEN CX
|
||||
#define R_OFF DX
|
||||
#define R_SRC SI
|
||||
#define R_DST DI
|
||||
#define R_DBASE R8
|
||||
#define R_DLEN R9
|
||||
#define R_DEND R10
|
||||
#define R_SBASE R11
|
||||
#define R_SLEN R12
|
||||
#define R_SEND R13
|
||||
#define R_TMP2 R14
|
||||
#define R_TMP3 R15
|
||||
|
||||
// The asm code generally follows the pure Go code in decode_other.go, except
|
||||
// where marked with a "!!!".
|
||||
|
||||
// func decode(dst, src []byte) int
|
||||
//
|
||||
// All local variables fit into registers. The non-zero stack size is only to
|
||||
// spill registers and push args when issuing a CALL. The register allocation:
|
||||
// - R_TMP0 scratch
|
||||
// - R_TMP1 scratch
|
||||
// - R_LEN length or x (shared)
|
||||
// - R_OFF offset
|
||||
// - R_SRC &src[s]
|
||||
// - R_DST &dst[d]
|
||||
// + R_DBASE dst_base
|
||||
// + R_DLEN dst_len
|
||||
// + R_DEND dst_base + dst_len
|
||||
// + R_SBASE src_base
|
||||
// + R_SLEN src_len
|
||||
// + R_SEND src_base + src_len
|
||||
// - R_TMP2 used by doCopy
|
||||
// - R_TMP3 used by doCopy
|
||||
//
|
||||
// The registers R_DBASE-R_SEND (marked with a "+") are set at the start of the
|
||||
// function, and after a CALL returns, and are not otherwise modified.
|
||||
//
|
||||
// The d variable is implicitly R_DST - R_DBASE, and len(dst)-d is R_DEND - R_DST.
|
||||
// The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC.
|
||||
TEXT ·s2Decode(SB), NOSPLIT, $48-56
|
||||
// Initialize R_SRC, R_DST and R_DBASE-R_SEND.
|
||||
MOVQ dst_base+0(FP), R_DBASE
|
||||
MOVQ dst_len+8(FP), R_DLEN
|
||||
MOVQ R_DBASE, R_DST
|
||||
MOVQ R_DBASE, R_DEND
|
||||
ADDQ R_DLEN, R_DEND
|
||||
MOVQ src_base+24(FP), R_SBASE
|
||||
MOVQ src_len+32(FP), R_SLEN
|
||||
MOVQ R_SBASE, R_SRC
|
||||
MOVQ R_SBASE, R_SEND
|
||||
ADDQ R_SLEN, R_SEND
|
||||
XORQ R_OFF, R_OFF
|
||||
|
||||
loop:
|
||||
// for s < len(src)
|
||||
CMPQ R_SRC, R_SEND
|
||||
JEQ end
|
||||
|
||||
// R_LEN = uint32(src[s])
|
||||
//
|
||||
// switch src[s] & 0x03
|
||||
MOVBLZX (R_SRC), R_LEN
|
||||
MOVL R_LEN, R_TMP1
|
||||
ANDL $3, R_TMP1
|
||||
CMPL R_TMP1, $1
|
||||
JAE tagCopy
|
||||
|
||||
// ----------------------------------------
|
||||
// The code below handles literal tags.
|
||||
|
||||
// case tagLiteral:
|
||||
// x := uint32(src[s] >> 2)
|
||||
// switch
|
||||
SHRL $2, R_LEN
|
||||
CMPL R_LEN, $60
|
||||
JAE tagLit60Plus
|
||||
|
||||
// case x < 60:
|
||||
// s++
|
||||
INCQ R_SRC
|
||||
|
||||
doLit:
|
||||
// This is the end of the inner "switch", when we have a literal tag.
|
||||
//
|
||||
// We assume that R_LEN == x and x fits in a uint32, where x is the variable
|
||||
// used in the pure Go decode_other.go code.
|
||||
|
||||
// length = int(x) + 1
|
||||
//
|
||||
// Unlike the pure Go code, we don't need to check if length <= 0 because
|
||||
// R_LEN can hold 64 bits, so the increment cannot overflow.
|
||||
INCQ R_LEN
|
||||
|
||||
// Prepare to check if copying length bytes will run past the end of dst or
|
||||
// src.
|
||||
//
|
||||
// R_TMP0 = len(dst) - d
|
||||
// R_TMP1 = len(src) - s
|
||||
MOVQ R_DEND, R_TMP0
|
||||
SUBQ R_DST, R_TMP0
|
||||
MOVQ R_SEND, R_TMP1
|
||||
SUBQ R_SRC, R_TMP1
|
||||
|
||||
// !!! Try a faster technique for short (16 or fewer bytes) copies.
|
||||
//
|
||||
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
|
||||
// goto callMemmove // Fall back on calling runtime·memmove.
|
||||
// }
|
||||
//
|
||||
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
|
||||
// against 21 instead of 16, because it cannot assume that all of its input
|
||||
// is contiguous in memory and so it needs to leave enough source bytes to
|
||||
// read the next tag without refilling buffers, but Go's Decode assumes
|
||||
// contiguousness (the src argument is a []byte).
|
||||
CMPQ R_LEN, $16
|
||||
JGT callMemmove
|
||||
CMPQ R_TMP0, $16
|
||||
JLT callMemmove
|
||||
CMPQ R_TMP1, $16
|
||||
JLT callMemmove
|
||||
|
||||
// !!! Implement the copy from src to dst as a 16-byte load and store.
|
||||
// (Decode's documentation says that dst and src must not overlap.)
|
||||
//
|
||||
// This always copies 16 bytes, instead of only length bytes, but that's
|
||||
// OK. If the input is a valid Snappy encoding then subsequent iterations
|
||||
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
|
||||
// non-nil error), so the overrun will be ignored.
|
||||
//
|
||||
// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
|
||||
// 16-byte loads and stores. This technique probably wouldn't be as
|
||||
// effective on architectures that are fussier about alignment.
|
||||
MOVOU 0(R_SRC), X0
|
||||
MOVOU X0, 0(R_DST)
|
||||
|
||||
// d += length
|
||||
// s += length
|
||||
ADDQ R_LEN, R_DST
|
||||
ADDQ R_LEN, R_SRC
|
||||
JMP loop
|
||||
|
||||
callMemmove:
|
||||
// if length > len(dst)-d || length > len(src)-s { etc }
|
||||
CMPQ R_LEN, R_TMP0
|
||||
JGT errCorrupt
|
||||
CMPQ R_LEN, R_TMP1
|
||||
JGT errCorrupt
|
||||
|
||||
// copy(dst[d:], src[s:s+length])
|
||||
//
|
||||
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
|
||||
// R_DST, R_SRC and R_LEN as arguments. Coincidentally, we also need to spill those
|
||||
// three registers to the stack, to save local variables across the CALL.
|
||||
MOVQ R_DST, 0(SP)
|
||||
MOVQ R_SRC, 8(SP)
|
||||
MOVQ R_LEN, 16(SP)
|
||||
MOVQ R_DST, 24(SP)
|
||||
MOVQ R_SRC, 32(SP)
|
||||
MOVQ R_LEN, 40(SP)
|
||||
MOVQ R_OFF, 48(SP)
|
||||
CALL runtime·memmove(SB)
|
||||
|
||||
// Restore local variables: unspill registers from the stack and
|
||||
// re-calculate R_DBASE-R_SEND.
|
||||
MOVQ 24(SP), R_DST
|
||||
MOVQ 32(SP), R_SRC
|
||||
MOVQ 40(SP), R_LEN
|
||||
MOVQ 48(SP), R_OFF
|
||||
MOVQ dst_base+0(FP), R_DBASE
|
||||
MOVQ dst_len+8(FP), R_DLEN
|
||||
MOVQ R_DBASE, R_DEND
|
||||
ADDQ R_DLEN, R_DEND
|
||||
MOVQ src_base+24(FP), R_SBASE
|
||||
MOVQ src_len+32(FP), R_SLEN
|
||||
MOVQ R_SBASE, R_SEND
|
||||
ADDQ R_SLEN, R_SEND
|
||||
|
||||
// d += length
|
||||
// s += length
|
||||
ADDQ R_LEN, R_DST
|
||||
ADDQ R_LEN, R_SRC
|
||||
JMP loop
|
||||
|
||||
tagLit60Plus:
|
||||
// !!! This fragment does the
|
||||
//
|
||||
// s += x - 58; if uint(s) > uint(len(src)) { etc }
|
||||
//
|
||||
// checks. In the asm version, we code it once instead of once per switch case.
|
||||
ADDQ R_LEN, R_SRC
|
||||
SUBQ $58, R_SRC
|
||||
CMPQ R_SRC, R_SEND
|
||||
JA errCorrupt
|
||||
|
||||
// case x == 60:
|
||||
CMPL R_LEN, $61
|
||||
JEQ tagLit61
|
||||
JA tagLit62Plus
|
||||
|
||||
// x = uint32(src[s-1])
|
||||
MOVBLZX -1(R_SRC), R_LEN
|
||||
JMP doLit
|
||||
|
||||
tagLit61:
|
||||
// case x == 61:
|
||||
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
|
||||
MOVWLZX -2(R_SRC), R_LEN
|
||||
JMP doLit
|
||||
|
||||
tagLit62Plus:
|
||||
CMPL R_LEN, $62
|
||||
JA tagLit63
|
||||
|
||||
// case x == 62:
|
||||
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
|
||||
// We read one byte, safe to read one back, since we are just reading tag.
|
||||
// x = binary.LittleEndian.Uint32(src[s-1:]) >> 8
|
||||
MOVL -4(R_SRC), R_LEN
|
||||
SHRL $8, R_LEN
|
||||
JMP doLit
|
||||
|
||||
tagLit63:
|
||||
// case x == 63:
|
||||
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
|
||||
MOVL -4(R_SRC), R_LEN
|
||||
JMP doLit
|
||||
|
||||
// The code above handles literal tags.
|
||||
// ----------------------------------------
|
||||
// The code below handles copy tags.
|
||||
|
||||
tagCopy4:
|
||||
// case tagCopy4:
|
||||
// s += 5
|
||||
ADDQ $5, R_SRC
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
CMPQ R_SRC, R_SEND
|
||||
JA errCorrupt
|
||||
|
||||
// length = 1 + int(src[s-5])>>2
|
||||
SHRQ $2, R_LEN
|
||||
INCQ R_LEN
|
||||
|
||||
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
|
||||
MOVLQZX -4(R_SRC), R_OFF
|
||||
JMP doCopy
|
||||
|
||||
tagCopy2:
|
||||
// case tagCopy2:
|
||||
// s += 3
|
||||
ADDQ $3, R_SRC
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
CMPQ R_SRC, R_SEND
|
||||
JA errCorrupt
|
||||
|
||||
// length = 1 + int(src[s-3])>>2
|
||||
SHRQ $2, R_LEN
|
||||
INCQ R_LEN
|
||||
|
||||
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
|
||||
MOVWQZX -2(R_SRC), R_OFF
|
||||
JMP doCopy
|
||||
|
||||
tagCopy:
|
||||
// We have a copy tag. We assume that:
|
||||
// - R_TMP1 == src[s] & 0x03
|
||||
// - R_LEN == src[s]
|
||||
CMPQ R_TMP1, $2
|
||||
JEQ tagCopy2
|
||||
JA tagCopy4
|
||||
|
||||
// case tagCopy1:
|
||||
// s += 2
|
||||
ADDQ $2, R_SRC
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
CMPQ R_SRC, R_SEND
|
||||
JA errCorrupt
|
||||
|
||||
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||
// length = 4 + int(src[s-2])>>2&0x7
|
||||
MOVBQZX -1(R_SRC), R_TMP1
|
||||
MOVQ R_LEN, R_TMP0
|
||||
SHRQ $2, R_LEN
|
||||
ANDQ $0xe0, R_TMP0
|
||||
ANDQ $7, R_LEN
|
||||
SHLQ $3, R_TMP0
|
||||
ADDQ $4, R_LEN
|
||||
ORQ R_TMP1, R_TMP0
|
||||
|
||||
// check if repeat code, ZF set by ORQ.
|
||||
JZ repeatCode
|
||||
|
||||
// This is a regular copy, transfer our temporary value to R_OFF (length)
|
||||
MOVQ R_TMP0, R_OFF
|
||||
JMP doCopy
|
||||
|
||||
// This is a repeat code.
|
||||
repeatCode:
|
||||
// If length < 9, reuse last offset, with the length already calculated.
|
||||
CMPQ R_LEN, $9
|
||||
JL doCopyRepeat
|
||||
|
||||
// Read additional bytes for length.
|
||||
JE repeatLen1
|
||||
|
||||
// Rare, so the extra branch shouldn't hurt too much.
|
||||
CMPQ R_LEN, $10
|
||||
JE repeatLen2
|
||||
JMP repeatLen3
|
||||
|
||||
// Read repeat lengths.
|
||||
repeatLen1:
|
||||
// s ++
|
||||
ADDQ $1, R_SRC
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
CMPQ R_SRC, R_SEND
|
||||
JA errCorrupt
|
||||
|
||||
// length = src[s-1] + 8
|
||||
MOVBQZX -1(R_SRC), R_LEN
|
||||
ADDL $8, R_LEN
|
||||
JMP doCopyRepeat
|
||||
|
||||
repeatLen2:
|
||||
// s +=2
|
||||
ADDQ $2, R_SRC
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
CMPQ R_SRC, R_SEND
|
||||
JA errCorrupt
|
||||
|
||||
// length = uint32(src[s-2]) | (uint32(src[s-1])<<8) + (1 << 8)
|
||||
MOVWQZX -2(R_SRC), R_LEN
|
||||
ADDL $260, R_LEN
|
||||
JMP doCopyRepeat
|
||||
|
||||
repeatLen3:
|
||||
// s +=3
|
||||
ADDQ $3, R_SRC
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
CMPQ R_SRC, R_SEND
|
||||
JA errCorrupt
|
||||
|
||||
// length = uint32(src[s-3]) | (uint32(src[s-2])<<8) | (uint32(src[s-1])<<16) + (1 << 16)
|
||||
// Read one byte further back (just part of the tag, shifted out)
|
||||
MOVL -4(R_SRC), R_LEN
|
||||
SHRL $8, R_LEN
|
||||
ADDL $65540, R_LEN
|
||||
JMP doCopyRepeat
|
||||
|
||||
doCopy:
|
||||
// This is the end of the outer "switch", when we have a copy tag.
|
||||
//
|
||||
// We assume that:
|
||||
// - R_LEN == length && R_LEN > 0
|
||||
// - R_OFF == offset
|
||||
|
||||
// if d < offset { etc }
|
||||
MOVQ R_DST, R_TMP1
|
||||
SUBQ R_DBASE, R_TMP1
|
||||
CMPQ R_TMP1, R_OFF
|
||||
JLT errCorrupt
|
||||
|
||||
// Repeat values can skip the test above, since any offset > 0 will be in dst.
|
||||
doCopyRepeat:
|
||||
// if offset <= 0 { etc }
|
||||
CMPQ R_OFF, $0
|
||||
JLE errCorrupt
|
||||
|
||||
// if length > len(dst)-d { etc }
|
||||
MOVQ R_DEND, R_TMP1
|
||||
SUBQ R_DST, R_TMP1
|
||||
CMPQ R_LEN, R_TMP1
|
||||
JGT errCorrupt
|
||||
|
||||
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
|
||||
//
|
||||
// Set:
|
||||
// - R_TMP2 = len(dst)-d
|
||||
// - R_TMP3 = &dst[d-offset]
|
||||
MOVQ R_DEND, R_TMP2
|
||||
SUBQ R_DST, R_TMP2
|
||||
MOVQ R_DST, R_TMP3
|
||||
SUBQ R_OFF, R_TMP3
|
||||
|
||||
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
|
||||
//
|
||||
// First, try using two 8-byte load/stores, similar to the doLit technique
|
||||
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
|
||||
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
|
||||
// and not one 16-byte load/store, and the first store has to be before the
|
||||
// second load, due to the overlap if offset is in the range [8, 16).
|
||||
//
|
||||
// if length > 16 || offset < 8 || len(dst)-d < 16 {
|
||||
// goto slowForwardCopy
|
||||
// }
|
||||
// copy 16 bytes
|
||||
// d += length
|
||||
CMPQ R_LEN, $16
|
||||
JGT slowForwardCopy
|
||||
CMPQ R_OFF, $8
|
||||
JLT slowForwardCopy
|
||||
CMPQ R_TMP2, $16
|
||||
JLT slowForwardCopy
|
||||
MOVQ 0(R_TMP3), R_TMP0
|
||||
MOVQ R_TMP0, 0(R_DST)
|
||||
MOVQ 8(R_TMP3), R_TMP1
|
||||
MOVQ R_TMP1, 8(R_DST)
|
||||
ADDQ R_LEN, R_DST
|
||||
JMP loop
|
||||
|
||||
slowForwardCopy:
|
||||
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
|
||||
// can still try 8-byte load stores, provided we can overrun up to 10 extra
|
||||
// bytes. As above, the overrun will be fixed up by subsequent iterations
|
||||
// of the outermost loop.
|
||||
//
|
||||
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
|
||||
// commentary says:
|
||||
//
|
||||
// ----
|
||||
//
|
||||
// The main part of this loop is a simple copy of eight bytes at a time
|
||||
// until we've copied (at least) the requested amount of bytes. However,
|
||||
// if d and d-offset are less than eight bytes apart (indicating a
|
||||
// repeating pattern of length < 8), we first need to expand the pattern in
|
||||
// order to get the correct results. For instance, if the buffer looks like
|
||||
// this, with the eight-byte <d-offset> and <d> patterns marked as
|
||||
// intervals:
|
||||
//
|
||||
// abxxxxxxxxxxxx
|
||||
// [------] d-offset
|
||||
// [------] d
|
||||
//
|
||||
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
|
||||
// once, after which we can move <d> two bytes without moving <d-offset>:
|
||||
//
|
||||
// ababxxxxxxxxxx
|
||||
// [------] d-offset
|
||||
// [------] d
|
||||
//
|
||||
// and repeat the exercise until the two no longer overlap.
|
||||
//
|
||||
// This allows us to do very well in the special case of one single byte
|
||||
// repeated many times, without taking a big hit for more general cases.
|
||||
//
|
||||
// The worst case of extra writing past the end of the match occurs when
|
||||
// offset == 1 and length == 1; the last copy will read from byte positions
|
||||
// [0..7] and write to [4..11], whereas it was only supposed to write to
|
||||
// position 1. Thus, ten excess bytes.
|
||||
//
|
||||
// ----
|
||||
//
|
||||
// That "10 byte overrun" worst case is confirmed by Go's
|
||||
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
|
||||
// and finishSlowForwardCopy algorithm.
|
||||
//
|
||||
// if length > len(dst)-d-10 {
|
||||
// goto verySlowForwardCopy
|
||||
// }
|
||||
SUBQ $10, R_TMP2
|
||||
CMPQ R_LEN, R_TMP2
|
||||
JGT verySlowForwardCopy
|
||||
|
||||
// We want to keep the offset, so we use R_TMP2 from here.
|
||||
MOVQ R_OFF, R_TMP2
|
||||
|
||||
makeOffsetAtLeast8:
|
||||
// !!! As above, expand the pattern so that offset >= 8 and we can use
|
||||
// 8-byte load/stores.
|
||||
//
|
||||
// for offset < 8 {
|
||||
// copy 8 bytes from dst[d-offset:] to dst[d:]
|
||||
// length -= offset
|
||||
// d += offset
|
||||
// offset += offset
|
||||
// // The two previous lines together means that d-offset, and therefore
|
||||
// // R_TMP3, is unchanged.
|
||||
// }
|
||||
CMPQ R_TMP2, $8
|
||||
JGE fixUpSlowForwardCopy
|
||||
MOVQ (R_TMP3), R_TMP1
|
||||
MOVQ R_TMP1, (R_DST)
|
||||
SUBQ R_TMP2, R_LEN
|
||||
ADDQ R_TMP2, R_DST
|
||||
ADDQ R_TMP2, R_TMP2
|
||||
JMP makeOffsetAtLeast8
|
||||
|
||||
fixUpSlowForwardCopy:
|
||||
// !!! Add length (which might be negative now) to d (implied by R_DST being
|
||||
// &dst[d]) so that d ends up at the right place when we jump back to the
|
||||
// top of the loop. Before we do that, though, we save R_DST to R_TMP0 so that, if
|
||||
// length is positive, copying the remaining length bytes will write to the
|
||||
// right place.
|
||||
MOVQ R_DST, R_TMP0
|
||||
ADDQ R_LEN, R_DST
|
||||
|
||||
finishSlowForwardCopy:
|
||||
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
|
||||
// length means that we overrun, but as above, that will be fixed up by
|
||||
// subsequent iterations of the outermost loop.
|
||||
CMPQ R_LEN, $0
|
||||
JLE loop
|
||||
MOVQ (R_TMP3), R_TMP1
|
||||
MOVQ R_TMP1, (R_TMP0)
|
||||
ADDQ $8, R_TMP3
|
||||
ADDQ $8, R_TMP0
|
||||
SUBQ $8, R_LEN
|
||||
JMP finishSlowForwardCopy
|
||||
|
||||
verySlowForwardCopy:
|
||||
// verySlowForwardCopy is a simple implementation of forward copy. In C
|
||||
// parlance, this is a do/while loop instead of a while loop, since we know
|
||||
// that length > 0. In Go syntax:
|
||||
//
|
||||
// for {
|
||||
// dst[d] = dst[d - offset]
|
||||
// d++
|
||||
// length--
|
||||
// if length == 0 {
|
||||
// break
|
||||
// }
|
||||
// }
|
||||
MOVB (R_TMP3), R_TMP1
|
||||
MOVB R_TMP1, (R_DST)
|
||||
INCQ R_TMP3
|
||||
INCQ R_DST
|
||||
DECQ R_LEN
|
||||
JNZ verySlowForwardCopy
|
||||
JMP loop
|
||||
|
||||
// The code above handles copy tags.
|
||||
// ----------------------------------------
|
||||
|
||||
end:
|
||||
// This is the end of the "for s < len(src)".
|
||||
//
|
||||
// if d != len(dst) { etc }
|
||||
CMPQ R_DST, R_DEND
|
||||
JNE errCorrupt
|
||||
|
||||
// return 0
|
||||
MOVQ $0, ret+48(FP)
|
||||
RET
|
||||
|
||||
errCorrupt:
|
||||
// return decodeErrCodeCorrupt
|
||||
MOVQ $1, ret+48(FP)
|
||||
RET
|
574
vendor/github.com/klauspost/compress/s2/decode_arm64.s
generated
vendored
574
vendor/github.com/klauspost/compress/s2/decode_arm64.s
generated
vendored
@ -1,574 +0,0 @@
|
||||
// Copyright 2020 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define R_TMP0 R2
|
||||
#define R_TMP1 R3
|
||||
#define R_LEN R4
|
||||
#define R_OFF R5
|
||||
#define R_SRC R6
|
||||
#define R_DST R7
|
||||
#define R_DBASE R8
|
||||
#define R_DLEN R9
|
||||
#define R_DEND R10
|
||||
#define R_SBASE R11
|
||||
#define R_SLEN R12
|
||||
#define R_SEND R13
|
||||
#define R_TMP2 R14
|
||||
#define R_TMP3 R15
|
||||
|
||||
// TEST_SRC will check if R_SRC is <= SRC_END
|
||||
#define TEST_SRC() \
|
||||
CMP R_SEND, R_SRC \
|
||||
BGT errCorrupt
|
||||
|
||||
// MOVD R_SRC, R_TMP1
|
||||
// SUB R_SBASE, R_TMP1, R_TMP1
|
||||
// CMP R_SLEN, R_TMP1
|
||||
// BGT errCorrupt
|
||||
|
||||
// The asm code generally follows the pure Go code in decode_other.go, except
|
||||
// where marked with a "!!!".
|
||||
|
||||
// func decode(dst, src []byte) int
|
||||
//
|
||||
// All local variables fit into registers. The non-zero stack size is only to
|
||||
// spill registers and push args when issuing a CALL. The register allocation:
|
||||
// - R_TMP0 scratch
|
||||
// - R_TMP1 scratch
|
||||
// - R_LEN length or x
|
||||
// - R_OFF offset
|
||||
// - R_SRC &src[s]
|
||||
// - R_DST &dst[d]
|
||||
// + R_DBASE dst_base
|
||||
// + R_DLEN dst_len
|
||||
// + R_DEND dst_base + dst_len
|
||||
// + R_SBASE src_base
|
||||
// + R_SLEN src_len
|
||||
// + R_SEND src_base + src_len
|
||||
// - R_TMP2 used by doCopy
|
||||
// - R_TMP3 used by doCopy
|
||||
//
|
||||
// The registers R_DBASE-R_SEND (marked with a "+") are set at the start of the
|
||||
// function, and after a CALL returns, and are not otherwise modified.
|
||||
//
|
||||
// The d variable is implicitly R_DST - R_DBASE, and len(dst)-d is R_DEND - R_DST.
|
||||
// The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC.
|
||||
TEXT ·s2Decode(SB), NOSPLIT, $56-64
|
||||
// Initialize R_SRC, R_DST and R_DBASE-R_SEND.
|
||||
MOVD dst_base+0(FP), R_DBASE
|
||||
MOVD dst_len+8(FP), R_DLEN
|
||||
MOVD R_DBASE, R_DST
|
||||
MOVD R_DBASE, R_DEND
|
||||
ADD R_DLEN, R_DEND, R_DEND
|
||||
MOVD src_base+24(FP), R_SBASE
|
||||
MOVD src_len+32(FP), R_SLEN
|
||||
MOVD R_SBASE, R_SRC
|
||||
MOVD R_SBASE, R_SEND
|
||||
ADD R_SLEN, R_SEND, R_SEND
|
||||
MOVD $0, R_OFF
|
||||
|
||||
loop:
|
||||
// for s < len(src)
|
||||
CMP R_SEND, R_SRC
|
||||
BEQ end
|
||||
|
||||
// R_LEN = uint32(src[s])
|
||||
//
|
||||
// switch src[s] & 0x03
|
||||
MOVBU (R_SRC), R_LEN
|
||||
MOVW R_LEN, R_TMP1
|
||||
ANDW $3, R_TMP1
|
||||
MOVW $1, R1
|
||||
CMPW R1, R_TMP1
|
||||
BGE tagCopy
|
||||
|
||||
// ----------------------------------------
|
||||
// The code below handles literal tags.
|
||||
|
||||
// case tagLiteral:
|
||||
// x := uint32(src[s] >> 2)
|
||||
// switch
|
||||
MOVW $60, R1
|
||||
LSRW $2, R_LEN, R_LEN
|
||||
CMPW R_LEN, R1
|
||||
BLS tagLit60Plus
|
||||
|
||||
// case x < 60:
|
||||
// s++
|
||||
ADD $1, R_SRC, R_SRC
|
||||
|
||||
doLit:
|
||||
// This is the end of the inner "switch", when we have a literal tag.
|
||||
//
|
||||
// We assume that R_LEN == x and x fits in a uint32, where x is the variable
|
||||
// used in the pure Go decode_other.go code.
|
||||
|
||||
// length = int(x) + 1
|
||||
//
|
||||
// Unlike the pure Go code, we don't need to check if length <= 0 because
|
||||
// R_LEN can hold 64 bits, so the increment cannot overflow.
|
||||
ADD $1, R_LEN, R_LEN
|
||||
|
||||
// Prepare to check if copying length bytes will run past the end of dst or
|
||||
// src.
|
||||
//
|
||||
// R_TMP0 = len(dst) - d
|
||||
// R_TMP1 = len(src) - s
|
||||
MOVD R_DEND, R_TMP0
|
||||
SUB R_DST, R_TMP0, R_TMP0
|
||||
MOVD R_SEND, R_TMP1
|
||||
SUB R_SRC, R_TMP1, R_TMP1
|
||||
|
||||
// !!! Try a faster technique for short (16 or fewer bytes) copies.
|
||||
//
|
||||
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
|
||||
// goto callMemmove // Fall back on calling runtime·memmove.
|
||||
// }
|
||||
//
|
||||
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
|
||||
// against 21 instead of 16, because it cannot assume that all of its input
|
||||
// is contiguous in memory and so it needs to leave enough source bytes to
|
||||
// read the next tag without refilling buffers, but Go's Decode assumes
|
||||
// contiguousness (the src argument is a []byte).
|
||||
CMP $16, R_LEN
|
||||
BGT callMemmove
|
||||
CMP $16, R_TMP0
|
||||
BLT callMemmove
|
||||
CMP $16, R_TMP1
|
||||
BLT callMemmove
|
||||
|
||||
// !!! Implement the copy from src to dst as a 16-byte load and store.
|
||||
// (Decode's documentation says that dst and src must not overlap.)
|
||||
//
|
||||
// This always copies 16 bytes, instead of only length bytes, but that's
|
||||
// OK. If the input is a valid Snappy encoding then subsequent iterations
|
||||
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
|
||||
// non-nil error), so the overrun will be ignored.
|
||||
//
|
||||
// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
|
||||
// 16-byte loads and stores. This technique probably wouldn't be as
|
||||
// effective on architectures that are fussier about alignment.
|
||||
LDP 0(R_SRC), (R_TMP2, R_TMP3)
|
||||
STP (R_TMP2, R_TMP3), 0(R_DST)
|
||||
|
||||
// d += length
|
||||
// s += length
|
||||
ADD R_LEN, R_DST, R_DST
|
||||
ADD R_LEN, R_SRC, R_SRC
|
||||
B loop
|
||||
|
||||
callMemmove:
|
||||
// if length > len(dst)-d || length > len(src)-s { etc }
|
||||
CMP R_TMP0, R_LEN
|
||||
BGT errCorrupt
|
||||
CMP R_TMP1, R_LEN
|
||||
BGT errCorrupt
|
||||
|
||||
// copy(dst[d:], src[s:s+length])
|
||||
//
|
||||
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
|
||||
// R_DST, R_SRC and R_LEN as arguments. Coincidentally, we also need to spill those
|
||||
// three registers to the stack, to save local variables across the CALL.
|
||||
MOVD R_DST, 8(RSP)
|
||||
MOVD R_SRC, 16(RSP)
|
||||
MOVD R_LEN, 24(RSP)
|
||||
MOVD R_DST, 32(RSP)
|
||||
MOVD R_SRC, 40(RSP)
|
||||
MOVD R_LEN, 48(RSP)
|
||||
MOVD R_OFF, 56(RSP)
|
||||
CALL runtime·memmove(SB)
|
||||
|
||||
// Restore local variables: unspill registers from the stack and
|
||||
// re-calculate R_DBASE-R_SEND.
|
||||
MOVD 32(RSP), R_DST
|
||||
MOVD 40(RSP), R_SRC
|
||||
MOVD 48(RSP), R_LEN
|
||||
MOVD 56(RSP), R_OFF
|
||||
MOVD dst_base+0(FP), R_DBASE
|
||||
MOVD dst_len+8(FP), R_DLEN
|
||||
MOVD R_DBASE, R_DEND
|
||||
ADD R_DLEN, R_DEND, R_DEND
|
||||
MOVD src_base+24(FP), R_SBASE
|
||||
MOVD src_len+32(FP), R_SLEN
|
||||
MOVD R_SBASE, R_SEND
|
||||
ADD R_SLEN, R_SEND, R_SEND
|
||||
|
||||
// d += length
|
||||
// s += length
|
||||
ADD R_LEN, R_DST, R_DST
|
||||
ADD R_LEN, R_SRC, R_SRC
|
||||
B loop
|
||||
|
||||
tagLit60Plus:
|
||||
// !!! This fragment does the
|
||||
//
|
||||
// s += x - 58; if uint(s) > uint(len(src)) { etc }
|
||||
//
|
||||
// checks. In the asm version, we code it once instead of once per switch case.
|
||||
ADD R_LEN, R_SRC, R_SRC
|
||||
SUB $58, R_SRC, R_SRC
|
||||
TEST_SRC()
|
||||
|
||||
// case x == 60:
|
||||
MOVW $61, R1
|
||||
CMPW R1, R_LEN
|
||||
BEQ tagLit61
|
||||
BGT tagLit62Plus
|
||||
|
||||
// x = uint32(src[s-1])
|
||||
MOVBU -1(R_SRC), R_LEN
|
||||
B doLit
|
||||
|
||||
tagLit61:
|
||||
// case x == 61:
|
||||
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
|
||||
MOVHU -2(R_SRC), R_LEN
|
||||
B doLit
|
||||
|
||||
tagLit62Plus:
|
||||
CMPW $62, R_LEN
|
||||
BHI tagLit63
|
||||
|
||||
// case x == 62:
|
||||
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
|
||||
MOVHU -3(R_SRC), R_LEN
|
||||
MOVBU -1(R_SRC), R_TMP1
|
||||
ORR R_TMP1<<16, R_LEN
|
||||
B doLit
|
||||
|
||||
tagLit63:
|
||||
// case x == 63:
|
||||
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
|
||||
MOVWU -4(R_SRC), R_LEN
|
||||
B doLit
|
||||
|
||||
// The code above handles literal tags.
|
||||
// ----------------------------------------
|
||||
// The code below handles copy tags.
|
||||
|
||||
tagCopy4:
|
||||
// case tagCopy4:
|
||||
// s += 5
|
||||
ADD $5, R_SRC, R_SRC
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
MOVD R_SRC, R_TMP1
|
||||
SUB R_SBASE, R_TMP1, R_TMP1
|
||||
CMP R_SLEN, R_TMP1
|
||||
BGT errCorrupt
|
||||
|
||||
// length = 1 + int(src[s-5])>>2
|
||||
MOVD $1, R1
|
||||
ADD R_LEN>>2, R1, R_LEN
|
||||
|
||||
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
|
||||
MOVWU -4(R_SRC), R_OFF
|
||||
B doCopy
|
||||
|
||||
tagCopy2:
|
||||
// case tagCopy2:
|
||||
// s += 3
|
||||
ADD $3, R_SRC, R_SRC
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
TEST_SRC()
|
||||
|
||||
// length = 1 + int(src[s-3])>>2
|
||||
MOVD $1, R1
|
||||
ADD R_LEN>>2, R1, R_LEN
|
||||
|
||||
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
|
||||
MOVHU -2(R_SRC), R_OFF
|
||||
B doCopy
|
||||
|
||||
tagCopy:
|
||||
// We have a copy tag. We assume that:
|
||||
// - R_TMP1 == src[s] & 0x03
|
||||
// - R_LEN == src[s]
|
||||
CMP $2, R_TMP1
|
||||
BEQ tagCopy2
|
||||
BGT tagCopy4
|
||||
|
||||
// case tagCopy1:
|
||||
// s += 2
|
||||
ADD $2, R_SRC, R_SRC
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
TEST_SRC()
|
||||
|
||||
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||
// Calculate offset in R_TMP0 in case it is a repeat.
|
||||
MOVD R_LEN, R_TMP0
|
||||
AND $0xe0, R_TMP0
|
||||
MOVBU -1(R_SRC), R_TMP1
|
||||
ORR R_TMP0<<3, R_TMP1, R_TMP0
|
||||
|
||||
// length = 4 + int(src[s-2])>>2&0x7
|
||||
MOVD $7, R1
|
||||
AND R_LEN>>2, R1, R_LEN
|
||||
ADD $4, R_LEN, R_LEN
|
||||
|
||||
// check if repeat code with offset 0.
|
||||
CMP $0, R_TMP0
|
||||
BEQ repeatCode
|
||||
|
||||
// This is a regular copy, transfer our temporary value to R_OFF (offset)
|
||||
MOVD R_TMP0, R_OFF
|
||||
B doCopy
|
||||
|
||||
// This is a repeat code.
|
||||
repeatCode:
|
||||
// If length < 9, reuse last offset, with the length already calculated.
|
||||
CMP $9, R_LEN
|
||||
BLT doCopyRepeat
|
||||
BEQ repeatLen1
|
||||
CMP $10, R_LEN
|
||||
BEQ repeatLen2
|
||||
|
||||
repeatLen3:
|
||||
// s +=3
|
||||
ADD $3, R_SRC, R_SRC
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
TEST_SRC()
|
||||
|
||||
// length = uint32(src[s-3]) | (uint32(src[s-2])<<8) | (uint32(src[s-1])<<16) + 65540
|
||||
MOVBU -1(R_SRC), R_TMP0
|
||||
MOVHU -3(R_SRC), R_LEN
|
||||
ORR R_TMP0<<16, R_LEN, R_LEN
|
||||
ADD $65540, R_LEN, R_LEN
|
||||
B doCopyRepeat
|
||||
|
||||
repeatLen2:
|
||||
// s +=2
|
||||
ADD $2, R_SRC, R_SRC
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
TEST_SRC()
|
||||
|
||||
// length = uint32(src[s-2]) | (uint32(src[s-1])<<8) + 260
|
||||
MOVHU -2(R_SRC), R_LEN
|
||||
ADD $260, R_LEN, R_LEN
|
||||
B doCopyRepeat
|
||||
|
||||
repeatLen1:
|
||||
// s +=1
|
||||
ADD $1, R_SRC, R_SRC
|
||||
|
||||
// if uint(s) > uint(len(src)) { etc }
|
||||
TEST_SRC()
|
||||
|
||||
// length = src[s-1] + 8
|
||||
MOVBU -1(R_SRC), R_LEN
|
||||
ADD $8, R_LEN, R_LEN
|
||||
B doCopyRepeat
|
||||
|
||||
doCopy:
|
||||
// This is the end of the outer "switch", when we have a copy tag.
|
||||
//
|
||||
// We assume that:
|
||||
// - R_LEN == length && R_LEN > 0
|
||||
// - R_OFF == offset
|
||||
|
||||
// if d < offset { etc }
|
||||
MOVD R_DST, R_TMP1
|
||||
SUB R_DBASE, R_TMP1, R_TMP1
|
||||
CMP R_OFF, R_TMP1
|
||||
BLT errCorrupt
|
||||
|
||||
// Repeat values can skip the test above, since any offset > 0 will be in dst.
|
||||
doCopyRepeat:
|
||||
|
||||
// if offset <= 0 { etc }
|
||||
CMP $0, R_OFF
|
||||
BLE errCorrupt
|
||||
|
||||
// if length > len(dst)-d { etc }
|
||||
MOVD R_DEND, R_TMP1
|
||||
SUB R_DST, R_TMP1, R_TMP1
|
||||
CMP R_TMP1, R_LEN
|
||||
BGT errCorrupt
|
||||
|
||||
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
|
||||
//
|
||||
// Set:
|
||||
// - R_TMP2 = len(dst)-d
|
||||
// - R_TMP3 = &dst[d-offset]
|
||||
MOVD R_DEND, R_TMP2
|
||||
SUB R_DST, R_TMP2, R_TMP2
|
||||
MOVD R_DST, R_TMP3
|
||||
SUB R_OFF, R_TMP3, R_TMP3
|
||||
|
||||
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
|
||||
//
|
||||
// First, try using two 8-byte load/stores, similar to the doLit technique
|
||||
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
|
||||
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
|
||||
// and not one 16-byte load/store, and the first store has to be before the
|
||||
// second load, due to the overlap if offset is in the range [8, 16).
|
||||
//
|
||||
// if length > 16 || offset < 8 || len(dst)-d < 16 {
|
||||
// goto slowForwardCopy
|
||||
// }
|
||||
// copy 16 bytes
|
||||
// d += length
|
||||
CMP $16, R_LEN
|
||||
BGT slowForwardCopy
|
||||
CMP $8, R_OFF
|
||||
BLT slowForwardCopy
|
||||
CMP $16, R_TMP2
|
||||
BLT slowForwardCopy
|
||||
MOVD 0(R_TMP3), R_TMP0
|
||||
MOVD R_TMP0, 0(R_DST)
|
||||
MOVD 8(R_TMP3), R_TMP1
|
||||
MOVD R_TMP1, 8(R_DST)
|
||||
ADD R_LEN, R_DST, R_DST
|
||||
B loop
|
||||
|
||||
slowForwardCopy:
|
||||
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
|
||||
// can still try 8-byte load stores, provided we can overrun up to 10 extra
|
||||
// bytes. As above, the overrun will be fixed up by subsequent iterations
|
||||
// of the outermost loop.
|
||||
//
|
||||
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
|
||||
// commentary says:
|
||||
//
|
||||
// ----
|
||||
//
|
||||
// The main part of this loop is a simple copy of eight bytes at a time
|
||||
// until we've copied (at least) the requested amount of bytes. However,
|
||||
// if d and d-offset are less than eight bytes apart (indicating a
|
||||
// repeating pattern of length < 8), we first need to expand the pattern in
|
||||
// order to get the correct results. For instance, if the buffer looks like
|
||||
// this, with the eight-byte <d-offset> and <d> patterns marked as
|
||||
// intervals:
|
||||
//
|
||||
// abxxxxxxxxxxxx
|
||||
// [------] d-offset
|
||||
// [------] d
|
||||
//
|
||||
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
|
||||
// once, after which we can move <d> two bytes without moving <d-offset>:
|
||||
//
|
||||
// ababxxxxxxxxxx
|
||||
// [------] d-offset
|
||||
// [------] d
|
||||
//
|
||||
// and repeat the exercise until the two no longer overlap.
|
||||
//
|
||||
// This allows us to do very well in the special case of one single byte
|
||||
// repeated many times, without taking a big hit for more general cases.
|
||||
//
|
||||
// The worst case of extra writing past the end of the match occurs when
|
||||
// offset == 1 and length == 1; the last copy will read from byte positions
|
||||
// [0..7] and write to [4..11], whereas it was only supposed to write to
|
||||
// position 1. Thus, ten excess bytes.
|
||||
//
|
||||
// ----
|
||||
//
|
||||
// That "10 byte overrun" worst case is confirmed by Go's
|
||||
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
|
||||
// and finishSlowForwardCopy algorithm.
|
||||
//
|
||||
// if length > len(dst)-d-10 {
|
||||
// goto verySlowForwardCopy
|
||||
// }
|
||||
SUB $10, R_TMP2, R_TMP2
|
||||
CMP R_TMP2, R_LEN
|
||||
BGT verySlowForwardCopy
|
||||
|
||||
// We want to keep the offset, so we use R_TMP2 from here.
|
||||
MOVD R_OFF, R_TMP2
|
||||
|
||||
makeOffsetAtLeast8:
|
||||
// !!! As above, expand the pattern so that offset >= 8 and we can use
|
||||
// 8-byte load/stores.
|
||||
//
|
||||
// for offset < 8 {
|
||||
// copy 8 bytes from dst[d-offset:] to dst[d:]
|
||||
// length -= offset
|
||||
// d += offset
|
||||
// offset += offset
|
||||
// // The two previous lines together means that d-offset, and therefore
|
||||
// // R_TMP3, is unchanged.
|
||||
// }
|
||||
CMP $8, R_TMP2
|
||||
BGE fixUpSlowForwardCopy
|
||||
MOVD (R_TMP3), R_TMP1
|
||||
MOVD R_TMP1, (R_DST)
|
||||
SUB R_TMP2, R_LEN, R_LEN
|
||||
ADD R_TMP2, R_DST, R_DST
|
||||
ADD R_TMP2, R_TMP2, R_TMP2
|
||||
B makeOffsetAtLeast8
|
||||
|
||||
fixUpSlowForwardCopy:
|
||||
// !!! Add length (which might be negative now) to d (implied by R_DST being
|
||||
// &dst[d]) so that d ends up at the right place when we jump back to the
|
||||
// top of the loop. Before we do that, though, we save R_DST to R_TMP0 so that, if
|
||||
// length is positive, copying the remaining length bytes will write to the
|
||||
// right place.
|
||||
MOVD R_DST, R_TMP0
|
||||
ADD R_LEN, R_DST, R_DST
|
||||
|
||||
finishSlowForwardCopy:
|
||||
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
|
||||
// length means that we overrun, but as above, that will be fixed up by
|
||||
// subsequent iterations of the outermost loop.
|
||||
MOVD $0, R1
|
||||
CMP R1, R_LEN
|
||||
BLE loop
|
||||
MOVD (R_TMP3), R_TMP1
|
||||
MOVD R_TMP1, (R_TMP0)
|
||||
ADD $8, R_TMP3, R_TMP3
|
||||
ADD $8, R_TMP0, R_TMP0
|
||||
SUB $8, R_LEN, R_LEN
|
||||
B finishSlowForwardCopy
|
||||
|
||||
verySlowForwardCopy:
|
||||
// verySlowForwardCopy is a simple implementation of forward copy. In C
|
||||
// parlance, this is a do/while loop instead of a while loop, since we know
|
||||
// that length > 0. In Go syntax:
|
||||
//
|
||||
// for {
|
||||
// dst[d] = dst[d - offset]
|
||||
// d++
|
||||
// length--
|
||||
// if length == 0 {
|
||||
// break
|
||||
// }
|
||||
// }
|
||||
MOVB (R_TMP3), R_TMP1
|
||||
MOVB R_TMP1, (R_DST)
|
||||
ADD $1, R_TMP3, R_TMP3
|
||||
ADD $1, R_DST, R_DST
|
||||
SUB $1, R_LEN, R_LEN
|
||||
CBNZ R_LEN, verySlowForwardCopy
|
||||
B loop
|
||||
|
||||
// The code above handles copy tags.
|
||||
// ----------------------------------------
|
||||
|
||||
end:
|
||||
// This is the end of the "for s < len(src)".
|
||||
//
|
||||
// if d != len(dst) { etc }
|
||||
CMP R_DEND, R_DST
|
||||
BNE errCorrupt
|
||||
|
||||
// return 0
|
||||
MOVD $0, ret+48(FP)
|
||||
RET
|
||||
|
||||
errCorrupt:
|
||||
// return decodeErrCodeCorrupt
|
||||
MOVD $1, R_TMP0
|
||||
MOVD R_TMP0, ret+48(FP)
|
||||
RET
|
17
vendor/github.com/klauspost/compress/s2/decode_asm.go
generated
vendored
17
vendor/github.com/klauspost/compress/s2/decode_asm.go
generated
vendored
@ -1,17 +0,0 @@
|
||||
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
|
||||
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (amd64 || arm64) && !appengine && gc && !noasm
|
||||
// +build amd64 arm64
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
package s2
|
||||
|
||||
// decode has the same semantics as in decode_other.go.
|
||||
//
|
||||
//go:noescape
|
||||
func s2Decode(dst, src []byte) int
|
292
vendor/github.com/klauspost/compress/s2/decode_other.go
generated
vendored
292
vendor/github.com/klauspost/compress/s2/decode_other.go
generated
vendored
@ -1,292 +0,0 @@
|
||||
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
|
||||
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (!amd64 && !arm64) || appengine || !gc || noasm
|
||||
// +build !amd64,!arm64 appengine !gc noasm
|
||||
|
||||
package s2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
// decode writes the decoding of src to dst. It assumes that the varint-encoded
|
||||
// length of the decompressed bytes has already been read, and that len(dst)
|
||||
// equals that length.
|
||||
//
|
||||
// It returns 0 on success or a decodeErrCodeXxx error code on failure.
|
||||
func s2Decode(dst, src []byte) int {
|
||||
const debug = false
|
||||
if debug {
|
||||
fmt.Println("Starting decode, dst len:", len(dst))
|
||||
}
|
||||
var d, s, length int
|
||||
offset := 0
|
||||
|
||||
// As long as we can read at least 5 bytes...
|
||||
for s < len(src)-5 {
|
||||
// Removing bounds checks is SLOWER, when if doing
|
||||
// in := src[s:s+5]
|
||||
// Checked on Go 1.18
|
||||
switch src[s] & 0x03 {
|
||||
case tagLiteral:
|
||||
x := uint32(src[s] >> 2)
|
||||
switch {
|
||||
case x < 60:
|
||||
s++
|
||||
case x == 60:
|
||||
s += 2
|
||||
x = uint32(src[s-1])
|
||||
case x == 61:
|
||||
in := src[s : s+3]
|
||||
x = uint32(in[1]) | uint32(in[2])<<8
|
||||
s += 3
|
||||
case x == 62:
|
||||
in := src[s : s+4]
|
||||
// Load as 32 bit and shift down.
|
||||
x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
|
||||
x >>= 8
|
||||
s += 4
|
||||
case x == 63:
|
||||
in := src[s : s+5]
|
||||
x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
|
||||
s += 5
|
||||
}
|
||||
length = int(x) + 1
|
||||
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
|
||||
if debug {
|
||||
fmt.Println("corrupt: lit size", length)
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
if debug {
|
||||
fmt.Println("literals, length:", length, "d-after:", d+length)
|
||||
}
|
||||
|
||||
copy(dst[d:], src[s:s+length])
|
||||
d += length
|
||||
s += length
|
||||
continue
|
||||
|
||||
case tagCopy1:
|
||||
s += 2
|
||||
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||
length = int(src[s-2]) >> 2 & 0x7
|
||||
if toffset == 0 {
|
||||
if debug {
|
||||
fmt.Print("(repeat) ")
|
||||
}
|
||||
// keep last offset
|
||||
switch length {
|
||||
case 5:
|
||||
length = int(src[s]) + 4
|
||||
s += 1
|
||||
case 6:
|
||||
in := src[s : s+2]
|
||||
length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
|
||||
s += 2
|
||||
case 7:
|
||||
in := src[s : s+3]
|
||||
length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
|
||||
s += 3
|
||||
default: // 0-> 4
|
||||
}
|
||||
} else {
|
||||
offset = toffset
|
||||
}
|
||||
length += 4
|
||||
case tagCopy2:
|
||||
in := src[s : s+3]
|
||||
offset = int(uint32(in[1]) | uint32(in[2])<<8)
|
||||
length = 1 + int(in[0])>>2
|
||||
s += 3
|
||||
|
||||
case tagCopy4:
|
||||
in := src[s : s+5]
|
||||
offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
|
||||
length = 1 + int(in[0])>>2
|
||||
s += 5
|
||||
}
|
||||
|
||||
if offset <= 0 || d < offset || length > len(dst)-d {
|
||||
if debug {
|
||||
fmt.Println("corrupt: match, length", length, "offset:", offset, "dst avail:", len(dst)-d, "dst pos:", d)
|
||||
}
|
||||
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
|
||||
if debug {
|
||||
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
|
||||
}
|
||||
|
||||
// Copy from an earlier sub-slice of dst to a later sub-slice.
|
||||
// If no overlap, use the built-in copy:
|
||||
if offset > length {
|
||||
copy(dst[d:d+length], dst[d-offset:])
|
||||
d += length
|
||||
continue
|
||||
}
|
||||
|
||||
// Unlike the built-in copy function, this byte-by-byte copy always runs
|
||||
// forwards, even if the slices overlap. Conceptually, this is:
|
||||
//
|
||||
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
|
||||
//
|
||||
// We align the slices into a and b and show the compiler they are the same size.
|
||||
// This allows the loop to run without bounds checks.
|
||||
a := dst[d : d+length]
|
||||
b := dst[d-offset:]
|
||||
b = b[:len(a)]
|
||||
for i := range a {
|
||||
a[i] = b[i]
|
||||
}
|
||||
d += length
|
||||
}
|
||||
|
||||
// Remaining with extra checks...
|
||||
for s < len(src) {
|
||||
switch src[s] & 0x03 {
|
||||
case tagLiteral:
|
||||
x := uint32(src[s] >> 2)
|
||||
switch {
|
||||
case x < 60:
|
||||
s++
|
||||
case x == 60:
|
||||
s += 2
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
x = uint32(src[s-1])
|
||||
case x == 61:
|
||||
s += 3
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
x = uint32(src[s-2]) | uint32(src[s-1])<<8
|
||||
case x == 62:
|
||||
s += 4
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
|
||||
case x == 63:
|
||||
s += 5
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
|
||||
}
|
||||
length = int(x) + 1
|
||||
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
|
||||
if debug {
|
||||
fmt.Println("corrupt: lit size", length)
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
if debug {
|
||||
fmt.Println("literals, length:", length, "d-after:", d+length)
|
||||
}
|
||||
|
||||
copy(dst[d:], src[s:s+length])
|
||||
d += length
|
||||
s += length
|
||||
continue
|
||||
|
||||
case tagCopy1:
|
||||
s += 2
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = int(src[s-2]) >> 2 & 0x7
|
||||
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
|
||||
if toffset == 0 {
|
||||
if debug {
|
||||
fmt.Print("(repeat) ")
|
||||
}
|
||||
// keep last offset
|
||||
switch length {
|
||||
case 5:
|
||||
s += 1
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = int(uint32(src[s-1])) + 4
|
||||
case 6:
|
||||
s += 2
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
|
||||
case 7:
|
||||
s += 3
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
|
||||
default: // 0-> 4
|
||||
}
|
||||
} else {
|
||||
offset = toffset
|
||||
}
|
||||
length += 4
|
||||
case tagCopy2:
|
||||
s += 3
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = 1 + int(src[s-3])>>2
|
||||
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
|
||||
|
||||
case tagCopy4:
|
||||
s += 5
|
||||
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
length = 1 + int(src[s-5])>>2
|
||||
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
|
||||
}
|
||||
|
||||
if offset <= 0 || d < offset || length > len(dst)-d {
|
||||
if debug {
|
||||
fmt.Println("corrupt: match, length", length, "offset:", offset, "dst avail:", len(dst)-d, "dst pos:", d)
|
||||
}
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
|
||||
if debug {
|
||||
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
|
||||
}
|
||||
|
||||
// Copy from an earlier sub-slice of dst to a later sub-slice.
|
||||
// If no overlap, use the built-in copy:
|
||||
if offset > length {
|
||||
copy(dst[d:d+length], dst[d-offset:])
|
||||
d += length
|
||||
continue
|
||||
}
|
||||
|
||||
// Unlike the built-in copy function, this byte-by-byte copy always runs
|
||||
// forwards, even if the slices overlap. Conceptually, this is:
|
||||
//
|
||||
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
|
||||
//
|
||||
// We align the slices into a and b and show the compiler they are the same size.
|
||||
// This allows the loop to run without bounds checks.
|
||||
a := dst[d : d+length]
|
||||
b := dst[d-offset:]
|
||||
b = b[:len(a)]
|
||||
for i := range a {
|
||||
a[i] = b[i]
|
||||
}
|
||||
d += length
|
||||
}
|
||||
|
||||
if d != len(dst) {
|
||||
return decodeErrCodeCorrupt
|
||||
}
|
||||
return 0
|
||||
}
|
350
vendor/github.com/klauspost/compress/s2/dict.go
generated
vendored
350
vendor/github.com/klauspost/compress/s2/dict.go
generated
vendored
@ -1,350 +0,0 @@
|
||||
// Copyright (c) 2022+ Klaus Post. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package s2
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"sync"
|
||||
)
|
||||
|
||||
const (
|
||||
// MinDictSize is the minimum dictionary size when repeat has been read.
|
||||
MinDictSize = 16
|
||||
|
||||
// MaxDictSize is the maximum dictionary size when repeat has been read.
|
||||
MaxDictSize = 65536
|
||||
|
||||
// MaxDictSrcOffset is the maximum offset where a dictionary entry can start.
|
||||
MaxDictSrcOffset = 65535
|
||||
)
|
||||
|
||||
// Dict contains a dictionary that can be used for encoding and decoding s2
|
||||
type Dict struct {
|
||||
dict []byte
|
||||
repeat int // Repeat as index of dict
|
||||
|
||||
fast, better, best sync.Once
|
||||
fastTable *[1 << 14]uint16
|
||||
|
||||
betterTableShort *[1 << 14]uint16
|
||||
betterTableLong *[1 << 17]uint16
|
||||
|
||||
bestTableShort *[1 << 16]uint32
|
||||
bestTableLong *[1 << 19]uint32
|
||||
}
|
||||
|
||||
// NewDict will read a dictionary.
|
||||
// It will return nil if the dictionary is invalid.
|
||||
func NewDict(dict []byte) *Dict {
|
||||
if len(dict) == 0 {
|
||||
return nil
|
||||
}
|
||||
var d Dict
|
||||
// Repeat is the first value of the dict
|
||||
r, n := binary.Uvarint(dict)
|
||||
if n <= 0 {
|
||||
return nil
|
||||
}
|
||||
dict = dict[n:]
|
||||
d.dict = dict
|
||||
if cap(d.dict) < len(d.dict)+16 {
|
||||
d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
|
||||
}
|
||||
if len(dict) < MinDictSize || len(dict) > MaxDictSize {
|
||||
return nil
|
||||
}
|
||||
d.repeat = int(r)
|
||||
if d.repeat > len(dict) {
|
||||
return nil
|
||||
}
|
||||
return &d
|
||||
}
|
||||
|
||||
// Bytes will return a serialized version of the dictionary.
|
||||
// The output can be sent to NewDict.
|
||||
func (d *Dict) Bytes() []byte {
|
||||
dst := make([]byte, binary.MaxVarintLen16+len(d.dict))
|
||||
return append(dst[:binary.PutUvarint(dst, uint64(d.repeat))], d.dict...)
|
||||
}
|
||||
|
||||
// MakeDict will create a dictionary.
|
||||
// 'data' must be at least MinDictSize.
|
||||
// If data is longer than MaxDictSize only the last MaxDictSize bytes will be used.
|
||||
// If searchStart is set the start repeat value will be set to the last
|
||||
// match of this content.
|
||||
// If no matches are found, it will attempt to find shorter matches.
|
||||
// This content should match the typical start of a block.
|
||||
// If at least 4 bytes cannot be matched, repeat is set to start of block.
|
||||
func MakeDict(data []byte, searchStart []byte) *Dict {
|
||||
if len(data) == 0 {
|
||||
return nil
|
||||
}
|
||||
if len(data) > MaxDictSize {
|
||||
data = data[len(data)-MaxDictSize:]
|
||||
}
|
||||
var d Dict
|
||||
dict := data
|
||||
d.dict = dict
|
||||
if cap(d.dict) < len(d.dict)+16 {
|
||||
d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
|
||||
}
|
||||
if len(dict) < MinDictSize {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Find the longest match possible, last entry if multiple.
|
||||
for s := len(searchStart); s > 4; s-- {
|
||||
if idx := bytes.LastIndex(data, searchStart[:s]); idx >= 0 && idx <= len(data)-8 {
|
||||
d.repeat = idx
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return &d
|
||||
}
|
||||
|
||||
// MakeDictManual will create a dictionary.
|
||||
// 'data' must be at least MinDictSize and less than or equal to MaxDictSize.
|
||||
// A manual first repeat index into data must be provided.
|
||||
// It must be less than len(data)-8.
|
||||
func MakeDictManual(data []byte, firstIdx uint16) *Dict {
|
||||
if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize {
|
||||
return nil
|
||||
}
|
||||
var d Dict
|
||||
dict := data
|
||||
d.dict = dict
|
||||
if cap(d.dict) < len(d.dict)+16 {
|
||||
d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
|
||||
}
|
||||
|
||||
d.repeat = int(firstIdx)
|
||||
return &d
|
||||
}
|
||||
|
||||
// Encode returns the encoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire encoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
//
|
||||
// The blocks will require the same amount of memory to decode as encoding,
|
||||
// and does not make for concurrent decoding.
|
||||
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
||||
//
|
||||
// If you need to encode larger amounts of data, consider using
|
||||
// the streaming interface which gives all of these features.
|
||||
func (d *Dict) Encode(dst, src []byte) []byte {
|
||||
if n := MaxEncodedLen(len(src)); n < 0 {
|
||||
panic(ErrTooLarge)
|
||||
} else if cap(dst) < n {
|
||||
dst = make([]byte, n)
|
||||
} else {
|
||||
dst = dst[:n]
|
||||
}
|
||||
|
||||
// The block starts with the varint-encoded length of the decompressed bytes.
|
||||
dstP := binary.PutUvarint(dst, uint64(len(src)))
|
||||
|
||||
if len(src) == 0 {
|
||||
return dst[:dstP]
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
dstP += emitLiteral(dst[dstP:], src)
|
||||
return dst[:dstP]
|
||||
}
|
||||
n := encodeBlockDictGo(dst[dstP:], src, d)
|
||||
if n > 0 {
|
||||
dstP += n
|
||||
return dst[:dstP]
|
||||
}
|
||||
// Not compressible
|
||||
dstP += emitLiteral(dst[dstP:], src)
|
||||
return dst[:dstP]
|
||||
}
|
||||
|
||||
// EncodeBetter returns the encoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire encoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// EncodeBetter compresses better than Encode but typically with a
|
||||
// 10-40% speed decrease on both compression and decompression.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
//
|
||||
// The blocks will require the same amount of memory to decode as encoding,
|
||||
// and does not make for concurrent decoding.
|
||||
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
||||
//
|
||||
// If you need to encode larger amounts of data, consider using
|
||||
// the streaming interface which gives all of these features.
|
||||
func (d *Dict) EncodeBetter(dst, src []byte) []byte {
|
||||
if n := MaxEncodedLen(len(src)); n < 0 {
|
||||
panic(ErrTooLarge)
|
||||
} else if len(dst) < n {
|
||||
dst = make([]byte, n)
|
||||
}
|
||||
|
||||
// The block starts with the varint-encoded length of the decompressed bytes.
|
||||
dstP := binary.PutUvarint(dst, uint64(len(src)))
|
||||
|
||||
if len(src) == 0 {
|
||||
return dst[:dstP]
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
dstP += emitLiteral(dst[dstP:], src)
|
||||
return dst[:dstP]
|
||||
}
|
||||
n := encodeBlockBetterDict(dst[dstP:], src, d)
|
||||
if n > 0 {
|
||||
dstP += n
|
||||
return dst[:dstP]
|
||||
}
|
||||
// Not compressible
|
||||
dstP += emitLiteral(dst[dstP:], src)
|
||||
return dst[:dstP]
|
||||
}
|
||||
|
||||
// EncodeBest returns the encoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire encoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// EncodeBest compresses as good as reasonably possible but with a
|
||||
// big speed decrease.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
//
|
||||
// The blocks will require the same amount of memory to decode as encoding,
|
||||
// and does not make for concurrent decoding.
|
||||
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
||||
//
|
||||
// If you need to encode larger amounts of data, consider using
|
||||
// the streaming interface which gives all of these features.
|
||||
func (d *Dict) EncodeBest(dst, src []byte) []byte {
|
||||
if n := MaxEncodedLen(len(src)); n < 0 {
|
||||
panic(ErrTooLarge)
|
||||
} else if len(dst) < n {
|
||||
dst = make([]byte, n)
|
||||
}
|
||||
|
||||
// The block starts with the varint-encoded length of the decompressed bytes.
|
||||
dstP := binary.PutUvarint(dst, uint64(len(src)))
|
||||
|
||||
if len(src) == 0 {
|
||||
return dst[:dstP]
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
dstP += emitLiteral(dst[dstP:], src)
|
||||
return dst[:dstP]
|
||||
}
|
||||
n := encodeBlockBest(dst[dstP:], src, d)
|
||||
if n > 0 {
|
||||
dstP += n
|
||||
return dst[:dstP]
|
||||
}
|
||||
// Not compressible
|
||||
dstP += emitLiteral(dst[dstP:], src)
|
||||
return dst[:dstP]
|
||||
}
|
||||
|
||||
// Decode returns the decoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire decoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
func (d *Dict) Decode(dst, src []byte) ([]byte, error) {
|
||||
dLen, s, err := decodedLen(src)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if dLen <= cap(dst) {
|
||||
dst = dst[:dLen]
|
||||
} else {
|
||||
dst = make([]byte, dLen)
|
||||
}
|
||||
if s2DecodeDict(dst, src[s:], d) != 0 {
|
||||
return nil, ErrCorrupt
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
func (d *Dict) initFast() {
|
||||
d.fast.Do(func() {
|
||||
const (
|
||||
tableBits = 14
|
||||
maxTableSize = 1 << tableBits
|
||||
)
|
||||
|
||||
var table [maxTableSize]uint16
|
||||
// We stop so any entry of length 8 can always be read.
|
||||
for i := 0; i < len(d.dict)-8-2; i += 3 {
|
||||
x0 := load64(d.dict, i)
|
||||
h0 := hash6(x0, tableBits)
|
||||
h1 := hash6(x0>>8, tableBits)
|
||||
h2 := hash6(x0>>16, tableBits)
|
||||
table[h0] = uint16(i)
|
||||
table[h1] = uint16(i + 1)
|
||||
table[h2] = uint16(i + 2)
|
||||
}
|
||||
d.fastTable = &table
|
||||
})
|
||||
}
|
||||
|
||||
func (d *Dict) initBetter() {
|
||||
d.better.Do(func() {
|
||||
const (
|
||||
// Long hash matches.
|
||||
lTableBits = 17
|
||||
maxLTableSize = 1 << lTableBits
|
||||
|
||||
// Short hash matches.
|
||||
sTableBits = 14
|
||||
maxSTableSize = 1 << sTableBits
|
||||
)
|
||||
|
||||
var lTable [maxLTableSize]uint16
|
||||
var sTable [maxSTableSize]uint16
|
||||
|
||||
// We stop so any entry of length 8 can always be read.
|
||||
for i := 0; i < len(d.dict)-8; i++ {
|
||||
cv := load64(d.dict, i)
|
||||
lTable[hash7(cv, lTableBits)] = uint16(i)
|
||||
sTable[hash4(cv, sTableBits)] = uint16(i)
|
||||
}
|
||||
d.betterTableShort = &sTable
|
||||
d.betterTableLong = &lTable
|
||||
})
|
||||
}
|
||||
|
||||
func (d *Dict) initBest() {
|
||||
d.best.Do(func() {
|
||||
const (
|
||||
// Long hash matches.
|
||||
lTableBits = 19
|
||||
maxLTableSize = 1 << lTableBits
|
||||
|
||||
// Short hash matches.
|
||||
sTableBits = 16
|
||||
maxSTableSize = 1 << sTableBits
|
||||
)
|
||||
|
||||
var lTable [maxLTableSize]uint32
|
||||
var sTable [maxSTableSize]uint32
|
||||
|
||||
// We stop so any entry of length 8 can always be read.
|
||||
for i := 0; i < len(d.dict)-8; i++ {
|
||||
cv := load64(d.dict, i)
|
||||
hashL := hash8(cv, lTableBits)
|
||||
hashS := hash4(cv, sTableBits)
|
||||
candidateL := lTable[hashL]
|
||||
candidateS := sTable[hashS]
|
||||
lTable[hashL] = uint32(i) | candidateL<<16
|
||||
sTable[hashS] = uint32(i) | candidateS<<16
|
||||
}
|
||||
d.bestTableShort = &sTable
|
||||
d.bestTableLong = &lTable
|
||||
})
|
||||
}
|
393
vendor/github.com/klauspost/compress/s2/encode.go
generated
vendored
393
vendor/github.com/klauspost/compress/s2/encode.go
generated
vendored
@ -1,393 +0,0 @@
|
||||
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
|
||||
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package s2
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
// Encode returns the encoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire encoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
//
|
||||
// The blocks will require the same amount of memory to decode as encoding,
|
||||
// and does not make for concurrent decoding.
|
||||
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
||||
//
|
||||
// If you need to encode larger amounts of data, consider using
|
||||
// the streaming interface which gives all of these features.
|
||||
func Encode(dst, src []byte) []byte {
|
||||
if n := MaxEncodedLen(len(src)); n < 0 {
|
||||
panic(ErrTooLarge)
|
||||
} else if cap(dst) < n {
|
||||
dst = make([]byte, n)
|
||||
} else {
|
||||
dst = dst[:n]
|
||||
}
|
||||
|
||||
// The block starts with the varint-encoded length of the decompressed bytes.
|
||||
d := binary.PutUvarint(dst, uint64(len(src)))
|
||||
|
||||
if len(src) == 0 {
|
||||
return dst[:d]
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
d += emitLiteral(dst[d:], src)
|
||||
return dst[:d]
|
||||
}
|
||||
n := encodeBlock(dst[d:], src)
|
||||
if n > 0 {
|
||||
d += n
|
||||
return dst[:d]
|
||||
}
|
||||
// Not compressible
|
||||
d += emitLiteral(dst[d:], src)
|
||||
return dst[:d]
|
||||
}
|
||||
|
||||
// EstimateBlockSize will perform a very fast compression
|
||||
// without outputting the result and return the compressed output size.
|
||||
// The function returns -1 if no improvement could be achieved.
|
||||
// Using actual compression will most often produce better compression than the estimate.
|
||||
func EstimateBlockSize(src []byte) (d int) {
|
||||
if len(src) < 6 || int64(len(src)) > 0xffffffff {
|
||||
return -1
|
||||
}
|
||||
if len(src) <= 1024 {
|
||||
d = calcBlockSizeSmall(src)
|
||||
} else {
|
||||
d = calcBlockSize(src)
|
||||
}
|
||||
|
||||
if d == 0 {
|
||||
return -1
|
||||
}
|
||||
// Size of the varint encoded block size.
|
||||
d += (bits.Len64(uint64(len(src))) + 7) / 7
|
||||
|
||||
if d >= len(src) {
|
||||
return -1
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// EncodeBetter returns the encoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire encoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// EncodeBetter compresses better than Encode but typically with a
|
||||
// 10-40% speed decrease on both compression and decompression.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
//
|
||||
// The blocks will require the same amount of memory to decode as encoding,
|
||||
// and does not make for concurrent decoding.
|
||||
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
||||
//
|
||||
// If you need to encode larger amounts of data, consider using
|
||||
// the streaming interface which gives all of these features.
|
||||
func EncodeBetter(dst, src []byte) []byte {
|
||||
if n := MaxEncodedLen(len(src)); n < 0 {
|
||||
panic(ErrTooLarge)
|
||||
} else if len(dst) < n {
|
||||
dst = make([]byte, n)
|
||||
}
|
||||
|
||||
// The block starts with the varint-encoded length of the decompressed bytes.
|
||||
d := binary.PutUvarint(dst, uint64(len(src)))
|
||||
|
||||
if len(src) == 0 {
|
||||
return dst[:d]
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
d += emitLiteral(dst[d:], src)
|
||||
return dst[:d]
|
||||
}
|
||||
n := encodeBlockBetter(dst[d:], src)
|
||||
if n > 0 {
|
||||
d += n
|
||||
return dst[:d]
|
||||
}
|
||||
// Not compressible
|
||||
d += emitLiteral(dst[d:], src)
|
||||
return dst[:d]
|
||||
}
|
||||
|
||||
// EncodeBest returns the encoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire encoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// EncodeBest compresses as good as reasonably possible but with a
|
||||
// big speed decrease.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
//
|
||||
// The blocks will require the same amount of memory to decode as encoding,
|
||||
// and does not make for concurrent decoding.
|
||||
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
||||
//
|
||||
// If you need to encode larger amounts of data, consider using
|
||||
// the streaming interface which gives all of these features.
|
||||
func EncodeBest(dst, src []byte) []byte {
|
||||
if n := MaxEncodedLen(len(src)); n < 0 {
|
||||
panic(ErrTooLarge)
|
||||
} else if len(dst) < n {
|
||||
dst = make([]byte, n)
|
||||
}
|
||||
|
||||
// The block starts with the varint-encoded length of the decompressed bytes.
|
||||
d := binary.PutUvarint(dst, uint64(len(src)))
|
||||
|
||||
if len(src) == 0 {
|
||||
return dst[:d]
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
d += emitLiteral(dst[d:], src)
|
||||
return dst[:d]
|
||||
}
|
||||
n := encodeBlockBest(dst[d:], src, nil)
|
||||
if n > 0 {
|
||||
d += n
|
||||
return dst[:d]
|
||||
}
|
||||
// Not compressible
|
||||
d += emitLiteral(dst[d:], src)
|
||||
return dst[:d]
|
||||
}
|
||||
|
||||
// EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire encoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// The output is Snappy compatible and will likely decompress faster.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
//
|
||||
// The blocks will require the same amount of memory to decode as encoding,
|
||||
// and does not make for concurrent decoding.
|
||||
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
||||
//
|
||||
// If you need to encode larger amounts of data, consider using
|
||||
// the streaming interface which gives all of these features.
|
||||
func EncodeSnappy(dst, src []byte) []byte {
|
||||
if n := MaxEncodedLen(len(src)); n < 0 {
|
||||
panic(ErrTooLarge)
|
||||
} else if cap(dst) < n {
|
||||
dst = make([]byte, n)
|
||||
} else {
|
||||
dst = dst[:n]
|
||||
}
|
||||
|
||||
// The block starts with the varint-encoded length of the decompressed bytes.
|
||||
d := binary.PutUvarint(dst, uint64(len(src)))
|
||||
|
||||
if len(src) == 0 {
|
||||
return dst[:d]
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
d += emitLiteral(dst[d:], src)
|
||||
return dst[:d]
|
||||
}
|
||||
|
||||
n := encodeBlockSnappy(dst[d:], src)
|
||||
if n > 0 {
|
||||
d += n
|
||||
return dst[:d]
|
||||
}
|
||||
// Not compressible
|
||||
d += emitLiteral(dst[d:], src)
|
||||
return dst[:d]
|
||||
}
|
||||
|
||||
// EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire encoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// The output is Snappy compatible and will likely decompress faster.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
//
|
||||
// The blocks will require the same amount of memory to decode as encoding,
|
||||
// and does not make for concurrent decoding.
|
||||
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
||||
//
|
||||
// If you need to encode larger amounts of data, consider using
|
||||
// the streaming interface which gives all of these features.
|
||||
func EncodeSnappyBetter(dst, src []byte) []byte {
|
||||
if n := MaxEncodedLen(len(src)); n < 0 {
|
||||
panic(ErrTooLarge)
|
||||
} else if cap(dst) < n {
|
||||
dst = make([]byte, n)
|
||||
} else {
|
||||
dst = dst[:n]
|
||||
}
|
||||
|
||||
// The block starts with the varint-encoded length of the decompressed bytes.
|
||||
d := binary.PutUvarint(dst, uint64(len(src)))
|
||||
|
||||
if len(src) == 0 {
|
||||
return dst[:d]
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
d += emitLiteral(dst[d:], src)
|
||||
return dst[:d]
|
||||
}
|
||||
|
||||
n := encodeBlockBetterSnappy(dst[d:], src)
|
||||
if n > 0 {
|
||||
d += n
|
||||
return dst[:d]
|
||||
}
|
||||
// Not compressible
|
||||
d += emitLiteral(dst[d:], src)
|
||||
return dst[:d]
|
||||
}
|
||||
|
||||
// EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub-
|
||||
// slice of dst if dst was large enough to hold the entire encoded block.
|
||||
// Otherwise, a newly allocated slice will be returned.
|
||||
//
|
||||
// The output is Snappy compatible and will likely decompress faster.
|
||||
//
|
||||
// The dst and src must not overlap. It is valid to pass a nil dst.
|
||||
//
|
||||
// The blocks will require the same amount of memory to decode as encoding,
|
||||
// and does not make for concurrent decoding.
|
||||
// Also note that blocks do not contain CRC information, so corruption may be undetected.
|
||||
//
|
||||
// If you need to encode larger amounts of data, consider using
|
||||
// the streaming interface which gives all of these features.
|
||||
func EncodeSnappyBest(dst, src []byte) []byte {
|
||||
if n := MaxEncodedLen(len(src)); n < 0 {
|
||||
panic(ErrTooLarge)
|
||||
} else if cap(dst) < n {
|
||||
dst = make([]byte, n)
|
||||
} else {
|
||||
dst = dst[:n]
|
||||
}
|
||||
|
||||
// The block starts with the varint-encoded length of the decompressed bytes.
|
||||
d := binary.PutUvarint(dst, uint64(len(src)))
|
||||
|
||||
if len(src) == 0 {
|
||||
return dst[:d]
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
d += emitLiteral(dst[d:], src)
|
||||
return dst[:d]
|
||||
}
|
||||
|
||||
n := encodeBlockBestSnappy(dst[d:], src)
|
||||
if n > 0 {
|
||||
d += n
|
||||
return dst[:d]
|
||||
}
|
||||
// Not compressible
|
||||
d += emitLiteral(dst[d:], src)
|
||||
return dst[:d]
|
||||
}
|
||||
|
||||
// ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination.
|
||||
// If the destination is nil or too small, a new will be allocated.
|
||||
// The blocks are not validated, so garbage in = garbage out.
|
||||
// dst may not overlap block data.
|
||||
// Any data in dst is preserved as is, so it will not be considered a block.
|
||||
func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) {
|
||||
totalSize := uint64(0)
|
||||
compSize := 0
|
||||
for _, b := range blocks {
|
||||
l, hdr, err := decodedLen(b)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
totalSize += uint64(l)
|
||||
compSize += len(b) - hdr
|
||||
}
|
||||
if totalSize == 0 {
|
||||
dst = append(dst, 0)
|
||||
return dst, nil
|
||||
}
|
||||
if totalSize > math.MaxUint32 {
|
||||
return nil, ErrTooLarge
|
||||
}
|
||||
var tmp [binary.MaxVarintLen32]byte
|
||||
hdrSize := binary.PutUvarint(tmp[:], totalSize)
|
||||
wantSize := hdrSize + compSize
|
||||
|
||||
if cap(dst)-len(dst) < wantSize {
|
||||
dst = append(make([]byte, 0, wantSize+len(dst)), dst...)
|
||||
}
|
||||
dst = append(dst, tmp[:hdrSize]...)
|
||||
for _, b := range blocks {
|
||||
_, hdr, err := decodedLen(b)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
dst = append(dst, b[hdr:]...)
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
// inputMargin is the minimum number of extra input bytes to keep, inside
|
||||
// encodeBlock's inner loop. On some architectures, this margin lets us
|
||||
// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
|
||||
// literals can be implemented as a single load to and store from a 16-byte
|
||||
// register. That literal's actual length can be as short as 1 byte, so this
|
||||
// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
|
||||
// the encoding loop will fix up the copy overrun, and this inputMargin ensures
|
||||
// that we don't overrun the dst and src buffers.
|
||||
const inputMargin = 8
|
||||
|
||||
// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
|
||||
// will be accepted by the encoder.
|
||||
const minNonLiteralBlockSize = 32
|
||||
|
||||
const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits)
|
||||
|
||||
// MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size.
|
||||
// Blocks this big are highly discouraged, though.
|
||||
// Half the size on 32 bit systems.
|
||||
const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5
|
||||
|
||||
// MaxEncodedLen returns the maximum length of a snappy block, given its
|
||||
// uncompressed length.
|
||||
//
|
||||
// It will return a negative value if srcLen is too large to encode.
|
||||
// 32 bit platforms will have lower thresholds for rejecting big content.
|
||||
func MaxEncodedLen(srcLen int) int {
|
||||
n := uint64(srcLen)
|
||||
if intReduction == 1 {
|
||||
// 32 bits
|
||||
if n > math.MaxInt32 {
|
||||
// Also includes negative.
|
||||
return -1
|
||||
}
|
||||
} else if n > 0xffffffff {
|
||||
// 64 bits
|
||||
// Also includes negative.
|
||||
return -1
|
||||
}
|
||||
// Size of the varint encoded block size.
|
||||
n = n + uint64((bits.Len64(n)+7)/7)
|
||||
|
||||
// Add maximum size of encoding block as literals.
|
||||
n += uint64(literalExtraSize(int64(srcLen)))
|
||||
if intReduction == 1 {
|
||||
// 32 bits
|
||||
if n > math.MaxInt32 {
|
||||
return -1
|
||||
}
|
||||
} else if n > 0xffffffff {
|
||||
// 64 bits
|
||||
// Also includes negative.
|
||||
return -1
|
||||
}
|
||||
return int(n)
|
||||
}
|
1048
vendor/github.com/klauspost/compress/s2/encode_all.go
generated
vendored
1048
vendor/github.com/klauspost/compress/s2/encode_all.go
generated
vendored
File diff suppressed because it is too large
Load Diff
148
vendor/github.com/klauspost/compress/s2/encode_amd64.go
generated
vendored
148
vendor/github.com/klauspost/compress/s2/encode_amd64.go
generated
vendored
@ -1,148 +0,0 @@
|
||||
//go:build !appengine && !noasm && gc
|
||||
// +build !appengine,!noasm,gc
|
||||
|
||||
package s2
|
||||
|
||||
const hasAmd64Asm = true
|
||||
|
||||
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||
// been written.
|
||||
//
|
||||
// It also assumes that:
|
||||
//
|
||||
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||
func encodeBlock(dst, src []byte) (d int) {
|
||||
const (
|
||||
// Use 12 bit table when less than...
|
||||
limit12B = 16 << 10
|
||||
// Use 10 bit table when less than...
|
||||
limit10B = 4 << 10
|
||||
// Use 8 bit table when less than...
|
||||
limit8B = 512
|
||||
)
|
||||
|
||||
if len(src) >= 4<<20 {
|
||||
return encodeBlockAsm(dst, src)
|
||||
}
|
||||
if len(src) >= limit12B {
|
||||
return encodeBlockAsm4MB(dst, src)
|
||||
}
|
||||
if len(src) >= limit10B {
|
||||
return encodeBlockAsm12B(dst, src)
|
||||
}
|
||||
if len(src) >= limit8B {
|
||||
return encodeBlockAsm10B(dst, src)
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
return 0
|
||||
}
|
||||
return encodeBlockAsm8B(dst, src)
|
||||
}
|
||||
|
||||
// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||
// been written.
|
||||
//
|
||||
// It also assumes that:
|
||||
//
|
||||
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||
func encodeBlockBetter(dst, src []byte) (d int) {
|
||||
const (
|
||||
// Use 12 bit table when less than...
|
||||
limit12B = 16 << 10
|
||||
// Use 10 bit table when less than...
|
||||
limit10B = 4 << 10
|
||||
// Use 8 bit table when less than...
|
||||
limit8B = 512
|
||||
)
|
||||
|
||||
if len(src) > 4<<20 {
|
||||
return encodeBetterBlockAsm(dst, src)
|
||||
}
|
||||
if len(src) >= limit12B {
|
||||
return encodeBetterBlockAsm4MB(dst, src)
|
||||
}
|
||||
if len(src) >= limit10B {
|
||||
return encodeBetterBlockAsm12B(dst, src)
|
||||
}
|
||||
if len(src) >= limit8B {
|
||||
return encodeBetterBlockAsm10B(dst, src)
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
return 0
|
||||
}
|
||||
return encodeBetterBlockAsm8B(dst, src)
|
||||
}
|
||||
|
||||
// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||
// been written.
|
||||
//
|
||||
// It also assumes that:
|
||||
//
|
||||
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||
func encodeBlockSnappy(dst, src []byte) (d int) {
|
||||
const (
|
||||
// Use 12 bit table when less than...
|
||||
limit12B = 16 << 10
|
||||
// Use 10 bit table when less than...
|
||||
limit10B = 4 << 10
|
||||
// Use 8 bit table when less than...
|
||||
limit8B = 512
|
||||
)
|
||||
if len(src) >= 64<<10 {
|
||||
return encodeSnappyBlockAsm(dst, src)
|
||||
}
|
||||
if len(src) >= limit12B {
|
||||
return encodeSnappyBlockAsm64K(dst, src)
|
||||
}
|
||||
if len(src) >= limit10B {
|
||||
return encodeSnappyBlockAsm12B(dst, src)
|
||||
}
|
||||
if len(src) >= limit8B {
|
||||
return encodeSnappyBlockAsm10B(dst, src)
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
return 0
|
||||
}
|
||||
return encodeSnappyBlockAsm8B(dst, src)
|
||||
}
|
||||
|
||||
// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||
// been written.
|
||||
//
|
||||
// It also assumes that:
|
||||
//
|
||||
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||
func encodeBlockBetterSnappy(dst, src []byte) (d int) {
|
||||
const (
|
||||
// Use 12 bit table when less than...
|
||||
limit12B = 16 << 10
|
||||
// Use 10 bit table when less than...
|
||||
limit10B = 4 << 10
|
||||
// Use 8 bit table when less than...
|
||||
limit8B = 512
|
||||
)
|
||||
if len(src) >= 64<<10 {
|
||||
return encodeSnappyBetterBlockAsm(dst, src)
|
||||
}
|
||||
if len(src) >= limit12B {
|
||||
return encodeSnappyBetterBlockAsm64K(dst, src)
|
||||
}
|
||||
if len(src) >= limit10B {
|
||||
return encodeSnappyBetterBlockAsm12B(dst, src)
|
||||
}
|
||||
if len(src) >= limit8B {
|
||||
return encodeSnappyBetterBlockAsm10B(dst, src)
|
||||
}
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
return 0
|
||||
}
|
||||
return encodeSnappyBetterBlockAsm8B(dst, src)
|
||||
}
|
793
vendor/github.com/klauspost/compress/s2/encode_best.go
generated
vendored
793
vendor/github.com/klauspost/compress/s2/encode_best.go
generated
vendored
@ -1,793 +0,0 @@
|
||||
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
|
||||
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package s2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
// encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||
// been written.
|
||||
//
|
||||
// It also assumes that:
|
||||
//
|
||||
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||
func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
|
||||
// Initialize the hash tables.
|
||||
const (
|
||||
// Long hash matches.
|
||||
lTableBits = 19
|
||||
maxLTableSize = 1 << lTableBits
|
||||
|
||||
// Short hash matches.
|
||||
sTableBits = 16
|
||||
maxSTableSize = 1 << sTableBits
|
||||
|
||||
inputMargin = 8 + 2
|
||||
|
||||
debug = false
|
||||
)
|
||||
|
||||
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||
// looking for copies.
|
||||
sLimit := len(src) - inputMargin
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
return 0
|
||||
}
|
||||
sLimitDict := len(src) - inputMargin
|
||||
if sLimitDict > MaxDictSrcOffset-inputMargin {
|
||||
sLimitDict = MaxDictSrcOffset - inputMargin
|
||||
}
|
||||
|
||||
var lTable [maxLTableSize]uint64
|
||||
var sTable [maxSTableSize]uint64
|
||||
|
||||
// Bail if we can't compress to at least this.
|
||||
dstLimit := len(src) - 5
|
||||
|
||||
// nextEmit is where in src the next emitLiteral should start from.
|
||||
nextEmit := 0
|
||||
|
||||
// The encoded form must start with a literal, as there are no previous
|
||||
// bytes to copy, so we start looking for hash matches at s == 1.
|
||||
s := 1
|
||||
repeat := 1
|
||||
if dict != nil {
|
||||
dict.initBest()
|
||||
s = 0
|
||||
repeat = len(dict.dict) - dict.repeat
|
||||
}
|
||||
cv := load64(src, s)
|
||||
|
||||
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
|
||||
const lowbitMask = 0xffffffff
|
||||
getCur := func(x uint64) int {
|
||||
return int(x & lowbitMask)
|
||||
}
|
||||
getPrev := func(x uint64) int {
|
||||
return int(x >> 32)
|
||||
}
|
||||
const maxSkip = 64
|
||||
|
||||
for {
|
||||
type match struct {
|
||||
offset int
|
||||
s int
|
||||
length int
|
||||
score int
|
||||
rep, dict bool
|
||||
}
|
||||
var best match
|
||||
for {
|
||||
// Next src position to check
|
||||
nextS := (s-nextEmit)>>8 + 1
|
||||
if nextS > maxSkip {
|
||||
nextS = s + maxSkip
|
||||
} else {
|
||||
nextS += s
|
||||
}
|
||||
if nextS > sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
if dict != nil && s >= MaxDictSrcOffset {
|
||||
dict = nil
|
||||
if repeat > s {
|
||||
repeat = math.MinInt32
|
||||
}
|
||||
}
|
||||
hashL := hash8(cv, lTableBits)
|
||||
hashS := hash4(cv, sTableBits)
|
||||
candidateL := lTable[hashL]
|
||||
candidateS := sTable[hashS]
|
||||
|
||||
score := func(m match) int {
|
||||
// Matches that are longer forward are penalized since we must emit it as a literal.
|
||||
score := m.length - m.s
|
||||
if nextEmit == m.s {
|
||||
// If we do not have to emit literals, we save 1 byte
|
||||
score++
|
||||
}
|
||||
offset := m.s - m.offset
|
||||
if m.rep {
|
||||
return score - emitRepeatSize(offset, m.length)
|
||||
}
|
||||
return score - emitCopySize(offset, m.length)
|
||||
}
|
||||
|
||||
matchAt := func(offset, s int, first uint32, rep bool) match {
|
||||
if best.length != 0 && best.s-best.offset == s-offset {
|
||||
// Don't retest if we have the same offset.
|
||||
return match{offset: offset, s: s}
|
||||
}
|
||||
if load32(src, offset) != first {
|
||||
return match{offset: offset, s: s}
|
||||
}
|
||||
m := match{offset: offset, s: s, length: 4 + offset, rep: rep}
|
||||
s += 4
|
||||
for s < len(src) {
|
||||
if len(src)-s < 8 {
|
||||
if src[s] == src[m.length] {
|
||||
m.length++
|
||||
s++
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
|
||||
m.length += bits.TrailingZeros64(diff) >> 3
|
||||
break
|
||||
}
|
||||
s += 8
|
||||
m.length += 8
|
||||
}
|
||||
m.length -= offset
|
||||
m.score = score(m)
|
||||
if m.score <= -m.s {
|
||||
// Eliminate if no savings, we might find a better one.
|
||||
m.length = 0
|
||||
}
|
||||
return m
|
||||
}
|
||||
matchDict := func(candidate, s int, first uint32, rep bool) match {
|
||||
// Calculate offset as if in continuous array with s
|
||||
offset := -len(dict.dict) + candidate
|
||||
if best.length != 0 && best.s-best.offset == s-offset && !rep {
|
||||
// Don't retest if we have the same offset.
|
||||
return match{offset: offset, s: s}
|
||||
}
|
||||
|
||||
if load32(dict.dict, candidate) != first {
|
||||
return match{offset: offset, s: s}
|
||||
}
|
||||
m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true}
|
||||
s += 4
|
||||
if !rep {
|
||||
for s < sLimitDict && m.length < len(dict.dict) {
|
||||
if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
|
||||
if src[s] == dict.dict[m.length] {
|
||||
m.length++
|
||||
s++
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
|
||||
m.length += bits.TrailingZeros64(diff) >> 3
|
||||
break
|
||||
}
|
||||
s += 8
|
||||
m.length += 8
|
||||
}
|
||||
} else {
|
||||
for s < len(src) && m.length < len(dict.dict) {
|
||||
if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
|
||||
if src[s] == dict.dict[m.length] {
|
||||
m.length++
|
||||
s++
|
||||
continue
|
||||
}
|
||||
break
|
||||
}
|
||||
if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
|
||||
m.length += bits.TrailingZeros64(diff) >> 3
|
||||
break
|
||||
}
|
||||
s += 8
|
||||
m.length += 8
|
||||
}
|
||||
}
|
||||
m.length -= candidate
|
||||
m.score = score(m)
|
||||
if m.score <= -m.s {
|
||||
// Eliminate if no savings, we might find a better one.
|
||||
m.length = 0
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
bestOf := func(a, b match) match {
|
||||
if b.length == 0 {
|
||||
return a
|
||||
}
|
||||
if a.length == 0 {
|
||||
return b
|
||||
}
|
||||
as := a.score + b.s
|
||||
bs := b.score + a.s
|
||||
if as >= bs {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
if s > 0 {
|
||||
best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false))
|
||||
best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false))
|
||||
best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false))
|
||||
}
|
||||
if dict != nil {
|
||||
candidateL := dict.bestTableLong[hashL]
|
||||
candidateS := dict.bestTableShort[hashS]
|
||||
best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
|
||||
best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false))
|
||||
best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
|
||||
best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false))
|
||||
}
|
||||
{
|
||||
if (dict == nil || repeat <= s) && repeat > 0 {
|
||||
best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
|
||||
} else if s-repeat < -4 && dict != nil {
|
||||
candidate := len(dict.dict) - (repeat - s)
|
||||
best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
|
||||
candidate++
|
||||
best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true))
|
||||
}
|
||||
|
||||
if best.length > 0 {
|
||||
hashS := hash4(cv>>8, sTableBits)
|
||||
// s+1
|
||||
nextShort := sTable[hashS]
|
||||
s := s + 1
|
||||
cv := load64(src, s)
|
||||
hashL := hash8(cv, lTableBits)
|
||||
nextLong := lTable[hashL]
|
||||
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
|
||||
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
|
||||
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
|
||||
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
|
||||
|
||||
// Dict at + 1
|
||||
if dict != nil {
|
||||
candidateL := dict.bestTableLong[hashL]
|
||||
candidateS := dict.bestTableShort[hashS]
|
||||
|
||||
best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
|
||||
best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
|
||||
}
|
||||
|
||||
// s+2
|
||||
if true {
|
||||
hashS := hash4(cv>>8, sTableBits)
|
||||
|
||||
nextShort = sTable[hashS]
|
||||
s++
|
||||
cv = load64(src, s)
|
||||
hashL := hash8(cv, lTableBits)
|
||||
nextLong = lTable[hashL]
|
||||
|
||||
if (dict == nil || repeat <= s) && repeat > 0 {
|
||||
// Repeat at + 2
|
||||
best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true))
|
||||
} else if repeat-s > 4 && dict != nil {
|
||||
candidate := len(dict.dict) - (repeat - s)
|
||||
best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
|
||||
}
|
||||
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
|
||||
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
|
||||
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
|
||||
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
|
||||
|
||||
// Dict at +2
|
||||
// Very small gain
|
||||
if dict != nil {
|
||||
candidateL := dict.bestTableLong[hashL]
|
||||
candidateS := dict.bestTableShort[hashS]
|
||||
|
||||
best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
|
||||
best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
|
||||
}
|
||||
}
|
||||
// Search for a match at best match end, see if that is better.
|
||||
// Allow some bytes at the beginning to mismatch.
|
||||
// Sweet spot is around 1-2 bytes, but depends on input.
|
||||
// The skipped bytes are tested in Extend backwards,
|
||||
// and still picked up as part of the match if they do.
|
||||
const skipBeginning = 2
|
||||
const skipEnd = 1
|
||||
if sAt := best.s + best.length - skipEnd; sAt < sLimit {
|
||||
|
||||
sBack := best.s + skipBeginning - skipEnd
|
||||
backL := best.length - skipBeginning
|
||||
// Load initial values
|
||||
cv = load64(src, sBack)
|
||||
|
||||
// Grab candidates...
|
||||
next := lTable[hash8(load64(src, sAt), lTableBits)]
|
||||
|
||||
if checkAt := getCur(next) - backL; checkAt > 0 {
|
||||
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
|
||||
}
|
||||
if checkAt := getPrev(next) - backL; checkAt > 0 {
|
||||
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
|
||||
}
|
||||
// Disabled: Extremely small gain
|
||||
if false {
|
||||
next = sTable[hash4(load64(src, sAt), sTableBits)]
|
||||
if checkAt := getCur(next) - backL; checkAt > 0 {
|
||||
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
|
||||
}
|
||||
if checkAt := getPrev(next) - backL; checkAt > 0 {
|
||||
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update table
|
||||
lTable[hashL] = uint64(s) | candidateL<<32
|
||||
sTable[hashS] = uint64(s) | candidateS<<32
|
||||
|
||||
if best.length > 0 {
|
||||
break
|
||||
}
|
||||
|
||||
cv = load64(src, nextS)
|
||||
s = nextS
|
||||
}
|
||||
|
||||
// Extend backwards, not needed for repeats...
|
||||
s = best.s
|
||||
if !best.rep && !best.dict {
|
||||
for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
|
||||
best.offset--
|
||||
best.length++
|
||||
s--
|
||||
}
|
||||
}
|
||||
if false && best.offset >= s {
|
||||
panic(fmt.Errorf("t %d >= s %d", best.offset, s))
|
||||
}
|
||||
// Bail if we exceed the maximum size.
|
||||
if d+(s-nextEmit) > dstLimit {
|
||||
return 0
|
||||
}
|
||||
|
||||
base := s
|
||||
offset := s - best.offset
|
||||
s += best.length
|
||||
|
||||
if offset > 65535 && s-base <= 5 && !best.rep {
|
||||
// Bail if the match is equal or worse to the encoding.
|
||||
s = best.s + 1
|
||||
if s >= sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
cv = load64(src, s)
|
||||
continue
|
||||
}
|
||||
if debug && nextEmit != base {
|
||||
fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base)
|
||||
}
|
||||
d += emitLiteral(dst[d:], src[nextEmit:base])
|
||||
if best.rep {
|
||||
if nextEmit > 0 || best.dict {
|
||||
if debug {
|
||||
fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
|
||||
}
|
||||
// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
|
||||
d += emitRepeat(dst[d:], offset, best.length)
|
||||
} else {
|
||||
// First match without dict cannot be a repeat.
|
||||
if debug {
|
||||
fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
|
||||
}
|
||||
d += emitCopy(dst[d:], offset, best.length)
|
||||
}
|
||||
} else {
|
||||
if debug {
|
||||
fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
|
||||
}
|
||||
d += emitCopy(dst[d:], offset, best.length)
|
||||
}
|
||||
repeat = offset
|
||||
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
|
||||
if d > dstLimit {
|
||||
// Do we have space for more, if not bail.
|
||||
return 0
|
||||
}
|
||||
// Fill tables...
|
||||
for i := best.s + 1; i < s; i++ {
|
||||
cv0 := load64(src, i)
|
||||
long0 := hash8(cv0, lTableBits)
|
||||
short0 := hash4(cv0, sTableBits)
|
||||
lTable[long0] = uint64(i) | lTable[long0]<<32
|
||||
sTable[short0] = uint64(i) | sTable[short0]<<32
|
||||
}
|
||||
cv = load64(src, s)
|
||||
}
|
||||
|
||||
emitRemainder:
|
||||
if nextEmit < len(src) {
|
||||
// Bail if we exceed the maximum size.
|
||||
if d+len(src)-nextEmit > dstLimit {
|
||||
return 0
|
||||
}
|
||||
if debug && nextEmit != s {
|
||||
fmt.Println("emitted ", len(src)-nextEmit, "literals")
|
||||
}
|
||||
d += emitLiteral(dst[d:], src[nextEmit:])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||
// been written.
|
||||
//
|
||||
// It also assumes that:
|
||||
//
|
||||
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||
func encodeBlockBestSnappy(dst, src []byte) (d int) {
|
||||
// Initialize the hash tables.
|
||||
const (
|
||||
// Long hash matches.
|
||||
lTableBits = 19
|
||||
maxLTableSize = 1 << lTableBits
|
||||
|
||||
// Short hash matches.
|
||||
sTableBits = 16
|
||||
maxSTableSize = 1 << sTableBits
|
||||
|
||||
inputMargin = 8 + 2
|
||||
)
|
||||
|
||||
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||
// looking for copies.
|
||||
sLimit := len(src) - inputMargin
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
return 0
|
||||
}
|
||||
|
||||
var lTable [maxLTableSize]uint64
|
||||
var sTable [maxSTableSize]uint64
|
||||
|
||||
// Bail if we can't compress to at least this.
|
||||
dstLimit := len(src) - 5
|
||||
|
||||
// nextEmit is where in src the next emitLiteral should start from.
|
||||
nextEmit := 0
|
||||
|
||||
// The encoded form must start with a literal, as there are no previous
|
||||
// bytes to copy, so we start looking for hash matches at s == 1.
|
||||
s := 1
|
||||
cv := load64(src, s)
|
||||
|
||||
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
|
||||
repeat := 1
|
||||
const lowbitMask = 0xffffffff
|
||||
getCur := func(x uint64) int {
|
||||
return int(x & lowbitMask)
|
||||
}
|
||||
getPrev := func(x uint64) int {
|
||||
return int(x >> 32)
|
||||
}
|
||||
const maxSkip = 64
|
||||
|
||||
for {
|
||||
type match struct {
|
||||
offset int
|
||||
s int
|
||||
length int
|
||||
score int
|
||||
}
|
||||
var best match
|
||||
for {
|
||||
// Next src position to check
|
||||
nextS := (s-nextEmit)>>8 + 1
|
||||
if nextS > maxSkip {
|
||||
nextS = s + maxSkip
|
||||
} else {
|
||||
nextS += s
|
||||
}
|
||||
if nextS > sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
hashL := hash8(cv, lTableBits)
|
||||
hashS := hash4(cv, sTableBits)
|
||||
candidateL := lTable[hashL]
|
||||
candidateS := sTable[hashS]
|
||||
|
||||
score := func(m match) int {
|
||||
// Matches that are longer forward are penalized since we must emit it as a literal.
|
||||
score := m.length - m.s
|
||||
if nextEmit == m.s {
|
||||
// If we do not have to emit literals, we save 1 byte
|
||||
score++
|
||||
}
|
||||
offset := m.s - m.offset
|
||||
|
||||
return score - emitCopyNoRepeatSize(offset, m.length)
|
||||
}
|
||||
|
||||
matchAt := func(offset, s int, first uint32) match {
|
||||
if best.length != 0 && best.s-best.offset == s-offset {
|
||||
// Don't retest if we have the same offset.
|
||||
return match{offset: offset, s: s}
|
||||
}
|
||||
if load32(src, offset) != first {
|
||||
return match{offset: offset, s: s}
|
||||
}
|
||||
m := match{offset: offset, s: s, length: 4 + offset}
|
||||
s += 4
|
||||
for s <= sLimit {
|
||||
if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
|
||||
m.length += bits.TrailingZeros64(diff) >> 3
|
||||
break
|
||||
}
|
||||
s += 8
|
||||
m.length += 8
|
||||
}
|
||||
m.length -= offset
|
||||
m.score = score(m)
|
||||
if m.score <= -m.s {
|
||||
// Eliminate if no savings, we might find a better one.
|
||||
m.length = 0
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
bestOf := func(a, b match) match {
|
||||
if b.length == 0 {
|
||||
return a
|
||||
}
|
||||
if a.length == 0 {
|
||||
return b
|
||||
}
|
||||
as := a.score + b.s
|
||||
bs := b.score + a.s
|
||||
if as >= bs {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv)))
|
||||
best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv)))
|
||||
best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv)))
|
||||
|
||||
{
|
||||
best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
|
||||
if best.length > 0 {
|
||||
// s+1
|
||||
nextShort := sTable[hash4(cv>>8, sTableBits)]
|
||||
s := s + 1
|
||||
cv := load64(src, s)
|
||||
nextLong := lTable[hash8(cv, lTableBits)]
|
||||
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
|
||||
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
|
||||
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
|
||||
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
|
||||
// Repeat at + 2
|
||||
best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
|
||||
|
||||
// s+2
|
||||
if true {
|
||||
nextShort = sTable[hash4(cv>>8, sTableBits)]
|
||||
s++
|
||||
cv = load64(src, s)
|
||||
nextLong = lTable[hash8(cv, lTableBits)]
|
||||
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
|
||||
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
|
||||
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
|
||||
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
|
||||
}
|
||||
// Search for a match at best match end, see if that is better.
|
||||
if sAt := best.s + best.length; sAt < sLimit {
|
||||
sBack := best.s
|
||||
backL := best.length
|
||||
// Load initial values
|
||||
cv = load64(src, sBack)
|
||||
// Search for mismatch
|
||||
next := lTable[hash8(load64(src, sAt), lTableBits)]
|
||||
//next := sTable[hash4(load64(src, sAt), sTableBits)]
|
||||
|
||||
if checkAt := getCur(next) - backL; checkAt > 0 {
|
||||
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
|
||||
}
|
||||
if checkAt := getPrev(next) - backL; checkAt > 0 {
|
||||
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Update table
|
||||
lTable[hashL] = uint64(s) | candidateL<<32
|
||||
sTable[hashS] = uint64(s) | candidateS<<32
|
||||
|
||||
if best.length > 0 {
|
||||
break
|
||||
}
|
||||
|
||||
cv = load64(src, nextS)
|
||||
s = nextS
|
||||
}
|
||||
|
||||
// Extend backwards, not needed for repeats...
|
||||
s = best.s
|
||||
if true {
|
||||
for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
|
||||
best.offset--
|
||||
best.length++
|
||||
s--
|
||||
}
|
||||
}
|
||||
if false && best.offset >= s {
|
||||
panic(fmt.Errorf("t %d >= s %d", best.offset, s))
|
||||
}
|
||||
// Bail if we exceed the maximum size.
|
||||
if d+(s-nextEmit) > dstLimit {
|
||||
return 0
|
||||
}
|
||||
|
||||
base := s
|
||||
offset := s - best.offset
|
||||
|
||||
s += best.length
|
||||
|
||||
if offset > 65535 && s-base <= 5 {
|
||||
// Bail if the match is equal or worse to the encoding.
|
||||
s = best.s + 1
|
||||
if s >= sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
cv = load64(src, s)
|
||||
continue
|
||||
}
|
||||
d += emitLiteral(dst[d:], src[nextEmit:base])
|
||||
d += emitCopyNoRepeat(dst[d:], offset, best.length)
|
||||
repeat = offset
|
||||
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
|
||||
if d > dstLimit {
|
||||
// Do we have space for more, if not bail.
|
||||
return 0
|
||||
}
|
||||
// Fill tables...
|
||||
for i := best.s + 1; i < s; i++ {
|
||||
cv0 := load64(src, i)
|
||||
long0 := hash8(cv0, lTableBits)
|
||||
short0 := hash4(cv0, sTableBits)
|
||||
lTable[long0] = uint64(i) | lTable[long0]<<32
|
||||
sTable[short0] = uint64(i) | sTable[short0]<<32
|
||||
}
|
||||
cv = load64(src, s)
|
||||
}
|
||||
|
||||
emitRemainder:
|
||||
if nextEmit < len(src) {
|
||||
// Bail if we exceed the maximum size.
|
||||
if d+len(src)-nextEmit > dstLimit {
|
||||
return 0
|
||||
}
|
||||
d += emitLiteral(dst[d:], src[nextEmit:])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// emitCopySize returns the size to encode the offset+length
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// 1 <= offset && offset <= math.MaxUint32
|
||||
// 4 <= length && length <= 1 << 24
|
||||
func emitCopySize(offset, length int) int {
|
||||
if offset >= 65536 {
|
||||
i := 0
|
||||
if length > 64 {
|
||||
length -= 64
|
||||
if length >= 4 {
|
||||
// Emit remaining as repeats
|
||||
return 5 + emitRepeatSize(offset, length)
|
||||
}
|
||||
i = 5
|
||||
}
|
||||
if length == 0 {
|
||||
return i
|
||||
}
|
||||
return i + 5
|
||||
}
|
||||
|
||||
// Offset no more than 2 bytes.
|
||||
if length > 64 {
|
||||
if offset < 2048 {
|
||||
// Emit 8 bytes, then rest as repeats...
|
||||
return 2 + emitRepeatSize(offset, length-8)
|
||||
}
|
||||
// Emit remaining as repeats, at least 4 bytes remain.
|
||||
return 3 + emitRepeatSize(offset, length-60)
|
||||
}
|
||||
if length >= 12 || offset >= 2048 {
|
||||
return 3
|
||||
}
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
return 2
|
||||
}
|
||||
|
||||
// emitCopyNoRepeatSize returns the size to encode the offset+length
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// 1 <= offset && offset <= math.MaxUint32
|
||||
// 4 <= length && length <= 1 << 24
|
||||
func emitCopyNoRepeatSize(offset, length int) int {
|
||||
if offset >= 65536 {
|
||||
return 5 + 5*(length/64)
|
||||
}
|
||||
|
||||
// Offset no more than 2 bytes.
|
||||
if length > 64 {
|
||||
// Emit remaining as repeats, at least 4 bytes remain.
|
||||
return 3 + 3*(length/60)
|
||||
}
|
||||
if length >= 12 || offset >= 2048 {
|
||||
return 3
|
||||
}
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
return 2
|
||||
}
|
||||
|
||||
// emitRepeatSize returns the number of bytes required to encode a repeat.
|
||||
// Length must be at least 4 and < 1<<24
|
||||
func emitRepeatSize(offset, length int) int {
|
||||
// Repeat offset, make length cheaper
|
||||
if length <= 4+4 || (length < 8+4 && offset < 2048) {
|
||||
return 2
|
||||
}
|
||||
if length < (1<<8)+4+4 {
|
||||
return 3
|
||||
}
|
||||
if length < (1<<16)+(1<<8)+4 {
|
||||
return 4
|
||||
}
|
||||
const maxRepeat = (1 << 24) - 1
|
||||
length -= (1 << 16) - 4
|
||||
left := 0
|
||||
if length > maxRepeat {
|
||||
left = length - maxRepeat + 4
|
||||
}
|
||||
if left > 0 {
|
||||
return 5 + emitRepeatSize(offset, left)
|
||||
}
|
||||
return 5
|
||||
}
|
1106
vendor/github.com/klauspost/compress/s2/encode_better.go
generated
vendored
1106
vendor/github.com/klauspost/compress/s2/encode_better.go
generated
vendored
File diff suppressed because it is too large
Load Diff
727
vendor/github.com/klauspost/compress/s2/encode_go.go
generated
vendored
727
vendor/github.com/klauspost/compress/s2/encode_go.go
generated
vendored
@ -1,727 +0,0 @@
|
||||
//go:build !amd64 || appengine || !gc || noasm
|
||||
// +build !amd64 appengine !gc noasm
|
||||
|
||||
package s2
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
const hasAmd64Asm = false
|
||||
|
||||
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||
// been written.
|
||||
//
|
||||
// It also assumes that:
|
||||
//
|
||||
// len(dst) >= MaxEncodedLen(len(src))
|
||||
func encodeBlock(dst, src []byte) (d int) {
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
return 0
|
||||
}
|
||||
return encodeBlockGo(dst, src)
|
||||
}
|
||||
|
||||
// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||
// been written.
|
||||
//
|
||||
// It also assumes that:
|
||||
//
|
||||
// len(dst) >= MaxEncodedLen(len(src))
|
||||
func encodeBlockBetter(dst, src []byte) (d int) {
|
||||
return encodeBlockBetterGo(dst, src)
|
||||
}
|
||||
|
||||
// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||
// been written.
|
||||
//
|
||||
// It also assumes that:
|
||||
//
|
||||
// len(dst) >= MaxEncodedLen(len(src))
|
||||
func encodeBlockBetterSnappy(dst, src []byte) (d int) {
|
||||
return encodeBlockBetterSnappyGo(dst, src)
|
||||
}
|
||||
|
||||
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
|
||||
// assumes that the varint-encoded length of the decompressed bytes has already
|
||||
// been written.
|
||||
//
|
||||
// It also assumes that:
|
||||
//
|
||||
// len(dst) >= MaxEncodedLen(len(src))
|
||||
func encodeBlockSnappy(dst, src []byte) (d int) {
|
||||
if len(src) < minNonLiteralBlockSize {
|
||||
return 0
|
||||
}
|
||||
return encodeBlockSnappyGo(dst, src)
|
||||
}
|
||||
|
||||
// emitLiteral writes a literal chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// dst is long enough to hold the encoded bytes
|
||||
// 0 <= len(lit) && len(lit) <= math.MaxUint32
|
||||
func emitLiteral(dst, lit []byte) int {
|
||||
if len(lit) == 0 {
|
||||
return 0
|
||||
}
|
||||
const num = 63<<2 | tagLiteral
|
||||
i, n := 0, uint(len(lit)-1)
|
||||
switch {
|
||||
case n < 60:
|
||||
dst[0] = uint8(n)<<2 | tagLiteral
|
||||
i = 1
|
||||
case n < 1<<8:
|
||||
dst[1] = uint8(n)
|
||||
dst[0] = 60<<2 | tagLiteral
|
||||
i = 2
|
||||
case n < 1<<16:
|
||||
dst[2] = uint8(n >> 8)
|
||||
dst[1] = uint8(n)
|
||||
dst[0] = 61<<2 | tagLiteral
|
||||
i = 3
|
||||
case n < 1<<24:
|
||||
dst[3] = uint8(n >> 16)
|
||||
dst[2] = uint8(n >> 8)
|
||||
dst[1] = uint8(n)
|
||||
dst[0] = 62<<2 | tagLiteral
|
||||
i = 4
|
||||
default:
|
||||
dst[4] = uint8(n >> 24)
|
||||
dst[3] = uint8(n >> 16)
|
||||
dst[2] = uint8(n >> 8)
|
||||
dst[1] = uint8(n)
|
||||
dst[0] = 63<<2 | tagLiteral
|
||||
i = 5
|
||||
}
|
||||
return i + copy(dst[i:], lit)
|
||||
}
|
||||
|
||||
// emitRepeat writes a repeat chunk and returns the number of bytes written.
|
||||
// Length must be at least 4 and < 1<<24
|
||||
func emitRepeat(dst []byte, offset, length int) int {
|
||||
// Repeat offset, make length cheaper
|
||||
length -= 4
|
||||
if length <= 4 {
|
||||
dst[0] = uint8(length)<<2 | tagCopy1
|
||||
dst[1] = 0
|
||||
return 2
|
||||
}
|
||||
if length < 8 && offset < 2048 {
|
||||
// Encode WITH offset
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
|
||||
return 2
|
||||
}
|
||||
if length < (1<<8)+4 {
|
||||
length -= 4
|
||||
dst[2] = uint8(length)
|
||||
dst[1] = 0
|
||||
dst[0] = 5<<2 | tagCopy1
|
||||
return 3
|
||||
}
|
||||
if length < (1<<16)+(1<<8) {
|
||||
length -= 1 << 8
|
||||
dst[3] = uint8(length >> 8)
|
||||
dst[2] = uint8(length >> 0)
|
||||
dst[1] = 0
|
||||
dst[0] = 6<<2 | tagCopy1
|
||||
return 4
|
||||
}
|
||||
const maxRepeat = (1 << 24) - 1
|
||||
length -= 1 << 16
|
||||
left := 0
|
||||
if length > maxRepeat {
|
||||
left = length - maxRepeat + 4
|
||||
length = maxRepeat - 4
|
||||
}
|
||||
dst[4] = uint8(length >> 16)
|
||||
dst[3] = uint8(length >> 8)
|
||||
dst[2] = uint8(length >> 0)
|
||||
dst[1] = 0
|
||||
dst[0] = 7<<2 | tagCopy1
|
||||
if left > 0 {
|
||||
return 5 + emitRepeat(dst[5:], offset, left)
|
||||
}
|
||||
return 5
|
||||
}
|
||||
|
||||
// emitCopy writes a copy chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// dst is long enough to hold the encoded bytes
|
||||
// 1 <= offset && offset <= math.MaxUint32
|
||||
// 4 <= length && length <= 1 << 24
|
||||
func emitCopy(dst []byte, offset, length int) int {
|
||||
if offset >= 65536 {
|
||||
i := 0
|
||||
if length > 64 {
|
||||
// Emit a length 64 copy, encoded as 5 bytes.
|
||||
dst[4] = uint8(offset >> 24)
|
||||
dst[3] = uint8(offset >> 16)
|
||||
dst[2] = uint8(offset >> 8)
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = 63<<2 | tagCopy4
|
||||
length -= 64
|
||||
if length >= 4 {
|
||||
// Emit remaining as repeats
|
||||
return 5 + emitRepeat(dst[5:], offset, length)
|
||||
}
|
||||
i = 5
|
||||
}
|
||||
if length == 0 {
|
||||
return i
|
||||
}
|
||||
// Emit a copy, offset encoded as 4 bytes.
|
||||
dst[i+0] = uint8(length-1)<<2 | tagCopy4
|
||||
dst[i+1] = uint8(offset)
|
||||
dst[i+2] = uint8(offset >> 8)
|
||||
dst[i+3] = uint8(offset >> 16)
|
||||
dst[i+4] = uint8(offset >> 24)
|
||||
return i + 5
|
||||
}
|
||||
|
||||
// Offset no more than 2 bytes.
|
||||
if length > 64 {
|
||||
off := 3
|
||||
if offset < 2048 {
|
||||
// emit 8 bytes as tagCopy1, rest as repeats.
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
|
||||
length -= 8
|
||||
off = 2
|
||||
} else {
|
||||
// Emit a length 60 copy, encoded as 3 bytes.
|
||||
// Emit remaining as repeat value (minimum 4 bytes).
|
||||
dst[2] = uint8(offset >> 8)
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = 59<<2 | tagCopy2
|
||||
length -= 60
|
||||
}
|
||||
// Emit remaining as repeats, at least 4 bytes remain.
|
||||
return off + emitRepeat(dst[off:], offset, length)
|
||||
}
|
||||
if length >= 12 || offset >= 2048 {
|
||||
// Emit the remaining copy, encoded as 3 bytes.
|
||||
dst[2] = uint8(offset >> 8)
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(length-1)<<2 | tagCopy2
|
||||
return 3
|
||||
}
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
|
||||
return 2
|
||||
}
|
||||
|
||||
// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// dst is long enough to hold the encoded bytes
|
||||
// 1 <= offset && offset <= math.MaxUint32
|
||||
// 4 <= length && length <= 1 << 24
|
||||
func emitCopyNoRepeat(dst []byte, offset, length int) int {
|
||||
if offset >= 65536 {
|
||||
i := 0
|
||||
if length > 64 {
|
||||
// Emit a length 64 copy, encoded as 5 bytes.
|
||||
dst[4] = uint8(offset >> 24)
|
||||
dst[3] = uint8(offset >> 16)
|
||||
dst[2] = uint8(offset >> 8)
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = 63<<2 | tagCopy4
|
||||
length -= 64
|
||||
if length >= 4 {
|
||||
// Emit remaining as repeats
|
||||
return 5 + emitCopyNoRepeat(dst[5:], offset, length)
|
||||
}
|
||||
i = 5
|
||||
}
|
||||
if length == 0 {
|
||||
return i
|
||||
}
|
||||
// Emit a copy, offset encoded as 4 bytes.
|
||||
dst[i+0] = uint8(length-1)<<2 | tagCopy4
|
||||
dst[i+1] = uint8(offset)
|
||||
dst[i+2] = uint8(offset >> 8)
|
||||
dst[i+3] = uint8(offset >> 16)
|
||||
dst[i+4] = uint8(offset >> 24)
|
||||
return i + 5
|
||||
}
|
||||
|
||||
// Offset no more than 2 bytes.
|
||||
if length > 64 {
|
||||
// Emit a length 60 copy, encoded as 3 bytes.
|
||||
// Emit remaining as repeat value (minimum 4 bytes).
|
||||
dst[2] = uint8(offset >> 8)
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = 59<<2 | tagCopy2
|
||||
length -= 60
|
||||
// Emit remaining as repeats, at least 4 bytes remain.
|
||||
return 3 + emitCopyNoRepeat(dst[3:], offset, length)
|
||||
}
|
||||
if length >= 12 || offset >= 2048 {
|
||||
// Emit the remaining copy, encoded as 3 bytes.
|
||||
dst[2] = uint8(offset >> 8)
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(length-1)<<2 | tagCopy2
|
||||
return 3
|
||||
}
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
|
||||
return 2
|
||||
}
|
||||
|
||||
// matchLen returns how many bytes match in a and b
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// len(a) <= len(b)
|
||||
func matchLen(a []byte, b []byte) int {
|
||||
b = b[:len(a)]
|
||||
var checked int
|
||||
if len(a) > 4 {
|
||||
// Try 4 bytes first
|
||||
if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
|
||||
return bits.TrailingZeros32(diff) >> 3
|
||||
}
|
||||
// Switch to 8 byte matching.
|
||||
checked = 4
|
||||
a = a[4:]
|
||||
b = b[4:]
|
||||
for len(a) >= 8 {
|
||||
b = b[:len(a)]
|
||||
if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
|
||||
return checked + (bits.TrailingZeros64(diff) >> 3)
|
||||
}
|
||||
checked += 8
|
||||
a = a[8:]
|
||||
b = b[8:]
|
||||
}
|
||||
}
|
||||
b = b[:len(a)]
|
||||
for i := range a {
|
||||
if a[i] != b[i] {
|
||||
return int(i) + checked
|
||||
}
|
||||
}
|
||||
return len(a) + checked
|
||||
}
|
||||
|
||||
func calcBlockSize(src []byte) (d int) {
|
||||
// Initialize the hash table.
|
||||
const (
|
||||
tableBits = 13
|
||||
maxTableSize = 1 << tableBits
|
||||
)
|
||||
|
||||
var table [maxTableSize]uint32
|
||||
|
||||
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||
// looking for copies.
|
||||
sLimit := len(src) - inputMargin
|
||||
|
||||
// Bail if we can't compress to at least this.
|
||||
dstLimit := len(src) - len(src)>>5 - 5
|
||||
|
||||
// nextEmit is where in src the next emitLiteral should start from.
|
||||
nextEmit := 0
|
||||
|
||||
// The encoded form must start with a literal, as there are no previous
|
||||
// bytes to copy, so we start looking for hash matches at s == 1.
|
||||
s := 1
|
||||
cv := load64(src, s)
|
||||
|
||||
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
|
||||
repeat := 1
|
||||
|
||||
for {
|
||||
candidate := 0
|
||||
for {
|
||||
// Next src position to check
|
||||
nextS := s + (s-nextEmit)>>6 + 4
|
||||
if nextS > sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
hash0 := hash6(cv, tableBits)
|
||||
hash1 := hash6(cv>>8, tableBits)
|
||||
candidate = int(table[hash0])
|
||||
candidate2 := int(table[hash1])
|
||||
table[hash0] = uint32(s)
|
||||
table[hash1] = uint32(s + 1)
|
||||
hash2 := hash6(cv>>16, tableBits)
|
||||
|
||||
// Check repeat at offset checkRep.
|
||||
const checkRep = 1
|
||||
if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
|
||||
base := s + checkRep
|
||||
// Extend back
|
||||
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
|
||||
i--
|
||||
base--
|
||||
}
|
||||
d += emitLiteralSize(src[nextEmit:base])
|
||||
|
||||
// Extend forward
|
||||
candidate := s - repeat + 4 + checkRep
|
||||
s += 4 + checkRep
|
||||
for s <= sLimit {
|
||||
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
|
||||
s += bits.TrailingZeros64(diff) >> 3
|
||||
break
|
||||
}
|
||||
s += 8
|
||||
candidate += 8
|
||||
}
|
||||
|
||||
d += emitCopyNoRepeatSize(repeat, s-base)
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
|
||||
cv = load64(src, s)
|
||||
continue
|
||||
}
|
||||
|
||||
if uint32(cv) == load32(src, candidate) {
|
||||
break
|
||||
}
|
||||
candidate = int(table[hash2])
|
||||
if uint32(cv>>8) == load32(src, candidate2) {
|
||||
table[hash2] = uint32(s + 2)
|
||||
candidate = candidate2
|
||||
s++
|
||||
break
|
||||
}
|
||||
table[hash2] = uint32(s + 2)
|
||||
if uint32(cv>>16) == load32(src, candidate) {
|
||||
s += 2
|
||||
break
|
||||
}
|
||||
|
||||
cv = load64(src, nextS)
|
||||
s = nextS
|
||||
}
|
||||
|
||||
// Extend backwards
|
||||
for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
|
||||
candidate--
|
||||
s--
|
||||
}
|
||||
|
||||
// Bail if we exceed the maximum size.
|
||||
if d+(s-nextEmit) > dstLimit {
|
||||
return 0
|
||||
}
|
||||
|
||||
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
||||
// them as literal bytes.
|
||||
|
||||
d += emitLiteralSize(src[nextEmit:s])
|
||||
|
||||
// Call emitCopy, and then see if another emitCopy could be our next
|
||||
// move. Repeat until we find no match for the input immediately after
|
||||
// what was consumed by the last emitCopy call.
|
||||
//
|
||||
// If we exit this loop normally then we need to call emitLiteral next,
|
||||
// though we don't yet know how big the literal will be. We handle that
|
||||
// by proceeding to the next iteration of the main loop. We also can
|
||||
// exit this loop via goto if we get close to exhausting the input.
|
||||
for {
|
||||
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||
// literal bytes prior to s.
|
||||
base := s
|
||||
repeat = base - candidate
|
||||
|
||||
// Extend the 4-byte match as long as possible.
|
||||
s += 4
|
||||
candidate += 4
|
||||
for s <= len(src)-8 {
|
||||
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
|
||||
s += bits.TrailingZeros64(diff) >> 3
|
||||
break
|
||||
}
|
||||
s += 8
|
||||
candidate += 8
|
||||
}
|
||||
|
||||
d += emitCopyNoRepeatSize(repeat, s-base)
|
||||
if false {
|
||||
// Validate match.
|
||||
a := src[base:s]
|
||||
b := src[base-repeat : base-repeat+(s-base)]
|
||||
if !bytes.Equal(a, b) {
|
||||
panic("mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
|
||||
if d > dstLimit {
|
||||
// Do we have space for more, if not bail.
|
||||
return 0
|
||||
}
|
||||
// Check for an immediate match, otherwise start search at s+1
|
||||
x := load64(src, s-2)
|
||||
m2Hash := hash6(x, tableBits)
|
||||
currHash := hash6(x>>16, tableBits)
|
||||
candidate = int(table[currHash])
|
||||
table[m2Hash] = uint32(s - 2)
|
||||
table[currHash] = uint32(s)
|
||||
if uint32(x>>16) != load32(src, candidate) {
|
||||
cv = load64(src, s+1)
|
||||
s++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
emitRemainder:
|
||||
if nextEmit < len(src) {
|
||||
// Bail if we exceed the maximum size.
|
||||
if d+len(src)-nextEmit > dstLimit {
|
||||
return 0
|
||||
}
|
||||
d += emitLiteralSize(src[nextEmit:])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func calcBlockSizeSmall(src []byte) (d int) {
|
||||
// Initialize the hash table.
|
||||
const (
|
||||
tableBits = 9
|
||||
maxTableSize = 1 << tableBits
|
||||
)
|
||||
|
||||
var table [maxTableSize]uint32
|
||||
|
||||
// sLimit is when to stop looking for offset/length copies. The inputMargin
|
||||
// lets us use a fast path for emitLiteral in the main loop, while we are
|
||||
// looking for copies.
|
||||
sLimit := len(src) - inputMargin
|
||||
|
||||
// Bail if we can't compress to at least this.
|
||||
dstLimit := len(src) - len(src)>>5 - 5
|
||||
|
||||
// nextEmit is where in src the next emitLiteral should start from.
|
||||
nextEmit := 0
|
||||
|
||||
// The encoded form must start with a literal, as there are no previous
|
||||
// bytes to copy, so we start looking for hash matches at s == 1.
|
||||
s := 1
|
||||
cv := load64(src, s)
|
||||
|
||||
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
|
||||
repeat := 1
|
||||
|
||||
for {
|
||||
candidate := 0
|
||||
for {
|
||||
// Next src position to check
|
||||
nextS := s + (s-nextEmit)>>6 + 4
|
||||
if nextS > sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
hash0 := hash6(cv, tableBits)
|
||||
hash1 := hash6(cv>>8, tableBits)
|
||||
candidate = int(table[hash0])
|
||||
candidate2 := int(table[hash1])
|
||||
table[hash0] = uint32(s)
|
||||
table[hash1] = uint32(s + 1)
|
||||
hash2 := hash6(cv>>16, tableBits)
|
||||
|
||||
// Check repeat at offset checkRep.
|
||||
const checkRep = 1
|
||||
if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
|
||||
base := s + checkRep
|
||||
// Extend back
|
||||
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
|
||||
i--
|
||||
base--
|
||||
}
|
||||
d += emitLiteralSize(src[nextEmit:base])
|
||||
|
||||
// Extend forward
|
||||
candidate := s - repeat + 4 + checkRep
|
||||
s += 4 + checkRep
|
||||
for s <= sLimit {
|
||||
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
|
||||
s += bits.TrailingZeros64(diff) >> 3
|
||||
break
|
||||
}
|
||||
s += 8
|
||||
candidate += 8
|
||||
}
|
||||
|
||||
d += emitCopyNoRepeatSize(repeat, s-base)
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
|
||||
cv = load64(src, s)
|
||||
continue
|
||||
}
|
||||
|
||||
if uint32(cv) == load32(src, candidate) {
|
||||
break
|
||||
}
|
||||
candidate = int(table[hash2])
|
||||
if uint32(cv>>8) == load32(src, candidate2) {
|
||||
table[hash2] = uint32(s + 2)
|
||||
candidate = candidate2
|
||||
s++
|
||||
break
|
||||
}
|
||||
table[hash2] = uint32(s + 2)
|
||||
if uint32(cv>>16) == load32(src, candidate) {
|
||||
s += 2
|
||||
break
|
||||
}
|
||||
|
||||
cv = load64(src, nextS)
|
||||
s = nextS
|
||||
}
|
||||
|
||||
// Extend backwards
|
||||
for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
|
||||
candidate--
|
||||
s--
|
||||
}
|
||||
|
||||
// Bail if we exceed the maximum size.
|
||||
if d+(s-nextEmit) > dstLimit {
|
||||
return 0
|
||||
}
|
||||
|
||||
// A 4-byte match has been found. We'll later see if more than 4 bytes
|
||||
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
|
||||
// them as literal bytes.
|
||||
|
||||
d += emitLiteralSize(src[nextEmit:s])
|
||||
|
||||
// Call emitCopy, and then see if another emitCopy could be our next
|
||||
// move. Repeat until we find no match for the input immediately after
|
||||
// what was consumed by the last emitCopy call.
|
||||
//
|
||||
// If we exit this loop normally then we need to call emitLiteral next,
|
||||
// though we don't yet know how big the literal will be. We handle that
|
||||
// by proceeding to the next iteration of the main loop. We also can
|
||||
// exit this loop via goto if we get close to exhausting the input.
|
||||
for {
|
||||
// Invariant: we have a 4-byte match at s, and no need to emit any
|
||||
// literal bytes prior to s.
|
||||
base := s
|
||||
repeat = base - candidate
|
||||
|
||||
// Extend the 4-byte match as long as possible.
|
||||
s += 4
|
||||
candidate += 4
|
||||
for s <= len(src)-8 {
|
||||
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
|
||||
s += bits.TrailingZeros64(diff) >> 3
|
||||
break
|
||||
}
|
||||
s += 8
|
||||
candidate += 8
|
||||
}
|
||||
|
||||
d += emitCopyNoRepeatSize(repeat, s-base)
|
||||
if false {
|
||||
// Validate match.
|
||||
a := src[base:s]
|
||||
b := src[base-repeat : base-repeat+(s-base)]
|
||||
if !bytes.Equal(a, b) {
|
||||
panic("mismatch")
|
||||
}
|
||||
}
|
||||
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
goto emitRemainder
|
||||
}
|
||||
|
||||
if d > dstLimit {
|
||||
// Do we have space for more, if not bail.
|
||||
return 0
|
||||
}
|
||||
// Check for an immediate match, otherwise start search at s+1
|
||||
x := load64(src, s-2)
|
||||
m2Hash := hash6(x, tableBits)
|
||||
currHash := hash6(x>>16, tableBits)
|
||||
candidate = int(table[currHash])
|
||||
table[m2Hash] = uint32(s - 2)
|
||||
table[currHash] = uint32(s)
|
||||
if uint32(x>>16) != load32(src, candidate) {
|
||||
cv = load64(src, s+1)
|
||||
s++
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
emitRemainder:
|
||||
if nextEmit < len(src) {
|
||||
// Bail if we exceed the maximum size.
|
||||
if d+len(src)-nextEmit > dstLimit {
|
||||
return 0
|
||||
}
|
||||
d += emitLiteralSize(src[nextEmit:])
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
// emitLiteral writes a literal chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// dst is long enough to hold the encoded bytes
|
||||
// 0 <= len(lit) && len(lit) <= math.MaxUint32
|
||||
func emitLiteralSize(lit []byte) int {
|
||||
if len(lit) == 0 {
|
||||
return 0
|
||||
}
|
||||
switch {
|
||||
case len(lit) <= 60:
|
||||
return len(lit) + 1
|
||||
case len(lit) <= 1<<8:
|
||||
return len(lit) + 2
|
||||
case len(lit) <= 1<<16:
|
||||
return len(lit) + 3
|
||||
case len(lit) <= 1<<24:
|
||||
return len(lit) + 4
|
||||
default:
|
||||
return len(lit) + 5
|
||||
}
|
||||
}
|
||||
|
||||
func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
|
||||
panic("cvtLZ4BlockAsm should be unreachable")
|
||||
}
|
||||
|
||||
func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
|
||||
panic("cvtLZ4BlockSnappyAsm should be unreachable")
|
||||
}
|
||||
|
||||
func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
|
||||
panic("cvtLZ4sBlockAsm should be unreachable")
|
||||
}
|
||||
|
||||
func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
|
||||
panic("cvtLZ4sBlockSnappyAsm should be unreachable")
|
||||
}
|
228
vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
generated
vendored
228
vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
generated
vendored
@ -1,228 +0,0 @@
|
||||
// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
|
||||
|
||||
//go:build !appengine && !noasm && gc && !noasm
|
||||
|
||||
package s2
|
||||
|
||||
func _dummy_()
|
||||
|
||||
// encodeBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 4294967295 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeBlockAsm(dst []byte, src []byte) int
|
||||
|
||||
// encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 4194304 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeBlockAsm4MB(dst []byte, src []byte) int
|
||||
|
||||
// encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 16383 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeBlockAsm12B(dst []byte, src []byte) int
|
||||
|
||||
// encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 4095 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeBlockAsm10B(dst []byte, src []byte) int
|
||||
|
||||
// encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 511 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeBlockAsm8B(dst []byte, src []byte) int
|
||||
|
||||
// encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 4294967295 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeBetterBlockAsm(dst []byte, src []byte) int
|
||||
|
||||
// encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 4194304 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
|
||||
|
||||
// encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 16383 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeBetterBlockAsm12B(dst []byte, src []byte) int
|
||||
|
||||
// encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 4095 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeBetterBlockAsm10B(dst []byte, src []byte) int
|
||||
|
||||
// encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 511 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeBetterBlockAsm8B(dst []byte, src []byte) int
|
||||
|
||||
// encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 4294967295 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeSnappyBlockAsm(dst []byte, src []byte) int
|
||||
|
||||
// encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 65535 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
|
||||
|
||||
// encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 16383 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
|
||||
|
||||
// encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 4095 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
|
||||
|
||||
// encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 511 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
|
||||
|
||||
// encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 4294967295 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
|
||||
|
||||
// encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 65535 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
|
||||
|
||||
// encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 16383 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
|
||||
|
||||
// encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 4095 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
|
||||
|
||||
// encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 511 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
|
||||
|
||||
// calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 4294967295 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func calcBlockSize(src []byte) int
|
||||
|
||||
// calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst.
|
||||
// Maximum input 1024 bytes.
|
||||
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
|
||||
//
|
||||
//go:noescape
|
||||
func calcBlockSizeSmall(src []byte) int
|
||||
|
||||
// emitLiteral writes a literal chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// dst is long enough to hold the encoded bytes with margin of 0 bytes
|
||||
// 0 <= len(lit) && len(lit) <= math.MaxUint32
|
||||
//
|
||||
//go:noescape
|
||||
func emitLiteral(dst []byte, lit []byte) int
|
||||
|
||||
// emitRepeat writes a repeat chunk and returns the number of bytes written.
|
||||
// Length must be at least 4 and < 1<<32
|
||||
//
|
||||
//go:noescape
|
||||
func emitRepeat(dst []byte, offset int, length int) int
|
||||
|
||||
// emitCopy writes a copy chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// dst is long enough to hold the encoded bytes
|
||||
// 1 <= offset && offset <= math.MaxUint32
|
||||
// 4 <= length && length <= 1 << 24
|
||||
//
|
||||
//go:noescape
|
||||
func emitCopy(dst []byte, offset int, length int) int
|
||||
|
||||
// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// dst is long enough to hold the encoded bytes
|
||||
// 1 <= offset && offset <= math.MaxUint32
|
||||
// 4 <= length && length <= 1 << 24
|
||||
//
|
||||
//go:noescape
|
||||
func emitCopyNoRepeat(dst []byte, offset int, length int) int
|
||||
|
||||
// matchLen returns how many bytes match in a and b
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// len(a) <= len(b)
|
||||
//
|
||||
//go:noescape
|
||||
func matchLen(a []byte, b []byte) int
|
||||
|
||||
// cvtLZ4Block converts an LZ4 block to S2
|
||||
//
|
||||
//go:noescape
|
||||
func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
|
||||
|
||||
// cvtLZ4sBlock converts an LZ4s block to S2
|
||||
//
|
||||
//go:noescape
|
||||
func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
|
||||
|
||||
// cvtLZ4Block converts an LZ4 block to Snappy
|
||||
//
|
||||
//go:noescape
|
||||
func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
|
||||
|
||||
// cvtLZ4sBlock converts an LZ4s block to Snappy
|
||||
//
|
||||
//go:noescape
|
||||
func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
|
21169
vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
generated
vendored
21169
vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
generated
vendored
File diff suppressed because it is too large
Load Diff
596
vendor/github.com/klauspost/compress/s2/index.go
generated
vendored
596
vendor/github.com/klauspost/compress/s2/index.go
generated
vendored
@ -1,596 +0,0 @@
|
||||
// Copyright (c) 2022+ Klaus Post. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package s2
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
)
|
||||
|
||||
const (
|
||||
S2IndexHeader = "s2idx\x00"
|
||||
S2IndexTrailer = "\x00xdi2s"
|
||||
maxIndexEntries = 1 << 16
|
||||
)
|
||||
|
||||
// Index represents an S2/Snappy index.
|
||||
type Index struct {
|
||||
TotalUncompressed int64 // Total Uncompressed size if known. Will be -1 if unknown.
|
||||
TotalCompressed int64 // Total Compressed size if known. Will be -1 if unknown.
|
||||
info []struct {
|
||||
compressedOffset int64
|
||||
uncompressedOffset int64
|
||||
}
|
||||
estBlockUncomp int64
|
||||
}
|
||||
|
||||
func (i *Index) reset(maxBlock int) {
|
||||
i.estBlockUncomp = int64(maxBlock)
|
||||
i.TotalCompressed = -1
|
||||
i.TotalUncompressed = -1
|
||||
if len(i.info) > 0 {
|
||||
i.info = i.info[:0]
|
||||
}
|
||||
}
|
||||
|
||||
// allocInfos will allocate an empty slice of infos.
|
||||
func (i *Index) allocInfos(n int) {
|
||||
if n > maxIndexEntries {
|
||||
panic("n > maxIndexEntries")
|
||||
}
|
||||
i.info = make([]struct {
|
||||
compressedOffset int64
|
||||
uncompressedOffset int64
|
||||
}, 0, n)
|
||||
}
|
||||
|
||||
// add an uncompressed and compressed pair.
|
||||
// Entries must be sent in order.
|
||||
func (i *Index) add(compressedOffset, uncompressedOffset int64) error {
|
||||
if i == nil {
|
||||
return nil
|
||||
}
|
||||
lastIdx := len(i.info) - 1
|
||||
if lastIdx >= 0 {
|
||||
latest := i.info[lastIdx]
|
||||
if latest.uncompressedOffset == uncompressedOffset {
|
||||
// Uncompressed didn't change, don't add entry,
|
||||
// but update start index.
|
||||
latest.compressedOffset = compressedOffset
|
||||
i.info[lastIdx] = latest
|
||||
return nil
|
||||
}
|
||||
if latest.uncompressedOffset > uncompressedOffset {
|
||||
return fmt.Errorf("internal error: Earlier uncompressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
|
||||
}
|
||||
if latest.compressedOffset > compressedOffset {
|
||||
return fmt.Errorf("internal error: Earlier compressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
|
||||
}
|
||||
}
|
||||
i.info = append(i.info, struct {
|
||||
compressedOffset int64
|
||||
uncompressedOffset int64
|
||||
}{compressedOffset: compressedOffset, uncompressedOffset: uncompressedOffset})
|
||||
return nil
|
||||
}
|
||||
|
||||
// Find the offset at or before the wanted (uncompressed) offset.
|
||||
// If offset is 0 or positive it is the offset from the beginning of the file.
|
||||
// If the uncompressed size is known, the offset must be within the file.
|
||||
// If an offset outside the file is requested io.ErrUnexpectedEOF is returned.
|
||||
// If the offset is negative, it is interpreted as the distance from the end of the file,
|
||||
// where -1 represents the last byte.
|
||||
// If offset from the end of the file is requested, but size is unknown,
|
||||
// ErrUnsupported will be returned.
|
||||
func (i *Index) Find(offset int64) (compressedOff, uncompressedOff int64, err error) {
|
||||
if i.TotalUncompressed < 0 {
|
||||
return 0, 0, ErrCorrupt
|
||||
}
|
||||
if offset < 0 {
|
||||
offset = i.TotalUncompressed + offset
|
||||
if offset < 0 {
|
||||
return 0, 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
}
|
||||
if offset > i.TotalUncompressed {
|
||||
return 0, 0, io.ErrUnexpectedEOF
|
||||
}
|
||||
if len(i.info) > 200 {
|
||||
n := sort.Search(len(i.info), func(n int) bool {
|
||||
return i.info[n].uncompressedOffset > offset
|
||||
})
|
||||
if n == 0 {
|
||||
n = 1
|
||||
}
|
||||
return i.info[n-1].compressedOffset, i.info[n-1].uncompressedOffset, nil
|
||||
}
|
||||
for _, info := range i.info {
|
||||
if info.uncompressedOffset > offset {
|
||||
break
|
||||
}
|
||||
compressedOff = info.compressedOffset
|
||||
uncompressedOff = info.uncompressedOffset
|
||||
}
|
||||
return compressedOff, uncompressedOff, nil
|
||||
}
|
||||
|
||||
// reduce to stay below maxIndexEntries
|
||||
func (i *Index) reduce() {
|
||||
if len(i.info) < maxIndexEntries && i.estBlockUncomp >= 1<<20 {
|
||||
return
|
||||
}
|
||||
|
||||
// Algorithm, keep 1, remove removeN entries...
|
||||
removeN := (len(i.info) + 1) / maxIndexEntries
|
||||
src := i.info
|
||||
j := 0
|
||||
|
||||
// Each block should be at least 1MB, but don't reduce below 1000 entries.
|
||||
for i.estBlockUncomp*(int64(removeN)+1) < 1<<20 && len(i.info)/(removeN+1) > 1000 {
|
||||
removeN++
|
||||
}
|
||||
for idx := 0; idx < len(src); idx++ {
|
||||
i.info[j] = src[idx]
|
||||
j++
|
||||
idx += removeN
|
||||
}
|
||||
i.info = i.info[:j]
|
||||
// Update maxblock estimate.
|
||||
i.estBlockUncomp += i.estBlockUncomp * int64(removeN)
|
||||
}
|
||||
|
||||
func (i *Index) appendTo(b []byte, uncompTotal, compTotal int64) []byte {
|
||||
i.reduce()
|
||||
var tmp [binary.MaxVarintLen64]byte
|
||||
|
||||
initSize := len(b)
|
||||
// We make the start a skippable header+size.
|
||||
b = append(b, ChunkTypeIndex, 0, 0, 0)
|
||||
b = append(b, []byte(S2IndexHeader)...)
|
||||
// Total Uncompressed size
|
||||
n := binary.PutVarint(tmp[:], uncompTotal)
|
||||
b = append(b, tmp[:n]...)
|
||||
// Total Compressed size
|
||||
n = binary.PutVarint(tmp[:], compTotal)
|
||||
b = append(b, tmp[:n]...)
|
||||
// Put EstBlockUncomp size
|
||||
n = binary.PutVarint(tmp[:], i.estBlockUncomp)
|
||||
b = append(b, tmp[:n]...)
|
||||
// Put length
|
||||
n = binary.PutVarint(tmp[:], int64(len(i.info)))
|
||||
b = append(b, tmp[:n]...)
|
||||
|
||||
// Check if we should add uncompressed offsets
|
||||
var hasUncompressed byte
|
||||
for idx, info := range i.info {
|
||||
if idx == 0 {
|
||||
if info.uncompressedOffset != 0 {
|
||||
hasUncompressed = 1
|
||||
break
|
||||
}
|
||||
continue
|
||||
}
|
||||
if info.uncompressedOffset != i.info[idx-1].uncompressedOffset+i.estBlockUncomp {
|
||||
hasUncompressed = 1
|
||||
break
|
||||
}
|
||||
}
|
||||
b = append(b, hasUncompressed)
|
||||
|
||||
// Add each entry
|
||||
if hasUncompressed == 1 {
|
||||
for idx, info := range i.info {
|
||||
uOff := info.uncompressedOffset
|
||||
if idx > 0 {
|
||||
prev := i.info[idx-1]
|
||||
uOff -= prev.uncompressedOffset + (i.estBlockUncomp)
|
||||
}
|
||||
n = binary.PutVarint(tmp[:], uOff)
|
||||
b = append(b, tmp[:n]...)
|
||||
}
|
||||
}
|
||||
|
||||
// Initial compressed size estimate.
|
||||
cPredict := i.estBlockUncomp / 2
|
||||
|
||||
for idx, info := range i.info {
|
||||
cOff := info.compressedOffset
|
||||
if idx > 0 {
|
||||
prev := i.info[idx-1]
|
||||
cOff -= prev.compressedOffset + cPredict
|
||||
// Update compressed size prediction, with half the error.
|
||||
cPredict += cOff / 2
|
||||
}
|
||||
n = binary.PutVarint(tmp[:], cOff)
|
||||
b = append(b, tmp[:n]...)
|
||||
}
|
||||
|
||||
// Add Total Size.
|
||||
// Stored as fixed size for easier reading.
|
||||
binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)-initSize+4+len(S2IndexTrailer)))
|
||||
b = append(b, tmp[:4]...)
|
||||
// Trailer
|
||||
b = append(b, []byte(S2IndexTrailer)...)
|
||||
|
||||
// Update size
|
||||
chunkLen := len(b) - initSize - skippableFrameHeader
|
||||
b[initSize+1] = uint8(chunkLen >> 0)
|
||||
b[initSize+2] = uint8(chunkLen >> 8)
|
||||
b[initSize+3] = uint8(chunkLen >> 16)
|
||||
//fmt.Printf("chunklen: 0x%x Uncomp:%d, Comp:%d\n", chunkLen, uncompTotal, compTotal)
|
||||
return b
|
||||
}
|
||||
|
||||
// Load a binary index.
|
||||
// A zero value Index can be used or a previous one can be reused.
|
||||
func (i *Index) Load(b []byte) ([]byte, error) {
|
||||
if len(b) <= 4+len(S2IndexHeader)+len(S2IndexTrailer) {
|
||||
return b, io.ErrUnexpectedEOF
|
||||
}
|
||||
if b[0] != ChunkTypeIndex {
|
||||
return b, ErrCorrupt
|
||||
}
|
||||
chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
|
||||
b = b[4:]
|
||||
|
||||
// Validate we have enough...
|
||||
if len(b) < chunkLen {
|
||||
return b, io.ErrUnexpectedEOF
|
||||
}
|
||||
if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) {
|
||||
return b, ErrUnsupported
|
||||
}
|
||||
b = b[len(S2IndexHeader):]
|
||||
|
||||
// Total Uncompressed
|
||||
if v, n := binary.Varint(b); n <= 0 || v < 0 {
|
||||
return b, ErrCorrupt
|
||||
} else {
|
||||
i.TotalUncompressed = v
|
||||
b = b[n:]
|
||||
}
|
||||
|
||||
// Total Compressed
|
||||
if v, n := binary.Varint(b); n <= 0 {
|
||||
return b, ErrCorrupt
|
||||
} else {
|
||||
i.TotalCompressed = v
|
||||
b = b[n:]
|
||||
}
|
||||
|
||||
// Read EstBlockUncomp
|
||||
if v, n := binary.Varint(b); n <= 0 {
|
||||
return b, ErrCorrupt
|
||||
} else {
|
||||
if v < 0 {
|
||||
return b, ErrCorrupt
|
||||
}
|
||||
i.estBlockUncomp = v
|
||||
b = b[n:]
|
||||
}
|
||||
|
||||
var entries int
|
||||
if v, n := binary.Varint(b); n <= 0 {
|
||||
return b, ErrCorrupt
|
||||
} else {
|
||||
if v < 0 || v > maxIndexEntries {
|
||||
return b, ErrCorrupt
|
||||
}
|
||||
entries = int(v)
|
||||
b = b[n:]
|
||||
}
|
||||
if cap(i.info) < entries {
|
||||
i.allocInfos(entries)
|
||||
}
|
||||
i.info = i.info[:entries]
|
||||
|
||||
if len(b) < 1 {
|
||||
return b, io.ErrUnexpectedEOF
|
||||
}
|
||||
hasUncompressed := b[0]
|
||||
b = b[1:]
|
||||
if hasUncompressed&1 != hasUncompressed {
|
||||
return b, ErrCorrupt
|
||||
}
|
||||
|
||||
// Add each uncompressed entry
|
||||
for idx := range i.info {
|
||||
var uOff int64
|
||||
if hasUncompressed != 0 {
|
||||
// Load delta
|
||||
if v, n := binary.Varint(b); n <= 0 {
|
||||
return b, ErrCorrupt
|
||||
} else {
|
||||
uOff = v
|
||||
b = b[n:]
|
||||
}
|
||||
}
|
||||
|
||||
if idx > 0 {
|
||||
prev := i.info[idx-1].uncompressedOffset
|
||||
uOff += prev + (i.estBlockUncomp)
|
||||
if uOff <= prev {
|
||||
return b, ErrCorrupt
|
||||
}
|
||||
}
|
||||
if uOff < 0 {
|
||||
return b, ErrCorrupt
|
||||
}
|
||||
i.info[idx].uncompressedOffset = uOff
|
||||
}
|
||||
|
||||
// Initial compressed size estimate.
|
||||
cPredict := i.estBlockUncomp / 2
|
||||
|
||||
// Add each compressed entry
|
||||
for idx := range i.info {
|
||||
var cOff int64
|
||||
if v, n := binary.Varint(b); n <= 0 {
|
||||
return b, ErrCorrupt
|
||||
} else {
|
||||
cOff = v
|
||||
b = b[n:]
|
||||
}
|
||||
|
||||
if idx > 0 {
|
||||
// Update compressed size prediction, with half the error.
|
||||
cPredictNew := cPredict + cOff/2
|
||||
|
||||
prev := i.info[idx-1].compressedOffset
|
||||
cOff += prev + cPredict
|
||||
if cOff <= prev {
|
||||
return b, ErrCorrupt
|
||||
}
|
||||
cPredict = cPredictNew
|
||||
}
|
||||
if cOff < 0 {
|
||||
return b, ErrCorrupt
|
||||
}
|
||||
i.info[idx].compressedOffset = cOff
|
||||
}
|
||||
if len(b) < 4+len(S2IndexTrailer) {
|
||||
return b, io.ErrUnexpectedEOF
|
||||
}
|
||||
// Skip size...
|
||||
b = b[4:]
|
||||
|
||||
// Check trailer...
|
||||
if !bytes.Equal(b[:len(S2IndexTrailer)], []byte(S2IndexTrailer)) {
|
||||
return b, ErrCorrupt
|
||||
}
|
||||
return b[len(S2IndexTrailer):], nil
|
||||
}
|
||||
|
||||
// LoadStream will load an index from the end of the supplied stream.
|
||||
// ErrUnsupported will be returned if the signature cannot be found.
|
||||
// ErrCorrupt will be returned if unexpected values are found.
|
||||
// io.ErrUnexpectedEOF is returned if there are too few bytes.
|
||||
// IO errors are returned as-is.
|
||||
func (i *Index) LoadStream(rs io.ReadSeeker) error {
|
||||
// Go to end.
|
||||
_, err := rs.Seek(-10, io.SeekEnd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var tmp [10]byte
|
||||
_, err = io.ReadFull(rs, tmp[:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Check trailer...
|
||||
if !bytes.Equal(tmp[4:4+len(S2IndexTrailer)], []byte(S2IndexTrailer)) {
|
||||
return ErrUnsupported
|
||||
}
|
||||
sz := binary.LittleEndian.Uint32(tmp[:4])
|
||||
if sz > maxChunkSize+skippableFrameHeader {
|
||||
return ErrCorrupt
|
||||
}
|
||||
_, err = rs.Seek(-int64(sz), io.SeekEnd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Read index.
|
||||
buf := make([]byte, sz)
|
||||
_, err = io.ReadFull(rs, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = i.Load(buf)
|
||||
return err
|
||||
}
|
||||
|
||||
// IndexStream will return an index for a stream.
|
||||
// The stream structure will be checked, but
|
||||
// data within blocks is not verified.
|
||||
// The returned index can either be appended to the end of the stream
|
||||
// or stored separately.
|
||||
func IndexStream(r io.Reader) ([]byte, error) {
|
||||
var i Index
|
||||
var buf [maxChunkSize]byte
|
||||
var readHeader bool
|
||||
for {
|
||||
_, err := io.ReadFull(r, buf[:4])
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
return i.appendTo(nil, i.TotalUncompressed, i.TotalCompressed), nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
// Start of this chunk.
|
||||
startChunk := i.TotalCompressed
|
||||
i.TotalCompressed += 4
|
||||
|
||||
chunkType := buf[0]
|
||||
if !readHeader {
|
||||
if chunkType != chunkTypeStreamIdentifier {
|
||||
return nil, ErrCorrupt
|
||||
}
|
||||
readHeader = true
|
||||
}
|
||||
chunkLen := int(buf[1]) | int(buf[2])<<8 | int(buf[3])<<16
|
||||
if chunkLen < checksumSize {
|
||||
return nil, ErrCorrupt
|
||||
}
|
||||
|
||||
i.TotalCompressed += int64(chunkLen)
|
||||
_, err = io.ReadFull(r, buf[:chunkLen])
|
||||
if err != nil {
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
// The chunk types are specified at
|
||||
// https://github.com/google/snappy/blob/master/framing_format.txt
|
||||
switch chunkType {
|
||||
case chunkTypeCompressedData:
|
||||
// Section 4.2. Compressed data (chunk type 0x00).
|
||||
// Skip checksum.
|
||||
dLen, err := DecodedLen(buf[checksumSize:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if dLen > maxBlockSize {
|
||||
return nil, ErrCorrupt
|
||||
}
|
||||
if i.estBlockUncomp == 0 {
|
||||
// Use first block for estimate...
|
||||
i.estBlockUncomp = int64(dLen)
|
||||
}
|
||||
err = i.add(startChunk, i.TotalUncompressed)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
i.TotalUncompressed += int64(dLen)
|
||||
continue
|
||||
case chunkTypeUncompressedData:
|
||||
n2 := chunkLen - checksumSize
|
||||
if n2 > maxBlockSize {
|
||||
return nil, ErrCorrupt
|
||||
}
|
||||
if i.estBlockUncomp == 0 {
|
||||
// Use first block for estimate...
|
||||
i.estBlockUncomp = int64(n2)
|
||||
}
|
||||
err = i.add(startChunk, i.TotalUncompressed)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
i.TotalUncompressed += int64(n2)
|
||||
continue
|
||||
case chunkTypeStreamIdentifier:
|
||||
// Section 4.1. Stream identifier (chunk type 0xff).
|
||||
if chunkLen != len(magicBody) {
|
||||
return nil, ErrCorrupt
|
||||
}
|
||||
|
||||
if string(buf[:len(magicBody)]) != magicBody {
|
||||
if string(buf[:len(magicBody)]) != magicBodySnappy {
|
||||
return nil, ErrCorrupt
|
||||
}
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if chunkType <= 0x7f {
|
||||
// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
|
||||
return nil, ErrUnsupported
|
||||
}
|
||||
if chunkLen > maxChunkSize {
|
||||
return nil, ErrUnsupported
|
||||
}
|
||||
// Section 4.4 Padding (chunk type 0xfe).
|
||||
// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
|
||||
}
|
||||
}
|
||||
|
||||
// JSON returns the index as JSON text.
|
||||
func (i *Index) JSON() []byte {
|
||||
type offset struct {
|
||||
CompressedOffset int64 `json:"compressed"`
|
||||
UncompressedOffset int64 `json:"uncompressed"`
|
||||
}
|
||||
x := struct {
|
||||
TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
|
||||
TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown.
|
||||
Offsets []offset `json:"offsets"`
|
||||
EstBlockUncomp int64 `json:"est_block_uncompressed"`
|
||||
}{
|
||||
TotalUncompressed: i.TotalUncompressed,
|
||||
TotalCompressed: i.TotalCompressed,
|
||||
EstBlockUncomp: i.estBlockUncomp,
|
||||
}
|
||||
for _, v := range i.info {
|
||||
x.Offsets = append(x.Offsets, offset{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
|
||||
}
|
||||
b, _ := json.MarshalIndent(x, "", " ")
|
||||
return b
|
||||
}
|
||||
|
||||
// RemoveIndexHeaders will trim all headers and trailers from a given index.
|
||||
// This is expected to save 20 bytes.
|
||||
// These can be restored using RestoreIndexHeaders.
|
||||
// This removes a layer of security, but is the most compact representation.
|
||||
// Returns nil if headers contains errors.
|
||||
// The returned slice references the provided slice.
|
||||
func RemoveIndexHeaders(b []byte) []byte {
|
||||
const save = 4 + len(S2IndexHeader) + len(S2IndexTrailer) + 4
|
||||
if len(b) <= save {
|
||||
return nil
|
||||
}
|
||||
if b[0] != ChunkTypeIndex {
|
||||
return nil
|
||||
}
|
||||
chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
|
||||
b = b[4:]
|
||||
|
||||
// Validate we have enough...
|
||||
if len(b) < chunkLen {
|
||||
return nil
|
||||
}
|
||||
b = b[:chunkLen]
|
||||
|
||||
if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) {
|
||||
return nil
|
||||
}
|
||||
b = b[len(S2IndexHeader):]
|
||||
if !bytes.HasSuffix(b, []byte(S2IndexTrailer)) {
|
||||
return nil
|
||||
}
|
||||
b = bytes.TrimSuffix(b, []byte(S2IndexTrailer))
|
||||
|
||||
if len(b) < 4 {
|
||||
return nil
|
||||
}
|
||||
return b[:len(b)-4]
|
||||
}
|
||||
|
||||
// RestoreIndexHeaders will index restore headers removed by RemoveIndexHeaders.
|
||||
// No error checking is performed on the input.
|
||||
// If a 0 length slice is sent, it is returned without modification.
|
||||
func RestoreIndexHeaders(in []byte) []byte {
|
||||
if len(in) == 0 {
|
||||
return in
|
||||
}
|
||||
b := make([]byte, 0, 4+len(S2IndexHeader)+len(in)+len(S2IndexTrailer)+4)
|
||||
b = append(b, ChunkTypeIndex, 0, 0, 0)
|
||||
b = append(b, []byte(S2IndexHeader)...)
|
||||
b = append(b, in...)
|
||||
|
||||
var tmp [4]byte
|
||||
binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)+4+len(S2IndexTrailer)))
|
||||
b = append(b, tmp[:4]...)
|
||||
// Trailer
|
||||
b = append(b, []byte(S2IndexTrailer)...)
|
||||
|
||||
chunkLen := len(b) - skippableFrameHeader
|
||||
b[1] = uint8(chunkLen >> 0)
|
||||
b[2] = uint8(chunkLen >> 8)
|
||||
b[3] = uint8(chunkLen >> 16)
|
||||
return b
|
||||
}
|
585
vendor/github.com/klauspost/compress/s2/lz4convert.go
generated
vendored
585
vendor/github.com/klauspost/compress/s2/lz4convert.go
generated
vendored
@ -1,585 +0,0 @@
|
||||
// Copyright (c) 2022 Klaus Post. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package s2
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// LZ4Converter provides conversion from LZ4 blocks as defined here:
|
||||
// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
|
||||
type LZ4Converter struct {
|
||||
}
|
||||
|
||||
// ErrDstTooSmall is returned when provided destination is too small.
|
||||
var ErrDstTooSmall = errors.New("s2: destination too small")
|
||||
|
||||
// ConvertBlock will convert an LZ4 block and append it as an S2
|
||||
// block without block length to dst.
|
||||
// The uncompressed size is returned as well.
|
||||
// dst must have capacity to contain the entire compressed block.
|
||||
func (l *LZ4Converter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
|
||||
if len(src) == 0 {
|
||||
return dst, 0, nil
|
||||
}
|
||||
const debug = false
|
||||
const inline = true
|
||||
const lz4MinMatch = 4
|
||||
|
||||
s, d := 0, len(dst)
|
||||
dst = dst[:cap(dst)]
|
||||
if !debug && hasAmd64Asm {
|
||||
res, sz := cvtLZ4BlockAsm(dst[d:], src)
|
||||
if res < 0 {
|
||||
const (
|
||||
errCorrupt = -1
|
||||
errDstTooSmall = -2
|
||||
)
|
||||
switch res {
|
||||
case errCorrupt:
|
||||
return nil, 0, ErrCorrupt
|
||||
case errDstTooSmall:
|
||||
return nil, 0, ErrDstTooSmall
|
||||
default:
|
||||
return nil, 0, fmt.Errorf("unexpected result: %d", res)
|
||||
}
|
||||
}
|
||||
if d+sz > len(dst) {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
return dst[:d+sz], res, nil
|
||||
}
|
||||
|
||||
dLimit := len(dst) - 10
|
||||
var lastOffset uint16
|
||||
var uncompressed int
|
||||
if debug {
|
||||
fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
|
||||
}
|
||||
|
||||
for {
|
||||
if s >= len(src) {
|
||||
return dst[:d], 0, ErrCorrupt
|
||||
}
|
||||
// Read literal info
|
||||
token := src[s]
|
||||
ll := int(token >> 4)
|
||||
ml := int(lz4MinMatch + (token & 0xf))
|
||||
|
||||
// If upper nibble is 15, literal length is extended
|
||||
if token >= 0xf0 {
|
||||
for {
|
||||
s++
|
||||
if s >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
|
||||
}
|
||||
return dst[:d], 0, ErrCorrupt
|
||||
}
|
||||
val := src[s]
|
||||
ll += int(val)
|
||||
if val != 255 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// Skip past token
|
||||
if s+ll >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
s++
|
||||
if ll > 0 {
|
||||
if d+ll > dLimit {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
if debug {
|
||||
fmt.Printf("emit %d literals\n", ll)
|
||||
}
|
||||
d += emitLiteralGo(dst[d:], src[s:s+ll])
|
||||
s += ll
|
||||
uncompressed += ll
|
||||
}
|
||||
|
||||
// Check if we are done...
|
||||
if s == len(src) && ml == lz4MinMatch {
|
||||
break
|
||||
}
|
||||
// 2 byte offset
|
||||
if s >= len(src)-2 {
|
||||
if debug {
|
||||
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
offset := binary.LittleEndian.Uint16(src[s:])
|
||||
s += 2
|
||||
if offset == 0 {
|
||||
if debug {
|
||||
fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
if int(offset) > uncompressed {
|
||||
if debug {
|
||||
fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
|
||||
if ml == lz4MinMatch+15 {
|
||||
for {
|
||||
if s >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
val := src[s]
|
||||
s++
|
||||
ml += int(val)
|
||||
if val != 255 {
|
||||
if s >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if offset == lastOffset {
|
||||
if debug {
|
||||
fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
|
||||
}
|
||||
if !inline {
|
||||
d += emitRepeat16(dst[d:], offset, ml)
|
||||
} else {
|
||||
length := ml
|
||||
dst := dst[d:]
|
||||
for len(dst) > 5 {
|
||||
// Repeat offset, make length cheaper
|
||||
length -= 4
|
||||
if length <= 4 {
|
||||
dst[0] = uint8(length)<<2 | tagCopy1
|
||||
dst[1] = 0
|
||||
d += 2
|
||||
break
|
||||
}
|
||||
if length < 8 && offset < 2048 {
|
||||
// Encode WITH offset
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
|
||||
d += 2
|
||||
break
|
||||
}
|
||||
if length < (1<<8)+4 {
|
||||
length -= 4
|
||||
dst[2] = uint8(length)
|
||||
dst[1] = 0
|
||||
dst[0] = 5<<2 | tagCopy1
|
||||
d += 3
|
||||
break
|
||||
}
|
||||
if length < (1<<16)+(1<<8) {
|
||||
length -= 1 << 8
|
||||
dst[3] = uint8(length >> 8)
|
||||
dst[2] = uint8(length >> 0)
|
||||
dst[1] = 0
|
||||
dst[0] = 6<<2 | tagCopy1
|
||||
d += 4
|
||||
break
|
||||
}
|
||||
const maxRepeat = (1 << 24) - 1
|
||||
length -= 1 << 16
|
||||
left := 0
|
||||
if length > maxRepeat {
|
||||
left = length - maxRepeat + 4
|
||||
length = maxRepeat - 4
|
||||
}
|
||||
dst[4] = uint8(length >> 16)
|
||||
dst[3] = uint8(length >> 8)
|
||||
dst[2] = uint8(length >> 0)
|
||||
dst[1] = 0
|
||||
dst[0] = 7<<2 | tagCopy1
|
||||
if left > 0 {
|
||||
d += 5 + emitRepeat16(dst[5:], offset, left)
|
||||
break
|
||||
}
|
||||
d += 5
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if debug {
|
||||
fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
|
||||
}
|
||||
if !inline {
|
||||
d += emitCopy16(dst[d:], offset, ml)
|
||||
} else {
|
||||
length := ml
|
||||
dst := dst[d:]
|
||||
for len(dst) > 5 {
|
||||
// Offset no more than 2 bytes.
|
||||
if length > 64 {
|
||||
off := 3
|
||||
if offset < 2048 {
|
||||
// emit 8 bytes as tagCopy1, rest as repeats.
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
|
||||
length -= 8
|
||||
off = 2
|
||||
} else {
|
||||
// Emit a length 60 copy, encoded as 3 bytes.
|
||||
// Emit remaining as repeat value (minimum 4 bytes).
|
||||
dst[2] = uint8(offset >> 8)
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = 59<<2 | tagCopy2
|
||||
length -= 60
|
||||
}
|
||||
// Emit remaining as repeats, at least 4 bytes remain.
|
||||
d += off + emitRepeat16(dst[off:], offset, length)
|
||||
break
|
||||
}
|
||||
if length >= 12 || offset >= 2048 {
|
||||
// Emit the remaining copy, encoded as 3 bytes.
|
||||
dst[2] = uint8(offset >> 8)
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(length-1)<<2 | tagCopy2
|
||||
d += 3
|
||||
break
|
||||
}
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
|
||||
d += 2
|
||||
break
|
||||
}
|
||||
}
|
||||
lastOffset = offset
|
||||
}
|
||||
uncompressed += ml
|
||||
if d > dLimit {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
}
|
||||
|
||||
return dst[:d], uncompressed, nil
|
||||
}
|
||||
|
||||
// ConvertBlockSnappy will convert an LZ4 block and append it
|
||||
// as a Snappy block without block length to dst.
|
||||
// The uncompressed size is returned as well.
|
||||
// dst must have capacity to contain the entire compressed block.
|
||||
func (l *LZ4Converter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
|
||||
if len(src) == 0 {
|
||||
return dst, 0, nil
|
||||
}
|
||||
const debug = false
|
||||
const lz4MinMatch = 4
|
||||
|
||||
s, d := 0, len(dst)
|
||||
dst = dst[:cap(dst)]
|
||||
// Use assembly when possible
|
||||
if !debug && hasAmd64Asm {
|
||||
res, sz := cvtLZ4BlockSnappyAsm(dst[d:], src)
|
||||
if res < 0 {
|
||||
const (
|
||||
errCorrupt = -1
|
||||
errDstTooSmall = -2
|
||||
)
|
||||
switch res {
|
||||
case errCorrupt:
|
||||
return nil, 0, ErrCorrupt
|
||||
case errDstTooSmall:
|
||||
return nil, 0, ErrDstTooSmall
|
||||
default:
|
||||
return nil, 0, fmt.Errorf("unexpected result: %d", res)
|
||||
}
|
||||
}
|
||||
if d+sz > len(dst) {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
return dst[:d+sz], res, nil
|
||||
}
|
||||
|
||||
dLimit := len(dst) - 10
|
||||
var uncompressed int
|
||||
if debug {
|
||||
fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
|
||||
}
|
||||
|
||||
for {
|
||||
if s >= len(src) {
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
// Read literal info
|
||||
token := src[s]
|
||||
ll := int(token >> 4)
|
||||
ml := int(lz4MinMatch + (token & 0xf))
|
||||
|
||||
// If upper nibble is 15, literal length is extended
|
||||
if token >= 0xf0 {
|
||||
for {
|
||||
s++
|
||||
if s >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
val := src[s]
|
||||
ll += int(val)
|
||||
if val != 255 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// Skip past token
|
||||
if s+ll >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
s++
|
||||
if ll > 0 {
|
||||
if d+ll > dLimit {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
if debug {
|
||||
fmt.Printf("emit %d literals\n", ll)
|
||||
}
|
||||
d += emitLiteralGo(dst[d:], src[s:s+ll])
|
||||
s += ll
|
||||
uncompressed += ll
|
||||
}
|
||||
|
||||
// Check if we are done...
|
||||
if s == len(src) && ml == lz4MinMatch {
|
||||
break
|
||||
}
|
||||
// 2 byte offset
|
||||
if s >= len(src)-2 {
|
||||
if debug {
|
||||
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
offset := binary.LittleEndian.Uint16(src[s:])
|
||||
s += 2
|
||||
if offset == 0 {
|
||||
if debug {
|
||||
fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
if int(offset) > uncompressed {
|
||||
if debug {
|
||||
fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
|
||||
if ml == lz4MinMatch+15 {
|
||||
for {
|
||||
if s >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
val := src[s]
|
||||
s++
|
||||
ml += int(val)
|
||||
if val != 255 {
|
||||
if s >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if debug {
|
||||
fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
|
||||
}
|
||||
length := ml
|
||||
// d += emitCopyNoRepeat(dst[d:], int(offset), ml)
|
||||
for length > 0 {
|
||||
if d >= dLimit {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
|
||||
// Offset no more than 2 bytes.
|
||||
if length > 64 {
|
||||
// Emit a length 64 copy, encoded as 3 bytes.
|
||||
dst[d+2] = uint8(offset >> 8)
|
||||
dst[d+1] = uint8(offset)
|
||||
dst[d+0] = 63<<2 | tagCopy2
|
||||
length -= 64
|
||||
d += 3
|
||||
continue
|
||||
}
|
||||
if length >= 12 || offset >= 2048 || length < 4 {
|
||||
// Emit the remaining copy, encoded as 3 bytes.
|
||||
dst[d+2] = uint8(offset >> 8)
|
||||
dst[d+1] = uint8(offset)
|
||||
dst[d+0] = uint8(length-1)<<2 | tagCopy2
|
||||
d += 3
|
||||
break
|
||||
}
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
dst[d+1] = uint8(offset)
|
||||
dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
|
||||
d += 2
|
||||
break
|
||||
}
|
||||
uncompressed += ml
|
||||
if d > dLimit {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
}
|
||||
|
||||
return dst[:d], uncompressed, nil
|
||||
}
|
||||
|
||||
// emitRepeat writes a repeat chunk and returns the number of bytes written.
|
||||
// Length must be at least 4 and < 1<<24
|
||||
func emitRepeat16(dst []byte, offset uint16, length int) int {
|
||||
// Repeat offset, make length cheaper
|
||||
length -= 4
|
||||
if length <= 4 {
|
||||
dst[0] = uint8(length)<<2 | tagCopy1
|
||||
dst[1] = 0
|
||||
return 2
|
||||
}
|
||||
if length < 8 && offset < 2048 {
|
||||
// Encode WITH offset
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
|
||||
return 2
|
||||
}
|
||||
if length < (1<<8)+4 {
|
||||
length -= 4
|
||||
dst[2] = uint8(length)
|
||||
dst[1] = 0
|
||||
dst[0] = 5<<2 | tagCopy1
|
||||
return 3
|
||||
}
|
||||
if length < (1<<16)+(1<<8) {
|
||||
length -= 1 << 8
|
||||
dst[3] = uint8(length >> 8)
|
||||
dst[2] = uint8(length >> 0)
|
||||
dst[1] = 0
|
||||
dst[0] = 6<<2 | tagCopy1
|
||||
return 4
|
||||
}
|
||||
const maxRepeat = (1 << 24) - 1
|
||||
length -= 1 << 16
|
||||
left := 0
|
||||
if length > maxRepeat {
|
||||
left = length - maxRepeat + 4
|
||||
length = maxRepeat - 4
|
||||
}
|
||||
dst[4] = uint8(length >> 16)
|
||||
dst[3] = uint8(length >> 8)
|
||||
dst[2] = uint8(length >> 0)
|
||||
dst[1] = 0
|
||||
dst[0] = 7<<2 | tagCopy1
|
||||
if left > 0 {
|
||||
return 5 + emitRepeat16(dst[5:], offset, left)
|
||||
}
|
||||
return 5
|
||||
}
|
||||
|
||||
// emitCopy writes a copy chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// dst is long enough to hold the encoded bytes
|
||||
// 1 <= offset && offset <= math.MaxUint16
|
||||
// 4 <= length && length <= math.MaxUint32
|
||||
func emitCopy16(dst []byte, offset uint16, length int) int {
|
||||
// Offset no more than 2 bytes.
|
||||
if length > 64 {
|
||||
off := 3
|
||||
if offset < 2048 {
|
||||
// emit 8 bytes as tagCopy1, rest as repeats.
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
|
||||
length -= 8
|
||||
off = 2
|
||||
} else {
|
||||
// Emit a length 60 copy, encoded as 3 bytes.
|
||||
// Emit remaining as repeat value (minimum 4 bytes).
|
||||
dst[2] = uint8(offset >> 8)
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = 59<<2 | tagCopy2
|
||||
length -= 60
|
||||
}
|
||||
// Emit remaining as repeats, at least 4 bytes remain.
|
||||
return off + emitRepeat16(dst[off:], offset, length)
|
||||
}
|
||||
if length >= 12 || offset >= 2048 {
|
||||
// Emit the remaining copy, encoded as 3 bytes.
|
||||
dst[2] = uint8(offset >> 8)
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(length-1)<<2 | tagCopy2
|
||||
return 3
|
||||
}
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
|
||||
return 2
|
||||
}
|
||||
|
||||
// emitLiteral writes a literal chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// dst is long enough to hold the encoded bytes
|
||||
// 0 <= len(lit) && len(lit) <= math.MaxUint32
|
||||
func emitLiteralGo(dst, lit []byte) int {
|
||||
if len(lit) == 0 {
|
||||
return 0
|
||||
}
|
||||
i, n := 0, uint(len(lit)-1)
|
||||
switch {
|
||||
case n < 60:
|
||||
dst[0] = uint8(n)<<2 | tagLiteral
|
||||
i = 1
|
||||
case n < 1<<8:
|
||||
dst[1] = uint8(n)
|
||||
dst[0] = 60<<2 | tagLiteral
|
||||
i = 2
|
||||
case n < 1<<16:
|
||||
dst[2] = uint8(n >> 8)
|
||||
dst[1] = uint8(n)
|
||||
dst[0] = 61<<2 | tagLiteral
|
||||
i = 3
|
||||
case n < 1<<24:
|
||||
dst[3] = uint8(n >> 16)
|
||||
dst[2] = uint8(n >> 8)
|
||||
dst[1] = uint8(n)
|
||||
dst[0] = 62<<2 | tagLiteral
|
||||
i = 4
|
||||
default:
|
||||
dst[4] = uint8(n >> 24)
|
||||
dst[3] = uint8(n >> 16)
|
||||
dst[2] = uint8(n >> 8)
|
||||
dst[1] = uint8(n)
|
||||
dst[0] = 63<<2 | tagLiteral
|
||||
i = 5
|
||||
}
|
||||
return i + copy(dst[i:], lit)
|
||||
}
|
467
vendor/github.com/klauspost/compress/s2/lz4sconvert.go
generated
vendored
467
vendor/github.com/klauspost/compress/s2/lz4sconvert.go
generated
vendored
@ -1,467 +0,0 @@
|
||||
// Copyright (c) 2022 Klaus Post. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package s2
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// LZ4sConverter provides conversion from LZ4s.
|
||||
// (Intel modified LZ4 Blocks)
|
||||
// https://cdrdv2-public.intel.com/743912/743912-qat-programmers-guide-v2.0.pdf
|
||||
// LZ4s is a variant of LZ4 block format. LZ4s should be considered as an intermediate compressed block format.
|
||||
// The LZ4s format is selected when the application sets the compType to CPA_DC_LZ4S in CpaDcSessionSetupData.
|
||||
// The LZ4s block returned by the Intel® QAT hardware can be used by an external
|
||||
// software post-processing to generate other compressed data formats.
|
||||
// The following table lists the differences between LZ4 and LZ4s block format. LZ4s block format uses
|
||||
// the same high-level formatting as LZ4 block format with the following encoding changes:
|
||||
// For Min Match of 4 bytes, Copy length value 1-15 means length 4-18 with 18 bytes adding an extra byte.
|
||||
// ONLY "Min match of 4 bytes" is supported.
|
||||
type LZ4sConverter struct {
|
||||
}
|
||||
|
||||
// ConvertBlock will convert an LZ4s block and append it as an S2
|
||||
// block without block length to dst.
|
||||
// The uncompressed size is returned as well.
|
||||
// dst must have capacity to contain the entire compressed block.
|
||||
func (l *LZ4sConverter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
|
||||
if len(src) == 0 {
|
||||
return dst, 0, nil
|
||||
}
|
||||
const debug = false
|
||||
const inline = true
|
||||
const lz4MinMatch = 3
|
||||
|
||||
s, d := 0, len(dst)
|
||||
dst = dst[:cap(dst)]
|
||||
if !debug && hasAmd64Asm {
|
||||
res, sz := cvtLZ4sBlockAsm(dst[d:], src)
|
||||
if res < 0 {
|
||||
const (
|
||||
errCorrupt = -1
|
||||
errDstTooSmall = -2
|
||||
)
|
||||
switch res {
|
||||
case errCorrupt:
|
||||
return nil, 0, ErrCorrupt
|
||||
case errDstTooSmall:
|
||||
return nil, 0, ErrDstTooSmall
|
||||
default:
|
||||
return nil, 0, fmt.Errorf("unexpected result: %d", res)
|
||||
}
|
||||
}
|
||||
if d+sz > len(dst) {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
return dst[:d+sz], res, nil
|
||||
}
|
||||
|
||||
dLimit := len(dst) - 10
|
||||
var lastOffset uint16
|
||||
var uncompressed int
|
||||
if debug {
|
||||
fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
|
||||
}
|
||||
|
||||
for {
|
||||
if s >= len(src) {
|
||||
return dst[:d], 0, ErrCorrupt
|
||||
}
|
||||
// Read literal info
|
||||
token := src[s]
|
||||
ll := int(token >> 4)
|
||||
ml := int(lz4MinMatch + (token & 0xf))
|
||||
|
||||
// If upper nibble is 15, literal length is extended
|
||||
if token >= 0xf0 {
|
||||
for {
|
||||
s++
|
||||
if s >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
|
||||
}
|
||||
return dst[:d], 0, ErrCorrupt
|
||||
}
|
||||
val := src[s]
|
||||
ll += int(val)
|
||||
if val != 255 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// Skip past token
|
||||
if s+ll >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
s++
|
||||
if ll > 0 {
|
||||
if d+ll > dLimit {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
if debug {
|
||||
fmt.Printf("emit %d literals\n", ll)
|
||||
}
|
||||
d += emitLiteralGo(dst[d:], src[s:s+ll])
|
||||
s += ll
|
||||
uncompressed += ll
|
||||
}
|
||||
|
||||
// Check if we are done...
|
||||
if ml == lz4MinMatch {
|
||||
if s == len(src) {
|
||||
break
|
||||
}
|
||||
// 0 bytes.
|
||||
continue
|
||||
}
|
||||
// 2 byte offset
|
||||
if s >= len(src)-2 {
|
||||
if debug {
|
||||
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
offset := binary.LittleEndian.Uint16(src[s:])
|
||||
s += 2
|
||||
if offset == 0 {
|
||||
if debug {
|
||||
fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
if int(offset) > uncompressed {
|
||||
if debug {
|
||||
fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
|
||||
if ml == lz4MinMatch+15 {
|
||||
for {
|
||||
if s >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
val := src[s]
|
||||
s++
|
||||
ml += int(val)
|
||||
if val != 255 {
|
||||
if s >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if offset == lastOffset {
|
||||
if debug {
|
||||
fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
|
||||
}
|
||||
if !inline {
|
||||
d += emitRepeat16(dst[d:], offset, ml)
|
||||
} else {
|
||||
length := ml
|
||||
dst := dst[d:]
|
||||
for len(dst) > 5 {
|
||||
// Repeat offset, make length cheaper
|
||||
length -= 4
|
||||
if length <= 4 {
|
||||
dst[0] = uint8(length)<<2 | tagCopy1
|
||||
dst[1] = 0
|
||||
d += 2
|
||||
break
|
||||
}
|
||||
if length < 8 && offset < 2048 {
|
||||
// Encode WITH offset
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
|
||||
d += 2
|
||||
break
|
||||
}
|
||||
if length < (1<<8)+4 {
|
||||
length -= 4
|
||||
dst[2] = uint8(length)
|
||||
dst[1] = 0
|
||||
dst[0] = 5<<2 | tagCopy1
|
||||
d += 3
|
||||
break
|
||||
}
|
||||
if length < (1<<16)+(1<<8) {
|
||||
length -= 1 << 8
|
||||
dst[3] = uint8(length >> 8)
|
||||
dst[2] = uint8(length >> 0)
|
||||
dst[1] = 0
|
||||
dst[0] = 6<<2 | tagCopy1
|
||||
d += 4
|
||||
break
|
||||
}
|
||||
const maxRepeat = (1 << 24) - 1
|
||||
length -= 1 << 16
|
||||
left := 0
|
||||
if length > maxRepeat {
|
||||
left = length - maxRepeat + 4
|
||||
length = maxRepeat - 4
|
||||
}
|
||||
dst[4] = uint8(length >> 16)
|
||||
dst[3] = uint8(length >> 8)
|
||||
dst[2] = uint8(length >> 0)
|
||||
dst[1] = 0
|
||||
dst[0] = 7<<2 | tagCopy1
|
||||
if left > 0 {
|
||||
d += 5 + emitRepeat16(dst[5:], offset, left)
|
||||
break
|
||||
}
|
||||
d += 5
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if debug {
|
||||
fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
|
||||
}
|
||||
if !inline {
|
||||
d += emitCopy16(dst[d:], offset, ml)
|
||||
} else {
|
||||
length := ml
|
||||
dst := dst[d:]
|
||||
for len(dst) > 5 {
|
||||
// Offset no more than 2 bytes.
|
||||
if length > 64 {
|
||||
off := 3
|
||||
if offset < 2048 {
|
||||
// emit 8 bytes as tagCopy1, rest as repeats.
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
|
||||
length -= 8
|
||||
off = 2
|
||||
} else {
|
||||
// Emit a length 60 copy, encoded as 3 bytes.
|
||||
// Emit remaining as repeat value (minimum 4 bytes).
|
||||
dst[2] = uint8(offset >> 8)
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = 59<<2 | tagCopy2
|
||||
length -= 60
|
||||
}
|
||||
// Emit remaining as repeats, at least 4 bytes remain.
|
||||
d += off + emitRepeat16(dst[off:], offset, length)
|
||||
break
|
||||
}
|
||||
if length >= 12 || offset >= 2048 {
|
||||
// Emit the remaining copy, encoded as 3 bytes.
|
||||
dst[2] = uint8(offset >> 8)
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(length-1)<<2 | tagCopy2
|
||||
d += 3
|
||||
break
|
||||
}
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
dst[1] = uint8(offset)
|
||||
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
|
||||
d += 2
|
||||
break
|
||||
}
|
||||
}
|
||||
lastOffset = offset
|
||||
}
|
||||
uncompressed += ml
|
||||
if d > dLimit {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
}
|
||||
|
||||
return dst[:d], uncompressed, nil
|
||||
}
|
||||
|
||||
// ConvertBlockSnappy will convert an LZ4s block and append it
|
||||
// as a Snappy block without block length to dst.
|
||||
// The uncompressed size is returned as well.
|
||||
// dst must have capacity to contain the entire compressed block.
|
||||
func (l *LZ4sConverter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
|
||||
if len(src) == 0 {
|
||||
return dst, 0, nil
|
||||
}
|
||||
const debug = false
|
||||
const lz4MinMatch = 3
|
||||
|
||||
s, d := 0, len(dst)
|
||||
dst = dst[:cap(dst)]
|
||||
// Use assembly when possible
|
||||
if !debug && hasAmd64Asm {
|
||||
res, sz := cvtLZ4sBlockSnappyAsm(dst[d:], src)
|
||||
if res < 0 {
|
||||
const (
|
||||
errCorrupt = -1
|
||||
errDstTooSmall = -2
|
||||
)
|
||||
switch res {
|
||||
case errCorrupt:
|
||||
return nil, 0, ErrCorrupt
|
||||
case errDstTooSmall:
|
||||
return nil, 0, ErrDstTooSmall
|
||||
default:
|
||||
return nil, 0, fmt.Errorf("unexpected result: %d", res)
|
||||
}
|
||||
}
|
||||
if d+sz > len(dst) {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
return dst[:d+sz], res, nil
|
||||
}
|
||||
|
||||
dLimit := len(dst) - 10
|
||||
var uncompressed int
|
||||
if debug {
|
||||
fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
|
||||
}
|
||||
|
||||
for {
|
||||
if s >= len(src) {
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
// Read literal info
|
||||
token := src[s]
|
||||
ll := int(token >> 4)
|
||||
ml := int(lz4MinMatch + (token & 0xf))
|
||||
|
||||
// If upper nibble is 15, literal length is extended
|
||||
if token >= 0xf0 {
|
||||
for {
|
||||
s++
|
||||
if s >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
val := src[s]
|
||||
ll += int(val)
|
||||
if val != 255 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
// Skip past token
|
||||
if s+ll >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
s++
|
||||
if ll > 0 {
|
||||
if d+ll > dLimit {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
if debug {
|
||||
fmt.Printf("emit %d literals\n", ll)
|
||||
}
|
||||
d += emitLiteralGo(dst[d:], src[s:s+ll])
|
||||
s += ll
|
||||
uncompressed += ll
|
||||
}
|
||||
|
||||
// Check if we are done...
|
||||
if ml == lz4MinMatch {
|
||||
if s == len(src) {
|
||||
break
|
||||
}
|
||||
// 0 bytes.
|
||||
continue
|
||||
}
|
||||
// 2 byte offset
|
||||
if s >= len(src)-2 {
|
||||
if debug {
|
||||
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
offset := binary.LittleEndian.Uint16(src[s:])
|
||||
s += 2
|
||||
if offset == 0 {
|
||||
if debug {
|
||||
fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
if int(offset) > uncompressed {
|
||||
if debug {
|
||||
fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
|
||||
if ml == lz4MinMatch+15 {
|
||||
for {
|
||||
if s >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
val := src[s]
|
||||
s++
|
||||
ml += int(val)
|
||||
if val != 255 {
|
||||
if s >= len(src) {
|
||||
if debug {
|
||||
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
|
||||
}
|
||||
return nil, 0, ErrCorrupt
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if debug {
|
||||
fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
|
||||
}
|
||||
length := ml
|
||||
// d += emitCopyNoRepeat(dst[d:], int(offset), ml)
|
||||
for length > 0 {
|
||||
if d >= dLimit {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
|
||||
// Offset no more than 2 bytes.
|
||||
if length > 64 {
|
||||
// Emit a length 64 copy, encoded as 3 bytes.
|
||||
dst[d+2] = uint8(offset >> 8)
|
||||
dst[d+1] = uint8(offset)
|
||||
dst[d+0] = 63<<2 | tagCopy2
|
||||
length -= 64
|
||||
d += 3
|
||||
continue
|
||||
}
|
||||
if length >= 12 || offset >= 2048 || length < 4 {
|
||||
// Emit the remaining copy, encoded as 3 bytes.
|
||||
dst[d+2] = uint8(offset >> 8)
|
||||
dst[d+1] = uint8(offset)
|
||||
dst[d+0] = uint8(length-1)<<2 | tagCopy2
|
||||
d += 3
|
||||
break
|
||||
}
|
||||
// Emit the remaining copy, encoded as 2 bytes.
|
||||
dst[d+1] = uint8(offset)
|
||||
dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
|
||||
d += 2
|
||||
break
|
||||
}
|
||||
uncompressed += ml
|
||||
if d > dLimit {
|
||||
return nil, 0, ErrDstTooSmall
|
||||
}
|
||||
}
|
||||
|
||||
return dst[:d], uncompressed, nil
|
||||
}
|
1062
vendor/github.com/klauspost/compress/s2/reader.go
generated
vendored
1062
vendor/github.com/klauspost/compress/s2/reader.go
generated
vendored
File diff suppressed because it is too large
Load Diff
143
vendor/github.com/klauspost/compress/s2/s2.go
generated
vendored
143
vendor/github.com/klauspost/compress/s2/s2.go
generated
vendored
@ -1,143 +0,0 @@
|
||||
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
|
||||
// Copyright (c) 2019 Klaus Post. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package s2 implements the S2 compression format.
|
||||
//
|
||||
// S2 is an extension of Snappy. Similar to Snappy S2 is aimed for high throughput,
|
||||
// which is why it features concurrent compression for bigger payloads.
|
||||
//
|
||||
// Decoding is compatible with Snappy compressed content,
|
||||
// but content compressed with S2 cannot be decompressed by Snappy.
|
||||
//
|
||||
// For more information on Snappy/S2 differences see README in: https://github.com/klauspost/compress/tree/master/s2
|
||||
//
|
||||
// There are actually two S2 formats: block and stream. They are related,
|
||||
// but different: trying to decompress block-compressed data as a S2 stream
|
||||
// will fail, and vice versa. The block format is the Decode and Encode
|
||||
// functions and the stream format is the Reader and Writer types.
|
||||
//
|
||||
// A "better" compression option is available. This will trade some compression
|
||||
// speed
|
||||
//
|
||||
// The block format, the more common case, is used when the complete size (the
|
||||
// number of bytes) of the original data is known upfront, at the time
|
||||
// compression starts. The stream format, also known as the framing format, is
|
||||
// for when that isn't always true.
|
||||
//
|
||||
// Blocks to not offer much data protection, so it is up to you to
|
||||
// add data validation of decompressed blocks.
|
||||
//
|
||||
// Streams perform CRC validation of the decompressed data.
|
||||
// Stream compression will also be performed on multiple CPU cores concurrently
|
||||
// significantly improving throughput.
|
||||
package s2
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"hash/crc32"
|
||||
)
|
||||
|
||||
/*
|
||||
Each encoded block begins with the varint-encoded length of the decoded data,
|
||||
followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
|
||||
first byte of each chunk is broken into its 2 least and 6 most significant bits
|
||||
called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
|
||||
Zero means a literal tag. All other values mean a copy tag.
|
||||
|
||||
For literal tags:
|
||||
- If m < 60, the next 1 + m bytes are literal bytes.
|
||||
- Otherwise, let n be the little-endian unsigned integer denoted by the next
|
||||
m - 59 bytes. The next 1 + n bytes after that are literal bytes.
|
||||
|
||||
For copy tags, length bytes are copied from offset bytes ago, in the style of
|
||||
Lempel-Ziv compression algorithms. In particular:
|
||||
- For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
|
||||
The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
|
||||
of the offset. The next byte is bits 0-7 of the offset.
|
||||
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
|
||||
The length is 1 + m. The offset is the little-endian unsigned integer
|
||||
denoted by the next 2 bytes.
|
||||
- For l == 3, the offset ranges in [0, 1<<32) and the length in
|
||||
[1, 65). The length is 1 + m. The offset is the little-endian unsigned
|
||||
integer denoted by the next 4 bytes.
|
||||
*/
|
||||
const (
|
||||
tagLiteral = 0x00
|
||||
tagCopy1 = 0x01
|
||||
tagCopy2 = 0x02
|
||||
tagCopy4 = 0x03
|
||||
)
|
||||
|
||||
const (
|
||||
checksumSize = 4
|
||||
chunkHeaderSize = 4
|
||||
magicChunk = "\xff\x06\x00\x00" + magicBody
|
||||
magicChunkSnappy = "\xff\x06\x00\x00" + magicBodySnappy
|
||||
magicBodySnappy = "sNaPpY"
|
||||
magicBody = "S2sTwO"
|
||||
|
||||
// maxBlockSize is the maximum size of the input to encodeBlock.
|
||||
//
|
||||
// For the framing format (Writer type instead of Encode function),
|
||||
// this is the maximum uncompressed size of a block.
|
||||
maxBlockSize = 4 << 20
|
||||
|
||||
// minBlockSize is the minimum size of block setting when creating a writer.
|
||||
minBlockSize = 4 << 10
|
||||
|
||||
skippableFrameHeader = 4
|
||||
maxChunkSize = 1<<24 - 1 // 16777215
|
||||
|
||||
// Default block size
|
||||
defaultBlockSize = 1 << 20
|
||||
|
||||
// maxSnappyBlockSize is the maximum snappy block size.
|
||||
maxSnappyBlockSize = 1 << 16
|
||||
|
||||
obufHeaderLen = checksumSize + chunkHeaderSize
|
||||
)
|
||||
|
||||
const (
|
||||
chunkTypeCompressedData = 0x00
|
||||
chunkTypeUncompressedData = 0x01
|
||||
ChunkTypeIndex = 0x99
|
||||
chunkTypePadding = 0xfe
|
||||
chunkTypeStreamIdentifier = 0xff
|
||||
)
|
||||
|
||||
var crcTable = crc32.MakeTable(crc32.Castagnoli)
|
||||
|
||||
// crc implements the checksum specified in section 3 of
|
||||
// https://github.com/google/snappy/blob/master/framing_format.txt
|
||||
func crc(b []byte) uint32 {
|
||||
c := crc32.Update(0, crcTable, b)
|
||||
return c>>15 | c<<17 + 0xa282ead8
|
||||
}
|
||||
|
||||
// literalExtraSize returns the extra size of encoding n literals.
|
||||
// n should be >= 0 and <= math.MaxUint32.
|
||||
func literalExtraSize(n int64) int64 {
|
||||
if n == 0 {
|
||||
return 0
|
||||
}
|
||||
switch {
|
||||
case n < 60:
|
||||
return 1
|
||||
case n < 1<<8:
|
||||
return 2
|
||||
case n < 1<<16:
|
||||
return 3
|
||||
case n < 1<<24:
|
||||
return 4
|
||||
default:
|
||||
return 5
|
||||
}
|
||||
}
|
||||
|
||||
type byter interface {
|
||||
Bytes() []byte
|
||||
}
|
||||
|
||||
var _ byter = &bytes.Buffer{}
|
1020
vendor/github.com/klauspost/compress/s2/writer.go
generated
vendored
1020
vendor/github.com/klauspost/compress/s2/writer.go
generated
vendored
File diff suppressed because it is too large
Load Diff
2
vendor/github.com/klauspost/compress/s2sx.mod
generated
vendored
2
vendor/github.com/klauspost/compress/s2sx.mod
generated
vendored
@ -1,4 +1,4 @@
|
||||
module github.com/klauspost/compress
|
||||
|
||||
go 1.16
|
||||
go 1.19
|
||||
|
||||
|
2
vendor/github.com/klauspost/compress/zstd/README.md
generated
vendored
2
vendor/github.com/klauspost/compress/zstd/README.md
generated
vendored
@ -259,7 +259,7 @@ nyc-taxi-data-10M.csv gzkp 1 3325605752 922273214 13929 227.68
|
||||
|
||||
## Decompressor
|
||||
|
||||
Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
|
||||
Status: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
|
||||
|
||||
This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz),
|
||||
kindly supplied by [fuzzit.dev](https://fuzzit.dev/).
|
||||
|
56
vendor/github.com/klauspost/compress/zstd/decodeheader.go
generated
vendored
56
vendor/github.com/klauspost/compress/zstd/decodeheader.go
generated
vendored
@ -95,42 +95,54 @@ type Header struct {
|
||||
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
|
||||
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
|
||||
func (h *Header) Decode(in []byte) error {
|
||||
_, err := h.DecodeAndStrip(in)
|
||||
return err
|
||||
}
|
||||
|
||||
// DecodeAndStrip will decode the header from the beginning of the stream
|
||||
// and on success return the remaining bytes.
|
||||
// This will decode the frame header and the first block header if enough bytes are provided.
|
||||
// It is recommended to provide at least HeaderMaxSize bytes.
|
||||
// If the frame header cannot be read an error will be returned.
|
||||
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
|
||||
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
|
||||
func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) {
|
||||
*h = Header{}
|
||||
if len(in) < 4 {
|
||||
return io.ErrUnexpectedEOF
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
h.HeaderSize += 4
|
||||
b, in := in[:4], in[4:]
|
||||
if string(b) != frameMagic {
|
||||
if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
|
||||
return ErrMagicMismatch
|
||||
return nil, ErrMagicMismatch
|
||||
}
|
||||
if len(in) < 4 {
|
||||
return io.ErrUnexpectedEOF
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
h.HeaderSize += 4
|
||||
h.Skippable = true
|
||||
h.SkippableID = int(b[0] & 0xf)
|
||||
h.SkippableSize = binary.LittleEndian.Uint32(in)
|
||||
return nil
|
||||
return in[4:], nil
|
||||
}
|
||||
|
||||
// Read Window_Descriptor
|
||||
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
|
||||
if len(in) < 1 {
|
||||
return io.ErrUnexpectedEOF
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
fhd, in := in[0], in[1:]
|
||||
h.HeaderSize++
|
||||
h.SingleSegment = fhd&(1<<5) != 0
|
||||
h.HasCheckSum = fhd&(1<<2) != 0
|
||||
if fhd&(1<<3) != 0 {
|
||||
return errors.New("reserved bit set on frame header")
|
||||
return nil, errors.New("reserved bit set on frame header")
|
||||
}
|
||||
|
||||
if !h.SingleSegment {
|
||||
if len(in) < 1 {
|
||||
return io.ErrUnexpectedEOF
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
var wd byte
|
||||
wd, in = in[0], in[1:]
|
||||
@ -148,7 +160,7 @@ func (h *Header) Decode(in []byte) error {
|
||||
size = 4
|
||||
}
|
||||
if len(in) < int(size) {
|
||||
return io.ErrUnexpectedEOF
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
b, in = in[:size], in[size:]
|
||||
h.HeaderSize += int(size)
|
||||
@ -178,7 +190,7 @@ func (h *Header) Decode(in []byte) error {
|
||||
if fcsSize > 0 {
|
||||
h.HasFCS = true
|
||||
if len(in) < fcsSize {
|
||||
return io.ErrUnexpectedEOF
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
b, in = in[:fcsSize], in[fcsSize:]
|
||||
h.HeaderSize += int(fcsSize)
|
||||
@ -199,7 +211,7 @@ func (h *Header) Decode(in []byte) error {
|
||||
|
||||
// Frame Header done, we will not fail from now on.
|
||||
if len(in) < 3 {
|
||||
return nil
|
||||
return in, nil
|
||||
}
|
||||
tmp := in[:3]
|
||||
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
|
||||
@ -209,7 +221,7 @@ func (h *Header) Decode(in []byte) error {
|
||||
cSize := int(bh >> 3)
|
||||
switch blockType {
|
||||
case blockTypeReserved:
|
||||
return nil
|
||||
return in, nil
|
||||
case blockTypeRLE:
|
||||
h.FirstBlock.Compressed = true
|
||||
h.FirstBlock.DecompressedSize = cSize
|
||||
@ -225,5 +237,25 @@ func (h *Header) Decode(in []byte) error {
|
||||
}
|
||||
|
||||
h.FirstBlock.OK = true
|
||||
return nil
|
||||
return in, nil
|
||||
}
|
||||
|
||||
// AppendTo will append the encoded header to the dst slice.
|
||||
// There is no error checking performed on the header values.
|
||||
func (h *Header) AppendTo(dst []byte) ([]byte, error) {
|
||||
if h.Skippable {
|
||||
magic := [4]byte{0x50, 0x2a, 0x4d, 0x18}
|
||||
magic[0] |= byte(h.SkippableID & 0xf)
|
||||
dst = append(dst, magic[:]...)
|
||||
f := h.SkippableSize
|
||||
return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil
|
||||
}
|
||||
f := frameHeader{
|
||||
ContentSize: h.FrameContentSize,
|
||||
WindowSize: uint32(h.WindowSize),
|
||||
SingleSegment: h.SingleSegment,
|
||||
Checksum: h.HasCheckSum,
|
||||
DictID: h.DictionaryID,
|
||||
}
|
||||
return f.appendTo(dst), nil
|
||||
}
|
||||
|
82
vendor/github.com/klauspost/compress/zstd/enc_best.go
generated
vendored
82
vendor/github.com/klauspost/compress/zstd/enc_best.go
generated
vendored
@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) {
|
||||
if m.rep < 0 {
|
||||
ofc = ofCode(uint32(m.s-m.offset) + 3)
|
||||
} else {
|
||||
ofc = ofCode(uint32(m.rep))
|
||||
ofc = ofCode(uint32(m.rep) & 3)
|
||||
}
|
||||
// Cost, excluding
|
||||
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
|
||||
@ -197,17 +197,10 @@ encodeLoop:
|
||||
|
||||
// Set m to a match at offset if it looks like that will improve compression.
|
||||
improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
|
||||
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
|
||||
delta := s - offset
|
||||
if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
|
||||
return
|
||||
}
|
||||
if debugAsserts {
|
||||
if offset <= 0 {
|
||||
panic(offset)
|
||||
}
|
||||
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
|
||||
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
|
||||
}
|
||||
}
|
||||
// Try to quick reject if we already have a long match.
|
||||
if m.length > 16 {
|
||||
left := len(src) - int(m.s+m.length)
|
||||
@ -226,8 +219,10 @@ encodeLoop:
|
||||
}
|
||||
}
|
||||
l := 4 + e.matchlen(s+4, offset+4, src)
|
||||
if rep < 0 {
|
||||
if m.rep <= 0 {
|
||||
// Extend candidate match backwards as far as possible.
|
||||
// Do not extend repeats as we can assume they are optimal
|
||||
// and offsets change if s == nextEmit.
|
||||
tMin := s - e.maxMatchOff
|
||||
if tMin < 0 {
|
||||
tMin = 0
|
||||
@ -238,7 +233,14 @@ encodeLoop:
|
||||
l++
|
||||
}
|
||||
}
|
||||
|
||||
if debugAsserts {
|
||||
if offset >= s {
|
||||
panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
|
||||
}
|
||||
if !bytes.Equal(src[s:s+l], src[offset:offset+l]) {
|
||||
panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
|
||||
}
|
||||
}
|
||||
cand := match{offset: offset, s: s, length: l, rep: rep}
|
||||
cand.estBits(bitsPerByte)
|
||||
if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
|
||||
@ -281,6 +283,7 @@ encodeLoop:
|
||||
// Load next and check...
|
||||
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
|
||||
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
|
||||
index0 := s + 1
|
||||
|
||||
// Look far ahead, unless we have a really long match already...
|
||||
if best.length < goodEnough {
|
||||
@ -334,41 +337,45 @@ encodeLoop:
|
||||
}
|
||||
|
||||
if debugAsserts {
|
||||
if best.offset >= best.s {
|
||||
panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s))
|
||||
}
|
||||
if best.s < nextEmit {
|
||||
panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit))
|
||||
}
|
||||
if best.offset < s-e.maxMatchOff {
|
||||
panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff))
|
||||
}
|
||||
if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
|
||||
panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
|
||||
}
|
||||
}
|
||||
|
||||
// We have a match, we can store the forward value
|
||||
s = best.s
|
||||
if best.rep > 0 {
|
||||
var seq seq
|
||||
seq.matchLen = uint32(best.length - zstdMinMatch)
|
||||
if debugAsserts && s <= nextEmit {
|
||||
panic("s <= nextEmit")
|
||||
}
|
||||
addLiterals(&seq, best.s)
|
||||
|
||||
// Repeat. If bit 4 is set, this is a non-lit repeat.
|
||||
seq.offset = uint32(best.rep & 3)
|
||||
if debugSequences {
|
||||
println("repeat sequence", seq, "next s:", s)
|
||||
println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset)
|
||||
}
|
||||
blk.sequences = append(blk.sequences, seq)
|
||||
|
||||
// Index old s + 1 -> s - 1
|
||||
index0 := s + 1
|
||||
s = best.s + best.length
|
||||
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
if debugEncoder {
|
||||
println("repeat ended", s, best.length)
|
||||
}
|
||||
break encodeLoop
|
||||
}
|
||||
|
||||
// Index skipped...
|
||||
end := s
|
||||
if s > sLimit+4 {
|
||||
end = sLimit + 4
|
||||
}
|
||||
off := index0 + e.cur
|
||||
for index0 < s {
|
||||
for index0 < end {
|
||||
cv0 := load6432(src, index0)
|
||||
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
|
||||
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
|
||||
@ -377,6 +384,7 @@ encodeLoop:
|
||||
off++
|
||||
index0++
|
||||
}
|
||||
|
||||
switch best.rep {
|
||||
case 2, 4 | 1:
|
||||
offset1, offset2 = offset2, offset1
|
||||
@ -385,13 +393,17 @@ encodeLoop:
|
||||
case 4 | 3:
|
||||
offset1, offset2, offset3 = offset1-1, offset1, offset2
|
||||
}
|
||||
if s >= sLimit {
|
||||
if debugEncoder {
|
||||
println("repeat ended", s, best.length)
|
||||
}
|
||||
break encodeLoop
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// A 4-byte match has been found. Update recent offsets.
|
||||
// We'll later see if more than 4 bytes.
|
||||
index0 := s + 1
|
||||
s = best.s
|
||||
t := best.offset
|
||||
offset1, offset2, offset3 = s-t, offset1, offset2
|
||||
|
||||
@ -418,19 +430,25 @@ encodeLoop:
|
||||
}
|
||||
blk.sequences = append(blk.sequences, seq)
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
break encodeLoop
|
||||
|
||||
// Index old s + 1 -> s - 1 or sLimit
|
||||
end := s
|
||||
if s > sLimit-4 {
|
||||
end = sLimit - 4
|
||||
}
|
||||
|
||||
// Index old s + 1 -> s - 1
|
||||
for index0 < s {
|
||||
off := index0 + e.cur
|
||||
for index0 < end {
|
||||
cv0 := load6432(src, index0)
|
||||
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
|
||||
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
|
||||
off := index0 + e.cur
|
||||
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
|
||||
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
|
||||
index0++
|
||||
off++
|
||||
}
|
||||
if s >= sLimit {
|
||||
break encodeLoop
|
||||
}
|
||||
}
|
||||
|
||||
|
17
vendor/github.com/klauspost/compress/zstd/enc_better.go
generated
vendored
17
vendor/github.com/klauspost/compress/zstd/enc_better.go
generated
vendored
@ -145,7 +145,7 @@ encodeLoop:
|
||||
var t int32
|
||||
// We allow the encoder to optionally turn off repeat offsets across blocks
|
||||
canRepeat := len(blk.sequences) > 2
|
||||
var matched int32
|
||||
var matched, index0 int32
|
||||
|
||||
for {
|
||||
if debugAsserts && canRepeat && offset1 == 0 {
|
||||
@ -162,6 +162,7 @@ encodeLoop:
|
||||
off := s + e.cur
|
||||
e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
|
||||
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
|
||||
index0 = s + 1
|
||||
|
||||
if canRepeat {
|
||||
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
|
||||
@ -258,7 +259,6 @@ encodeLoop:
|
||||
}
|
||||
blk.sequences = append(blk.sequences, seq)
|
||||
|
||||
index0 := s + repOff2
|
||||
s += lenght + repOff2
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
@ -498,15 +498,15 @@ encodeLoop:
|
||||
}
|
||||
|
||||
// Index match start+1 (long) -> s - 1
|
||||
index0 := s - l + 1
|
||||
off := index0 + e.cur
|
||||
for index0 < s-1 {
|
||||
cv0 := load6432(src, index0)
|
||||
cv1 := cv0 >> 8
|
||||
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
|
||||
off := index0 + e.cur
|
||||
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
|
||||
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
|
||||
index0 += 2
|
||||
off += 2
|
||||
}
|
||||
|
||||
cv = load6432(src, s)
|
||||
@ -672,7 +672,7 @@ encodeLoop:
|
||||
var t int32
|
||||
// We allow the encoder to optionally turn off repeat offsets across blocks
|
||||
canRepeat := len(blk.sequences) > 2
|
||||
var matched int32
|
||||
var matched, index0 int32
|
||||
|
||||
for {
|
||||
if debugAsserts && canRepeat && offset1 == 0 {
|
||||
@ -691,6 +691,7 @@ encodeLoop:
|
||||
e.markLongShardDirty(nextHashL)
|
||||
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
|
||||
e.markShortShardDirty(nextHashS)
|
||||
index0 = s + 1
|
||||
|
||||
if canRepeat {
|
||||
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
|
||||
@ -726,7 +727,6 @@ encodeLoop:
|
||||
blk.sequences = append(blk.sequences, seq)
|
||||
|
||||
// Index match start+1 (long) -> s - 1
|
||||
index0 := s + repOff
|
||||
s += lenght + repOff
|
||||
|
||||
nextEmit = s
|
||||
@ -790,7 +790,6 @@ encodeLoop:
|
||||
}
|
||||
blk.sequences = append(blk.sequences, seq)
|
||||
|
||||
index0 := s + repOff2
|
||||
s += lenght + repOff2
|
||||
nextEmit = s
|
||||
if s >= sLimit {
|
||||
@ -1024,18 +1023,18 @@ encodeLoop:
|
||||
}
|
||||
|
||||
// Index match start+1 (long) -> s - 1
|
||||
index0 := s - l + 1
|
||||
off := index0 + e.cur
|
||||
for index0 < s-1 {
|
||||
cv0 := load6432(src, index0)
|
||||
cv1 := cv0 >> 8
|
||||
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
|
||||
off := index0 + e.cur
|
||||
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
|
||||
e.markLongShardDirty(h0)
|
||||
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
|
||||
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
|
||||
e.markShortShardDirty(h1)
|
||||
index0 += 2
|
||||
off += 2
|
||||
}
|
||||
|
||||
cv = load6432(src, s)
|
||||
|
6
vendor/github.com/klauspost/compress/zstd/encoder_options.go
generated
vendored
6
vendor/github.com/klauspost/compress/zstd/encoder_options.go
generated
vendored
@ -94,7 +94,7 @@ func WithEncoderConcurrency(n int) EOption {
|
||||
// The value must be a power of two between MinWindowSize and MaxWindowSize.
|
||||
// A larger value will enable better compression but allocate more memory and,
|
||||
// for above-default values, take considerably longer.
|
||||
// The default value is determined by the compression level.
|
||||
// The default value is determined by the compression level and max 8MB.
|
||||
func WithWindowSize(n int) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
switch {
|
||||
@ -232,9 +232,9 @@ func WithEncoderLevel(l EncoderLevel) EOption {
|
||||
case SpeedDefault:
|
||||
o.windowSize = 8 << 20
|
||||
case SpeedBetterCompression:
|
||||
o.windowSize = 16 << 20
|
||||
o.windowSize = 8 << 20
|
||||
case SpeedBestCompression:
|
||||
o.windowSize = 32 << 20
|
||||
o.windowSize = 8 << 20
|
||||
}
|
||||
}
|
||||
if !o.customALEntropy {
|
||||
|
2
vendor/github.com/klauspost/compress/zstd/frameenc.go
generated
vendored
2
vendor/github.com/klauspost/compress/zstd/frameenc.go
generated
vendored
@ -76,7 +76,7 @@ func (f frameHeader) appendTo(dst []byte) []byte {
|
||||
if f.SingleSegment {
|
||||
dst = append(dst, uint8(f.ContentSize))
|
||||
}
|
||||
// Unless SingleSegment is set, framessizes < 256 are nto stored.
|
||||
// Unless SingleSegment is set, framessizes < 256 are not stored.
|
||||
case 1:
|
||||
f.ContentSize -= 256
|
||||
dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8))
|
||||
|
11
vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go
generated
vendored
11
vendor/github.com/klauspost/compress/zstd/fse_decoder_generic.go
generated
vendored
@ -20,10 +20,9 @@ func (s *fseDecoder) buildDtable() error {
|
||||
if v == -1 {
|
||||
s.dt[highThreshold].setAddBits(uint8(i))
|
||||
highThreshold--
|
||||
symbolNext[i] = 1
|
||||
} else {
|
||||
symbolNext[i] = uint16(v)
|
||||
v = 1
|
||||
}
|
||||
symbolNext[i] = uint16(v)
|
||||
}
|
||||
}
|
||||
|
||||
@ -35,10 +34,12 @@ func (s *fseDecoder) buildDtable() error {
|
||||
for ss, v := range s.norm[:s.symbolLen] {
|
||||
for i := 0; i < int(v); i++ {
|
||||
s.dt[position].setAddBits(uint8(ss))
|
||||
position = (position + step) & tableMask
|
||||
for position > highThreshold {
|
||||
for {
|
||||
// lowprob area
|
||||
position = (position + step) & tableMask
|
||||
if position <= highThreshold {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
136
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
generated
vendored
136
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
generated
vendored
@ -157,8 +157,7 @@ sequenceDecs_decode_amd64_ll_update_zero:
|
||||
|
||||
// Update Literal Length State
|
||||
MOVBQZX DI, R14
|
||||
SHRQ $0x10, DI
|
||||
MOVWQZX DI, DI
|
||||
SHRL $0x10, DI
|
||||
LEAQ (BX)(R14*1), CX
|
||||
MOVQ DX, R15
|
||||
MOVQ CX, BX
|
||||
@ -177,8 +176,7 @@ sequenceDecs_decode_amd64_ll_update_zero:
|
||||
|
||||
// Update Match Length State
|
||||
MOVBQZX R8, R14
|
||||
SHRQ $0x10, R8
|
||||
MOVWQZX R8, R8
|
||||
SHRL $0x10, R8
|
||||
LEAQ (BX)(R14*1), CX
|
||||
MOVQ DX, R15
|
||||
MOVQ CX, BX
|
||||
@ -197,8 +195,7 @@ sequenceDecs_decode_amd64_ll_update_zero:
|
||||
|
||||
// Update Offset State
|
||||
MOVBQZX R9, R14
|
||||
SHRQ $0x10, R9
|
||||
MOVWQZX R9, R9
|
||||
SHRL $0x10, R9
|
||||
LEAQ (BX)(R14*1), CX
|
||||
MOVQ DX, R15
|
||||
MOVQ CX, BX
|
||||
@ -459,8 +456,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:
|
||||
|
||||
// Update Literal Length State
|
||||
MOVBQZX DI, R14
|
||||
SHRQ $0x10, DI
|
||||
MOVWQZX DI, DI
|
||||
SHRL $0x10, DI
|
||||
LEAQ (BX)(R14*1), CX
|
||||
MOVQ DX, R15
|
||||
MOVQ CX, BX
|
||||
@ -479,8 +475,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:
|
||||
|
||||
// Update Match Length State
|
||||
MOVBQZX R8, R14
|
||||
SHRQ $0x10, R8
|
||||
MOVWQZX R8, R8
|
||||
SHRL $0x10, R8
|
||||
LEAQ (BX)(R14*1), CX
|
||||
MOVQ DX, R15
|
||||
MOVQ CX, BX
|
||||
@ -499,8 +494,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:
|
||||
|
||||
// Update Offset State
|
||||
MOVBQZX R9, R14
|
||||
SHRQ $0x10, R9
|
||||
MOVWQZX R9, R9
|
||||
SHRL $0x10, R9
|
||||
LEAQ (BX)(R14*1), CX
|
||||
MOVQ DX, R15
|
||||
MOVQ CX, BX
|
||||
@ -772,11 +766,10 @@ sequenceDecs_decode_bmi2_fill_2_end:
|
||||
BZHIQ R14, R15, R15
|
||||
|
||||
// Update Offset State
|
||||
BZHIQ R8, R15, CX
|
||||
SHRXQ R8, R15, R15
|
||||
MOVQ $0x00001010, R14
|
||||
BEXTRQ R14, R8, R8
|
||||
ADDQ CX, R8
|
||||
BZHIQ R8, R15, CX
|
||||
SHRXQ R8, R15, R15
|
||||
SHRL $0x10, R8
|
||||
ADDQ CX, R8
|
||||
|
||||
// Load ctx.ofTable
|
||||
MOVQ ctx+16(FP), CX
|
||||
@ -784,11 +777,10 @@ sequenceDecs_decode_bmi2_fill_2_end:
|
||||
MOVQ (CX)(R8*8), R8
|
||||
|
||||
// Update Match Length State
|
||||
BZHIQ DI, R15, CX
|
||||
SHRXQ DI, R15, R15
|
||||
MOVQ $0x00001010, R14
|
||||
BEXTRQ R14, DI, DI
|
||||
ADDQ CX, DI
|
||||
BZHIQ DI, R15, CX
|
||||
SHRXQ DI, R15, R15
|
||||
SHRL $0x10, DI
|
||||
ADDQ CX, DI
|
||||
|
||||
// Load ctx.mlTable
|
||||
MOVQ ctx+16(FP), CX
|
||||
@ -796,10 +788,9 @@ sequenceDecs_decode_bmi2_fill_2_end:
|
||||
MOVQ (CX)(DI*8), DI
|
||||
|
||||
// Update Literal Length State
|
||||
BZHIQ SI, R15, CX
|
||||
MOVQ $0x00001010, R14
|
||||
BEXTRQ R14, SI, SI
|
||||
ADDQ CX, SI
|
||||
BZHIQ SI, R15, CX
|
||||
SHRL $0x10, SI
|
||||
ADDQ CX, SI
|
||||
|
||||
// Load ctx.llTable
|
||||
MOVQ ctx+16(FP), CX
|
||||
@ -1032,11 +1023,10 @@ sequenceDecs_decode_56_bmi2_fill_end:
|
||||
BZHIQ R14, R15, R15
|
||||
|
||||
// Update Offset State
|
||||
BZHIQ R8, R15, CX
|
||||
SHRXQ R8, R15, R15
|
||||
MOVQ $0x00001010, R14
|
||||
BEXTRQ R14, R8, R8
|
||||
ADDQ CX, R8
|
||||
BZHIQ R8, R15, CX
|
||||
SHRXQ R8, R15, R15
|
||||
SHRL $0x10, R8
|
||||
ADDQ CX, R8
|
||||
|
||||
// Load ctx.ofTable
|
||||
MOVQ ctx+16(FP), CX
|
||||
@ -1044,11 +1034,10 @@ sequenceDecs_decode_56_bmi2_fill_end:
|
||||
MOVQ (CX)(R8*8), R8
|
||||
|
||||
// Update Match Length State
|
||||
BZHIQ DI, R15, CX
|
||||
SHRXQ DI, R15, R15
|
||||
MOVQ $0x00001010, R14
|
||||
BEXTRQ R14, DI, DI
|
||||
ADDQ CX, DI
|
||||
BZHIQ DI, R15, CX
|
||||
SHRXQ DI, R15, R15
|
||||
SHRL $0x10, DI
|
||||
ADDQ CX, DI
|
||||
|
||||
// Load ctx.mlTable
|
||||
MOVQ ctx+16(FP), CX
|
||||
@ -1056,10 +1045,9 @@ sequenceDecs_decode_56_bmi2_fill_end:
|
||||
MOVQ (CX)(DI*8), DI
|
||||
|
||||
// Update Literal Length State
|
||||
BZHIQ SI, R15, CX
|
||||
MOVQ $0x00001010, R14
|
||||
BEXTRQ R14, SI, SI
|
||||
ADDQ CX, SI
|
||||
BZHIQ SI, R15, CX
|
||||
SHRL $0x10, SI
|
||||
ADDQ CX, SI
|
||||
|
||||
// Load ctx.llTable
|
||||
MOVQ ctx+16(FP), CX
|
||||
@ -1967,8 +1955,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:
|
||||
|
||||
// Update Literal Length State
|
||||
MOVBQZX DI, R13
|
||||
SHRQ $0x10, DI
|
||||
MOVWQZX DI, DI
|
||||
SHRL $0x10, DI
|
||||
LEAQ (BX)(R13*1), CX
|
||||
MOVQ DX, R14
|
||||
MOVQ CX, BX
|
||||
@ -1987,8 +1974,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:
|
||||
|
||||
// Update Match Length State
|
||||
MOVBQZX R8, R13
|
||||
SHRQ $0x10, R8
|
||||
MOVWQZX R8, R8
|
||||
SHRL $0x10, R8
|
||||
LEAQ (BX)(R13*1), CX
|
||||
MOVQ DX, R14
|
||||
MOVQ CX, BX
|
||||
@ -2007,8 +1993,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:
|
||||
|
||||
// Update Offset State
|
||||
MOVBQZX R9, R13
|
||||
SHRQ $0x10, R9
|
||||
MOVWQZX R9, R9
|
||||
SHRL $0x10, R9
|
||||
LEAQ (BX)(R13*1), CX
|
||||
MOVQ DX, R14
|
||||
MOVQ CX, BX
|
||||
@ -2514,11 +2499,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
|
||||
BZHIQ R13, R14, R14
|
||||
|
||||
// Update Offset State
|
||||
BZHIQ R8, R14, CX
|
||||
SHRXQ R8, R14, R14
|
||||
MOVQ $0x00001010, R13
|
||||
BEXTRQ R13, R8, R8
|
||||
ADDQ CX, R8
|
||||
BZHIQ R8, R14, CX
|
||||
SHRXQ R8, R14, R14
|
||||
SHRL $0x10, R8
|
||||
ADDQ CX, R8
|
||||
|
||||
// Load ctx.ofTable
|
||||
MOVQ ctx+16(FP), CX
|
||||
@ -2526,11 +2510,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
|
||||
MOVQ (CX)(R8*8), R8
|
||||
|
||||
// Update Match Length State
|
||||
BZHIQ DI, R14, CX
|
||||
SHRXQ DI, R14, R14
|
||||
MOVQ $0x00001010, R13
|
||||
BEXTRQ R13, DI, DI
|
||||
ADDQ CX, DI
|
||||
BZHIQ DI, R14, CX
|
||||
SHRXQ DI, R14, R14
|
||||
SHRL $0x10, DI
|
||||
ADDQ CX, DI
|
||||
|
||||
// Load ctx.mlTable
|
||||
MOVQ ctx+16(FP), CX
|
||||
@ -2538,10 +2521,9 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
|
||||
MOVQ (CX)(DI*8), DI
|
||||
|
||||
// Update Literal Length State
|
||||
BZHIQ SI, R14, CX
|
||||
MOVQ $0x00001010, R13
|
||||
BEXTRQ R13, SI, SI
|
||||
ADDQ CX, SI
|
||||
BZHIQ SI, R14, CX
|
||||
SHRL $0x10, SI
|
||||
ADDQ CX, SI
|
||||
|
||||
// Load ctx.llTable
|
||||
MOVQ ctx+16(FP), CX
|
||||
@ -3055,8 +3037,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
|
||||
|
||||
// Update Literal Length State
|
||||
MOVBQZX DI, R13
|
||||
SHRQ $0x10, DI
|
||||
MOVWQZX DI, DI
|
||||
SHRL $0x10, DI
|
||||
LEAQ (BX)(R13*1), CX
|
||||
MOVQ DX, R14
|
||||
MOVQ CX, BX
|
||||
@ -3075,8 +3056,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
|
||||
|
||||
// Update Match Length State
|
||||
MOVBQZX R8, R13
|
||||
SHRQ $0x10, R8
|
||||
MOVWQZX R8, R8
|
||||
SHRL $0x10, R8
|
||||
LEAQ (BX)(R13*1), CX
|
||||
MOVQ DX, R14
|
||||
MOVQ CX, BX
|
||||
@ -3095,8 +3075,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
|
||||
|
||||
// Update Offset State
|
||||
MOVBQZX R9, R13
|
||||
SHRQ $0x10, R9
|
||||
MOVWQZX R9, R9
|
||||
SHRL $0x10, R9
|
||||
LEAQ (BX)(R13*1), CX
|
||||
MOVQ DX, R14
|
||||
MOVQ CX, BX
|
||||
@ -3704,11 +3683,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
|
||||
BZHIQ R13, R14, R14
|
||||
|
||||
// Update Offset State
|
||||
BZHIQ R8, R14, CX
|
||||
SHRXQ R8, R14, R14
|
||||
MOVQ $0x00001010, R13
|
||||
BEXTRQ R13, R8, R8
|
||||
ADDQ CX, R8
|
||||
BZHIQ R8, R14, CX
|
||||
SHRXQ R8, R14, R14
|
||||
SHRL $0x10, R8
|
||||
ADDQ CX, R8
|
||||
|
||||
// Load ctx.ofTable
|
||||
MOVQ ctx+16(FP), CX
|
||||
@ -3716,11 +3694,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
|
||||
MOVQ (CX)(R8*8), R8
|
||||
|
||||
// Update Match Length State
|
||||
BZHIQ DI, R14, CX
|
||||
SHRXQ DI, R14, R14
|
||||
MOVQ $0x00001010, R13
|
||||
BEXTRQ R13, DI, DI
|
||||
ADDQ CX, DI
|
||||
BZHIQ DI, R14, CX
|
||||
SHRXQ DI, R14, R14
|
||||
SHRL $0x10, DI
|
||||
ADDQ CX, DI
|
||||
|
||||
// Load ctx.mlTable
|
||||
MOVQ ctx+16(FP), CX
|
||||
@ -3728,10 +3705,9 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
|
||||
MOVQ (CX)(DI*8), DI
|
||||
|
||||
// Update Literal Length State
|
||||
BZHIQ SI, R14, CX
|
||||
MOVQ $0x00001010, R13
|
||||
BEXTRQ R13, SI, SI
|
||||
ADDQ CX, SI
|
||||
BZHIQ SI, R14, CX
|
||||
SHRL $0x10, SI
|
||||
ADDQ CX, SI
|
||||
|
||||
// Load ctx.llTable
|
||||
MOVQ ctx+16(FP), CX
|
||||
|
24
vendor/github.com/klauspost/cpuid/v2/.gitignore
generated
vendored
24
vendor/github.com/klauspost/cpuid/v2/.gitignore
generated
vendored
@ -1,24 +0,0 @@
|
||||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
74
vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
generated
vendored
74
vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
generated
vendored
@ -1,74 +0,0 @@
|
||||
# This is an example goreleaser.yaml file with some sane defaults.
|
||||
# Make sure to check the documentation at http://goreleaser.com
|
||||
|
||||
builds:
|
||||
-
|
||||
id: "cpuid"
|
||||
binary: cpuid
|
||||
main: ./cmd/cpuid/main.go
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
flags:
|
||||
- -ldflags=-s -w
|
||||
goos:
|
||||
- aix
|
||||
- linux
|
||||
- freebsd
|
||||
- netbsd
|
||||
- windows
|
||||
- darwin
|
||||
goarch:
|
||||
- 386
|
||||
- amd64
|
||||
- arm64
|
||||
goarm:
|
||||
- 7
|
||||
|
||||
archives:
|
||||
-
|
||||
id: cpuid
|
||||
name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
|
||||
replacements:
|
||||
aix: AIX
|
||||
darwin: OSX
|
||||
linux: Linux
|
||||
windows: Windows
|
||||
386: i386
|
||||
amd64: x86_64
|
||||
freebsd: FreeBSD
|
||||
netbsd: NetBSD
|
||||
format_overrides:
|
||||
- goos: windows
|
||||
format: zip
|
||||
files:
|
||||
- LICENSE
|
||||
checksum:
|
||||
name_template: 'checksums.txt'
|
||||
snapshot:
|
||||
name_template: "{{ .Tag }}-next"
|
||||
changelog:
|
||||
sort: asc
|
||||
filters:
|
||||
exclude:
|
||||
- '^doc:'
|
||||
- '^docs:'
|
||||
- '^test:'
|
||||
- '^tests:'
|
||||
- '^Update\sREADME.md'
|
||||
|
||||
nfpms:
|
||||
-
|
||||
file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
|
||||
vendor: Klaus Post
|
||||
homepage: https://github.com/klauspost/cpuid
|
||||
maintainer: Klaus Post <klauspost@gmail.com>
|
||||
description: CPUID Tool
|
||||
license: BSD 3-Clause
|
||||
formats:
|
||||
- deb
|
||||
- rpm
|
||||
replacements:
|
||||
darwin: Darwin
|
||||
linux: Linux
|
||||
freebsd: FreeBSD
|
||||
amd64: x86_64
|
35
vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt
generated
vendored
35
vendor/github.com/klauspost/cpuid/v2/CONTRIBUTING.txt
generated
vendored
@ -1,35 +0,0 @@
|
||||
Developer Certificate of Origin
|
||||
Version 1.1
|
||||
|
||||
Copyright (C) 2015- Klaus Post & Contributors.
|
||||
Email: klauspost@gmail.com
|
||||
|
||||
Everyone is permitted to copy and distribute verbatim copies of this
|
||||
license document, but changing it is not allowed.
|
||||
|
||||
|
||||
Developer's Certificate of Origin 1.1
|
||||
|
||||
By making a contribution to this project, I certify that:
|
||||
|
||||
(a) The contribution was created in whole or in part by me and I
|
||||
have the right to submit it under the open source license
|
||||
indicated in the file; or
|
||||
|
||||
(b) The contribution is based upon previous work that, to the best
|
||||
of my knowledge, is covered under an appropriate open source
|
||||
license and I have the right under that license to submit that
|
||||
work with modifications, whether created in whole or in part
|
||||
by me, under the same open source license (unless I am
|
||||
permitted to submit under a different license), as indicated
|
||||
in the file; or
|
||||
|
||||
(c) The contribution was provided directly to me by some other
|
||||
person who certified (a), (b) or (c) and I have not modified
|
||||
it.
|
||||
|
||||
(d) I understand and agree that this project and the contribution
|
||||
are public and that a record of the contribution (including all
|
||||
personal information I submit with it, including my sign-off) is
|
||||
maintained indefinitely and may be redistributed consistent with
|
||||
this project or the open source license(s) involved.
|
22
vendor/github.com/klauspost/cpuid/v2/LICENSE
generated
vendored
22
vendor/github.com/klauspost/cpuid/v2/LICENSE
generated
vendored
@ -1,22 +0,0 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Klaus Post
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
497
vendor/github.com/klauspost/cpuid/v2/README.md
generated
vendored
497
vendor/github.com/klauspost/cpuid/v2/README.md
generated
vendored
@ -1,497 +0,0 @@
|
||||
# cpuid
|
||||
Package cpuid provides information about the CPU running the current program.
|
||||
|
||||
CPU features are detected on startup, and kept for fast access through the life of the application.
|
||||
Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
|
||||
|
||||
You can access the CPU information by accessing the shared CPU variable of the cpuid library.
|
||||
|
||||
Package home: https://github.com/klauspost/cpuid
|
||||
|
||||
[](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
|
||||
[](https://github.com/klauspost/cpuid/actions/workflows/go.yml)
|
||||
|
||||
## installing
|
||||
|
||||
`go get -u github.com/klauspost/cpuid/v2` using modules.
|
||||
Drop `v2` for others.
|
||||
|
||||
Installing binary:
|
||||
|
||||
`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
|
||||
|
||||
Or download binaries from release page: https://github.com/klauspost/cpuid/releases
|
||||
|
||||
### Homebrew
|
||||
|
||||
For macOS/Linux users, you can install via [brew](https://brew.sh/)
|
||||
|
||||
```sh
|
||||
$ brew install cpuid
|
||||
```
|
||||
|
||||
## example
|
||||
|
||||
```Go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
. "github.com/klauspost/cpuid/v2"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Print basic CPU information:
|
||||
fmt.Println("Name:", CPU.BrandName)
|
||||
fmt.Println("PhysicalCores:", CPU.PhysicalCores)
|
||||
fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore)
|
||||
fmt.Println("LogicalCores:", CPU.LogicalCores)
|
||||
fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID)
|
||||
fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ","))
|
||||
fmt.Println("Cacheline bytes:", CPU.CacheLine)
|
||||
fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes")
|
||||
fmt.Println("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
|
||||
fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes")
|
||||
fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes")
|
||||
fmt.Println("Frequency", CPU.Hz, "hz")
|
||||
|
||||
// Test if we have these specific features:
|
||||
if CPU.Supports(SSE, SSE2) {
|
||||
fmt.Println("We have Streaming SIMD 2 Extensions")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Sample output:
|
||||
```
|
||||
>go run main.go
|
||||
Name: AMD Ryzen 9 3950X 16-Core Processor
|
||||
PhysicalCores: 16
|
||||
ThreadsPerCore: 2
|
||||
LogicalCores: 32
|
||||
Family 23 Model: 113 Vendor ID: AMD
|
||||
Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CMOV,CX16,F16C,FMA3,HTT,HYPERVISOR,LZCNT,MMX,MMXEXT,NX,POPCNT,RDRAND,RDSEED,RDTSCP,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3
|
||||
Cacheline bytes: 64
|
||||
L1 Data Cache: 32768 bytes
|
||||
L1 Instruction Cache: 32768 bytes
|
||||
L2 Cache: 524288 bytes
|
||||
L3 Cache: 16777216 bytes
|
||||
Frequency 0 hz
|
||||
We have Streaming SIMD 2 Extensions
|
||||
```
|
||||
|
||||
# usage
|
||||
|
||||
The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features.
|
||||
A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler.
|
||||
|
||||
To test a larger number of features, they can be combined using `f := CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)`, etc.
|
||||
This can be using with `cpuid.CPU.HasAll(f)` to quickly test if all features are supported.
|
||||
|
||||
Note that for some cpu/os combinations some features will not be detected.
|
||||
`amd64` has rather good support and should work reliably on all platforms.
|
||||
|
||||
Note that hypervisors may not pass through all CPU features through to the guest OS,
|
||||
so even if your host supports a feature it may not be visible on guests.
|
||||
|
||||
## arm64 feature detection
|
||||
|
||||
Not all operating systems provide ARM features directly
|
||||
and there is no safe way to do so for the rest.
|
||||
|
||||
Currently `arm64/linux` and `arm64/freebsd` should be quite reliable.
|
||||
`arm64/darwin` adds features expected from the M1 processor, but a lot remains undetected.
|
||||
|
||||
A `DetectARM()` can be used if you are able to control your deployment,
|
||||
it will detect CPU features, but may crash if the OS doesn't intercept the calls.
|
||||
A `-cpu.arm` flag for detecting unsafe ARM features can be added. See below.
|
||||
|
||||
Note that currently only features are detected on ARM,
|
||||
no additional information is currently available.
|
||||
|
||||
## flags
|
||||
|
||||
It is possible to add flags that affects cpu detection.
|
||||
|
||||
For this the `Flags()` command is provided.
|
||||
|
||||
This must be called *before* `flag.Parse()` AND after the flags have been parsed `Detect()` must be called.
|
||||
|
||||
This means that any detection used in `init()` functions will not contain these flags.
|
||||
|
||||
Example:
|
||||
|
||||
```Go
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/klauspost/cpuid/v2"
|
||||
)
|
||||
|
||||
func main() {
|
||||
cpuid.Flags()
|
||||
flag.Parse()
|
||||
cpuid.Detect()
|
||||
|
||||
// Test if we have these specific features:
|
||||
if cpuid.CPU.Supports(cpuid.SSE, cpuid.SSE2) {
|
||||
fmt.Println("We have Streaming SIMD 2 Extensions")
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## commandline
|
||||
|
||||
Download as binary from: https://github.com/klauspost/cpuid/releases
|
||||
|
||||
Install from source:
|
||||
|
||||
`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
|
||||
|
||||
### Example
|
||||
|
||||
```
|
||||
λ cpuid
|
||||
Name: AMD Ryzen 9 3950X 16-Core Processor
|
||||
Vendor String: AuthenticAMD
|
||||
Vendor ID: AMD
|
||||
PhysicalCores: 16
|
||||
Threads Per Core: 2
|
||||
Logical Cores: 32
|
||||
CPU Family 23 Model: 113
|
||||
Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CLZERO,CMOV,CMPXCHG8,CPBOOST,CX16,F16C,FMA3,FXSR,FXSROPT,HTT,HYPERVISOR,LAHF,LZCNT,MCAOVERFLOW,MMX,MMXEXT,MOVBE,NX,OSXSAVE,POPCNT,RDRAND,RDSEED,RDTSCP,SCE,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3,SUCCOR,X87,XSAVE
|
||||
Microarchitecture level: 3
|
||||
Cacheline bytes: 64
|
||||
L1 Instruction Cache: 32768 bytes
|
||||
L1 Data Cache: 32768 bytes
|
||||
L2 Cache: 524288 bytes
|
||||
L3 Cache: 16777216 bytes
|
||||
|
||||
```
|
||||
### JSON Output:
|
||||
|
||||
```
|
||||
λ cpuid --json
|
||||
{
|
||||
"BrandName": "AMD Ryzen 9 3950X 16-Core Processor",
|
||||
"VendorID": 2,
|
||||
"VendorString": "AuthenticAMD",
|
||||
"PhysicalCores": 16,
|
||||
"ThreadsPerCore": 2,
|
||||
"LogicalCores": 32,
|
||||
"Family": 23,
|
||||
"Model": 113,
|
||||
"CacheLine": 64,
|
||||
"Hz": 0,
|
||||
"BoostFreq": 0,
|
||||
"Cache": {
|
||||
"L1I": 32768,
|
||||
"L1D": 32768,
|
||||
"L2": 524288,
|
||||
"L3": 16777216
|
||||
},
|
||||
"SGX": {
|
||||
"Available": false,
|
||||
"LaunchControl": false,
|
||||
"SGX1Supported": false,
|
||||
"SGX2Supported": false,
|
||||
"MaxEnclaveSizeNot64": 0,
|
||||
"MaxEnclaveSize64": 0,
|
||||
"EPCSections": null
|
||||
},
|
||||
"Features": [
|
||||
"ADX",
|
||||
"AESNI",
|
||||
"AVX",
|
||||
"AVX2",
|
||||
"BMI1",
|
||||
"BMI2",
|
||||
"CLMUL",
|
||||
"CLZERO",
|
||||
"CMOV",
|
||||
"CMPXCHG8",
|
||||
"CPBOOST",
|
||||
"CX16",
|
||||
"F16C",
|
||||
"FMA3",
|
||||
"FXSR",
|
||||
"FXSROPT",
|
||||
"HTT",
|
||||
"HYPERVISOR",
|
||||
"LAHF",
|
||||
"LZCNT",
|
||||
"MCAOVERFLOW",
|
||||
"MMX",
|
||||
"MMXEXT",
|
||||
"MOVBE",
|
||||
"NX",
|
||||
"OSXSAVE",
|
||||
"POPCNT",
|
||||
"RDRAND",
|
||||
"RDSEED",
|
||||
"RDTSCP",
|
||||
"SCE",
|
||||
"SHA",
|
||||
"SSE",
|
||||
"SSE2",
|
||||
"SSE3",
|
||||
"SSE4",
|
||||
"SSE42",
|
||||
"SSE4A",
|
||||
"SSSE3",
|
||||
"SUCCOR",
|
||||
"X87",
|
||||
"XSAVE"
|
||||
],
|
||||
"X64Level": 3
|
||||
}
|
||||
```
|
||||
|
||||
### Check CPU microarch level
|
||||
|
||||
```
|
||||
λ cpuid --check-level=3
|
||||
2022/03/18 17:04:40 AMD Ryzen 9 3950X 16-Core Processor
|
||||
2022/03/18 17:04:40 Microarchitecture level 3 is supported. Max level is 3.
|
||||
Exit Code 0
|
||||
|
||||
λ cpuid --check-level=4
|
||||
2022/03/18 17:06:18 AMD Ryzen 9 3950X 16-Core Processor
|
||||
2022/03/18 17:06:18 Microarchitecture level 4 not supported. Max level is 3.
|
||||
Exit Code 1
|
||||
```
|
||||
|
||||
|
||||
## Available flags
|
||||
|
||||
### x86 & amd64
|
||||
|
||||
| Feature Flag | Description |
|
||||
|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||
| ADX | Intel ADX (Multi-Precision Add-Carry Instruction Extensions) |
|
||||
| AESNI | Advanced Encryption Standard New Instructions |
|
||||
| AMD3DNOW | AMD 3DNOW |
|
||||
| AMD3DNOWEXT | AMD 3DNowExt |
|
||||
| AMXBF16 | Tile computational operations on BFLOAT16 numbers |
|
||||
| AMXINT8 | Tile computational operations on 8-bit integers |
|
||||
| AMXFP16 | Tile computational operations on FP16 numbers |
|
||||
| AMXTILE | Tile architecture |
|
||||
| APX_F | Intel APX |
|
||||
| AVX | AVX functions |
|
||||
| AVX10 | If set the Intel AVX10 Converged Vector ISA is supported |
|
||||
| AVX10_128 | If set indicates that AVX10 128-bit vector support is present |
|
||||
| AVX10_256 | If set indicates that AVX10 256-bit vector support is present |
|
||||
| AVX10_512 | If set indicates that AVX10 512-bit vector support is present |
|
||||
| AVX2 | AVX2 functions |
|
||||
| AVX512BF16 | AVX-512 BFLOAT16 Instructions |
|
||||
| AVX512BITALG | AVX-512 Bit Algorithms |
|
||||
| AVX512BW | AVX-512 Byte and Word Instructions |
|
||||
| AVX512CD | AVX-512 Conflict Detection Instructions |
|
||||
| AVX512DQ | AVX-512 Doubleword and Quadword Instructions |
|
||||
| AVX512ER | AVX-512 Exponential and Reciprocal Instructions |
|
||||
| AVX512F | AVX-512 Foundation |
|
||||
| AVX512FP16 | AVX-512 FP16 Instructions |
|
||||
| AVX512IFMA | AVX-512 Integer Fused Multiply-Add Instructions |
|
||||
| AVX512PF | AVX-512 Prefetch Instructions |
|
||||
| AVX512VBMI | AVX-512 Vector Bit Manipulation Instructions |
|
||||
| AVX512VBMI2 | AVX-512 Vector Bit Manipulation Instructions, Version 2 |
|
||||
| AVX512VL | AVX-512 Vector Length Extensions |
|
||||
| AVX512VNNI | AVX-512 Vector Neural Network Instructions |
|
||||
| AVX512VP2INTERSECT | AVX-512 Intersect for D/Q |
|
||||
| AVX512VPOPCNTDQ | AVX-512 Vector Population Count Doubleword and Quadword |
|
||||
| AVXIFMA | AVX-IFMA instructions |
|
||||
| AVXNECONVERT | AVX-NE-CONVERT instructions |
|
||||
| AVXSLOW | Indicates the CPU performs 2 128 bit operations instead of one |
|
||||
| AVXVNNI | AVX (VEX encoded) VNNI neural network instructions |
|
||||
| AVXVNNIINT8 | AVX-VNNI-INT8 instructions |
|
||||
| BHI_CTRL | Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 |
|
||||
| BMI1 | Bit Manipulation Instruction Set 1 |
|
||||
| BMI2 | Bit Manipulation Instruction Set 2 |
|
||||
| CETIBT | Intel CET Indirect Branch Tracking |
|
||||
| CETSS | Intel CET Shadow Stack |
|
||||
| CLDEMOTE | Cache Line Demote |
|
||||
| CLMUL | Carry-less Multiplication |
|
||||
| CLZERO | CLZERO instruction supported |
|
||||
| CMOV | i686 CMOV |
|
||||
| CMPCCXADD | CMPCCXADD instructions |
|
||||
| CMPSB_SCADBS_SHORT | Fast short CMPSB and SCASB |
|
||||
| CMPXCHG8 | CMPXCHG8 instruction |
|
||||
| CPBOOST | Core Performance Boost |
|
||||
| CPPC | AMD: Collaborative Processor Performance Control |
|
||||
| CX16 | CMPXCHG16B Instruction |
|
||||
| EFER_LMSLE_UNS | AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ |
|
||||
| ENQCMD | Enqueue Command |
|
||||
| ERMS | Enhanced REP MOVSB/STOSB |
|
||||
| F16C | Half-precision floating-point conversion |
|
||||
| FLUSH_L1D | Flush L1D cache |
|
||||
| FMA3 | Intel FMA 3. Does not imply AVX. |
|
||||
| FMA4 | Bulldozer FMA4 functions |
|
||||
| FP128 | AMD: When set, the internal FP/SIMD execution datapath is 128-bits wide |
|
||||
| FP256 | AMD: When set, the internal FP/SIMD execution datapath is 256-bits wide |
|
||||
| FSRM | Fast Short Rep Mov |
|
||||
| FXSR | FXSAVE, FXRESTOR instructions, CR4 bit 9 |
|
||||
| FXSROPT | FXSAVE/FXRSTOR optimizations |
|
||||
| GFNI | Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. |
|
||||
| HLE | Hardware Lock Elision |
|
||||
| HRESET | If set CPU supports history reset and the IA32_HRESET_ENABLE MSR |
|
||||
| HTT | Hyperthreading (enabled) |
|
||||
| HWA | Hardware assert supported. Indicates support for MSRC001_10 |
|
||||
| HYBRID_CPU | This part has CPUs of more than one type. |
|
||||
| HYPERVISOR | This bit has been reserved by Intel & AMD for use by hypervisors |
|
||||
| IA32_ARCH_CAP | IA32_ARCH_CAPABILITIES MSR (Intel) |
|
||||
| IA32_CORE_CAP | IA32_CORE_CAPABILITIES MSR |
|
||||
| IBPB | Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) |
|
||||
| IBRS | AMD: Indirect Branch Restricted Speculation |
|
||||
| IBRS_PREFERRED | AMD: IBRS is preferred over software solution |
|
||||
| IBRS_PROVIDES_SMP | AMD: IBRS provides Same Mode Protection |
|
||||
| IBS | Instruction Based Sampling (AMD) |
|
||||
| IBSBRNTRGT | Instruction Based Sampling Feature (AMD) |
|
||||
| IBSFETCHSAM | Instruction Based Sampling Feature (AMD) |
|
||||
| IBSFFV | Instruction Based Sampling Feature (AMD) |
|
||||
| IBSOPCNT | Instruction Based Sampling Feature (AMD) |
|
||||
| IBSOPCNTEXT | Instruction Based Sampling Feature (AMD) |
|
||||
| IBSOPSAM | Instruction Based Sampling Feature (AMD) |
|
||||
| IBSRDWROPCNT | Instruction Based Sampling Feature (AMD) |
|
||||
| IBSRIPINVALIDCHK | Instruction Based Sampling Feature (AMD) |
|
||||
| IBS_FETCH_CTLX | AMD: IBS fetch control extended MSR supported |
|
||||
| IBS_OPDATA4 | AMD: IBS op data 4 MSR supported |
|
||||
| IBS_OPFUSE | AMD: Indicates support for IbsOpFuse |
|
||||
| IBS_PREVENTHOST | Disallowing IBS use by the host supported |
|
||||
| IBS_ZEN4 | Fetch and Op IBS support IBS extensions added with Zen4 |
|
||||
| IDPRED_CTRL | IPRED_DIS |
|
||||
| INT_WBINVD | WBINVD/WBNOINVD are interruptible. |
|
||||
| INVLPGB | NVLPGB and TLBSYNC instruction supported |
|
||||
| KEYLOCKER | Key locker |
|
||||
| KEYLOCKERW | Key locker wide |
|
||||
| LAHF | LAHF/SAHF in long mode |
|
||||
| LAM | If set, CPU supports Linear Address Masking |
|
||||
| LBRVIRT | LBR virtualization |
|
||||
| LZCNT | LZCNT instruction |
|
||||
| MCAOVERFLOW | MCA overflow recovery support. |
|
||||
| MCDT_NO | Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. |
|
||||
| MCOMMIT | MCOMMIT instruction supported |
|
||||
| MD_CLEAR | VERW clears CPU buffers |
|
||||
| MMX | standard MMX |
|
||||
| MMXEXT | SSE integer functions or AMD MMX ext |
|
||||
| MOVBE | MOVBE instruction (big-endian) |
|
||||
| MOVDIR64B | Move 64 Bytes as Direct Store |
|
||||
| MOVDIRI | Move Doubleword as Direct Store |
|
||||
| MOVSB_ZL | Fast Zero-Length MOVSB |
|
||||
| MPX | Intel MPX (Memory Protection Extensions) |
|
||||
| MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD |
|
||||
| MSRIRC | Instruction Retired Counter MSR available |
|
||||
| MSRLIST | Read/Write List of Model Specific Registers |
|
||||
| MSR_PAGEFLUSH | Page Flush MSR available |
|
||||
| NRIPS | Indicates support for NRIP save on VMEXIT |
|
||||
| NX | NX (No-Execute) bit |
|
||||
| OSXSAVE | XSAVE enabled by OS |
|
||||
| PCONFIG | PCONFIG for Intel Multi-Key Total Memory Encryption |
|
||||
| POPCNT | POPCNT instruction |
|
||||
| PPIN | AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled |
|
||||
| PREFETCHI | PREFETCHIT0/1 instructions |
|
||||
| PSFD | Predictive Store Forward Disable |
|
||||
| RDPRU | RDPRU instruction supported |
|
||||
| RDRAND | RDRAND instruction is available |
|
||||
| RDSEED | RDSEED instruction is available |
|
||||
| RDTSCP | RDTSCP Instruction |
|
||||
| RRSBA_CTRL | Restricted RSB Alternate |
|
||||
| RTM | Restricted Transactional Memory |
|
||||
| RTM_ALWAYS_ABORT | Indicates that the loaded microcode is forcing RTM abort. |
|
||||
| SERIALIZE | Serialize Instruction Execution |
|
||||
| SEV | AMD Secure Encrypted Virtualization supported |
|
||||
| SEV_64BIT | AMD SEV guest execution only allowed from a 64-bit host |
|
||||
| SEV_ALTERNATIVE | AMD SEV Alternate Injection supported |
|
||||
| SEV_DEBUGSWAP | Full debug state swap supported for SEV-ES guests |
|
||||
| SEV_ES | AMD SEV Encrypted State supported |
|
||||
| SEV_RESTRICTED | AMD SEV Restricted Injection supported |
|
||||
| SEV_SNP | AMD SEV Secure Nested Paging supported |
|
||||
| SGX | Software Guard Extensions |
|
||||
| SGXLC | Software Guard Extensions Launch Control |
|
||||
| SHA | Intel SHA Extensions |
|
||||
| SME | AMD Secure Memory Encryption supported |
|
||||
| SME_COHERENT | AMD Hardware cache coherency across encryption domains enforced |
|
||||
| SPEC_CTRL_SSBD | Speculative Store Bypass Disable |
|
||||
| SRBDS_CTRL | SRBDS mitigation MSR available |
|
||||
| SSE | SSE functions |
|
||||
| SSE2 | P4 SSE functions |
|
||||
| SSE3 | Prescott SSE3 functions |
|
||||
| SSE4 | Penryn SSE4.1 functions |
|
||||
| SSE42 | Nehalem SSE4.2 functions |
|
||||
| SSE4A | AMD Barcelona microarchitecture SSE4a instructions |
|
||||
| SSSE3 | Conroe SSSE3 functions |
|
||||
| STIBP | Single Thread Indirect Branch Predictors |
|
||||
| STIBP_ALWAYSON | AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On |
|
||||
| STOSB_SHORT | Fast short STOSB |
|
||||
| SUCCOR | Software uncorrectable error containment and recovery capability. |
|
||||
| SVM | AMD Secure Virtual Machine |
|
||||
| SVMDA | Indicates support for the SVM decode assists. |
|
||||
| SVMFBASID | SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control |
|
||||
| SVML | AMD SVM lock. Indicates support for SVM-Lock. |
|
||||
| SVMNP | AMD SVM nested paging |
|
||||
| SVMPF | SVM pause intercept filter. Indicates support for the pause intercept filter |
|
||||
| SVMPFT | SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold |
|
||||
| SYSCALL | System-Call Extension (SCE): SYSCALL and SYSRET instructions. |
|
||||
| SYSEE | SYSENTER and SYSEXIT instructions |
|
||||
| TBM | AMD Trailing Bit Manipulation |
|
||||
| TDX_GUEST | Intel Trust Domain Extensions Guest |
|
||||
| TLB_FLUSH_NESTED | AMD: Flushing includes all the nested translations for guest translations |
|
||||
| TME | Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. |
|
||||
| TOPEXT | TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. |
|
||||
| TSCRATEMSR | MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 |
|
||||
| TSXLDTRK | Intel TSX Suspend Load Address Tracking |
|
||||
| VAES | Vector AES. AVX(512) versions requires additional checks. |
|
||||
| VMCBCLEAN | VMCB clean bits. Indicates support for VMCB clean bits. |
|
||||
| VMPL | AMD VM Permission Levels supported |
|
||||
| VMSA_REGPROT | AMD VMSA Register Protection supported |
|
||||
| VMX | Virtual Machine Extensions |
|
||||
| VPCLMULQDQ | Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. |
|
||||
| VTE | AMD Virtual Transparent Encryption supported |
|
||||
| WAITPKG | TPAUSE, UMONITOR, UMWAIT |
|
||||
| WBNOINVD | Write Back and Do Not Invalidate Cache |
|
||||
| WRMSRNS | Non-Serializing Write to Model Specific Register |
|
||||
| X87 | FPU |
|
||||
| XGETBV1 | Supports XGETBV with ECX = 1 |
|
||||
| XOP | Bulldozer XOP functions |
|
||||
| XSAVE | XSAVE, XRESTOR, XSETBV, XGETBV |
|
||||
| XSAVEC | Supports XSAVEC and the compacted form of XRSTOR. |
|
||||
| XSAVEOPT | XSAVEOPT available |
|
||||
| XSAVES | Supports XSAVES/XRSTORS and IA32_XSS |
|
||||
|
||||
# ARM features:
|
||||
|
||||
| Feature Flag | Description |
|
||||
|--------------|------------------------------------------------------------------|
|
||||
| AESARM | AES instructions |
|
||||
| ARMCPUID | Some CPU ID registers readable at user-level |
|
||||
| ASIMD | Advanced SIMD |
|
||||
| ASIMDDP | SIMD Dot Product |
|
||||
| ASIMDHP | Advanced SIMD half-precision floating point |
|
||||
| ASIMDRDM | Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) |
|
||||
| ATOMICS | Large System Extensions (LSE) |
|
||||
| CRC32 | CRC32/CRC32C instructions |
|
||||
| DCPOP | Data cache clean to Point of Persistence (DC CVAP) |
|
||||
| EVTSTRM | Generic timer |
|
||||
| FCMA | Floatin point complex number addition and multiplication |
|
||||
| FP | Single-precision and double-precision floating point |
|
||||
| FPHP | Half-precision floating point |
|
||||
| GPA | Generic Pointer Authentication |
|
||||
| JSCVT | Javascript-style double->int convert (FJCVTZS) |
|
||||
| LRCPC | Weaker release consistency (LDAPR, etc) |
|
||||
| PMULL | Polynomial Multiply instructions (PMULL/PMULL2) |
|
||||
| SHA1 | SHA-1 instructions (SHA1C, etc) |
|
||||
| SHA2 | SHA-2 instructions (SHA256H, etc) |
|
||||
| SHA3 | SHA-3 instructions (EOR3, RAXI, XAR, BCAX) |
|
||||
| SHA512 | SHA512 instructions |
|
||||
| SM3 | SM3 instructions |
|
||||
| SM4 | SM4 instructions |
|
||||
| SVE | Scalable Vector Extension |
|
||||
|
||||
# license
|
||||
|
||||
This code is published under an MIT license. See LICENSE file for more information.
|
1514
vendor/github.com/klauspost/cpuid/v2/cpuid.go
generated
vendored
1514
vendor/github.com/klauspost/cpuid/v2/cpuid.go
generated
vendored
File diff suppressed because it is too large
Load Diff
47
vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
generated
vendored
47
vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
generated
vendored
@ -1,47 +0,0 @@
|
||||
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
//+build 386,!gccgo,!noasm,!appengine
|
||||
|
||||
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·asmCpuid(SB), 7, $0
|
||||
XORL CX, CX
|
||||
MOVL op+0(FP), AX
|
||||
CPUID
|
||||
MOVL AX, eax+4(FP)
|
||||
MOVL BX, ebx+8(FP)
|
||||
MOVL CX, ecx+12(FP)
|
||||
MOVL DX, edx+16(FP)
|
||||
RET
|
||||
|
||||
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·asmCpuidex(SB), 7, $0
|
||||
MOVL op+0(FP), AX
|
||||
MOVL op2+4(FP), CX
|
||||
CPUID
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL BX, ebx+12(FP)
|
||||
MOVL CX, ecx+16(FP)
|
||||
MOVL DX, edx+20(FP)
|
||||
RET
|
||||
|
||||
// func xgetbv(index uint32) (eax, edx uint32)
|
||||
TEXT ·asmXgetbv(SB), 7, $0
|
||||
MOVL index+0(FP), CX
|
||||
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
|
||||
MOVL AX, eax+4(FP)
|
||||
MOVL DX, edx+8(FP)
|
||||
RET
|
||||
|
||||
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·asmRdtscpAsm(SB), 7, $0
|
||||
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
|
||||
MOVL AX, eax+0(FP)
|
||||
MOVL BX, ebx+4(FP)
|
||||
MOVL CX, ecx+8(FP)
|
||||
MOVL DX, edx+12(FP)
|
||||
RET
|
||||
|
||||
// func asmDarwinHasAVX512() bool
|
||||
TEXT ·asmDarwinHasAVX512(SB), 7, $0
|
||||
MOVL $0, eax+0(FP)
|
||||
RET
|
72
vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
generated
vendored
72
vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
generated
vendored
@ -1,72 +0,0 @@
|
||||
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
//+build amd64,!gccgo,!noasm,!appengine
|
||||
|
||||
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·asmCpuid(SB), 7, $0
|
||||
XORQ CX, CX
|
||||
MOVL op+0(FP), AX
|
||||
CPUID
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL BX, ebx+12(FP)
|
||||
MOVL CX, ecx+16(FP)
|
||||
MOVL DX, edx+20(FP)
|
||||
RET
|
||||
|
||||
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·asmCpuidex(SB), 7, $0
|
||||
MOVL op+0(FP), AX
|
||||
MOVL op2+4(FP), CX
|
||||
CPUID
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL BX, ebx+12(FP)
|
||||
MOVL CX, ecx+16(FP)
|
||||
MOVL DX, edx+20(FP)
|
||||
RET
|
||||
|
||||
// func asmXgetbv(index uint32) (eax, edx uint32)
|
||||
TEXT ·asmXgetbv(SB), 7, $0
|
||||
MOVL index+0(FP), CX
|
||||
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
|
||||
MOVL AX, eax+8(FP)
|
||||
MOVL DX, edx+12(FP)
|
||||
RET
|
||||
|
||||
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
|
||||
TEXT ·asmRdtscpAsm(SB), 7, $0
|
||||
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
|
||||
MOVL AX, eax+0(FP)
|
||||
MOVL BX, ebx+4(FP)
|
||||
MOVL CX, ecx+8(FP)
|
||||
MOVL DX, edx+12(FP)
|
||||
RET
|
||||
|
||||
// From https://go-review.googlesource.com/c/sys/+/285572/
|
||||
// func asmDarwinHasAVX512() bool
|
||||
TEXT ·asmDarwinHasAVX512(SB), 7, $0-1
|
||||
MOVB $0, ret+0(FP) // default to false
|
||||
|
||||
#ifdef GOOS_darwin // return if not darwin
|
||||
#ifdef GOARCH_amd64 // return if not amd64
|
||||
// These values from:
|
||||
// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
|
||||
#define commpage64_base_address 0x00007fffffe00000
|
||||
#define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
|
||||
#define commpage64_version (commpage64_base_address+0x01E)
|
||||
#define hasAVX512F 0x0000004000000000
|
||||
MOVQ $commpage64_version, BX
|
||||
MOVW (BX), AX
|
||||
CMPW AX, $13 // versions < 13 do not support AVX512
|
||||
JL no_avx512
|
||||
MOVQ $commpage64_cpu_capabilities64, BX
|
||||
MOVQ (BX), AX
|
||||
MOVQ $hasAVX512F, CX
|
||||
ANDQ CX, AX
|
||||
JZ no_avx512
|
||||
MOVB $1, ret+0(FP)
|
||||
|
||||
no_avx512:
|
||||
#endif
|
||||
#endif
|
||||
RET
|
||||
|
26
vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s
generated
vendored
26
vendor/github.com/klauspost/cpuid/v2/cpuid_arm64.s
generated
vendored
@ -1,26 +0,0 @@
|
||||
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
//+build arm64,!gccgo,!noasm,!appengine
|
||||
|
||||
// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
|
||||
|
||||
// func getMidr
|
||||
TEXT ·getMidr(SB), 7, $0
|
||||
WORD $0xd5380000 // mrs x0, midr_el1 /* Main ID Register */
|
||||
MOVD R0, midr+0(FP)
|
||||
RET
|
||||
|
||||
// func getProcFeatures
|
||||
TEXT ·getProcFeatures(SB), 7, $0
|
||||
WORD $0xd5380400 // mrs x0, id_aa64pfr0_el1 /* Processor Feature Register 0 */
|
||||
MOVD R0, procFeatures+0(FP)
|
||||
RET
|
||||
|
||||
// func getInstAttributes
|
||||
TEXT ·getInstAttributes(SB), 7, $0
|
||||
WORD $0xd5380600 // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */
|
||||
WORD $0xd5380621 // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */
|
||||
MOVD R0, instAttrReg0+0(FP)
|
||||
MOVD R1, instAttrReg1+8(FP)
|
||||
RET
|
||||
|
247
vendor/github.com/klauspost/cpuid/v2/detect_arm64.go
generated
vendored
247
vendor/github.com/klauspost/cpuid/v2/detect_arm64.go
generated
vendored
@ -1,247 +0,0 @@
|
||||
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
//go:build arm64 && !gccgo && !noasm && !appengine
|
||||
// +build arm64,!gccgo,!noasm,!appengine
|
||||
|
||||
package cpuid
|
||||
|
||||
import "runtime"
|
||||
|
||||
func getMidr() (midr uint64)
|
||||
func getProcFeatures() (procFeatures uint64)
|
||||
func getInstAttributes() (instAttrReg0, instAttrReg1 uint64)
|
||||
|
||||
func initCPU() {
|
||||
cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
|
||||
cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
|
||||
xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
|
||||
rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
|
||||
}
|
||||
|
||||
func addInfo(c *CPUInfo, safe bool) {
|
||||
// Seems to be safe to assume on ARM64
|
||||
c.CacheLine = 64
|
||||
detectOS(c)
|
||||
|
||||
// ARM64 disabled since it may crash if interrupt is not intercepted by OS.
|
||||
if safe && !c.Supports(ARMCPUID) && runtime.GOOS != "freebsd" {
|
||||
return
|
||||
}
|
||||
midr := getMidr()
|
||||
|
||||
// MIDR_EL1 - Main ID Register
|
||||
// https://developer.arm.com/docs/ddi0595/h/aarch64-system-registers/midr_el1
|
||||
// x--------------------------------------------------x
|
||||
// | Name | bits | visible |
|
||||
// |--------------------------------------------------|
|
||||
// | Implementer | [31-24] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | Variant | [23-20] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | Architecture | [19-16] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | PartNum | [15-4] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | Revision | [3-0] | y |
|
||||
// x--------------------------------------------------x
|
||||
|
||||
switch (midr >> 24) & 0xff {
|
||||
case 0xC0:
|
||||
c.VendorString = "Ampere Computing"
|
||||
c.VendorID = Ampere
|
||||
case 0x41:
|
||||
c.VendorString = "Arm Limited"
|
||||
c.VendorID = ARM
|
||||
case 0x42:
|
||||
c.VendorString = "Broadcom Corporation"
|
||||
c.VendorID = Broadcom
|
||||
case 0x43:
|
||||
c.VendorString = "Cavium Inc"
|
||||
c.VendorID = Cavium
|
||||
case 0x44:
|
||||
c.VendorString = "Digital Equipment Corporation"
|
||||
c.VendorID = DEC
|
||||
case 0x46:
|
||||
c.VendorString = "Fujitsu Ltd"
|
||||
c.VendorID = Fujitsu
|
||||
case 0x49:
|
||||
c.VendorString = "Infineon Technologies AG"
|
||||
c.VendorID = Infineon
|
||||
case 0x4D:
|
||||
c.VendorString = "Motorola or Freescale Semiconductor Inc"
|
||||
c.VendorID = Motorola
|
||||
case 0x4E:
|
||||
c.VendorString = "NVIDIA Corporation"
|
||||
c.VendorID = NVIDIA
|
||||
case 0x50:
|
||||
c.VendorString = "Applied Micro Circuits Corporation"
|
||||
c.VendorID = AMCC
|
||||
case 0x51:
|
||||
c.VendorString = "Qualcomm Inc"
|
||||
c.VendorID = Qualcomm
|
||||
case 0x56:
|
||||
c.VendorString = "Marvell International Ltd"
|
||||
c.VendorID = Marvell
|
||||
case 0x69:
|
||||
c.VendorString = "Intel Corporation"
|
||||
c.VendorID = Intel
|
||||
}
|
||||
|
||||
// Lower 4 bits: Architecture
|
||||
// Architecture Meaning
|
||||
// 0b0001 Armv4.
|
||||
// 0b0010 Armv4T.
|
||||
// 0b0011 Armv5 (obsolete).
|
||||
// 0b0100 Armv5T.
|
||||
// 0b0101 Armv5TE.
|
||||
// 0b0110 Armv5TEJ.
|
||||
// 0b0111 Armv6.
|
||||
// 0b1111 Architectural features are individually identified in the ID_* registers, see 'ID registers'.
|
||||
// Upper 4 bit: Variant
|
||||
// An IMPLEMENTATION DEFINED variant number.
|
||||
// Typically, this field is used to distinguish between different product variants, or major revisions of a product.
|
||||
c.Family = int(midr>>16) & 0xff
|
||||
|
||||
// PartNum, bits [15:4]
|
||||
// An IMPLEMENTATION DEFINED primary part number for the device.
|
||||
// On processors implemented by Arm, if the top four bits of the primary
|
||||
// part number are 0x0 or 0x7, the variant and architecture are encoded differently.
|
||||
// Revision, bits [3:0]
|
||||
// An IMPLEMENTATION DEFINED revision number for the device.
|
||||
c.Model = int(midr) & 0xffff
|
||||
|
||||
procFeatures := getProcFeatures()
|
||||
|
||||
// ID_AA64PFR0_EL1 - Processor Feature Register 0
|
||||
// x--------------------------------------------------x
|
||||
// | Name | bits | visible |
|
||||
// |--------------------------------------------------|
|
||||
// | DIT | [51-48] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | SVE | [35-32] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | GIC | [27-24] | n |
|
||||
// |--------------------------------------------------|
|
||||
// | AdvSIMD | [23-20] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | FP | [19-16] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | EL3 | [15-12] | n |
|
||||
// |--------------------------------------------------|
|
||||
// | EL2 | [11-8] | n |
|
||||
// |--------------------------------------------------|
|
||||
// | EL1 | [7-4] | n |
|
||||
// |--------------------------------------------------|
|
||||
// | EL0 | [3-0] | n |
|
||||
// x--------------------------------------------------x
|
||||
|
||||
var f flagSet
|
||||
// if procFeatures&(0xf<<48) != 0 {
|
||||
// fmt.Println("DIT")
|
||||
// }
|
||||
f.setIf(procFeatures&(0xf<<32) != 0, SVE)
|
||||
if procFeatures&(0xf<<20) != 15<<20 {
|
||||
f.set(ASIMD)
|
||||
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
|
||||
// 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
|
||||
f.setIf(procFeatures&(0xf<<20) == 1<<20, FPHP, ASIMDHP)
|
||||
}
|
||||
f.setIf(procFeatures&(0xf<<16) != 0, FP)
|
||||
|
||||
instAttrReg0, instAttrReg1 := getInstAttributes()
|
||||
|
||||
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
|
||||
//
|
||||
// ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
|
||||
// x--------------------------------------------------x
|
||||
// | Name | bits | visible |
|
||||
// |--------------------------------------------------|
|
||||
// | TS | [55-52] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | FHM | [51-48] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | DP | [47-44] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | SM4 | [43-40] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | SM3 | [39-36] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | SHA3 | [35-32] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | RDM | [31-28] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | ATOMICS | [23-20] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | CRC32 | [19-16] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | SHA2 | [15-12] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | SHA1 | [11-8] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | AES | [7-4] | y |
|
||||
// x--------------------------------------------------x
|
||||
|
||||
// if instAttrReg0&(0xf<<52) != 0 {
|
||||
// fmt.Println("TS")
|
||||
// }
|
||||
// if instAttrReg0&(0xf<<48) != 0 {
|
||||
// fmt.Println("FHM")
|
||||
// }
|
||||
f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP)
|
||||
f.setIf(instAttrReg0&(0xf<<40) != 0, SM4)
|
||||
f.setIf(instAttrReg0&(0xf<<36) != 0, SM3)
|
||||
f.setIf(instAttrReg0&(0xf<<32) != 0, SHA3)
|
||||
f.setIf(instAttrReg0&(0xf<<28) != 0, ASIMDRDM)
|
||||
f.setIf(instAttrReg0&(0xf<<20) != 0, ATOMICS)
|
||||
f.setIf(instAttrReg0&(0xf<<16) != 0, CRC32)
|
||||
f.setIf(instAttrReg0&(0xf<<12) != 0, SHA2)
|
||||
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
|
||||
// 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
|
||||
f.setIf(instAttrReg0&(0xf<<12) == 2<<12, SHA512)
|
||||
f.setIf(instAttrReg0&(0xf<<8) != 0, SHA1)
|
||||
f.setIf(instAttrReg0&(0xf<<4) != 0, AESARM)
|
||||
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
|
||||
// 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
|
||||
f.setIf(instAttrReg0&(0xf<<4) == 2<<4, PMULL)
|
||||
|
||||
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1
|
||||
//
|
||||
// ID_AA64ISAR1_EL1 - Instruction set attribute register 1
|
||||
// x--------------------------------------------------x
|
||||
// | Name | bits | visible |
|
||||
// |--------------------------------------------------|
|
||||
// | GPI | [31-28] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | GPA | [27-24] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | LRCPC | [23-20] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | FCMA | [19-16] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | JSCVT | [15-12] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | API | [11-8] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | APA | [7-4] | y |
|
||||
// |--------------------------------------------------|
|
||||
// | DPB | [3-0] | y |
|
||||
// x--------------------------------------------------x
|
||||
|
||||
// if instAttrReg1&(0xf<<28) != 0 {
|
||||
// fmt.Println("GPI")
|
||||
// }
|
||||
f.setIf(instAttrReg1&(0xf<<28) != 24, GPA)
|
||||
f.setIf(instAttrReg1&(0xf<<20) != 0, LRCPC)
|
||||
f.setIf(instAttrReg1&(0xf<<16) != 0, FCMA)
|
||||
f.setIf(instAttrReg1&(0xf<<12) != 0, JSCVT)
|
||||
// if instAttrReg1&(0xf<<8) != 0 {
|
||||
// fmt.Println("API")
|
||||
// }
|
||||
// if instAttrReg1&(0xf<<4) != 0 {
|
||||
// fmt.Println("APA")
|
||||
// }
|
||||
f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP)
|
||||
|
||||
// Store
|
||||
c.featureSet.or(f)
|
||||
}
|
15
vendor/github.com/klauspost/cpuid/v2/detect_ref.go
generated
vendored
15
vendor/github.com/klauspost/cpuid/v2/detect_ref.go
generated
vendored
@ -1,15 +0,0 @@
|
||||
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
//go:build (!amd64 && !386 && !arm64) || gccgo || noasm || appengine
|
||||
// +build !amd64,!386,!arm64 gccgo noasm appengine
|
||||
|
||||
package cpuid
|
||||
|
||||
func initCPU() {
|
||||
cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
|
||||
cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
|
||||
xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
|
||||
rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
|
||||
}
|
||||
|
||||
func addInfo(info *CPUInfo, safe bool) {}
|
38
vendor/github.com/klauspost/cpuid/v2/detect_x86.go
generated
vendored
38
vendor/github.com/klauspost/cpuid/v2/detect_x86.go
generated
vendored
@ -1,38 +0,0 @@
|
||||
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
//go:build (386 && !gccgo && !noasm && !appengine) || (amd64 && !gccgo && !noasm && !appengine)
|
||||
// +build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine
|
||||
|
||||
package cpuid
|
||||
|
||||
func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
|
||||
func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
|
||||
func asmXgetbv(index uint32) (eax, edx uint32)
|
||||
func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
|
||||
func asmDarwinHasAVX512() bool
|
||||
|
||||
func initCPU() {
|
||||
cpuid = asmCpuid
|
||||
cpuidex = asmCpuidex
|
||||
xgetbv = asmXgetbv
|
||||
rdtscpAsm = asmRdtscpAsm
|
||||
darwinHasAVX512 = asmDarwinHasAVX512
|
||||
}
|
||||
|
||||
func addInfo(c *CPUInfo, safe bool) {
|
||||
c.maxFunc = maxFunctionID()
|
||||
c.maxExFunc = maxExtendedFunction()
|
||||
c.BrandName = brandName()
|
||||
c.CacheLine = cacheLine()
|
||||
c.Family, c.Model, c.Stepping = familyModel()
|
||||
c.featureSet = support()
|
||||
c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
|
||||
c.AMDMemEncryption = hasAMDMemEncryption(c.featureSet.inSet(SME) || c.featureSet.inSet(SEV))
|
||||
c.ThreadsPerCore = threadsPerCore()
|
||||
c.LogicalCores = logicalCores()
|
||||
c.PhysicalCores = physicalCores()
|
||||
c.VendorID, c.VendorString = vendorID()
|
||||
c.AVX10Level = c.supportAVX10()
|
||||
c.cacheSize()
|
||||
c.frequencies()
|
||||
}
|
284
vendor/github.com/klauspost/cpuid/v2/featureid_string.go
generated
vendored
284
vendor/github.com/klauspost/cpuid/v2/featureid_string.go
generated
vendored
@ -1,284 +0,0 @@
|
||||
// Code generated by "stringer -type=FeatureID,Vendor"; DO NOT EDIT.
|
||||
|
||||
package cpuid
|
||||
|
||||
import "strconv"
|
||||
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[ADX-1]
|
||||
_ = x[AESNI-2]
|
||||
_ = x[AMD3DNOW-3]
|
||||
_ = x[AMD3DNOWEXT-4]
|
||||
_ = x[AMXBF16-5]
|
||||
_ = x[AMXFP16-6]
|
||||
_ = x[AMXINT8-7]
|
||||
_ = x[AMXTILE-8]
|
||||
_ = x[APX_F-9]
|
||||
_ = x[AVX-10]
|
||||
_ = x[AVX10-11]
|
||||
_ = x[AVX10_128-12]
|
||||
_ = x[AVX10_256-13]
|
||||
_ = x[AVX10_512-14]
|
||||
_ = x[AVX2-15]
|
||||
_ = x[AVX512BF16-16]
|
||||
_ = x[AVX512BITALG-17]
|
||||
_ = x[AVX512BW-18]
|
||||
_ = x[AVX512CD-19]
|
||||
_ = x[AVX512DQ-20]
|
||||
_ = x[AVX512ER-21]
|
||||
_ = x[AVX512F-22]
|
||||
_ = x[AVX512FP16-23]
|
||||
_ = x[AVX512IFMA-24]
|
||||
_ = x[AVX512PF-25]
|
||||
_ = x[AVX512VBMI-26]
|
||||
_ = x[AVX512VBMI2-27]
|
||||
_ = x[AVX512VL-28]
|
||||
_ = x[AVX512VNNI-29]
|
||||
_ = x[AVX512VP2INTERSECT-30]
|
||||
_ = x[AVX512VPOPCNTDQ-31]
|
||||
_ = x[AVXIFMA-32]
|
||||
_ = x[AVXNECONVERT-33]
|
||||
_ = x[AVXSLOW-34]
|
||||
_ = x[AVXVNNI-35]
|
||||
_ = x[AVXVNNIINT8-36]
|
||||
_ = x[BHI_CTRL-37]
|
||||
_ = x[BMI1-38]
|
||||
_ = x[BMI2-39]
|
||||
_ = x[CETIBT-40]
|
||||
_ = x[CETSS-41]
|
||||
_ = x[CLDEMOTE-42]
|
||||
_ = x[CLMUL-43]
|
||||
_ = x[CLZERO-44]
|
||||
_ = x[CMOV-45]
|
||||
_ = x[CMPCCXADD-46]
|
||||
_ = x[CMPSB_SCADBS_SHORT-47]
|
||||
_ = x[CMPXCHG8-48]
|
||||
_ = x[CPBOOST-49]
|
||||
_ = x[CPPC-50]
|
||||
_ = x[CX16-51]
|
||||
_ = x[EFER_LMSLE_UNS-52]
|
||||
_ = x[ENQCMD-53]
|
||||
_ = x[ERMS-54]
|
||||
_ = x[F16C-55]
|
||||
_ = x[FLUSH_L1D-56]
|
||||
_ = x[FMA3-57]
|
||||
_ = x[FMA4-58]
|
||||
_ = x[FP128-59]
|
||||
_ = x[FP256-60]
|
||||
_ = x[FSRM-61]
|
||||
_ = x[FXSR-62]
|
||||
_ = x[FXSROPT-63]
|
||||
_ = x[GFNI-64]
|
||||
_ = x[HLE-65]
|
||||
_ = x[HRESET-66]
|
||||
_ = x[HTT-67]
|
||||
_ = x[HWA-68]
|
||||
_ = x[HYBRID_CPU-69]
|
||||
_ = x[HYPERVISOR-70]
|
||||
_ = x[IA32_ARCH_CAP-71]
|
||||
_ = x[IA32_CORE_CAP-72]
|
||||
_ = x[IBPB-73]
|
||||
_ = x[IBPB_BRTYPE-74]
|
||||
_ = x[IBRS-75]
|
||||
_ = x[IBRS_PREFERRED-76]
|
||||
_ = x[IBRS_PROVIDES_SMP-77]
|
||||
_ = x[IBS-78]
|
||||
_ = x[IBSBRNTRGT-79]
|
||||
_ = x[IBSFETCHSAM-80]
|
||||
_ = x[IBSFFV-81]
|
||||
_ = x[IBSOPCNT-82]
|
||||
_ = x[IBSOPCNTEXT-83]
|
||||
_ = x[IBSOPSAM-84]
|
||||
_ = x[IBSRDWROPCNT-85]
|
||||
_ = x[IBSRIPINVALIDCHK-86]
|
||||
_ = x[IBS_FETCH_CTLX-87]
|
||||
_ = x[IBS_OPDATA4-88]
|
||||
_ = x[IBS_OPFUSE-89]
|
||||
_ = x[IBS_PREVENTHOST-90]
|
||||
_ = x[IBS_ZEN4-91]
|
||||
_ = x[IDPRED_CTRL-92]
|
||||
_ = x[INT_WBINVD-93]
|
||||
_ = x[INVLPGB-94]
|
||||
_ = x[KEYLOCKER-95]
|
||||
_ = x[KEYLOCKERW-96]
|
||||
_ = x[LAHF-97]
|
||||
_ = x[LAM-98]
|
||||
_ = x[LBRVIRT-99]
|
||||
_ = x[LZCNT-100]
|
||||
_ = x[MCAOVERFLOW-101]
|
||||
_ = x[MCDT_NO-102]
|
||||
_ = x[MCOMMIT-103]
|
||||
_ = x[MD_CLEAR-104]
|
||||
_ = x[MMX-105]
|
||||
_ = x[MMXEXT-106]
|
||||
_ = x[MOVBE-107]
|
||||
_ = x[MOVDIR64B-108]
|
||||
_ = x[MOVDIRI-109]
|
||||
_ = x[MOVSB_ZL-110]
|
||||
_ = x[MOVU-111]
|
||||
_ = x[MPX-112]
|
||||
_ = x[MSRIRC-113]
|
||||
_ = x[MSRLIST-114]
|
||||
_ = x[MSR_PAGEFLUSH-115]
|
||||
_ = x[NRIPS-116]
|
||||
_ = x[NX-117]
|
||||
_ = x[OSXSAVE-118]
|
||||
_ = x[PCONFIG-119]
|
||||
_ = x[POPCNT-120]
|
||||
_ = x[PPIN-121]
|
||||
_ = x[PREFETCHI-122]
|
||||
_ = x[PSFD-123]
|
||||
_ = x[RDPRU-124]
|
||||
_ = x[RDRAND-125]
|
||||
_ = x[RDSEED-126]
|
||||
_ = x[RDTSCP-127]
|
||||
_ = x[RRSBA_CTRL-128]
|
||||
_ = x[RTM-129]
|
||||
_ = x[RTM_ALWAYS_ABORT-130]
|
||||
_ = x[SBPB-131]
|
||||
_ = x[SERIALIZE-132]
|
||||
_ = x[SEV-133]
|
||||
_ = x[SEV_64BIT-134]
|
||||
_ = x[SEV_ALTERNATIVE-135]
|
||||
_ = x[SEV_DEBUGSWAP-136]
|
||||
_ = x[SEV_ES-137]
|
||||
_ = x[SEV_RESTRICTED-138]
|
||||
_ = x[SEV_SNP-139]
|
||||
_ = x[SGX-140]
|
||||
_ = x[SGXLC-141]
|
||||
_ = x[SHA-142]
|
||||
_ = x[SME-143]
|
||||
_ = x[SME_COHERENT-144]
|
||||
_ = x[SPEC_CTRL_SSBD-145]
|
||||
_ = x[SRBDS_CTRL-146]
|
||||
_ = x[SRSO_MSR_FIX-147]
|
||||
_ = x[SRSO_NO-148]
|
||||
_ = x[SRSO_USER_KERNEL_NO-149]
|
||||
_ = x[SSE-150]
|
||||
_ = x[SSE2-151]
|
||||
_ = x[SSE3-152]
|
||||
_ = x[SSE4-153]
|
||||
_ = x[SSE42-154]
|
||||
_ = x[SSE4A-155]
|
||||
_ = x[SSSE3-156]
|
||||
_ = x[STIBP-157]
|
||||
_ = x[STIBP_ALWAYSON-158]
|
||||
_ = x[STOSB_SHORT-159]
|
||||
_ = x[SUCCOR-160]
|
||||
_ = x[SVM-161]
|
||||
_ = x[SVMDA-162]
|
||||
_ = x[SVMFBASID-163]
|
||||
_ = x[SVML-164]
|
||||
_ = x[SVMNP-165]
|
||||
_ = x[SVMPF-166]
|
||||
_ = x[SVMPFT-167]
|
||||
_ = x[SYSCALL-168]
|
||||
_ = x[SYSEE-169]
|
||||
_ = x[TBM-170]
|
||||
_ = x[TDX_GUEST-171]
|
||||
_ = x[TLB_FLUSH_NESTED-172]
|
||||
_ = x[TME-173]
|
||||
_ = x[TOPEXT-174]
|
||||
_ = x[TSCRATEMSR-175]
|
||||
_ = x[TSXLDTRK-176]
|
||||
_ = x[VAES-177]
|
||||
_ = x[VMCBCLEAN-178]
|
||||
_ = x[VMPL-179]
|
||||
_ = x[VMSA_REGPROT-180]
|
||||
_ = x[VMX-181]
|
||||
_ = x[VPCLMULQDQ-182]
|
||||
_ = x[VTE-183]
|
||||
_ = x[WAITPKG-184]
|
||||
_ = x[WBNOINVD-185]
|
||||
_ = x[WRMSRNS-186]
|
||||
_ = x[X87-187]
|
||||
_ = x[XGETBV1-188]
|
||||
_ = x[XOP-189]
|
||||
_ = x[XSAVE-190]
|
||||
_ = x[XSAVEC-191]
|
||||
_ = x[XSAVEOPT-192]
|
||||
_ = x[XSAVES-193]
|
||||
_ = x[AESARM-194]
|
||||
_ = x[ARMCPUID-195]
|
||||
_ = x[ASIMD-196]
|
||||
_ = x[ASIMDDP-197]
|
||||
_ = x[ASIMDHP-198]
|
||||
_ = x[ASIMDRDM-199]
|
||||
_ = x[ATOMICS-200]
|
||||
_ = x[CRC32-201]
|
||||
_ = x[DCPOP-202]
|
||||
_ = x[EVTSTRM-203]
|
||||
_ = x[FCMA-204]
|
||||
_ = x[FP-205]
|
||||
_ = x[FPHP-206]
|
||||
_ = x[GPA-207]
|
||||
_ = x[JSCVT-208]
|
||||
_ = x[LRCPC-209]
|
||||
_ = x[PMULL-210]
|
||||
_ = x[SHA1-211]
|
||||
_ = x[SHA2-212]
|
||||
_ = x[SHA3-213]
|
||||
_ = x[SHA512-214]
|
||||
_ = x[SM3-215]
|
||||
_ = x[SM4-216]
|
||||
_ = x[SVE-217]
|
||||
_ = x[lastID-218]
|
||||
_ = x[firstID-0]
|
||||
}
|
||||
|
||||
const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
|
||||
|
||||
var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 319, 323, 327, 333, 338, 346, 351, 357, 361, 370, 388, 396, 403, 407, 411, 425, 431, 435, 439, 448, 452, 456, 461, 466, 470, 474, 481, 485, 488, 494, 497, 500, 510, 520, 533, 546, 550, 561, 565, 579, 596, 599, 609, 620, 626, 634, 645, 653, 665, 681, 695, 706, 716, 731, 739, 750, 760, 767, 776, 786, 790, 793, 800, 805, 816, 823, 830, 838, 841, 847, 852, 861, 868, 876, 880, 883, 889, 896, 909, 914, 916, 923, 930, 936, 940, 949, 953, 958, 964, 970, 976, 986, 989, 1005, 1009, 1018, 1021, 1030, 1045, 1058, 1064, 1078, 1085, 1088, 1093, 1096, 1099, 1111, 1125, 1135, 1147, 1154, 1173, 1176, 1180, 1184, 1188, 1193, 1198, 1203, 1208, 1222, 1233, 1239, 1242, 1247, 1256, 1260, 1265, 1270, 1276, 1283, 1288, 1291, 1300, 1316, 1319, 1325, 1335, 1343, 1347, 1356, 1360, 1372, 1375, 1385, 1388, 1395, 1403, 1410, 1413, 1420, 1423, 1428, 1434, 1442, 1448, 1454, 1462, 1467, 1474, 1481, 1489, 1496, 1501, 1506, 1513, 1517, 1519, 1523, 1526, 1531, 1536, 1541, 1545, 1549, 1553, 1559, 1562, 1565, 1568, 1574}
|
||||
|
||||
func (i FeatureID) String() string {
|
||||
if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
|
||||
return "FeatureID(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _FeatureID_name[_FeatureID_index[i]:_FeatureID_index[i+1]]
|
||||
}
|
||||
func _() {
|
||||
// An "invalid array index" compiler error signifies that the constant values have changed.
|
||||
// Re-run the stringer command to generate them again.
|
||||
var x [1]struct{}
|
||||
_ = x[VendorUnknown-0]
|
||||
_ = x[Intel-1]
|
||||
_ = x[AMD-2]
|
||||
_ = x[VIA-3]
|
||||
_ = x[Transmeta-4]
|
||||
_ = x[NSC-5]
|
||||
_ = x[KVM-6]
|
||||
_ = x[MSVM-7]
|
||||
_ = x[VMware-8]
|
||||
_ = x[XenHVM-9]
|
||||
_ = x[Bhyve-10]
|
||||
_ = x[Hygon-11]
|
||||
_ = x[SiS-12]
|
||||
_ = x[RDC-13]
|
||||
_ = x[Ampere-14]
|
||||
_ = x[ARM-15]
|
||||
_ = x[Broadcom-16]
|
||||
_ = x[Cavium-17]
|
||||
_ = x[DEC-18]
|
||||
_ = x[Fujitsu-19]
|
||||
_ = x[Infineon-20]
|
||||
_ = x[Motorola-21]
|
||||
_ = x[NVIDIA-22]
|
||||
_ = x[AMCC-23]
|
||||
_ = x[Qualcomm-24]
|
||||
_ = x[Marvell-25]
|
||||
_ = x[lastVendor-26]
|
||||
}
|
||||
|
||||
const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvelllastVendor"
|
||||
|
||||
var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 155}
|
||||
|
||||
func (i Vendor) String() string {
|
||||
if i < 0 || i >= Vendor(len(_Vendor_index)-1) {
|
||||
return "Vendor(" + strconv.FormatInt(int64(i), 10) + ")"
|
||||
}
|
||||
return _Vendor_name[_Vendor_index[i]:_Vendor_index[i+1]]
|
||||
}
|
121
vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
generated
vendored
121
vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
generated
vendored
@ -1,121 +0,0 @@
|
||||
// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
package cpuid
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
func detectOS(c *CPUInfo) bool {
|
||||
if runtime.GOOS != "ios" {
|
||||
tryToFillCPUInfoFomSysctl(c)
|
||||
}
|
||||
// There are no hw.optional sysctl values for the below features on Mac OS 11.0
|
||||
// to detect their supported state dynamically. Assume the CPU features that
|
||||
// Apple Silicon M1 supports to be available as a minimal set of features
|
||||
// to all Go programs running on darwin/arm64.
|
||||
// TODO: Add more if we know them.
|
||||
c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func sysctlGetBool(name string) bool {
|
||||
value, err := unix.SysctlUint32(name)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
return value != 0
|
||||
}
|
||||
|
||||
func sysctlGetString(name string) string {
|
||||
value, err := unix.Sysctl(name)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
return value
|
||||
}
|
||||
|
||||
func sysctlGetInt(unknown int, names ...string) int {
|
||||
for _, name := range names {
|
||||
value, err := unix.SysctlUint32(name)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if value != 0 {
|
||||
return int(value)
|
||||
}
|
||||
}
|
||||
return unknown
|
||||
}
|
||||
|
||||
func sysctlGetInt64(unknown int, names ...string) int {
|
||||
for _, name := range names {
|
||||
value64, err := unix.SysctlUint64(name)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
if int(value64) != unknown {
|
||||
return int(value64)
|
||||
}
|
||||
}
|
||||
return unknown
|
||||
}
|
||||
|
||||
func setFeature(c *CPUInfo, name string, feature FeatureID) {
|
||||
c.featureSet.setIf(sysctlGetBool(name), feature)
|
||||
}
|
||||
func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
|
||||
c.BrandName = sysctlGetString("machdep.cpu.brand_string")
|
||||
|
||||
if len(c.BrandName) != 0 {
|
||||
c.VendorString = strings.Fields(c.BrandName)[0]
|
||||
}
|
||||
|
||||
c.PhysicalCores = sysctlGetInt(runtime.NumCPU(), "hw.physicalcpu")
|
||||
c.ThreadsPerCore = sysctlGetInt(1, "machdep.cpu.thread_count", "kern.num_threads") /
|
||||
sysctlGetInt(1, "hw.physicalcpu")
|
||||
c.LogicalCores = sysctlGetInt(runtime.NumCPU(), "machdep.cpu.core_count")
|
||||
c.Family = sysctlGetInt(0, "machdep.cpu.family", "hw.cpufamily")
|
||||
c.Model = sysctlGetInt(0, "machdep.cpu.model")
|
||||
c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize")
|
||||
c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize")
|
||||
c.Cache.L1D = sysctlGetInt64(-1, "hw.l1dcachesize")
|
||||
c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize")
|
||||
c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize")
|
||||
|
||||
// from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile
|
||||
setFeature(c, "hw.optional.arm.FEAT_AES", AESARM)
|
||||
setFeature(c, "hw.optional.AdvSIMD", ASIMD)
|
||||
setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP)
|
||||
setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM)
|
||||
setFeature(c, "hw.optional.FEAT_CRC32", CRC32)
|
||||
setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP)
|
||||
// setFeature(c, "", EVTSTRM)
|
||||
setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA)
|
||||
setFeature(c, "hw.optional.arm.FEAT_FP", FP)
|
||||
setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP)
|
||||
setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA)
|
||||
setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT)
|
||||
setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC)
|
||||
setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL)
|
||||
setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1)
|
||||
setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2)
|
||||
setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3)
|
||||
setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512)
|
||||
// setFeature(c, "", SM3)
|
||||
// setFeature(c, "", SM4)
|
||||
setFeature(c, "hw.optional.arm.FEAT_SVE", SVE)
|
||||
|
||||
// from empirical observation
|
||||
setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP)
|
||||
setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS)
|
||||
setFeature(c, "hw.optional.floatingpoint", FP)
|
||||
setFeature(c, "hw.optional.armv8_2_sha3", SHA3)
|
||||
setFeature(c, "hw.optional.armv8_2_sha512", SHA512)
|
||||
setFeature(c, "hw.optional.armv8_3_compnum", FCMA)
|
||||
setFeature(c, "hw.optional.armv8_crc32", CRC32)
|
||||
}
|
130
vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
generated
vendored
130
vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
generated
vendored
@ -1,130 +0,0 @@
|
||||
// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file located
|
||||
// here https://github.com/golang/sys/blob/master/LICENSE
|
||||
|
||||
package cpuid
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"io/ioutil"
|
||||
"runtime"
|
||||
)
|
||||
|
||||
// HWCAP bits.
|
||||
const (
|
||||
hwcap_FP = 1 << 0
|
||||
hwcap_ASIMD = 1 << 1
|
||||
hwcap_EVTSTRM = 1 << 2
|
||||
hwcap_AES = 1 << 3
|
||||
hwcap_PMULL = 1 << 4
|
||||
hwcap_SHA1 = 1 << 5
|
||||
hwcap_SHA2 = 1 << 6
|
||||
hwcap_CRC32 = 1 << 7
|
||||
hwcap_ATOMICS = 1 << 8
|
||||
hwcap_FPHP = 1 << 9
|
||||
hwcap_ASIMDHP = 1 << 10
|
||||
hwcap_CPUID = 1 << 11
|
||||
hwcap_ASIMDRDM = 1 << 12
|
||||
hwcap_JSCVT = 1 << 13
|
||||
hwcap_FCMA = 1 << 14
|
||||
hwcap_LRCPC = 1 << 15
|
||||
hwcap_DCPOP = 1 << 16
|
||||
hwcap_SHA3 = 1 << 17
|
||||
hwcap_SM3 = 1 << 18
|
||||
hwcap_SM4 = 1 << 19
|
||||
hwcap_ASIMDDP = 1 << 20
|
||||
hwcap_SHA512 = 1 << 21
|
||||
hwcap_SVE = 1 << 22
|
||||
hwcap_ASIMDFHM = 1 << 23
|
||||
)
|
||||
|
||||
func detectOS(c *CPUInfo) bool {
|
||||
// For now assuming no hyperthreading is reasonable.
|
||||
c.LogicalCores = runtime.NumCPU()
|
||||
c.PhysicalCores = c.LogicalCores
|
||||
c.ThreadsPerCore = 1
|
||||
if hwcap == 0 {
|
||||
// We did not get values from the runtime.
|
||||
// Try reading /proc/self/auxv
|
||||
|
||||
// From https://github.com/golang/sys
|
||||
const (
|
||||
_AT_HWCAP = 16
|
||||
_AT_HWCAP2 = 26
|
||||
|
||||
uintSize = int(32 << (^uint(0) >> 63))
|
||||
)
|
||||
|
||||
buf, err := ioutil.ReadFile("/proc/self/auxv")
|
||||
if err != nil {
|
||||
// e.g. on android /proc/self/auxv is not accessible, so silently
|
||||
// ignore the error and leave Initialized = false. On some
|
||||
// architectures (e.g. arm64) doinit() implements a fallback
|
||||
// readout and will set Initialized = true again.
|
||||
return false
|
||||
}
|
||||
bo := binary.LittleEndian
|
||||
for len(buf) >= 2*(uintSize/8) {
|
||||
var tag, val uint
|
||||
switch uintSize {
|
||||
case 32:
|
||||
tag = uint(bo.Uint32(buf[0:]))
|
||||
val = uint(bo.Uint32(buf[4:]))
|
||||
buf = buf[8:]
|
||||
case 64:
|
||||
tag = uint(bo.Uint64(buf[0:]))
|
||||
val = uint(bo.Uint64(buf[8:]))
|
||||
buf = buf[16:]
|
||||
}
|
||||
switch tag {
|
||||
case _AT_HWCAP:
|
||||
hwcap = val
|
||||
case _AT_HWCAP2:
|
||||
// Not used
|
||||
}
|
||||
}
|
||||
if hwcap == 0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// HWCap was populated by the runtime from the auxiliary vector.
|
||||
// Use HWCap information since reading aarch64 system registers
|
||||
// is not supported in user space on older linux kernels.
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_AES), AESARM)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_ASIMD), ASIMD)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDDP), ASIMDDP)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDHP), ASIMDHP)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDRDM), ASIMDRDM)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_CPUID), ARMCPUID)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_CRC32), CRC32)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_DCPOP), DCPOP)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_EVTSTRM), EVTSTRM)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_FCMA), FCMA)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_FP), FP)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_FPHP), FPHP)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_SHA512), SHA512)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4)
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE)
|
||||
|
||||
// The Samsung S9+ kernel reports support for atomics, but not all cores
|
||||
// actually support them, resulting in SIGILL. See issue #28431.
|
||||
// TODO(elias.naur): Only disable the optimization on bad chipsets on android.
|
||||
c.featureSet.setIf(isSet(hwcap, hwcap_ATOMICS) && runtime.GOOS != "android", ATOMICS)
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func isSet(hwc uint, value uint) bool {
|
||||
return hwc&value != 0
|
||||
}
|
16
vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
generated
vendored
16
vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
generated
vendored
@ -1,16 +0,0 @@
|
||||
// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
//go:build arm64 && !linux && !darwin
|
||||
// +build arm64,!linux,!darwin
|
||||
|
||||
package cpuid
|
||||
|
||||
import "runtime"
|
||||
|
||||
func detectOS(c *CPUInfo) bool {
|
||||
c.PhysicalCores = runtime.NumCPU()
|
||||
// For now assuming 1 thread per core...
|
||||
c.ThreadsPerCore = 1
|
||||
c.LogicalCores = c.PhysicalCores
|
||||
return false
|
||||
}
|
8
vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
generated
vendored
8
vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
generated
vendored
@ -1,8 +0,0 @@
|
||||
// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
//go:build nounsafe
|
||||
// +build nounsafe
|
||||
|
||||
package cpuid
|
||||
|
||||
var hwcap uint
|
11
vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
generated
vendored
11
vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
generated
vendored
@ -1,11 +0,0 @@
|
||||
// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
|
||||
|
||||
//go:build !nounsafe
|
||||
// +build !nounsafe
|
||||
|
||||
package cpuid
|
||||
|
||||
import _ "unsafe" // needed for go:linkname
|
||||
|
||||
//go:linkname hwcap internal/cpu.HWCap
|
||||
var hwcap uint
|
15
vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
generated
vendored
15
vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
generated
vendored
@ -1,15 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
go tool dist list | while IFS=/ read os arch; do
|
||||
echo "Checking $os/$arch..."
|
||||
echo " normal"
|
||||
GOARCH=$arch GOOS=$os go build -o /dev/null .
|
||||
echo " noasm"
|
||||
GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null .
|
||||
echo " appengine"
|
||||
GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null .
|
||||
echo " noasm,appengine"
|
||||
GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null .
|
||||
done
|
Reference in New Issue
Block a user