4
0
mirror of https://github.com/cwinfo/matterbridge.git synced 2025-07-13 07:56:28 +00:00

Update mattermost library (#2152)

* Update mattermost library

* Fix linting
This commit is contained in:
Wim
2024-05-24 23:08:09 +02:00
committed by GitHub
parent 65d78e38af
commit d16645c952
1003 changed files with 89451 additions and 114025 deletions

View File

@ -3,7 +3,6 @@
before:
hooks:
- ./gen.sh
- go install mvdan.cc/garble@v0.10.1
builds:
-
@ -32,7 +31,6 @@ builds:
- mips64le
goarm:
- 7
gobinary: garble
-
id: "s2d"
binary: s2d
@ -59,7 +57,6 @@ builds:
- mips64le
goarm:
- 7
gobinary: garble
-
id: "s2sx"
binary: s2sx
@ -87,7 +84,6 @@ builds:
- mips64le
goarm:
- 7
gobinary: garble
archives:
-

View File

@ -16,6 +16,46 @@ This package provides various compression algorithms.
# changelog
* Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6)
* zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923
* s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925
* Jan 26th, 2024 - [v1.17.5](https://github.com/klauspost/compress/releases/tag/v1.17.5)
* flate: Fix reset with dictionary on custom window encodes https://github.com/klauspost/compress/pull/912
* zstd: Add Frame header encoding and stripping https://github.com/klauspost/compress/pull/908
* zstd: Limit better/best default window to 8MB https://github.com/klauspost/compress/pull/913
* zstd: Speed improvements by @greatroar in https://github.com/klauspost/compress/pull/896 https://github.com/klauspost/compress/pull/910
* s2: Fix callbacks for skippable blocks and disallow 0xfe (Padding) by @Jille in https://github.com/klauspost/compress/pull/916 https://github.com/klauspost/compress/pull/917
https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/compress/pull/918
* Dec 1st, 2023 - [v1.17.4](https://github.com/klauspost/compress/releases/tag/v1.17.4)
* huff0: Speed up symbol counting by @greatroar in https://github.com/klauspost/compress/pull/887
* huff0: Remove byteReader by @greatroar in https://github.com/klauspost/compress/pull/886
* gzhttp: Allow overriding decompression on transport https://github.com/klauspost/compress/pull/892
* gzhttp: Clamp compression level https://github.com/klauspost/compress/pull/890
* gzip: Error out if reserved bits are set https://github.com/klauspost/compress/pull/891
* Nov 15th, 2023 - [v1.17.3](https://github.com/klauspost/compress/releases/tag/v1.17.3)
* fse: Fix max header size https://github.com/klauspost/compress/pull/881
* zstd: Improve better/best compression https://github.com/klauspost/compress/pull/877
* gzhttp: Fix missing content type on Close https://github.com/klauspost/compress/pull/883
* Oct 22nd, 2023 - [v1.17.2](https://github.com/klauspost/compress/releases/tag/v1.17.2)
* zstd: Fix rare *CORRUPTION* output in "best" mode. See https://github.com/klauspost/compress/pull/876
* Oct 14th, 2023 - [v1.17.1](https://github.com/klauspost/compress/releases/tag/v1.17.1)
* s2: Fix S2 "best" dictionary wrong encoding by @klauspost in https://github.com/klauspost/compress/pull/871
* flate: Reduce allocations in decompressor and minor code improvements by @fakefloordiv in https://github.com/klauspost/compress/pull/869
* s2: Fix EstimateBlockSize on 6&7 length input by @klauspost in https://github.com/klauspost/compress/pull/867
* Sept 19th, 2023 - [v1.17.0](https://github.com/klauspost/compress/releases/tag/v1.17.0)
* Add experimental dictionary builder https://github.com/klauspost/compress/pull/853
* Add xerial snappy read/writer https://github.com/klauspost/compress/pull/838
* flate: Add limited window compression https://github.com/klauspost/compress/pull/843
* s2: Do 2 overlapping match checks https://github.com/klauspost/compress/pull/839
* flate: Add amd64 assembly matchlen https://github.com/klauspost/compress/pull/837
* gzip: Copy bufio.Reader on Reset by @thatguystone in https://github.com/klauspost/compress/pull/860
* July 1st, 2023 - [v1.16.7](https://github.com/klauspost/compress/releases/tag/v1.16.7)
* zstd: Fix default level first dictionary encode https://github.com/klauspost/compress/pull/829
* s2: add GetBufferCapacity() method by @GiedriusS in https://github.com/klauspost/compress/pull/832
@ -538,7 +578,7 @@ For direct deflate use, NewStatelessWriter and StatelessDeflate are available. S
A `bufio.Writer` can of course be used to control write sizes. For example, to use a 4KB buffer:
```
```go
// replace 'ioutil.Discard' with your output.
gzw, err := gzip.NewWriterLevel(ioutil.Discard, gzip.StatelessCompression)
if err != nil {
@ -646,6 +686,7 @@ Here are other packages of good quality and pure Go (no cgo wrappers or autoconv
* [github.com/ronanh/intcomp](https://github.com/ronanh/intcomp) - Integer compression.
* [github.com/spenczar/fpc](https://github.com/spenczar/fpc) - Float compression.
* [github.com/minio/zipindex](https://github.com/minio/zipindex) - External ZIP directory index.
* [github.com/ybirader/pzip](https://github.com/ybirader/pzip) - Fast concurrent zip archiver and extractor.
# license

View File

@ -212,7 +212,7 @@ func (s *Scratch) writeCount() error {
previous0 bool
charnum uint16
maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3
maxHeaderSize = ((int(s.symbolLen)*int(tableLog) + 4 + 2) >> 3) + 3
// Write Table Size
bitStream = uint32(tableLog - minTablelog)

View File

@ -1,44 +0,0 @@
// Copyright 2018 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Based on work Copyright (c) 2013, Yann Collet, released under BSD License.
package huff0
// byteReader provides a byte reader that reads
// little endian values from a byte stream.
// The input stream is manually advanced.
// The reader performs no bounds checks.
type byteReader struct {
b []byte
off int
}
// init will initialize the reader and set the input.
func (b *byteReader) init(in []byte) {
b.b = in
b.off = 0
}
// Int32 returns a little endian int32 starting at current offset.
func (b byteReader) Int32() int32 {
v3 := int32(b.b[b.off+3])
v2 := int32(b.b[b.off+2])
v1 := int32(b.b[b.off+1])
v0 := int32(b.b[b.off])
return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
}
// Uint32 returns a little endian uint32 starting at current offset.
func (b byteReader) Uint32() uint32 {
v3 := uint32(b.b[b.off+3])
v2 := uint32(b.b[b.off+2])
v1 := uint32(b.b[b.off+1])
v0 := uint32(b.b[b.off])
return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0
}
// remain will return the number of bytes remaining.
func (b byteReader) remain() int {
return len(b.b) - b.off
}

View File

@ -350,6 +350,7 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
// Does not update s.clearCount.
func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
reuse = true
_ = s.count // Assert that s != nil to speed up the following loop.
for _, v := range in {
s.count[v]++
}
@ -415,7 +416,7 @@ func (s *Scratch) validateTable(c cTable) bool {
// minTableLog provides the minimum logSize to safely represent a distribution.
func (s *Scratch) minTableLog() uint8 {
minBitsSrc := highBit32(uint32(s.br.remain())) + 1
minBitsSrc := highBit32(uint32(s.srcLen)) + 1
minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2
if minBitsSrc < minBitsSymbols {
return uint8(minBitsSrc)
@ -427,7 +428,7 @@ func (s *Scratch) minTableLog() uint8 {
func (s *Scratch) optimalTableLog() {
tableLog := s.TableLog
minBits := s.minTableLog()
maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1
maxBitsSrc := uint8(highBit32(uint32(s.srcLen-1))) - 1
if maxBitsSrc < tableLog {
// Accuracy can be reduced
tableLog = maxBitsSrc

View File

@ -88,7 +88,7 @@ type Scratch struct {
// Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded.
MaxDecodedSize int
br byteReader
srcLen int
// MaxSymbolValue will override the maximum symbol value of the next block.
MaxSymbolValue uint8
@ -170,7 +170,7 @@ func (s *Scratch) prepare(in []byte) (*Scratch, error) {
if s.fse == nil {
s.fse = &fse.Scratch{}
}
s.br.init(in)
s.srcLen = len(in)
return s, nil
}

View File

@ -1,15 +0,0 @@
testdata/bench
# These explicitly listed benchmark data files are for an obsolete version of
# snappy_test.go.
testdata/alice29.txt
testdata/asyoulik.txt
testdata/fireworks.jpeg
testdata/geo.protodata
testdata/html
testdata/html_x_4
testdata/kppkn.gtb
testdata/lcet10.txt
testdata/paper-100k.pdf
testdata/plrabn12.txt
testdata/urls.10K

View File

@ -1,28 +0,0 @@
Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
Copyright (c) 2019 Klaus Post. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

File diff suppressed because it is too large Load Diff

View File

@ -1,437 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Copyright (c) 2019 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package s2
import (
"encoding/binary"
"errors"
"fmt"
"strconv"
)
var (
// ErrCorrupt reports that the input is invalid.
ErrCorrupt = errors.New("s2: corrupt input")
// ErrCRC reports that the input failed CRC validation (streams only)
ErrCRC = errors.New("s2: corrupt input, crc mismatch")
// ErrTooLarge reports that the uncompressed length is too large.
ErrTooLarge = errors.New("s2: decoded block is too large")
// ErrUnsupported reports that the input isn't supported.
ErrUnsupported = errors.New("s2: unsupported input")
)
// DecodedLen returns the length of the decoded block.
func DecodedLen(src []byte) (int, error) {
v, _, err := decodedLen(src)
return v, err
}
// decodedLen returns the length of the decoded block and the number of bytes
// that the length header occupied.
func decodedLen(src []byte) (blockLen, headerLen int, err error) {
v, n := binary.Uvarint(src)
if n <= 0 || v > 0xffffffff {
return 0, 0, ErrCorrupt
}
const wordSize = 32 << (^uint(0) >> 32 & 1)
if wordSize == 32 && v > 0x7fffffff {
return 0, 0, ErrTooLarge
}
return int(v), n, nil
}
const (
decodeErrCodeCorrupt = 1
)
// Decode returns the decoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire decoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
func Decode(dst, src []byte) ([]byte, error) {
dLen, s, err := decodedLen(src)
if err != nil {
return nil, err
}
if dLen <= cap(dst) {
dst = dst[:dLen]
} else {
dst = make([]byte, dLen)
}
if s2Decode(dst, src[s:]) != 0 {
return nil, ErrCorrupt
}
return dst, nil
}
// s2DecodeDict writes the decoding of src to dst. It assumes that the varint-encoded
// length of the decompressed bytes has already been read, and that len(dst)
// equals that length.
//
// It returns 0 on success or a decodeErrCodeXxx error code on failure.
func s2DecodeDict(dst, src []byte, dict *Dict) int {
if dict == nil {
return s2Decode(dst, src)
}
const debug = false
const debugErrs = debug
if debug {
fmt.Println("Starting decode, dst len:", len(dst))
}
var d, s, length int
offset := len(dict.dict) - dict.repeat
// As long as we can read at least 5 bytes...
for s < len(src)-5 {
// Removing bounds checks is SLOWER, when if doing
// in := src[s:s+5]
// Checked on Go 1.18
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
x = uint32(src[s-1])
case x == 61:
in := src[s : s+3]
x = uint32(in[1]) | uint32(in[2])<<8
s += 3
case x == 62:
in := src[s : s+4]
// Load as 32 bit and shift down.
x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
x >>= 8
s += 4
case x == 63:
in := src[s : s+5]
x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
s += 5
}
length = int(x) + 1
if debug {
fmt.Println("literals, length:", length, "d-after:", d+length)
}
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
if debugErrs {
fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s)
}
return decodeErrCodeCorrupt
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
length = int(src[s-2]) >> 2 & 0x7
if toffset == 0 {
if debug {
fmt.Print("(repeat) ")
}
// keep last offset
switch length {
case 5:
length = int(src[s]) + 4
s += 1
case 6:
in := src[s : s+2]
length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
s += 2
case 7:
in := src[s : s+3]
length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
s += 3
default: // 0-> 4
}
} else {
offset = toffset
}
length += 4
case tagCopy2:
in := src[s : s+3]
offset = int(uint32(in[1]) | uint32(in[2])<<8)
length = 1 + int(in[0])>>2
s += 3
case tagCopy4:
in := src[s : s+5]
offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
length = 1 + int(in[0])>>2
s += 5
}
if offset <= 0 || length > len(dst)-d {
if debugErrs {
fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d)
}
return decodeErrCodeCorrupt
}
// copy from dict
if d < offset {
if d > MaxDictSrcOffset {
if debugErrs {
fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length)
}
return decodeErrCodeCorrupt
}
startOff := len(dict.dict) - offset + d
if startOff < 0 || startOff+length > len(dict.dict) {
if debugErrs {
fmt.Printf("offset (%d) + length (%d) bigger than dict (%d)\n", offset, length, len(dict.dict))
}
return decodeErrCodeCorrupt
}
if debug {
fmt.Println("dict copy, length:", length, "offset:", offset, "d-after:", d+length, "dict start offset:", startOff)
}
copy(dst[d:d+length], dict.dict[startOff:])
d += length
continue
}
if debug {
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
}
// Copy from an earlier sub-slice of dst to a later sub-slice.
// If no overlap, use the built-in copy:
if offset > length {
copy(dst[d:d+length], dst[d-offset:])
d += length
continue
}
// Unlike the built-in copy function, this byte-by-byte copy always runs
// forwards, even if the slices overlap. Conceptually, this is:
//
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
//
// We align the slices into a and b and show the compiler they are the same size.
// This allows the loop to run without bounds checks.
a := dst[d : d+length]
b := dst[d-offset:]
b = b[:len(a)]
for i := range a {
a[i] = b[i]
}
d += length
}
// Remaining with extra checks...
for s < len(src) {
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
x = uint32(src[s-1])
case x == 61:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
x = uint32(src[s-2]) | uint32(src[s-1])<<8
case x == 62:
s += 4
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
case x == 63:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
}
length = int(x) + 1
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
if debugErrs {
fmt.Println("corrupt literal: length:", length, "d-left:", len(dst)-d, "src-left:", len(src)-s)
}
return decodeErrCodeCorrupt
}
if debug {
fmt.Println("literals, length:", length, "d-after:", d+length)
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = int(src[s-2]) >> 2 & 0x7
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
if toffset == 0 {
if debug {
fmt.Print("(repeat) ")
}
// keep last offset
switch length {
case 5:
s += 1
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = int(uint32(src[s-1])) + 4
case 6:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
case 7:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
default: // 0-> 4
}
} else {
offset = toffset
}
length += 4
case tagCopy2:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-3])>>2
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
case tagCopy4:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
if debugErrs {
fmt.Println("src went oob")
}
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-5])>>2
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
}
if offset <= 0 || length > len(dst)-d {
if debugErrs {
fmt.Println("match error; offset:", offset, "length:", length, "dst-left:", len(dst)-d)
}
return decodeErrCodeCorrupt
}
// copy from dict
if d < offset {
if d > MaxDictSrcOffset {
if debugErrs {
fmt.Println("dict after", MaxDictSrcOffset, "d:", d, "offset:", offset, "length:", length)
}
return decodeErrCodeCorrupt
}
rOff := len(dict.dict) - (offset - d)
if debug {
fmt.Println("starting dict entry from dict offset", len(dict.dict)-rOff)
}
if rOff+length > len(dict.dict) {
if debugErrs {
fmt.Println("err: END offset", rOff+length, "bigger than dict", len(dict.dict), "dict offset:", rOff, "length:", length)
}
return decodeErrCodeCorrupt
}
if rOff < 0 {
if debugErrs {
fmt.Println("err: START offset", rOff, "less than 0", len(dict.dict), "dict offset:", rOff, "length:", length)
}
return decodeErrCodeCorrupt
}
copy(dst[d:d+length], dict.dict[rOff:])
d += length
continue
}
if debug {
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
}
// Copy from an earlier sub-slice of dst to a later sub-slice.
// If no overlap, use the built-in copy:
if offset > length {
copy(dst[d:d+length], dst[d-offset:])
d += length
continue
}
// Unlike the built-in copy function, this byte-by-byte copy always runs
// forwards, even if the slices overlap. Conceptually, this is:
//
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
//
// We align the slices into a and b and show the compiler they are the same size.
// This allows the loop to run without bounds checks.
a := dst[d : d+length]
b := dst[d-offset:]
b = b[:len(a)]
for i := range a {
a[i] = b[i]
}
d += length
}
if d != len(dst) {
if debugErrs {
fmt.Println("wanted length", len(dst), "got", d)
}
return decodeErrCodeCorrupt
}
return 0
}

View File

@ -1,568 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Copyright (c) 2019 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
#define R_TMP0 AX
#define R_TMP1 BX
#define R_LEN CX
#define R_OFF DX
#define R_SRC SI
#define R_DST DI
#define R_DBASE R8
#define R_DLEN R9
#define R_DEND R10
#define R_SBASE R11
#define R_SLEN R12
#define R_SEND R13
#define R_TMP2 R14
#define R_TMP3 R15
// The asm code generally follows the pure Go code in decode_other.go, except
// where marked with a "!!!".
// func decode(dst, src []byte) int
//
// All local variables fit into registers. The non-zero stack size is only to
// spill registers and push args when issuing a CALL. The register allocation:
// - R_TMP0 scratch
// - R_TMP1 scratch
// - R_LEN length or x (shared)
// - R_OFF offset
// - R_SRC &src[s]
// - R_DST &dst[d]
// + R_DBASE dst_base
// + R_DLEN dst_len
// + R_DEND dst_base + dst_len
// + R_SBASE src_base
// + R_SLEN src_len
// + R_SEND src_base + src_len
// - R_TMP2 used by doCopy
// - R_TMP3 used by doCopy
//
// The registers R_DBASE-R_SEND (marked with a "+") are set at the start of the
// function, and after a CALL returns, and are not otherwise modified.
//
// The d variable is implicitly R_DST - R_DBASE, and len(dst)-d is R_DEND - R_DST.
// The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC.
TEXT ·s2Decode(SB), NOSPLIT, $48-56
// Initialize R_SRC, R_DST and R_DBASE-R_SEND.
MOVQ dst_base+0(FP), R_DBASE
MOVQ dst_len+8(FP), R_DLEN
MOVQ R_DBASE, R_DST
MOVQ R_DBASE, R_DEND
ADDQ R_DLEN, R_DEND
MOVQ src_base+24(FP), R_SBASE
MOVQ src_len+32(FP), R_SLEN
MOVQ R_SBASE, R_SRC
MOVQ R_SBASE, R_SEND
ADDQ R_SLEN, R_SEND
XORQ R_OFF, R_OFF
loop:
// for s < len(src)
CMPQ R_SRC, R_SEND
JEQ end
// R_LEN = uint32(src[s])
//
// switch src[s] & 0x03
MOVBLZX (R_SRC), R_LEN
MOVL R_LEN, R_TMP1
ANDL $3, R_TMP1
CMPL R_TMP1, $1
JAE tagCopy
// ----------------------------------------
// The code below handles literal tags.
// case tagLiteral:
// x := uint32(src[s] >> 2)
// switch
SHRL $2, R_LEN
CMPL R_LEN, $60
JAE tagLit60Plus
// case x < 60:
// s++
INCQ R_SRC
doLit:
// This is the end of the inner "switch", when we have a literal tag.
//
// We assume that R_LEN == x and x fits in a uint32, where x is the variable
// used in the pure Go decode_other.go code.
// length = int(x) + 1
//
// Unlike the pure Go code, we don't need to check if length <= 0 because
// R_LEN can hold 64 bits, so the increment cannot overflow.
INCQ R_LEN
// Prepare to check if copying length bytes will run past the end of dst or
// src.
//
// R_TMP0 = len(dst) - d
// R_TMP1 = len(src) - s
MOVQ R_DEND, R_TMP0
SUBQ R_DST, R_TMP0
MOVQ R_SEND, R_TMP1
SUBQ R_SRC, R_TMP1
// !!! Try a faster technique for short (16 or fewer bytes) copies.
//
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
// goto callMemmove // Fall back on calling runtime·memmove.
// }
//
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
// against 21 instead of 16, because it cannot assume that all of its input
// is contiguous in memory and so it needs to leave enough source bytes to
// read the next tag without refilling buffers, but Go's Decode assumes
// contiguousness (the src argument is a []byte).
CMPQ R_LEN, $16
JGT callMemmove
CMPQ R_TMP0, $16
JLT callMemmove
CMPQ R_TMP1, $16
JLT callMemmove
// !!! Implement the copy from src to dst as a 16-byte load and store.
// (Decode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only length bytes, but that's
// OK. If the input is a valid Snappy encoding then subsequent iterations
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
// non-nil error), so the overrun will be ignored.
//
// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
MOVOU 0(R_SRC), X0
MOVOU X0, 0(R_DST)
// d += length
// s += length
ADDQ R_LEN, R_DST
ADDQ R_LEN, R_SRC
JMP loop
callMemmove:
// if length > len(dst)-d || length > len(src)-s { etc }
CMPQ R_LEN, R_TMP0
JGT errCorrupt
CMPQ R_LEN, R_TMP1
JGT errCorrupt
// copy(dst[d:], src[s:s+length])
//
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
// R_DST, R_SRC and R_LEN as arguments. Coincidentally, we also need to spill those
// three registers to the stack, to save local variables across the CALL.
MOVQ R_DST, 0(SP)
MOVQ R_SRC, 8(SP)
MOVQ R_LEN, 16(SP)
MOVQ R_DST, 24(SP)
MOVQ R_SRC, 32(SP)
MOVQ R_LEN, 40(SP)
MOVQ R_OFF, 48(SP)
CALL runtime·memmove(SB)
// Restore local variables: unspill registers from the stack and
// re-calculate R_DBASE-R_SEND.
MOVQ 24(SP), R_DST
MOVQ 32(SP), R_SRC
MOVQ 40(SP), R_LEN
MOVQ 48(SP), R_OFF
MOVQ dst_base+0(FP), R_DBASE
MOVQ dst_len+8(FP), R_DLEN
MOVQ R_DBASE, R_DEND
ADDQ R_DLEN, R_DEND
MOVQ src_base+24(FP), R_SBASE
MOVQ src_len+32(FP), R_SLEN
MOVQ R_SBASE, R_SEND
ADDQ R_SLEN, R_SEND
// d += length
// s += length
ADDQ R_LEN, R_DST
ADDQ R_LEN, R_SRC
JMP loop
tagLit60Plus:
// !!! This fragment does the
//
// s += x - 58; if uint(s) > uint(len(src)) { etc }
//
// checks. In the asm version, we code it once instead of once per switch case.
ADDQ R_LEN, R_SRC
SUBQ $58, R_SRC
CMPQ R_SRC, R_SEND
JA errCorrupt
// case x == 60:
CMPL R_LEN, $61
JEQ tagLit61
JA tagLit62Plus
// x = uint32(src[s-1])
MOVBLZX -1(R_SRC), R_LEN
JMP doLit
tagLit61:
// case x == 61:
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
MOVWLZX -2(R_SRC), R_LEN
JMP doLit
tagLit62Plus:
CMPL R_LEN, $62
JA tagLit63
// case x == 62:
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
// We read one byte, safe to read one back, since we are just reading tag.
// x = binary.LittleEndian.Uint32(src[s-1:]) >> 8
MOVL -4(R_SRC), R_LEN
SHRL $8, R_LEN
JMP doLit
tagLit63:
// case x == 63:
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
MOVL -4(R_SRC), R_LEN
JMP doLit
// The code above handles literal tags.
// ----------------------------------------
// The code below handles copy tags.
tagCopy4:
// case tagCopy4:
// s += 5
ADDQ $5, R_SRC
// if uint(s) > uint(len(src)) { etc }
CMPQ R_SRC, R_SEND
JA errCorrupt
// length = 1 + int(src[s-5])>>2
SHRQ $2, R_LEN
INCQ R_LEN
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
MOVLQZX -4(R_SRC), R_OFF
JMP doCopy
tagCopy2:
// case tagCopy2:
// s += 3
ADDQ $3, R_SRC
// if uint(s) > uint(len(src)) { etc }
CMPQ R_SRC, R_SEND
JA errCorrupt
// length = 1 + int(src[s-3])>>2
SHRQ $2, R_LEN
INCQ R_LEN
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
MOVWQZX -2(R_SRC), R_OFF
JMP doCopy
tagCopy:
// We have a copy tag. We assume that:
// - R_TMP1 == src[s] & 0x03
// - R_LEN == src[s]
CMPQ R_TMP1, $2
JEQ tagCopy2
JA tagCopy4
// case tagCopy1:
// s += 2
ADDQ $2, R_SRC
// if uint(s) > uint(len(src)) { etc }
CMPQ R_SRC, R_SEND
JA errCorrupt
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
// length = 4 + int(src[s-2])>>2&0x7
MOVBQZX -1(R_SRC), R_TMP1
MOVQ R_LEN, R_TMP0
SHRQ $2, R_LEN
ANDQ $0xe0, R_TMP0
ANDQ $7, R_LEN
SHLQ $3, R_TMP0
ADDQ $4, R_LEN
ORQ R_TMP1, R_TMP0
// check if repeat code, ZF set by ORQ.
JZ repeatCode
// This is a regular copy, transfer our temporary value to R_OFF (length)
MOVQ R_TMP0, R_OFF
JMP doCopy
// This is a repeat code.
repeatCode:
// If length < 9, reuse last offset, with the length already calculated.
CMPQ R_LEN, $9
JL doCopyRepeat
// Read additional bytes for length.
JE repeatLen1
// Rare, so the extra branch shouldn't hurt too much.
CMPQ R_LEN, $10
JE repeatLen2
JMP repeatLen3
// Read repeat lengths.
repeatLen1:
// s ++
ADDQ $1, R_SRC
// if uint(s) > uint(len(src)) { etc }
CMPQ R_SRC, R_SEND
JA errCorrupt
// length = src[s-1] + 8
MOVBQZX -1(R_SRC), R_LEN
ADDL $8, R_LEN
JMP doCopyRepeat
repeatLen2:
// s +=2
ADDQ $2, R_SRC
// if uint(s) > uint(len(src)) { etc }
CMPQ R_SRC, R_SEND
JA errCorrupt
// length = uint32(src[s-2]) | (uint32(src[s-1])<<8) + (1 << 8)
MOVWQZX -2(R_SRC), R_LEN
ADDL $260, R_LEN
JMP doCopyRepeat
repeatLen3:
// s +=3
ADDQ $3, R_SRC
// if uint(s) > uint(len(src)) { etc }
CMPQ R_SRC, R_SEND
JA errCorrupt
// length = uint32(src[s-3]) | (uint32(src[s-2])<<8) | (uint32(src[s-1])<<16) + (1 << 16)
// Read one byte further back (just part of the tag, shifted out)
MOVL -4(R_SRC), R_LEN
SHRL $8, R_LEN
ADDL $65540, R_LEN
JMP doCopyRepeat
doCopy:
// This is the end of the outer "switch", when we have a copy tag.
//
// We assume that:
// - R_LEN == length && R_LEN > 0
// - R_OFF == offset
// if d < offset { etc }
MOVQ R_DST, R_TMP1
SUBQ R_DBASE, R_TMP1
CMPQ R_TMP1, R_OFF
JLT errCorrupt
// Repeat values can skip the test above, since any offset > 0 will be in dst.
doCopyRepeat:
// if offset <= 0 { etc }
CMPQ R_OFF, $0
JLE errCorrupt
// if length > len(dst)-d { etc }
MOVQ R_DEND, R_TMP1
SUBQ R_DST, R_TMP1
CMPQ R_LEN, R_TMP1
JGT errCorrupt
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
//
// Set:
// - R_TMP2 = len(dst)-d
// - R_TMP3 = &dst[d-offset]
MOVQ R_DEND, R_TMP2
SUBQ R_DST, R_TMP2
MOVQ R_DST, R_TMP3
SUBQ R_OFF, R_TMP3
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
//
// First, try using two 8-byte load/stores, similar to the doLit technique
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
// and not one 16-byte load/store, and the first store has to be before the
// second load, due to the overlap if offset is in the range [8, 16).
//
// if length > 16 || offset < 8 || len(dst)-d < 16 {
// goto slowForwardCopy
// }
// copy 16 bytes
// d += length
CMPQ R_LEN, $16
JGT slowForwardCopy
CMPQ R_OFF, $8
JLT slowForwardCopy
CMPQ R_TMP2, $16
JLT slowForwardCopy
MOVQ 0(R_TMP3), R_TMP0
MOVQ R_TMP0, 0(R_DST)
MOVQ 8(R_TMP3), R_TMP1
MOVQ R_TMP1, 8(R_DST)
ADDQ R_LEN, R_DST
JMP loop
slowForwardCopy:
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
// can still try 8-byte load stores, provided we can overrun up to 10 extra
// bytes. As above, the overrun will be fixed up by subsequent iterations
// of the outermost loop.
//
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
// commentary says:
//
// ----
//
// The main part of this loop is a simple copy of eight bytes at a time
// until we've copied (at least) the requested amount of bytes. However,
// if d and d-offset are less than eight bytes apart (indicating a
// repeating pattern of length < 8), we first need to expand the pattern in
// order to get the correct results. For instance, if the buffer looks like
// this, with the eight-byte <d-offset> and <d> patterns marked as
// intervals:
//
// abxxxxxxxxxxxx
// [------] d-offset
// [------] d
//
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
// once, after which we can move <d> two bytes without moving <d-offset>:
//
// ababxxxxxxxxxx
// [------] d-offset
// [------] d
//
// and repeat the exercise until the two no longer overlap.
//
// This allows us to do very well in the special case of one single byte
// repeated many times, without taking a big hit for more general cases.
//
// The worst case of extra writing past the end of the match occurs when
// offset == 1 and length == 1; the last copy will read from byte positions
// [0..7] and write to [4..11], whereas it was only supposed to write to
// position 1. Thus, ten excess bytes.
//
// ----
//
// That "10 byte overrun" worst case is confirmed by Go's
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
// and finishSlowForwardCopy algorithm.
//
// if length > len(dst)-d-10 {
// goto verySlowForwardCopy
// }
SUBQ $10, R_TMP2
CMPQ R_LEN, R_TMP2
JGT verySlowForwardCopy
// We want to keep the offset, so we use R_TMP2 from here.
MOVQ R_OFF, R_TMP2
makeOffsetAtLeast8:
// !!! As above, expand the pattern so that offset >= 8 and we can use
// 8-byte load/stores.
//
// for offset < 8 {
// copy 8 bytes from dst[d-offset:] to dst[d:]
// length -= offset
// d += offset
// offset += offset
// // The two previous lines together means that d-offset, and therefore
// // R_TMP3, is unchanged.
// }
CMPQ R_TMP2, $8
JGE fixUpSlowForwardCopy
MOVQ (R_TMP3), R_TMP1
MOVQ R_TMP1, (R_DST)
SUBQ R_TMP2, R_LEN
ADDQ R_TMP2, R_DST
ADDQ R_TMP2, R_TMP2
JMP makeOffsetAtLeast8
fixUpSlowForwardCopy:
// !!! Add length (which might be negative now) to d (implied by R_DST being
// &dst[d]) so that d ends up at the right place when we jump back to the
// top of the loop. Before we do that, though, we save R_DST to R_TMP0 so that, if
// length is positive, copying the remaining length bytes will write to the
// right place.
MOVQ R_DST, R_TMP0
ADDQ R_LEN, R_DST
finishSlowForwardCopy:
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
// length means that we overrun, but as above, that will be fixed up by
// subsequent iterations of the outermost loop.
CMPQ R_LEN, $0
JLE loop
MOVQ (R_TMP3), R_TMP1
MOVQ R_TMP1, (R_TMP0)
ADDQ $8, R_TMP3
ADDQ $8, R_TMP0
SUBQ $8, R_LEN
JMP finishSlowForwardCopy
verySlowForwardCopy:
// verySlowForwardCopy is a simple implementation of forward copy. In C
// parlance, this is a do/while loop instead of a while loop, since we know
// that length > 0. In Go syntax:
//
// for {
// dst[d] = dst[d - offset]
// d++
// length--
// if length == 0 {
// break
// }
// }
MOVB (R_TMP3), R_TMP1
MOVB R_TMP1, (R_DST)
INCQ R_TMP3
INCQ R_DST
DECQ R_LEN
JNZ verySlowForwardCopy
JMP loop
// The code above handles copy tags.
// ----------------------------------------
end:
// This is the end of the "for s < len(src)".
//
// if d != len(dst) { etc }
CMPQ R_DST, R_DEND
JNE errCorrupt
// return 0
MOVQ $0, ret+48(FP)
RET
errCorrupt:
// return decodeErrCodeCorrupt
MOVQ $1, ret+48(FP)
RET

View File

@ -1,574 +0,0 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
#define R_TMP0 R2
#define R_TMP1 R3
#define R_LEN R4
#define R_OFF R5
#define R_SRC R6
#define R_DST R7
#define R_DBASE R8
#define R_DLEN R9
#define R_DEND R10
#define R_SBASE R11
#define R_SLEN R12
#define R_SEND R13
#define R_TMP2 R14
#define R_TMP3 R15
// TEST_SRC will check if R_SRC is <= SRC_END
#define TEST_SRC() \
CMP R_SEND, R_SRC \
BGT errCorrupt
// MOVD R_SRC, R_TMP1
// SUB R_SBASE, R_TMP1, R_TMP1
// CMP R_SLEN, R_TMP1
// BGT errCorrupt
// The asm code generally follows the pure Go code in decode_other.go, except
// where marked with a "!!!".
// func decode(dst, src []byte) int
//
// All local variables fit into registers. The non-zero stack size is only to
// spill registers and push args when issuing a CALL. The register allocation:
// - R_TMP0 scratch
// - R_TMP1 scratch
// - R_LEN length or x
// - R_OFF offset
// - R_SRC &src[s]
// - R_DST &dst[d]
// + R_DBASE dst_base
// + R_DLEN dst_len
// + R_DEND dst_base + dst_len
// + R_SBASE src_base
// + R_SLEN src_len
// + R_SEND src_base + src_len
// - R_TMP2 used by doCopy
// - R_TMP3 used by doCopy
//
// The registers R_DBASE-R_SEND (marked with a "+") are set at the start of the
// function, and after a CALL returns, and are not otherwise modified.
//
// The d variable is implicitly R_DST - R_DBASE, and len(dst)-d is R_DEND - R_DST.
// The s variable is implicitly R_SRC - R_SBASE, and len(src)-s is R_SEND - R_SRC.
TEXT ·s2Decode(SB), NOSPLIT, $56-64
// Initialize R_SRC, R_DST and R_DBASE-R_SEND.
MOVD dst_base+0(FP), R_DBASE
MOVD dst_len+8(FP), R_DLEN
MOVD R_DBASE, R_DST
MOVD R_DBASE, R_DEND
ADD R_DLEN, R_DEND, R_DEND
MOVD src_base+24(FP), R_SBASE
MOVD src_len+32(FP), R_SLEN
MOVD R_SBASE, R_SRC
MOVD R_SBASE, R_SEND
ADD R_SLEN, R_SEND, R_SEND
MOVD $0, R_OFF
loop:
// for s < len(src)
CMP R_SEND, R_SRC
BEQ end
// R_LEN = uint32(src[s])
//
// switch src[s] & 0x03
MOVBU (R_SRC), R_LEN
MOVW R_LEN, R_TMP1
ANDW $3, R_TMP1
MOVW $1, R1
CMPW R1, R_TMP1
BGE tagCopy
// ----------------------------------------
// The code below handles literal tags.
// case tagLiteral:
// x := uint32(src[s] >> 2)
// switch
MOVW $60, R1
LSRW $2, R_LEN, R_LEN
CMPW R_LEN, R1
BLS tagLit60Plus
// case x < 60:
// s++
ADD $1, R_SRC, R_SRC
doLit:
// This is the end of the inner "switch", when we have a literal tag.
//
// We assume that R_LEN == x and x fits in a uint32, where x is the variable
// used in the pure Go decode_other.go code.
// length = int(x) + 1
//
// Unlike the pure Go code, we don't need to check if length <= 0 because
// R_LEN can hold 64 bits, so the increment cannot overflow.
ADD $1, R_LEN, R_LEN
// Prepare to check if copying length bytes will run past the end of dst or
// src.
//
// R_TMP0 = len(dst) - d
// R_TMP1 = len(src) - s
MOVD R_DEND, R_TMP0
SUB R_DST, R_TMP0, R_TMP0
MOVD R_SEND, R_TMP1
SUB R_SRC, R_TMP1, R_TMP1
// !!! Try a faster technique for short (16 or fewer bytes) copies.
//
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
// goto callMemmove // Fall back on calling runtime·memmove.
// }
//
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
// against 21 instead of 16, because it cannot assume that all of its input
// is contiguous in memory and so it needs to leave enough source bytes to
// read the next tag without refilling buffers, but Go's Decode assumes
// contiguousness (the src argument is a []byte).
CMP $16, R_LEN
BGT callMemmove
CMP $16, R_TMP0
BLT callMemmove
CMP $16, R_TMP1
BLT callMemmove
// !!! Implement the copy from src to dst as a 16-byte load and store.
// (Decode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only length bytes, but that's
// OK. If the input is a valid Snappy encoding then subsequent iterations
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
// non-nil error), so the overrun will be ignored.
//
// Note that on arm64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
LDP 0(R_SRC), (R_TMP2, R_TMP3)
STP (R_TMP2, R_TMP3), 0(R_DST)
// d += length
// s += length
ADD R_LEN, R_DST, R_DST
ADD R_LEN, R_SRC, R_SRC
B loop
callMemmove:
// if length > len(dst)-d || length > len(src)-s { etc }
CMP R_TMP0, R_LEN
BGT errCorrupt
CMP R_TMP1, R_LEN
BGT errCorrupt
// copy(dst[d:], src[s:s+length])
//
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
// R_DST, R_SRC and R_LEN as arguments. Coincidentally, we also need to spill those
// three registers to the stack, to save local variables across the CALL.
MOVD R_DST, 8(RSP)
MOVD R_SRC, 16(RSP)
MOVD R_LEN, 24(RSP)
MOVD R_DST, 32(RSP)
MOVD R_SRC, 40(RSP)
MOVD R_LEN, 48(RSP)
MOVD R_OFF, 56(RSP)
CALL runtime·memmove(SB)
// Restore local variables: unspill registers from the stack and
// re-calculate R_DBASE-R_SEND.
MOVD 32(RSP), R_DST
MOVD 40(RSP), R_SRC
MOVD 48(RSP), R_LEN
MOVD 56(RSP), R_OFF
MOVD dst_base+0(FP), R_DBASE
MOVD dst_len+8(FP), R_DLEN
MOVD R_DBASE, R_DEND
ADD R_DLEN, R_DEND, R_DEND
MOVD src_base+24(FP), R_SBASE
MOVD src_len+32(FP), R_SLEN
MOVD R_SBASE, R_SEND
ADD R_SLEN, R_SEND, R_SEND
// d += length
// s += length
ADD R_LEN, R_DST, R_DST
ADD R_LEN, R_SRC, R_SRC
B loop
tagLit60Plus:
// !!! This fragment does the
//
// s += x - 58; if uint(s) > uint(len(src)) { etc }
//
// checks. In the asm version, we code it once instead of once per switch case.
ADD R_LEN, R_SRC, R_SRC
SUB $58, R_SRC, R_SRC
TEST_SRC()
// case x == 60:
MOVW $61, R1
CMPW R1, R_LEN
BEQ tagLit61
BGT tagLit62Plus
// x = uint32(src[s-1])
MOVBU -1(R_SRC), R_LEN
B doLit
tagLit61:
// case x == 61:
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
MOVHU -2(R_SRC), R_LEN
B doLit
tagLit62Plus:
CMPW $62, R_LEN
BHI tagLit63
// case x == 62:
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
MOVHU -3(R_SRC), R_LEN
MOVBU -1(R_SRC), R_TMP1
ORR R_TMP1<<16, R_LEN
B doLit
tagLit63:
// case x == 63:
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
MOVWU -4(R_SRC), R_LEN
B doLit
// The code above handles literal tags.
// ----------------------------------------
// The code below handles copy tags.
tagCopy4:
// case tagCopy4:
// s += 5
ADD $5, R_SRC, R_SRC
// if uint(s) > uint(len(src)) { etc }
MOVD R_SRC, R_TMP1
SUB R_SBASE, R_TMP1, R_TMP1
CMP R_SLEN, R_TMP1
BGT errCorrupt
// length = 1 + int(src[s-5])>>2
MOVD $1, R1
ADD R_LEN>>2, R1, R_LEN
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
MOVWU -4(R_SRC), R_OFF
B doCopy
tagCopy2:
// case tagCopy2:
// s += 3
ADD $3, R_SRC, R_SRC
// if uint(s) > uint(len(src)) { etc }
TEST_SRC()
// length = 1 + int(src[s-3])>>2
MOVD $1, R1
ADD R_LEN>>2, R1, R_LEN
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
MOVHU -2(R_SRC), R_OFF
B doCopy
tagCopy:
// We have a copy tag. We assume that:
// - R_TMP1 == src[s] & 0x03
// - R_LEN == src[s]
CMP $2, R_TMP1
BEQ tagCopy2
BGT tagCopy4
// case tagCopy1:
// s += 2
ADD $2, R_SRC, R_SRC
// if uint(s) > uint(len(src)) { etc }
TEST_SRC()
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
// Calculate offset in R_TMP0 in case it is a repeat.
MOVD R_LEN, R_TMP0
AND $0xe0, R_TMP0
MOVBU -1(R_SRC), R_TMP1
ORR R_TMP0<<3, R_TMP1, R_TMP0
// length = 4 + int(src[s-2])>>2&0x7
MOVD $7, R1
AND R_LEN>>2, R1, R_LEN
ADD $4, R_LEN, R_LEN
// check if repeat code with offset 0.
CMP $0, R_TMP0
BEQ repeatCode
// This is a regular copy, transfer our temporary value to R_OFF (offset)
MOVD R_TMP0, R_OFF
B doCopy
// This is a repeat code.
repeatCode:
// If length < 9, reuse last offset, with the length already calculated.
CMP $9, R_LEN
BLT doCopyRepeat
BEQ repeatLen1
CMP $10, R_LEN
BEQ repeatLen2
repeatLen3:
// s +=3
ADD $3, R_SRC, R_SRC
// if uint(s) > uint(len(src)) { etc }
TEST_SRC()
// length = uint32(src[s-3]) | (uint32(src[s-2])<<8) | (uint32(src[s-1])<<16) + 65540
MOVBU -1(R_SRC), R_TMP0
MOVHU -3(R_SRC), R_LEN
ORR R_TMP0<<16, R_LEN, R_LEN
ADD $65540, R_LEN, R_LEN
B doCopyRepeat
repeatLen2:
// s +=2
ADD $2, R_SRC, R_SRC
// if uint(s) > uint(len(src)) { etc }
TEST_SRC()
// length = uint32(src[s-2]) | (uint32(src[s-1])<<8) + 260
MOVHU -2(R_SRC), R_LEN
ADD $260, R_LEN, R_LEN
B doCopyRepeat
repeatLen1:
// s +=1
ADD $1, R_SRC, R_SRC
// if uint(s) > uint(len(src)) { etc }
TEST_SRC()
// length = src[s-1] + 8
MOVBU -1(R_SRC), R_LEN
ADD $8, R_LEN, R_LEN
B doCopyRepeat
doCopy:
// This is the end of the outer "switch", when we have a copy tag.
//
// We assume that:
// - R_LEN == length && R_LEN > 0
// - R_OFF == offset
// if d < offset { etc }
MOVD R_DST, R_TMP1
SUB R_DBASE, R_TMP1, R_TMP1
CMP R_OFF, R_TMP1
BLT errCorrupt
// Repeat values can skip the test above, since any offset > 0 will be in dst.
doCopyRepeat:
// if offset <= 0 { etc }
CMP $0, R_OFF
BLE errCorrupt
// if length > len(dst)-d { etc }
MOVD R_DEND, R_TMP1
SUB R_DST, R_TMP1, R_TMP1
CMP R_TMP1, R_LEN
BGT errCorrupt
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
//
// Set:
// - R_TMP2 = len(dst)-d
// - R_TMP3 = &dst[d-offset]
MOVD R_DEND, R_TMP2
SUB R_DST, R_TMP2, R_TMP2
MOVD R_DST, R_TMP3
SUB R_OFF, R_TMP3, R_TMP3
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
//
// First, try using two 8-byte load/stores, similar to the doLit technique
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
// and not one 16-byte load/store, and the first store has to be before the
// second load, due to the overlap if offset is in the range [8, 16).
//
// if length > 16 || offset < 8 || len(dst)-d < 16 {
// goto slowForwardCopy
// }
// copy 16 bytes
// d += length
CMP $16, R_LEN
BGT slowForwardCopy
CMP $8, R_OFF
BLT slowForwardCopy
CMP $16, R_TMP2
BLT slowForwardCopy
MOVD 0(R_TMP3), R_TMP0
MOVD R_TMP0, 0(R_DST)
MOVD 8(R_TMP3), R_TMP1
MOVD R_TMP1, 8(R_DST)
ADD R_LEN, R_DST, R_DST
B loop
slowForwardCopy:
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
// can still try 8-byte load stores, provided we can overrun up to 10 extra
// bytes. As above, the overrun will be fixed up by subsequent iterations
// of the outermost loop.
//
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
// commentary says:
//
// ----
//
// The main part of this loop is a simple copy of eight bytes at a time
// until we've copied (at least) the requested amount of bytes. However,
// if d and d-offset are less than eight bytes apart (indicating a
// repeating pattern of length < 8), we first need to expand the pattern in
// order to get the correct results. For instance, if the buffer looks like
// this, with the eight-byte <d-offset> and <d> patterns marked as
// intervals:
//
// abxxxxxxxxxxxx
// [------] d-offset
// [------] d
//
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
// once, after which we can move <d> two bytes without moving <d-offset>:
//
// ababxxxxxxxxxx
// [------] d-offset
// [------] d
//
// and repeat the exercise until the two no longer overlap.
//
// This allows us to do very well in the special case of one single byte
// repeated many times, without taking a big hit for more general cases.
//
// The worst case of extra writing past the end of the match occurs when
// offset == 1 and length == 1; the last copy will read from byte positions
// [0..7] and write to [4..11], whereas it was only supposed to write to
// position 1. Thus, ten excess bytes.
//
// ----
//
// That "10 byte overrun" worst case is confirmed by Go's
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
// and finishSlowForwardCopy algorithm.
//
// if length > len(dst)-d-10 {
// goto verySlowForwardCopy
// }
SUB $10, R_TMP2, R_TMP2
CMP R_TMP2, R_LEN
BGT verySlowForwardCopy
// We want to keep the offset, so we use R_TMP2 from here.
MOVD R_OFF, R_TMP2
makeOffsetAtLeast8:
// !!! As above, expand the pattern so that offset >= 8 and we can use
// 8-byte load/stores.
//
// for offset < 8 {
// copy 8 bytes from dst[d-offset:] to dst[d:]
// length -= offset
// d += offset
// offset += offset
// // The two previous lines together means that d-offset, and therefore
// // R_TMP3, is unchanged.
// }
CMP $8, R_TMP2
BGE fixUpSlowForwardCopy
MOVD (R_TMP3), R_TMP1
MOVD R_TMP1, (R_DST)
SUB R_TMP2, R_LEN, R_LEN
ADD R_TMP2, R_DST, R_DST
ADD R_TMP2, R_TMP2, R_TMP2
B makeOffsetAtLeast8
fixUpSlowForwardCopy:
// !!! Add length (which might be negative now) to d (implied by R_DST being
// &dst[d]) so that d ends up at the right place when we jump back to the
// top of the loop. Before we do that, though, we save R_DST to R_TMP0 so that, if
// length is positive, copying the remaining length bytes will write to the
// right place.
MOVD R_DST, R_TMP0
ADD R_LEN, R_DST, R_DST
finishSlowForwardCopy:
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
// length means that we overrun, but as above, that will be fixed up by
// subsequent iterations of the outermost loop.
MOVD $0, R1
CMP R1, R_LEN
BLE loop
MOVD (R_TMP3), R_TMP1
MOVD R_TMP1, (R_TMP0)
ADD $8, R_TMP3, R_TMP3
ADD $8, R_TMP0, R_TMP0
SUB $8, R_LEN, R_LEN
B finishSlowForwardCopy
verySlowForwardCopy:
// verySlowForwardCopy is a simple implementation of forward copy. In C
// parlance, this is a do/while loop instead of a while loop, since we know
// that length > 0. In Go syntax:
//
// for {
// dst[d] = dst[d - offset]
// d++
// length--
// if length == 0 {
// break
// }
// }
MOVB (R_TMP3), R_TMP1
MOVB R_TMP1, (R_DST)
ADD $1, R_TMP3, R_TMP3
ADD $1, R_DST, R_DST
SUB $1, R_LEN, R_LEN
CBNZ R_LEN, verySlowForwardCopy
B loop
// The code above handles copy tags.
// ----------------------------------------
end:
// This is the end of the "for s < len(src)".
//
// if d != len(dst) { etc }
CMP R_DEND, R_DST
BNE errCorrupt
// return 0
MOVD $0, ret+48(FP)
RET
errCorrupt:
// return decodeErrCodeCorrupt
MOVD $1, R_TMP0
MOVD R_TMP0, ret+48(FP)
RET

View File

@ -1,17 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Copyright (c) 2019 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build (amd64 || arm64) && !appengine && gc && !noasm
// +build amd64 arm64
// +build !appengine
// +build gc
// +build !noasm
package s2
// decode has the same semantics as in decode_other.go.
//
//go:noescape
func s2Decode(dst, src []byte) int

View File

@ -1,292 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Copyright (c) 2019 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build (!amd64 && !arm64) || appengine || !gc || noasm
// +build !amd64,!arm64 appengine !gc noasm
package s2
import (
"fmt"
"strconv"
)
// decode writes the decoding of src to dst. It assumes that the varint-encoded
// length of the decompressed bytes has already been read, and that len(dst)
// equals that length.
//
// It returns 0 on success or a decodeErrCodeXxx error code on failure.
func s2Decode(dst, src []byte) int {
const debug = false
if debug {
fmt.Println("Starting decode, dst len:", len(dst))
}
var d, s, length int
offset := 0
// As long as we can read at least 5 bytes...
for s < len(src)-5 {
// Removing bounds checks is SLOWER, when if doing
// in := src[s:s+5]
// Checked on Go 1.18
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
x = uint32(src[s-1])
case x == 61:
in := src[s : s+3]
x = uint32(in[1]) | uint32(in[2])<<8
s += 3
case x == 62:
in := src[s : s+4]
// Load as 32 bit and shift down.
x = uint32(in[0]) | uint32(in[1])<<8 | uint32(in[2])<<16 | uint32(in[3])<<24
x >>= 8
s += 4
case x == 63:
in := src[s : s+5]
x = uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24
s += 5
}
length = int(x) + 1
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
if debug {
fmt.Println("corrupt: lit size", length)
}
return decodeErrCodeCorrupt
}
if debug {
fmt.Println("literals, length:", length, "d-after:", d+length)
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
length = int(src[s-2]) >> 2 & 0x7
if toffset == 0 {
if debug {
fmt.Print("(repeat) ")
}
// keep last offset
switch length {
case 5:
length = int(src[s]) + 4
s += 1
case 6:
in := src[s : s+2]
length = int(uint32(in[0])|(uint32(in[1])<<8)) + (1 << 8)
s += 2
case 7:
in := src[s : s+3]
length = int((uint32(in[2])<<16)|(uint32(in[1])<<8)|uint32(in[0])) + (1 << 16)
s += 3
default: // 0-> 4
}
} else {
offset = toffset
}
length += 4
case tagCopy2:
in := src[s : s+3]
offset = int(uint32(in[1]) | uint32(in[2])<<8)
length = 1 + int(in[0])>>2
s += 3
case tagCopy4:
in := src[s : s+5]
offset = int(uint32(in[1]) | uint32(in[2])<<8 | uint32(in[3])<<16 | uint32(in[4])<<24)
length = 1 + int(in[0])>>2
s += 5
}
if offset <= 0 || d < offset || length > len(dst)-d {
if debug {
fmt.Println("corrupt: match, length", length, "offset:", offset, "dst avail:", len(dst)-d, "dst pos:", d)
}
return decodeErrCodeCorrupt
}
if debug {
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
}
// Copy from an earlier sub-slice of dst to a later sub-slice.
// If no overlap, use the built-in copy:
if offset > length {
copy(dst[d:d+length], dst[d-offset:])
d += length
continue
}
// Unlike the built-in copy function, this byte-by-byte copy always runs
// forwards, even if the slices overlap. Conceptually, this is:
//
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
//
// We align the slices into a and b and show the compiler they are the same size.
// This allows the loop to run without bounds checks.
a := dst[d : d+length]
b := dst[d-offset:]
b = b[:len(a)]
for i := range a {
a[i] = b[i]
}
d += length
}
// Remaining with extra checks...
for s < len(src) {
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
x = uint32(src[s-1])
case x == 61:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
x = uint32(src[s-2]) | uint32(src[s-1])<<8
case x == 62:
s += 4
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
case x == 63:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
}
length = int(x) + 1
if length > len(dst)-d || length > len(src)-s || (strconv.IntSize == 32 && length <= 0) {
if debug {
fmt.Println("corrupt: lit size", length)
}
return decodeErrCodeCorrupt
}
if debug {
fmt.Println("literals, length:", length, "d-after:", d+length)
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = int(src[s-2]) >> 2 & 0x7
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
if toffset == 0 {
if debug {
fmt.Print("(repeat) ")
}
// keep last offset
switch length {
case 5:
s += 1
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = int(uint32(src[s-1])) + 4
case 6:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
case 7:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
default: // 0-> 4
}
} else {
offset = toffset
}
length += 4
case tagCopy2:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-3])>>2
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
case tagCopy4:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-5])>>2
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
}
if offset <= 0 || d < offset || length > len(dst)-d {
if debug {
fmt.Println("corrupt: match, length", length, "offset:", offset, "dst avail:", len(dst)-d, "dst pos:", d)
}
return decodeErrCodeCorrupt
}
if debug {
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
}
// Copy from an earlier sub-slice of dst to a later sub-slice.
// If no overlap, use the built-in copy:
if offset > length {
copy(dst[d:d+length], dst[d-offset:])
d += length
continue
}
// Unlike the built-in copy function, this byte-by-byte copy always runs
// forwards, even if the slices overlap. Conceptually, this is:
//
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
//
// We align the slices into a and b and show the compiler they are the same size.
// This allows the loop to run without bounds checks.
a := dst[d : d+length]
b := dst[d-offset:]
b = b[:len(a)]
for i := range a {
a[i] = b[i]
}
d += length
}
if d != len(dst) {
return decodeErrCodeCorrupt
}
return 0
}

View File

@ -1,350 +0,0 @@
// Copyright (c) 2022+ Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package s2
import (
"bytes"
"encoding/binary"
"sync"
)
const (
// MinDictSize is the minimum dictionary size when repeat has been read.
MinDictSize = 16
// MaxDictSize is the maximum dictionary size when repeat has been read.
MaxDictSize = 65536
// MaxDictSrcOffset is the maximum offset where a dictionary entry can start.
MaxDictSrcOffset = 65535
)
// Dict contains a dictionary that can be used for encoding and decoding s2
type Dict struct {
dict []byte
repeat int // Repeat as index of dict
fast, better, best sync.Once
fastTable *[1 << 14]uint16
betterTableShort *[1 << 14]uint16
betterTableLong *[1 << 17]uint16
bestTableShort *[1 << 16]uint32
bestTableLong *[1 << 19]uint32
}
// NewDict will read a dictionary.
// It will return nil if the dictionary is invalid.
func NewDict(dict []byte) *Dict {
if len(dict) == 0 {
return nil
}
var d Dict
// Repeat is the first value of the dict
r, n := binary.Uvarint(dict)
if n <= 0 {
return nil
}
dict = dict[n:]
d.dict = dict
if cap(d.dict) < len(d.dict)+16 {
d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
}
if len(dict) < MinDictSize || len(dict) > MaxDictSize {
return nil
}
d.repeat = int(r)
if d.repeat > len(dict) {
return nil
}
return &d
}
// Bytes will return a serialized version of the dictionary.
// The output can be sent to NewDict.
func (d *Dict) Bytes() []byte {
dst := make([]byte, binary.MaxVarintLen16+len(d.dict))
return append(dst[:binary.PutUvarint(dst, uint64(d.repeat))], d.dict...)
}
// MakeDict will create a dictionary.
// 'data' must be at least MinDictSize.
// If data is longer than MaxDictSize only the last MaxDictSize bytes will be used.
// If searchStart is set the start repeat value will be set to the last
// match of this content.
// If no matches are found, it will attempt to find shorter matches.
// This content should match the typical start of a block.
// If at least 4 bytes cannot be matched, repeat is set to start of block.
func MakeDict(data []byte, searchStart []byte) *Dict {
if len(data) == 0 {
return nil
}
if len(data) > MaxDictSize {
data = data[len(data)-MaxDictSize:]
}
var d Dict
dict := data
d.dict = dict
if cap(d.dict) < len(d.dict)+16 {
d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
}
if len(dict) < MinDictSize {
return nil
}
// Find the longest match possible, last entry if multiple.
for s := len(searchStart); s > 4; s-- {
if idx := bytes.LastIndex(data, searchStart[:s]); idx >= 0 && idx <= len(data)-8 {
d.repeat = idx
break
}
}
return &d
}
// MakeDictManual will create a dictionary.
// 'data' must be at least MinDictSize and less than or equal to MaxDictSize.
// A manual first repeat index into data must be provided.
// It must be less than len(data)-8.
func MakeDictManual(data []byte, firstIdx uint16) *Dict {
if len(data) < MinDictSize || int(firstIdx) >= len(data)-8 || len(data) > MaxDictSize {
return nil
}
var d Dict
dict := data
d.dict = dict
if cap(d.dict) < len(d.dict)+16 {
d.dict = append(make([]byte, 0, len(d.dict)+16), d.dict...)
}
d.repeat = int(firstIdx)
return &d
}
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// The blocks will require the same amount of memory to decode as encoding,
// and does not make for concurrent decoding.
// Also note that blocks do not contain CRC information, so corruption may be undetected.
//
// If you need to encode larger amounts of data, consider using
// the streaming interface which gives all of these features.
func (d *Dict) Encode(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if cap(dst) < n {
dst = make([]byte, n)
} else {
dst = dst[:n]
}
// The block starts with the varint-encoded length of the decompressed bytes.
dstP := binary.PutUvarint(dst, uint64(len(src)))
if len(src) == 0 {
return dst[:dstP]
}
if len(src) < minNonLiteralBlockSize {
dstP += emitLiteral(dst[dstP:], src)
return dst[:dstP]
}
n := encodeBlockDictGo(dst[dstP:], src, d)
if n > 0 {
dstP += n
return dst[:dstP]
}
// Not compressible
dstP += emitLiteral(dst[dstP:], src)
return dst[:dstP]
}
// EncodeBetter returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// EncodeBetter compresses better than Encode but typically with a
// 10-40% speed decrease on both compression and decompression.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// The blocks will require the same amount of memory to decode as encoding,
// and does not make for concurrent decoding.
// Also note that blocks do not contain CRC information, so corruption may be undetected.
//
// If you need to encode larger amounts of data, consider using
// the streaming interface which gives all of these features.
func (d *Dict) EncodeBetter(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if len(dst) < n {
dst = make([]byte, n)
}
// The block starts with the varint-encoded length of the decompressed bytes.
dstP := binary.PutUvarint(dst, uint64(len(src)))
if len(src) == 0 {
return dst[:dstP]
}
if len(src) < minNonLiteralBlockSize {
dstP += emitLiteral(dst[dstP:], src)
return dst[:dstP]
}
n := encodeBlockBetterDict(dst[dstP:], src, d)
if n > 0 {
dstP += n
return dst[:dstP]
}
// Not compressible
dstP += emitLiteral(dst[dstP:], src)
return dst[:dstP]
}
// EncodeBest returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// EncodeBest compresses as good as reasonably possible but with a
// big speed decrease.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// The blocks will require the same amount of memory to decode as encoding,
// and does not make for concurrent decoding.
// Also note that blocks do not contain CRC information, so corruption may be undetected.
//
// If you need to encode larger amounts of data, consider using
// the streaming interface which gives all of these features.
func (d *Dict) EncodeBest(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if len(dst) < n {
dst = make([]byte, n)
}
// The block starts with the varint-encoded length of the decompressed bytes.
dstP := binary.PutUvarint(dst, uint64(len(src)))
if len(src) == 0 {
return dst[:dstP]
}
if len(src) < minNonLiteralBlockSize {
dstP += emitLiteral(dst[dstP:], src)
return dst[:dstP]
}
n := encodeBlockBest(dst[dstP:], src, d)
if n > 0 {
dstP += n
return dst[:dstP]
}
// Not compressible
dstP += emitLiteral(dst[dstP:], src)
return dst[:dstP]
}
// Decode returns the decoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire decoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
func (d *Dict) Decode(dst, src []byte) ([]byte, error) {
dLen, s, err := decodedLen(src)
if err != nil {
return nil, err
}
if dLen <= cap(dst) {
dst = dst[:dLen]
} else {
dst = make([]byte, dLen)
}
if s2DecodeDict(dst, src[s:], d) != 0 {
return nil, ErrCorrupt
}
return dst, nil
}
func (d *Dict) initFast() {
d.fast.Do(func() {
const (
tableBits = 14
maxTableSize = 1 << tableBits
)
var table [maxTableSize]uint16
// We stop so any entry of length 8 can always be read.
for i := 0; i < len(d.dict)-8-2; i += 3 {
x0 := load64(d.dict, i)
h0 := hash6(x0, tableBits)
h1 := hash6(x0>>8, tableBits)
h2 := hash6(x0>>16, tableBits)
table[h0] = uint16(i)
table[h1] = uint16(i + 1)
table[h2] = uint16(i + 2)
}
d.fastTable = &table
})
}
func (d *Dict) initBetter() {
d.better.Do(func() {
const (
// Long hash matches.
lTableBits = 17
maxLTableSize = 1 << lTableBits
// Short hash matches.
sTableBits = 14
maxSTableSize = 1 << sTableBits
)
var lTable [maxLTableSize]uint16
var sTable [maxSTableSize]uint16
// We stop so any entry of length 8 can always be read.
for i := 0; i < len(d.dict)-8; i++ {
cv := load64(d.dict, i)
lTable[hash7(cv, lTableBits)] = uint16(i)
sTable[hash4(cv, sTableBits)] = uint16(i)
}
d.betterTableShort = &sTable
d.betterTableLong = &lTable
})
}
func (d *Dict) initBest() {
d.best.Do(func() {
const (
// Long hash matches.
lTableBits = 19
maxLTableSize = 1 << lTableBits
// Short hash matches.
sTableBits = 16
maxSTableSize = 1 << sTableBits
)
var lTable [maxLTableSize]uint32
var sTable [maxSTableSize]uint32
// We stop so any entry of length 8 can always be read.
for i := 0; i < len(d.dict)-8; i++ {
cv := load64(d.dict, i)
hashL := hash8(cv, lTableBits)
hashS := hash4(cv, sTableBits)
candidateL := lTable[hashL]
candidateS := sTable[hashS]
lTable[hashL] = uint32(i) | candidateL<<16
sTable[hashS] = uint32(i) | candidateS<<16
}
d.bestTableShort = &sTable
d.bestTableLong = &lTable
})
}

View File

@ -1,393 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Copyright (c) 2019 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package s2
import (
"encoding/binary"
"math"
"math/bits"
)
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// The blocks will require the same amount of memory to decode as encoding,
// and does not make for concurrent decoding.
// Also note that blocks do not contain CRC information, so corruption may be undetected.
//
// If you need to encode larger amounts of data, consider using
// the streaming interface which gives all of these features.
func Encode(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if cap(dst) < n {
dst = make([]byte, n)
} else {
dst = dst[:n]
}
// The block starts with the varint-encoded length of the decompressed bytes.
d := binary.PutUvarint(dst, uint64(len(src)))
if len(src) == 0 {
return dst[:d]
}
if len(src) < minNonLiteralBlockSize {
d += emitLiteral(dst[d:], src)
return dst[:d]
}
n := encodeBlock(dst[d:], src)
if n > 0 {
d += n
return dst[:d]
}
// Not compressible
d += emitLiteral(dst[d:], src)
return dst[:d]
}
// EstimateBlockSize will perform a very fast compression
// without outputting the result and return the compressed output size.
// The function returns -1 if no improvement could be achieved.
// Using actual compression will most often produce better compression than the estimate.
func EstimateBlockSize(src []byte) (d int) {
if len(src) < 6 || int64(len(src)) > 0xffffffff {
return -1
}
if len(src) <= 1024 {
d = calcBlockSizeSmall(src)
} else {
d = calcBlockSize(src)
}
if d == 0 {
return -1
}
// Size of the varint encoded block size.
d += (bits.Len64(uint64(len(src))) + 7) / 7
if d >= len(src) {
return -1
}
return d
}
// EncodeBetter returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// EncodeBetter compresses better than Encode but typically with a
// 10-40% speed decrease on both compression and decompression.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// The blocks will require the same amount of memory to decode as encoding,
// and does not make for concurrent decoding.
// Also note that blocks do not contain CRC information, so corruption may be undetected.
//
// If you need to encode larger amounts of data, consider using
// the streaming interface which gives all of these features.
func EncodeBetter(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if len(dst) < n {
dst = make([]byte, n)
}
// The block starts with the varint-encoded length of the decompressed bytes.
d := binary.PutUvarint(dst, uint64(len(src)))
if len(src) == 0 {
return dst[:d]
}
if len(src) < minNonLiteralBlockSize {
d += emitLiteral(dst[d:], src)
return dst[:d]
}
n := encodeBlockBetter(dst[d:], src)
if n > 0 {
d += n
return dst[:d]
}
// Not compressible
d += emitLiteral(dst[d:], src)
return dst[:d]
}
// EncodeBest returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// EncodeBest compresses as good as reasonably possible but with a
// big speed decrease.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// The blocks will require the same amount of memory to decode as encoding,
// and does not make for concurrent decoding.
// Also note that blocks do not contain CRC information, so corruption may be undetected.
//
// If you need to encode larger amounts of data, consider using
// the streaming interface which gives all of these features.
func EncodeBest(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if len(dst) < n {
dst = make([]byte, n)
}
// The block starts with the varint-encoded length of the decompressed bytes.
d := binary.PutUvarint(dst, uint64(len(src)))
if len(src) == 0 {
return dst[:d]
}
if len(src) < minNonLiteralBlockSize {
d += emitLiteral(dst[d:], src)
return dst[:d]
}
n := encodeBlockBest(dst[d:], src, nil)
if n > 0 {
d += n
return dst[:d]
}
// Not compressible
d += emitLiteral(dst[d:], src)
return dst[:d]
}
// EncodeSnappy returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The output is Snappy compatible and will likely decompress faster.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// The blocks will require the same amount of memory to decode as encoding,
// and does not make for concurrent decoding.
// Also note that blocks do not contain CRC information, so corruption may be undetected.
//
// If you need to encode larger amounts of data, consider using
// the streaming interface which gives all of these features.
func EncodeSnappy(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if cap(dst) < n {
dst = make([]byte, n)
} else {
dst = dst[:n]
}
// The block starts with the varint-encoded length of the decompressed bytes.
d := binary.PutUvarint(dst, uint64(len(src)))
if len(src) == 0 {
return dst[:d]
}
if len(src) < minNonLiteralBlockSize {
d += emitLiteral(dst[d:], src)
return dst[:d]
}
n := encodeBlockSnappy(dst[d:], src)
if n > 0 {
d += n
return dst[:d]
}
// Not compressible
d += emitLiteral(dst[d:], src)
return dst[:d]
}
// EncodeSnappyBetter returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The output is Snappy compatible and will likely decompress faster.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// The blocks will require the same amount of memory to decode as encoding,
// and does not make for concurrent decoding.
// Also note that blocks do not contain CRC information, so corruption may be undetected.
//
// If you need to encode larger amounts of data, consider using
// the streaming interface which gives all of these features.
func EncodeSnappyBetter(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if cap(dst) < n {
dst = make([]byte, n)
} else {
dst = dst[:n]
}
// The block starts with the varint-encoded length of the decompressed bytes.
d := binary.PutUvarint(dst, uint64(len(src)))
if len(src) == 0 {
return dst[:d]
}
if len(src) < minNonLiteralBlockSize {
d += emitLiteral(dst[d:], src)
return dst[:d]
}
n := encodeBlockBetterSnappy(dst[d:], src)
if n > 0 {
d += n
return dst[:d]
}
// Not compressible
d += emitLiteral(dst[d:], src)
return dst[:d]
}
// EncodeSnappyBest returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The output is Snappy compatible and will likely decompress faster.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
//
// The blocks will require the same amount of memory to decode as encoding,
// and does not make for concurrent decoding.
// Also note that blocks do not contain CRC information, so corruption may be undetected.
//
// If you need to encode larger amounts of data, consider using
// the streaming interface which gives all of these features.
func EncodeSnappyBest(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if cap(dst) < n {
dst = make([]byte, n)
} else {
dst = dst[:n]
}
// The block starts with the varint-encoded length of the decompressed bytes.
d := binary.PutUvarint(dst, uint64(len(src)))
if len(src) == 0 {
return dst[:d]
}
if len(src) < minNonLiteralBlockSize {
d += emitLiteral(dst[d:], src)
return dst[:d]
}
n := encodeBlockBestSnappy(dst[d:], src)
if n > 0 {
d += n
return dst[:d]
}
// Not compressible
d += emitLiteral(dst[d:], src)
return dst[:d]
}
// ConcatBlocks will concatenate the supplied blocks and append them to the supplied destination.
// If the destination is nil or too small, a new will be allocated.
// The blocks are not validated, so garbage in = garbage out.
// dst may not overlap block data.
// Any data in dst is preserved as is, so it will not be considered a block.
func ConcatBlocks(dst []byte, blocks ...[]byte) ([]byte, error) {
totalSize := uint64(0)
compSize := 0
for _, b := range blocks {
l, hdr, err := decodedLen(b)
if err != nil {
return nil, err
}
totalSize += uint64(l)
compSize += len(b) - hdr
}
if totalSize == 0 {
dst = append(dst, 0)
return dst, nil
}
if totalSize > math.MaxUint32 {
return nil, ErrTooLarge
}
var tmp [binary.MaxVarintLen32]byte
hdrSize := binary.PutUvarint(tmp[:], totalSize)
wantSize := hdrSize + compSize
if cap(dst)-len(dst) < wantSize {
dst = append(make([]byte, 0, wantSize+len(dst)), dst...)
}
dst = append(dst, tmp[:hdrSize]...)
for _, b := range blocks {
_, hdr, err := decodedLen(b)
if err != nil {
return nil, err
}
dst = append(dst, b[hdr:]...)
}
return dst, nil
}
// inputMargin is the minimum number of extra input bytes to keep, inside
// encodeBlock's inner loop. On some architectures, this margin lets us
// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
// literals can be implemented as a single load to and store from a 16-byte
// register. That literal's actual length can be as short as 1 byte, so this
// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
// the encoding loop will fix up the copy overrun, and this inputMargin ensures
// that we don't overrun the dst and src buffers.
const inputMargin = 8
// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
// will be accepted by the encoder.
const minNonLiteralBlockSize = 32
const intReduction = 2 - (1 << (^uint(0) >> 63)) // 1 (32 bits) or 0 (64 bits)
// MaxBlockSize is the maximum value where MaxEncodedLen will return a valid block size.
// Blocks this big are highly discouraged, though.
// Half the size on 32 bit systems.
const MaxBlockSize = (1<<(32-intReduction) - 1) - binary.MaxVarintLen32 - 5
// MaxEncodedLen returns the maximum length of a snappy block, given its
// uncompressed length.
//
// It will return a negative value if srcLen is too large to encode.
// 32 bit platforms will have lower thresholds for rejecting big content.
func MaxEncodedLen(srcLen int) int {
n := uint64(srcLen)
if intReduction == 1 {
// 32 bits
if n > math.MaxInt32 {
// Also includes negative.
return -1
}
} else if n > 0xffffffff {
// 64 bits
// Also includes negative.
return -1
}
// Size of the varint encoded block size.
n = n + uint64((bits.Len64(n)+7)/7)
// Add maximum size of encoding block as literals.
n += uint64(literalExtraSize(int64(srcLen)))
if intReduction == 1 {
// 32 bits
if n > math.MaxInt32 {
return -1
}
} else if n > 0xffffffff {
// 64 bits
// Also includes negative.
return -1
}
return int(n)
}

File diff suppressed because it is too large Load Diff

View File

@ -1,148 +0,0 @@
//go:build !appengine && !noasm && gc
// +build !appengine,!noasm,gc
package s2
const hasAmd64Asm = true
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlock(dst, src []byte) (d int) {
const (
// Use 12 bit table when less than...
limit12B = 16 << 10
// Use 10 bit table when less than...
limit10B = 4 << 10
// Use 8 bit table when less than...
limit8B = 512
)
if len(src) >= 4<<20 {
return encodeBlockAsm(dst, src)
}
if len(src) >= limit12B {
return encodeBlockAsm4MB(dst, src)
}
if len(src) >= limit10B {
return encodeBlockAsm12B(dst, src)
}
if len(src) >= limit8B {
return encodeBlockAsm10B(dst, src)
}
if len(src) < minNonLiteralBlockSize {
return 0
}
return encodeBlockAsm8B(dst, src)
}
// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetter(dst, src []byte) (d int) {
const (
// Use 12 bit table when less than...
limit12B = 16 << 10
// Use 10 bit table when less than...
limit10B = 4 << 10
// Use 8 bit table when less than...
limit8B = 512
)
if len(src) > 4<<20 {
return encodeBetterBlockAsm(dst, src)
}
if len(src) >= limit12B {
return encodeBetterBlockAsm4MB(dst, src)
}
if len(src) >= limit10B {
return encodeBetterBlockAsm12B(dst, src)
}
if len(src) >= limit8B {
return encodeBetterBlockAsm10B(dst, src)
}
if len(src) < minNonLiteralBlockSize {
return 0
}
return encodeBetterBlockAsm8B(dst, src)
}
// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockSnappy(dst, src []byte) (d int) {
const (
// Use 12 bit table when less than...
limit12B = 16 << 10
// Use 10 bit table when less than...
limit10B = 4 << 10
// Use 8 bit table when less than...
limit8B = 512
)
if len(src) >= 64<<10 {
return encodeSnappyBlockAsm(dst, src)
}
if len(src) >= limit12B {
return encodeSnappyBlockAsm64K(dst, src)
}
if len(src) >= limit10B {
return encodeSnappyBlockAsm12B(dst, src)
}
if len(src) >= limit8B {
return encodeSnappyBlockAsm10B(dst, src)
}
if len(src) < minNonLiteralBlockSize {
return 0
}
return encodeSnappyBlockAsm8B(dst, src)
}
// encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBetterSnappy(dst, src []byte) (d int) {
const (
// Use 12 bit table when less than...
limit12B = 16 << 10
// Use 10 bit table when less than...
limit10B = 4 << 10
// Use 8 bit table when less than...
limit8B = 512
)
if len(src) >= 64<<10 {
return encodeSnappyBetterBlockAsm(dst, src)
}
if len(src) >= limit12B {
return encodeSnappyBetterBlockAsm64K(dst, src)
}
if len(src) >= limit10B {
return encodeSnappyBetterBlockAsm12B(dst, src)
}
if len(src) >= limit8B {
return encodeSnappyBetterBlockAsm10B(dst, src)
}
if len(src) < minNonLiteralBlockSize {
return 0
}
return encodeSnappyBetterBlockAsm8B(dst, src)
}

View File

@ -1,793 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Copyright (c) 2019 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package s2
import (
"fmt"
"math"
"math/bits"
)
// encodeBlockBest encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBest(dst, src []byte, dict *Dict) (d int) {
// Initialize the hash tables.
const (
// Long hash matches.
lTableBits = 19
maxLTableSize = 1 << lTableBits
// Short hash matches.
sTableBits = 16
maxSTableSize = 1 << sTableBits
inputMargin = 8 + 2
debug = false
)
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
if len(src) < minNonLiteralBlockSize {
return 0
}
sLimitDict := len(src) - inputMargin
if sLimitDict > MaxDictSrcOffset-inputMargin {
sLimitDict = MaxDictSrcOffset - inputMargin
}
var lTable [maxLTableSize]uint64
var sTable [maxSTableSize]uint64
// Bail if we can't compress to at least this.
dstLimit := len(src) - 5
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 1
repeat := 1
if dict != nil {
dict.initBest()
s = 0
repeat = len(dict.dict) - dict.repeat
}
cv := load64(src, s)
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
const lowbitMask = 0xffffffff
getCur := func(x uint64) int {
return int(x & lowbitMask)
}
getPrev := func(x uint64) int {
return int(x >> 32)
}
const maxSkip = 64
for {
type match struct {
offset int
s int
length int
score int
rep, dict bool
}
var best match
for {
// Next src position to check
nextS := (s-nextEmit)>>8 + 1
if nextS > maxSkip {
nextS = s + maxSkip
} else {
nextS += s
}
if nextS > sLimit {
goto emitRemainder
}
if dict != nil && s >= MaxDictSrcOffset {
dict = nil
if repeat > s {
repeat = math.MinInt32
}
}
hashL := hash8(cv, lTableBits)
hashS := hash4(cv, sTableBits)
candidateL := lTable[hashL]
candidateS := sTable[hashS]
score := func(m match) int {
// Matches that are longer forward are penalized since we must emit it as a literal.
score := m.length - m.s
if nextEmit == m.s {
// If we do not have to emit literals, we save 1 byte
score++
}
offset := m.s - m.offset
if m.rep {
return score - emitRepeatSize(offset, m.length)
}
return score - emitCopySize(offset, m.length)
}
matchAt := func(offset, s int, first uint32, rep bool) match {
if best.length != 0 && best.s-best.offset == s-offset {
// Don't retest if we have the same offset.
return match{offset: offset, s: s}
}
if load32(src, offset) != first {
return match{offset: offset, s: s}
}
m := match{offset: offset, s: s, length: 4 + offset, rep: rep}
s += 4
for s < len(src) {
if len(src)-s < 8 {
if src[s] == src[m.length] {
m.length++
s++
continue
}
break
}
if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
m.length += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
m.length += 8
}
m.length -= offset
m.score = score(m)
if m.score <= -m.s {
// Eliminate if no savings, we might find a better one.
m.length = 0
}
return m
}
matchDict := func(candidate, s int, first uint32, rep bool) match {
// Calculate offset as if in continuous array with s
offset := -len(dict.dict) + candidate
if best.length != 0 && best.s-best.offset == s-offset && !rep {
// Don't retest if we have the same offset.
return match{offset: offset, s: s}
}
if load32(dict.dict, candidate) != first {
return match{offset: offset, s: s}
}
m := match{offset: offset, s: s, length: 4 + candidate, rep: rep, dict: true}
s += 4
if !rep {
for s < sLimitDict && m.length < len(dict.dict) {
if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
if src[s] == dict.dict[m.length] {
m.length++
s++
continue
}
break
}
if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
m.length += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
m.length += 8
}
} else {
for s < len(src) && m.length < len(dict.dict) {
if len(src)-s < 8 || len(dict.dict)-m.length < 8 {
if src[s] == dict.dict[m.length] {
m.length++
s++
continue
}
break
}
if diff := load64(src, s) ^ load64(dict.dict, m.length); diff != 0 {
m.length += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
m.length += 8
}
}
m.length -= candidate
m.score = score(m)
if m.score <= -m.s {
// Eliminate if no savings, we might find a better one.
m.length = 0
}
return m
}
bestOf := func(a, b match) match {
if b.length == 0 {
return a
}
if a.length == 0 {
return b
}
as := a.score + b.s
bs := b.score + a.s
if as >= bs {
return a
}
return b
}
if s > 0 {
best = bestOf(matchAt(getCur(candidateL), s, uint32(cv), false), matchAt(getPrev(candidateL), s, uint32(cv), false))
best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv), false))
best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv), false))
}
if dict != nil {
candidateL := dict.bestTableLong[hashL]
candidateS := dict.bestTableShort[hashS]
best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
best = bestOf(best, matchDict(int(candidateL>>16), s, uint32(cv), false))
best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
best = bestOf(best, matchDict(int(candidateS>>16), s, uint32(cv), false))
}
{
if (dict == nil || repeat <= s) && repeat > 0 {
best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8), true))
} else if s-repeat < -4 && dict != nil {
candidate := len(dict.dict) - (repeat - s)
best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
candidate++
best = bestOf(best, matchDict(candidate, s+1, uint32(cv>>8), true))
}
if best.length > 0 {
hashS := hash4(cv>>8, sTableBits)
// s+1
nextShort := sTable[hashS]
s := s + 1
cv := load64(src, s)
hashL := hash8(cv, lTableBits)
nextLong := lTable[hashL]
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
// Dict at + 1
if dict != nil {
candidateL := dict.bestTableLong[hashL]
candidateS := dict.bestTableShort[hashS]
best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
}
// s+2
if true {
hashS := hash4(cv>>8, sTableBits)
nextShort = sTable[hashS]
s++
cv = load64(src, s)
hashL := hash8(cv, lTableBits)
nextLong = lTable[hashL]
if (dict == nil || repeat <= s) && repeat > 0 {
// Repeat at + 2
best = bestOf(best, matchAt(s-repeat, s, uint32(cv), true))
} else if repeat-s > 4 && dict != nil {
candidate := len(dict.dict) - (repeat - s)
best = bestOf(best, matchDict(candidate, s, uint32(cv), true))
}
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv), false))
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv), false))
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv), false))
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false))
// Dict at +2
// Very small gain
if dict != nil {
candidateL := dict.bestTableLong[hashL]
candidateS := dict.bestTableShort[hashS]
best = bestOf(best, matchDict(int(candidateL&0xffff), s, uint32(cv), false))
best = bestOf(best, matchDict(int(candidateS&0xffff), s, uint32(cv), false))
}
}
// Search for a match at best match end, see if that is better.
// Allow some bytes at the beginning to mismatch.
// Sweet spot is around 1-2 bytes, but depends on input.
// The skipped bytes are tested in Extend backwards,
// and still picked up as part of the match if they do.
const skipBeginning = 2
const skipEnd = 1
if sAt := best.s + best.length - skipEnd; sAt < sLimit {
sBack := best.s + skipBeginning - skipEnd
backL := best.length - skipBeginning
// Load initial values
cv = load64(src, sBack)
// Grab candidates...
next := lTable[hash8(load64(src, sAt), lTableBits)]
if checkAt := getCur(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
}
if checkAt := getPrev(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
}
// Disabled: Extremely small gain
if false {
next = sTable[hash4(load64(src, sAt), sTableBits)]
if checkAt := getCur(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
}
if checkAt := getPrev(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false))
}
}
}
}
}
// Update table
lTable[hashL] = uint64(s) | candidateL<<32
sTable[hashS] = uint64(s) | candidateS<<32
if best.length > 0 {
break
}
cv = load64(src, nextS)
s = nextS
}
// Extend backwards, not needed for repeats...
s = best.s
if !best.rep && !best.dict {
for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
best.offset--
best.length++
s--
}
}
if false && best.offset >= s {
panic(fmt.Errorf("t %d >= s %d", best.offset, s))
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
base := s
offset := s - best.offset
s += best.length
if offset > 65535 && s-base <= 5 && !best.rep {
// Bail if the match is equal or worse to the encoding.
s = best.s + 1
if s >= sLimit {
goto emitRemainder
}
cv = load64(src, s)
continue
}
if debug && nextEmit != base {
fmt.Println("EMIT", base-nextEmit, "literals. base-after:", base)
}
d += emitLiteral(dst[d:], src[nextEmit:base])
if best.rep {
if nextEmit > 0 || best.dict {
if debug {
fmt.Println("REPEAT, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
}
// same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
d += emitRepeat(dst[d:], offset, best.length)
} else {
// First match without dict cannot be a repeat.
if debug {
fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
}
d += emitCopy(dst[d:], offset, best.length)
}
} else {
if debug {
fmt.Println("COPY, length", best.length, "offset:", offset, "s-after:", s, "dict:", best.dict, "best:", best)
}
d += emitCopy(dst[d:], offset, best.length)
}
repeat = offset
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Fill tables...
for i := best.s + 1; i < s; i++ {
cv0 := load64(src, i)
long0 := hash8(cv0, lTableBits)
short0 := hash4(cv0, sTableBits)
lTable[long0] = uint64(i) | lTable[long0]<<32
sTable[short0] = uint64(i) | sTable[short0]<<32
}
cv = load64(src, s)
}
emitRemainder:
if nextEmit < len(src) {
// Bail if we exceed the maximum size.
if d+len(src)-nextEmit > dstLimit {
return 0
}
if debug && nextEmit != s {
fmt.Println("emitted ", len(src)-nextEmit, "literals")
}
d += emitLiteral(dst[d:], src[nextEmit:])
}
return d
}
// encodeBlockBestSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlockBestSnappy(dst, src []byte) (d int) {
// Initialize the hash tables.
const (
// Long hash matches.
lTableBits = 19
maxLTableSize = 1 << lTableBits
// Short hash matches.
sTableBits = 16
maxSTableSize = 1 << sTableBits
inputMargin = 8 + 2
)
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
if len(src) < minNonLiteralBlockSize {
return 0
}
var lTable [maxLTableSize]uint64
var sTable [maxSTableSize]uint64
// Bail if we can't compress to at least this.
dstLimit := len(src) - 5
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 1
cv := load64(src, s)
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
repeat := 1
const lowbitMask = 0xffffffff
getCur := func(x uint64) int {
return int(x & lowbitMask)
}
getPrev := func(x uint64) int {
return int(x >> 32)
}
const maxSkip = 64
for {
type match struct {
offset int
s int
length int
score int
}
var best match
for {
// Next src position to check
nextS := (s-nextEmit)>>8 + 1
if nextS > maxSkip {
nextS = s + maxSkip
} else {
nextS += s
}
if nextS > sLimit {
goto emitRemainder
}
hashL := hash8(cv, lTableBits)
hashS := hash4(cv, sTableBits)
candidateL := lTable[hashL]
candidateS := sTable[hashS]
score := func(m match) int {
// Matches that are longer forward are penalized since we must emit it as a literal.
score := m.length - m.s
if nextEmit == m.s {
// If we do not have to emit literals, we save 1 byte
score++
}
offset := m.s - m.offset
return score - emitCopyNoRepeatSize(offset, m.length)
}
matchAt := func(offset, s int, first uint32) match {
if best.length != 0 && best.s-best.offset == s-offset {
// Don't retest if we have the same offset.
return match{offset: offset, s: s}
}
if load32(src, offset) != first {
return match{offset: offset, s: s}
}
m := match{offset: offset, s: s, length: 4 + offset}
s += 4
for s <= sLimit {
if diff := load64(src, s) ^ load64(src, m.length); diff != 0 {
m.length += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
m.length += 8
}
m.length -= offset
m.score = score(m)
if m.score <= -m.s {
// Eliminate if no savings, we might find a better one.
m.length = 0
}
return m
}
bestOf := func(a, b match) match {
if b.length == 0 {
return a
}
if a.length == 0 {
return b
}
as := a.score + b.s
bs := b.score + a.s
if as >= bs {
return a
}
return b
}
best = bestOf(matchAt(getCur(candidateL), s, uint32(cv)), matchAt(getPrev(candidateL), s, uint32(cv)))
best = bestOf(best, matchAt(getCur(candidateS), s, uint32(cv)))
best = bestOf(best, matchAt(getPrev(candidateS), s, uint32(cv)))
{
best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
if best.length > 0 {
// s+1
nextShort := sTable[hash4(cv>>8, sTableBits)]
s := s + 1
cv := load64(src, s)
nextLong := lTable[hash8(cv, lTableBits)]
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
// Repeat at + 2
best = bestOf(best, matchAt(s-repeat+1, s+1, uint32(cv>>8)))
// s+2
if true {
nextShort = sTable[hash4(cv>>8, sTableBits)]
s++
cv = load64(src, s)
nextLong = lTable[hash8(cv, lTableBits)]
best = bestOf(best, matchAt(getCur(nextShort), s, uint32(cv)))
best = bestOf(best, matchAt(getPrev(nextShort), s, uint32(cv)))
best = bestOf(best, matchAt(getCur(nextLong), s, uint32(cv)))
best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv)))
}
// Search for a match at best match end, see if that is better.
if sAt := best.s + best.length; sAt < sLimit {
sBack := best.s
backL := best.length
// Load initial values
cv = load64(src, sBack)
// Search for mismatch
next := lTable[hash8(load64(src, sAt), lTableBits)]
//next := sTable[hash4(load64(src, sAt), sTableBits)]
if checkAt := getCur(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
}
if checkAt := getPrev(next) - backL; checkAt > 0 {
best = bestOf(best, matchAt(checkAt, sBack, uint32(cv)))
}
}
}
}
// Update table
lTable[hashL] = uint64(s) | candidateL<<32
sTable[hashS] = uint64(s) | candidateS<<32
if best.length > 0 {
break
}
cv = load64(src, nextS)
s = nextS
}
// Extend backwards, not needed for repeats...
s = best.s
if true {
for best.offset > 0 && s > nextEmit && src[best.offset-1] == src[s-1] {
best.offset--
best.length++
s--
}
}
if false && best.offset >= s {
panic(fmt.Errorf("t %d >= s %d", best.offset, s))
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
base := s
offset := s - best.offset
s += best.length
if offset > 65535 && s-base <= 5 {
// Bail if the match is equal or worse to the encoding.
s = best.s + 1
if s >= sLimit {
goto emitRemainder
}
cv = load64(src, s)
continue
}
d += emitLiteral(dst[d:], src[nextEmit:base])
d += emitCopyNoRepeat(dst[d:], offset, best.length)
repeat = offset
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Fill tables...
for i := best.s + 1; i < s; i++ {
cv0 := load64(src, i)
long0 := hash8(cv0, lTableBits)
short0 := hash4(cv0, sTableBits)
lTable[long0] = uint64(i) | lTable[long0]<<32
sTable[short0] = uint64(i) | sTable[short0]<<32
}
cv = load64(src, s)
}
emitRemainder:
if nextEmit < len(src) {
// Bail if we exceed the maximum size.
if d+len(src)-nextEmit > dstLimit {
return 0
}
d += emitLiteral(dst[d:], src[nextEmit:])
}
return d
}
// emitCopySize returns the size to encode the offset+length
//
// It assumes that:
//
// 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
func emitCopySize(offset, length int) int {
if offset >= 65536 {
i := 0
if length > 64 {
length -= 64
if length >= 4 {
// Emit remaining as repeats
return 5 + emitRepeatSize(offset, length)
}
i = 5
}
if length == 0 {
return i
}
return i + 5
}
// Offset no more than 2 bytes.
if length > 64 {
if offset < 2048 {
// Emit 8 bytes, then rest as repeats...
return 2 + emitRepeatSize(offset, length-8)
}
// Emit remaining as repeats, at least 4 bytes remain.
return 3 + emitRepeatSize(offset, length-60)
}
if length >= 12 || offset >= 2048 {
return 3
}
// Emit the remaining copy, encoded as 2 bytes.
return 2
}
// emitCopyNoRepeatSize returns the size to encode the offset+length
//
// It assumes that:
//
// 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
func emitCopyNoRepeatSize(offset, length int) int {
if offset >= 65536 {
return 5 + 5*(length/64)
}
// Offset no more than 2 bytes.
if length > 64 {
// Emit remaining as repeats, at least 4 bytes remain.
return 3 + 3*(length/60)
}
if length >= 12 || offset >= 2048 {
return 3
}
// Emit the remaining copy, encoded as 2 bytes.
return 2
}
// emitRepeatSize returns the number of bytes required to encode a repeat.
// Length must be at least 4 and < 1<<24
func emitRepeatSize(offset, length int) int {
// Repeat offset, make length cheaper
if length <= 4+4 || (length < 8+4 && offset < 2048) {
return 2
}
if length < (1<<8)+4+4 {
return 3
}
if length < (1<<16)+(1<<8)+4 {
return 4
}
const maxRepeat = (1 << 24) - 1
length -= (1 << 16) - 4
left := 0
if length > maxRepeat {
left = length - maxRepeat + 4
}
if left > 0 {
return 5 + emitRepeatSize(offset, left)
}
return 5
}

File diff suppressed because it is too large Load Diff

View File

@ -1,727 +0,0 @@
//go:build !amd64 || appengine || !gc || noasm
// +build !amd64 appengine !gc noasm
package s2
import (
"bytes"
"math/bits"
)
const hasAmd64Asm = false
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src))
func encodeBlock(dst, src []byte) (d int) {
if len(src) < minNonLiteralBlockSize {
return 0
}
return encodeBlockGo(dst, src)
}
// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src))
func encodeBlockBetter(dst, src []byte) (d int) {
return encodeBlockBetterGo(dst, src)
}
// encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src))
func encodeBlockBetterSnappy(dst, src []byte) (d int) {
return encodeBlockBetterSnappyGo(dst, src)
}
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
//
// len(dst) >= MaxEncodedLen(len(src))
func encodeBlockSnappy(dst, src []byte) (d int) {
if len(src) < minNonLiteralBlockSize {
return 0
}
return encodeBlockSnappyGo(dst, src)
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 0 <= len(lit) && len(lit) <= math.MaxUint32
func emitLiteral(dst, lit []byte) int {
if len(lit) == 0 {
return 0
}
const num = 63<<2 | tagLiteral
i, n := 0, uint(len(lit)-1)
switch {
case n < 60:
dst[0] = uint8(n)<<2 | tagLiteral
i = 1
case n < 1<<8:
dst[1] = uint8(n)
dst[0] = 60<<2 | tagLiteral
i = 2
case n < 1<<16:
dst[2] = uint8(n >> 8)
dst[1] = uint8(n)
dst[0] = 61<<2 | tagLiteral
i = 3
case n < 1<<24:
dst[3] = uint8(n >> 16)
dst[2] = uint8(n >> 8)
dst[1] = uint8(n)
dst[0] = 62<<2 | tagLiteral
i = 4
default:
dst[4] = uint8(n >> 24)
dst[3] = uint8(n >> 16)
dst[2] = uint8(n >> 8)
dst[1] = uint8(n)
dst[0] = 63<<2 | tagLiteral
i = 5
}
return i + copy(dst[i:], lit)
}
// emitRepeat writes a repeat chunk and returns the number of bytes written.
// Length must be at least 4 and < 1<<24
func emitRepeat(dst []byte, offset, length int) int {
// Repeat offset, make length cheaper
length -= 4
if length <= 4 {
dst[0] = uint8(length)<<2 | tagCopy1
dst[1] = 0
return 2
}
if length < 8 && offset < 2048 {
// Encode WITH offset
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
return 2
}
if length < (1<<8)+4 {
length -= 4
dst[2] = uint8(length)
dst[1] = 0
dst[0] = 5<<2 | tagCopy1
return 3
}
if length < (1<<16)+(1<<8) {
length -= 1 << 8
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 6<<2 | tagCopy1
return 4
}
const maxRepeat = (1 << 24) - 1
length -= 1 << 16
left := 0
if length > maxRepeat {
left = length - maxRepeat + 4
length = maxRepeat - 4
}
dst[4] = uint8(length >> 16)
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 7<<2 | tagCopy1
if left > 0 {
return 5 + emitRepeat(dst[5:], offset, left)
}
return 5
}
// emitCopy writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
func emitCopy(dst []byte, offset, length int) int {
if offset >= 65536 {
i := 0
if length > 64 {
// Emit a length 64 copy, encoded as 5 bytes.
dst[4] = uint8(offset >> 24)
dst[3] = uint8(offset >> 16)
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 63<<2 | tagCopy4
length -= 64
if length >= 4 {
// Emit remaining as repeats
return 5 + emitRepeat(dst[5:], offset, length)
}
i = 5
}
if length == 0 {
return i
}
// Emit a copy, offset encoded as 4 bytes.
dst[i+0] = uint8(length-1)<<2 | tagCopy4
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
dst[i+3] = uint8(offset >> 16)
dst[i+4] = uint8(offset >> 24)
return i + 5
}
// Offset no more than 2 bytes.
if length > 64 {
off := 3
if offset < 2048 {
// emit 8 bytes as tagCopy1, rest as repeats.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
length -= 8
off = 2
} else {
// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 59<<2 | tagCopy2
length -= 60
}
// Emit remaining as repeats, at least 4 bytes remain.
return off + emitRepeat(dst[off:], offset, length)
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = uint8(length-1)<<2 | tagCopy2
return 3
}
// Emit the remaining copy, encoded as 2 bytes.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
return 2
}
// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
func emitCopyNoRepeat(dst []byte, offset, length int) int {
if offset >= 65536 {
i := 0
if length > 64 {
// Emit a length 64 copy, encoded as 5 bytes.
dst[4] = uint8(offset >> 24)
dst[3] = uint8(offset >> 16)
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 63<<2 | tagCopy4
length -= 64
if length >= 4 {
// Emit remaining as repeats
return 5 + emitCopyNoRepeat(dst[5:], offset, length)
}
i = 5
}
if length == 0 {
return i
}
// Emit a copy, offset encoded as 4 bytes.
dst[i+0] = uint8(length-1)<<2 | tagCopy4
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
dst[i+3] = uint8(offset >> 16)
dst[i+4] = uint8(offset >> 24)
return i + 5
}
// Offset no more than 2 bytes.
if length > 64 {
// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 59<<2 | tagCopy2
length -= 60
// Emit remaining as repeats, at least 4 bytes remain.
return 3 + emitCopyNoRepeat(dst[3:], offset, length)
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = uint8(length-1)<<2 | tagCopy2
return 3
}
// Emit the remaining copy, encoded as 2 bytes.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
return 2
}
// matchLen returns how many bytes match in a and b
//
// It assumes that:
//
// len(a) <= len(b)
func matchLen(a []byte, b []byte) int {
b = b[:len(a)]
var checked int
if len(a) > 4 {
// Try 4 bytes first
if diff := load32(a, 0) ^ load32(b, 0); diff != 0 {
return bits.TrailingZeros32(diff) >> 3
}
// Switch to 8 byte matching.
checked = 4
a = a[4:]
b = b[4:]
for len(a) >= 8 {
b = b[:len(a)]
if diff := load64(a, 0) ^ load64(b, 0); diff != 0 {
return checked + (bits.TrailingZeros64(diff) >> 3)
}
checked += 8
a = a[8:]
b = b[8:]
}
}
b = b[:len(a)]
for i := range a {
if a[i] != b[i] {
return int(i) + checked
}
}
return len(a) + checked
}
func calcBlockSize(src []byte) (d int) {
// Initialize the hash table.
const (
tableBits = 13
maxTableSize = 1 << tableBits
)
var table [maxTableSize]uint32
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
// Bail if we can't compress to at least this.
dstLimit := len(src) - len(src)>>5 - 5
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 1
cv := load64(src, s)
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
repeat := 1
for {
candidate := 0
for {
// Next src position to check
nextS := s + (s-nextEmit)>>6 + 4
if nextS > sLimit {
goto emitRemainder
}
hash0 := hash6(cv, tableBits)
hash1 := hash6(cv>>8, tableBits)
candidate = int(table[hash0])
candidate2 := int(table[hash1])
table[hash0] = uint32(s)
table[hash1] = uint32(s + 1)
hash2 := hash6(cv>>16, tableBits)
// Check repeat at offset checkRep.
const checkRep = 1
if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
base := s + checkRep
// Extend back
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
i--
base--
}
d += emitLiteralSize(src[nextEmit:base])
// Extend forward
candidate := s - repeat + 4 + checkRep
s += 4 + checkRep
for s <= sLimit {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
d += emitCopyNoRepeatSize(repeat, s-base)
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
cv = load64(src, s)
continue
}
if uint32(cv) == load32(src, candidate) {
break
}
candidate = int(table[hash2])
if uint32(cv>>8) == load32(src, candidate2) {
table[hash2] = uint32(s + 2)
candidate = candidate2
s++
break
}
table[hash2] = uint32(s + 2)
if uint32(cv>>16) == load32(src, candidate) {
s += 2
break
}
cv = load64(src, nextS)
s = nextS
}
// Extend backwards
for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
candidate--
s--
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
d += emitLiteralSize(src[nextEmit:s])
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
base := s
repeat = base - candidate
// Extend the 4-byte match as long as possible.
s += 4
candidate += 4
for s <= len(src)-8 {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
d += emitCopyNoRepeatSize(repeat, s-base)
if false {
// Validate match.
a := src[base:s]
b := src[base-repeat : base-repeat+(s-base)]
if !bytes.Equal(a, b) {
panic("mismatch")
}
}
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Check for an immediate match, otherwise start search at s+1
x := load64(src, s-2)
m2Hash := hash6(x, tableBits)
currHash := hash6(x>>16, tableBits)
candidate = int(table[currHash])
table[m2Hash] = uint32(s - 2)
table[currHash] = uint32(s)
if uint32(x>>16) != load32(src, candidate) {
cv = load64(src, s+1)
s++
break
}
}
}
emitRemainder:
if nextEmit < len(src) {
// Bail if we exceed the maximum size.
if d+len(src)-nextEmit > dstLimit {
return 0
}
d += emitLiteralSize(src[nextEmit:])
}
return d
}
func calcBlockSizeSmall(src []byte) (d int) {
// Initialize the hash table.
const (
tableBits = 9
maxTableSize = 1 << tableBits
)
var table [maxTableSize]uint32
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
// Bail if we can't compress to at least this.
dstLimit := len(src) - len(src)>>5 - 5
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 1
cv := load64(src, s)
// We search for a repeat at -1, but don't output repeats when nextEmit == 0
repeat := 1
for {
candidate := 0
for {
// Next src position to check
nextS := s + (s-nextEmit)>>6 + 4
if nextS > sLimit {
goto emitRemainder
}
hash0 := hash6(cv, tableBits)
hash1 := hash6(cv>>8, tableBits)
candidate = int(table[hash0])
candidate2 := int(table[hash1])
table[hash0] = uint32(s)
table[hash1] = uint32(s + 1)
hash2 := hash6(cv>>16, tableBits)
// Check repeat at offset checkRep.
const checkRep = 1
if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
base := s + checkRep
// Extend back
for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
i--
base--
}
d += emitLiteralSize(src[nextEmit:base])
// Extend forward
candidate := s - repeat + 4 + checkRep
s += 4 + checkRep
for s <= sLimit {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
d += emitCopyNoRepeatSize(repeat, s-base)
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
cv = load64(src, s)
continue
}
if uint32(cv) == load32(src, candidate) {
break
}
candidate = int(table[hash2])
if uint32(cv>>8) == load32(src, candidate2) {
table[hash2] = uint32(s + 2)
candidate = candidate2
s++
break
}
table[hash2] = uint32(s + 2)
if uint32(cv>>16) == load32(src, candidate) {
s += 2
break
}
cv = load64(src, nextS)
s = nextS
}
// Extend backwards
for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
candidate--
s--
}
// Bail if we exceed the maximum size.
if d+(s-nextEmit) > dstLimit {
return 0
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
d += emitLiteralSize(src[nextEmit:s])
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
base := s
repeat = base - candidate
// Extend the 4-byte match as long as possible.
s += 4
candidate += 4
for s <= len(src)-8 {
if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
s += bits.TrailingZeros64(diff) >> 3
break
}
s += 8
candidate += 8
}
d += emitCopyNoRepeatSize(repeat, s-base)
if false {
// Validate match.
a := src[base:s]
b := src[base-repeat : base-repeat+(s-base)]
if !bytes.Equal(a, b) {
panic("mismatch")
}
}
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
if d > dstLimit {
// Do we have space for more, if not bail.
return 0
}
// Check for an immediate match, otherwise start search at s+1
x := load64(src, s-2)
m2Hash := hash6(x, tableBits)
currHash := hash6(x>>16, tableBits)
candidate = int(table[currHash])
table[m2Hash] = uint32(s - 2)
table[currHash] = uint32(s)
if uint32(x>>16) != load32(src, candidate) {
cv = load64(src, s+1)
s++
break
}
}
}
emitRemainder:
if nextEmit < len(src) {
// Bail if we exceed the maximum size.
if d+len(src)-nextEmit > dstLimit {
return 0
}
d += emitLiteralSize(src[nextEmit:])
}
return d
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 0 <= len(lit) && len(lit) <= math.MaxUint32
func emitLiteralSize(lit []byte) int {
if len(lit) == 0 {
return 0
}
switch {
case len(lit) <= 60:
return len(lit) + 1
case len(lit) <= 1<<8:
return len(lit) + 2
case len(lit) <= 1<<16:
return len(lit) + 3
case len(lit) <= 1<<24:
return len(lit) + 4
default:
return len(lit) + 5
}
}
func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
panic("cvtLZ4BlockAsm should be unreachable")
}
func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
panic("cvtLZ4BlockSnappyAsm should be unreachable")
}
func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
panic("cvtLZ4sBlockAsm should be unreachable")
}
func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int) {
panic("cvtLZ4sBlockSnappyAsm should be unreachable")
}

View File

@ -1,228 +0,0 @@
// Code generated by command: go run gen.go -out ../encodeblock_amd64.s -stubs ../encodeblock_amd64.go -pkg=s2. DO NOT EDIT.
//go:build !appengine && !noasm && gc && !noasm
package s2
func _dummy_()
// encodeBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4294967295 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeBlockAsm(dst []byte, src []byte) int
// encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4194304 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeBlockAsm4MB(dst []byte, src []byte) int
// encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 16383 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeBlockAsm12B(dst []byte, src []byte) int
// encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4095 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeBlockAsm10B(dst []byte, src []byte) int
// encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 511 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeBlockAsm8B(dst []byte, src []byte) int
// encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4294967295 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeBetterBlockAsm(dst []byte, src []byte) int
// encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4194304 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
// encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 16383 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeBetterBlockAsm12B(dst []byte, src []byte) int
// encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4095 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeBetterBlockAsm10B(dst []byte, src []byte) int
// encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 511 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeBetterBlockAsm8B(dst []byte, src []byte) int
// encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4294967295 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeSnappyBlockAsm(dst []byte, src []byte) int
// encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 65535 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
// encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 16383 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
// encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4095 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
// encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 511 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
// encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4294967295 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
// encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 65535 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
// encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 16383 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
// encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4095 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
// encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 511 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
// calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 4294967295 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func calcBlockSize(src []byte) int
// calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst.
// Maximum input 1024 bytes.
// It assumes that the varint-encoded length of the decompressed bytes has already been written.
//
//go:noescape
func calcBlockSizeSmall(src []byte) int
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes with margin of 0 bytes
// 0 <= len(lit) && len(lit) <= math.MaxUint32
//
//go:noescape
func emitLiteral(dst []byte, lit []byte) int
// emitRepeat writes a repeat chunk and returns the number of bytes written.
// Length must be at least 4 and < 1<<32
//
//go:noescape
func emitRepeat(dst []byte, offset int, length int) int
// emitCopy writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
//
//go:noescape
func emitCopy(dst []byte, offset int, length int) int
// emitCopyNoRepeat writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= math.MaxUint32
// 4 <= length && length <= 1 << 24
//
//go:noescape
func emitCopyNoRepeat(dst []byte, offset int, length int) int
// matchLen returns how many bytes match in a and b
//
// It assumes that:
//
// len(a) <= len(b)
//
//go:noescape
func matchLen(a []byte, b []byte) int
// cvtLZ4Block converts an LZ4 block to S2
//
//go:noescape
func cvtLZ4BlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
// cvtLZ4sBlock converts an LZ4s block to S2
//
//go:noescape
func cvtLZ4sBlockAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
// cvtLZ4Block converts an LZ4 block to Snappy
//
//go:noescape
func cvtLZ4BlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)
// cvtLZ4sBlock converts an LZ4s block to Snappy
//
//go:noescape
func cvtLZ4sBlockSnappyAsm(dst []byte, src []byte) (uncompressed int, dstUsed int)

File diff suppressed because it is too large Load Diff

View File

@ -1,596 +0,0 @@
// Copyright (c) 2022+ Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package s2
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"sort"
)
const (
S2IndexHeader = "s2idx\x00"
S2IndexTrailer = "\x00xdi2s"
maxIndexEntries = 1 << 16
)
// Index represents an S2/Snappy index.
type Index struct {
TotalUncompressed int64 // Total Uncompressed size if known. Will be -1 if unknown.
TotalCompressed int64 // Total Compressed size if known. Will be -1 if unknown.
info []struct {
compressedOffset int64
uncompressedOffset int64
}
estBlockUncomp int64
}
func (i *Index) reset(maxBlock int) {
i.estBlockUncomp = int64(maxBlock)
i.TotalCompressed = -1
i.TotalUncompressed = -1
if len(i.info) > 0 {
i.info = i.info[:0]
}
}
// allocInfos will allocate an empty slice of infos.
func (i *Index) allocInfos(n int) {
if n > maxIndexEntries {
panic("n > maxIndexEntries")
}
i.info = make([]struct {
compressedOffset int64
uncompressedOffset int64
}, 0, n)
}
// add an uncompressed and compressed pair.
// Entries must be sent in order.
func (i *Index) add(compressedOffset, uncompressedOffset int64) error {
if i == nil {
return nil
}
lastIdx := len(i.info) - 1
if lastIdx >= 0 {
latest := i.info[lastIdx]
if latest.uncompressedOffset == uncompressedOffset {
// Uncompressed didn't change, don't add entry,
// but update start index.
latest.compressedOffset = compressedOffset
i.info[lastIdx] = latest
return nil
}
if latest.uncompressedOffset > uncompressedOffset {
return fmt.Errorf("internal error: Earlier uncompressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
}
if latest.compressedOffset > compressedOffset {
return fmt.Errorf("internal error: Earlier compressed received (%d > %d)", latest.uncompressedOffset, uncompressedOffset)
}
}
i.info = append(i.info, struct {
compressedOffset int64
uncompressedOffset int64
}{compressedOffset: compressedOffset, uncompressedOffset: uncompressedOffset})
return nil
}
// Find the offset at or before the wanted (uncompressed) offset.
// If offset is 0 or positive it is the offset from the beginning of the file.
// If the uncompressed size is known, the offset must be within the file.
// If an offset outside the file is requested io.ErrUnexpectedEOF is returned.
// If the offset is negative, it is interpreted as the distance from the end of the file,
// where -1 represents the last byte.
// If offset from the end of the file is requested, but size is unknown,
// ErrUnsupported will be returned.
func (i *Index) Find(offset int64) (compressedOff, uncompressedOff int64, err error) {
if i.TotalUncompressed < 0 {
return 0, 0, ErrCorrupt
}
if offset < 0 {
offset = i.TotalUncompressed + offset
if offset < 0 {
return 0, 0, io.ErrUnexpectedEOF
}
}
if offset > i.TotalUncompressed {
return 0, 0, io.ErrUnexpectedEOF
}
if len(i.info) > 200 {
n := sort.Search(len(i.info), func(n int) bool {
return i.info[n].uncompressedOffset > offset
})
if n == 0 {
n = 1
}
return i.info[n-1].compressedOffset, i.info[n-1].uncompressedOffset, nil
}
for _, info := range i.info {
if info.uncompressedOffset > offset {
break
}
compressedOff = info.compressedOffset
uncompressedOff = info.uncompressedOffset
}
return compressedOff, uncompressedOff, nil
}
// reduce to stay below maxIndexEntries
func (i *Index) reduce() {
if len(i.info) < maxIndexEntries && i.estBlockUncomp >= 1<<20 {
return
}
// Algorithm, keep 1, remove removeN entries...
removeN := (len(i.info) + 1) / maxIndexEntries
src := i.info
j := 0
// Each block should be at least 1MB, but don't reduce below 1000 entries.
for i.estBlockUncomp*(int64(removeN)+1) < 1<<20 && len(i.info)/(removeN+1) > 1000 {
removeN++
}
for idx := 0; idx < len(src); idx++ {
i.info[j] = src[idx]
j++
idx += removeN
}
i.info = i.info[:j]
// Update maxblock estimate.
i.estBlockUncomp += i.estBlockUncomp * int64(removeN)
}
func (i *Index) appendTo(b []byte, uncompTotal, compTotal int64) []byte {
i.reduce()
var tmp [binary.MaxVarintLen64]byte
initSize := len(b)
// We make the start a skippable header+size.
b = append(b, ChunkTypeIndex, 0, 0, 0)
b = append(b, []byte(S2IndexHeader)...)
// Total Uncompressed size
n := binary.PutVarint(tmp[:], uncompTotal)
b = append(b, tmp[:n]...)
// Total Compressed size
n = binary.PutVarint(tmp[:], compTotal)
b = append(b, tmp[:n]...)
// Put EstBlockUncomp size
n = binary.PutVarint(tmp[:], i.estBlockUncomp)
b = append(b, tmp[:n]...)
// Put length
n = binary.PutVarint(tmp[:], int64(len(i.info)))
b = append(b, tmp[:n]...)
// Check if we should add uncompressed offsets
var hasUncompressed byte
for idx, info := range i.info {
if idx == 0 {
if info.uncompressedOffset != 0 {
hasUncompressed = 1
break
}
continue
}
if info.uncompressedOffset != i.info[idx-1].uncompressedOffset+i.estBlockUncomp {
hasUncompressed = 1
break
}
}
b = append(b, hasUncompressed)
// Add each entry
if hasUncompressed == 1 {
for idx, info := range i.info {
uOff := info.uncompressedOffset
if idx > 0 {
prev := i.info[idx-1]
uOff -= prev.uncompressedOffset + (i.estBlockUncomp)
}
n = binary.PutVarint(tmp[:], uOff)
b = append(b, tmp[:n]...)
}
}
// Initial compressed size estimate.
cPredict := i.estBlockUncomp / 2
for idx, info := range i.info {
cOff := info.compressedOffset
if idx > 0 {
prev := i.info[idx-1]
cOff -= prev.compressedOffset + cPredict
// Update compressed size prediction, with half the error.
cPredict += cOff / 2
}
n = binary.PutVarint(tmp[:], cOff)
b = append(b, tmp[:n]...)
}
// Add Total Size.
// Stored as fixed size for easier reading.
binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)-initSize+4+len(S2IndexTrailer)))
b = append(b, tmp[:4]...)
// Trailer
b = append(b, []byte(S2IndexTrailer)...)
// Update size
chunkLen := len(b) - initSize - skippableFrameHeader
b[initSize+1] = uint8(chunkLen >> 0)
b[initSize+2] = uint8(chunkLen >> 8)
b[initSize+3] = uint8(chunkLen >> 16)
//fmt.Printf("chunklen: 0x%x Uncomp:%d, Comp:%d\n", chunkLen, uncompTotal, compTotal)
return b
}
// Load a binary index.
// A zero value Index can be used or a previous one can be reused.
func (i *Index) Load(b []byte) ([]byte, error) {
if len(b) <= 4+len(S2IndexHeader)+len(S2IndexTrailer) {
return b, io.ErrUnexpectedEOF
}
if b[0] != ChunkTypeIndex {
return b, ErrCorrupt
}
chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
b = b[4:]
// Validate we have enough...
if len(b) < chunkLen {
return b, io.ErrUnexpectedEOF
}
if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) {
return b, ErrUnsupported
}
b = b[len(S2IndexHeader):]
// Total Uncompressed
if v, n := binary.Varint(b); n <= 0 || v < 0 {
return b, ErrCorrupt
} else {
i.TotalUncompressed = v
b = b[n:]
}
// Total Compressed
if v, n := binary.Varint(b); n <= 0 {
return b, ErrCorrupt
} else {
i.TotalCompressed = v
b = b[n:]
}
// Read EstBlockUncomp
if v, n := binary.Varint(b); n <= 0 {
return b, ErrCorrupt
} else {
if v < 0 {
return b, ErrCorrupt
}
i.estBlockUncomp = v
b = b[n:]
}
var entries int
if v, n := binary.Varint(b); n <= 0 {
return b, ErrCorrupt
} else {
if v < 0 || v > maxIndexEntries {
return b, ErrCorrupt
}
entries = int(v)
b = b[n:]
}
if cap(i.info) < entries {
i.allocInfos(entries)
}
i.info = i.info[:entries]
if len(b) < 1 {
return b, io.ErrUnexpectedEOF
}
hasUncompressed := b[0]
b = b[1:]
if hasUncompressed&1 != hasUncompressed {
return b, ErrCorrupt
}
// Add each uncompressed entry
for idx := range i.info {
var uOff int64
if hasUncompressed != 0 {
// Load delta
if v, n := binary.Varint(b); n <= 0 {
return b, ErrCorrupt
} else {
uOff = v
b = b[n:]
}
}
if idx > 0 {
prev := i.info[idx-1].uncompressedOffset
uOff += prev + (i.estBlockUncomp)
if uOff <= prev {
return b, ErrCorrupt
}
}
if uOff < 0 {
return b, ErrCorrupt
}
i.info[idx].uncompressedOffset = uOff
}
// Initial compressed size estimate.
cPredict := i.estBlockUncomp / 2
// Add each compressed entry
for idx := range i.info {
var cOff int64
if v, n := binary.Varint(b); n <= 0 {
return b, ErrCorrupt
} else {
cOff = v
b = b[n:]
}
if idx > 0 {
// Update compressed size prediction, with half the error.
cPredictNew := cPredict + cOff/2
prev := i.info[idx-1].compressedOffset
cOff += prev + cPredict
if cOff <= prev {
return b, ErrCorrupt
}
cPredict = cPredictNew
}
if cOff < 0 {
return b, ErrCorrupt
}
i.info[idx].compressedOffset = cOff
}
if len(b) < 4+len(S2IndexTrailer) {
return b, io.ErrUnexpectedEOF
}
// Skip size...
b = b[4:]
// Check trailer...
if !bytes.Equal(b[:len(S2IndexTrailer)], []byte(S2IndexTrailer)) {
return b, ErrCorrupt
}
return b[len(S2IndexTrailer):], nil
}
// LoadStream will load an index from the end of the supplied stream.
// ErrUnsupported will be returned if the signature cannot be found.
// ErrCorrupt will be returned if unexpected values are found.
// io.ErrUnexpectedEOF is returned if there are too few bytes.
// IO errors are returned as-is.
func (i *Index) LoadStream(rs io.ReadSeeker) error {
// Go to end.
_, err := rs.Seek(-10, io.SeekEnd)
if err != nil {
return err
}
var tmp [10]byte
_, err = io.ReadFull(rs, tmp[:])
if err != nil {
return err
}
// Check trailer...
if !bytes.Equal(tmp[4:4+len(S2IndexTrailer)], []byte(S2IndexTrailer)) {
return ErrUnsupported
}
sz := binary.LittleEndian.Uint32(tmp[:4])
if sz > maxChunkSize+skippableFrameHeader {
return ErrCorrupt
}
_, err = rs.Seek(-int64(sz), io.SeekEnd)
if err != nil {
return err
}
// Read index.
buf := make([]byte, sz)
_, err = io.ReadFull(rs, buf)
if err != nil {
return err
}
_, err = i.Load(buf)
return err
}
// IndexStream will return an index for a stream.
// The stream structure will be checked, but
// data within blocks is not verified.
// The returned index can either be appended to the end of the stream
// or stored separately.
func IndexStream(r io.Reader) ([]byte, error) {
var i Index
var buf [maxChunkSize]byte
var readHeader bool
for {
_, err := io.ReadFull(r, buf[:4])
if err != nil {
if err == io.EOF {
return i.appendTo(nil, i.TotalUncompressed, i.TotalCompressed), nil
}
return nil, err
}
// Start of this chunk.
startChunk := i.TotalCompressed
i.TotalCompressed += 4
chunkType := buf[0]
if !readHeader {
if chunkType != chunkTypeStreamIdentifier {
return nil, ErrCorrupt
}
readHeader = true
}
chunkLen := int(buf[1]) | int(buf[2])<<8 | int(buf[3])<<16
if chunkLen < checksumSize {
return nil, ErrCorrupt
}
i.TotalCompressed += int64(chunkLen)
_, err = io.ReadFull(r, buf[:chunkLen])
if err != nil {
return nil, io.ErrUnexpectedEOF
}
// The chunk types are specified at
// https://github.com/google/snappy/blob/master/framing_format.txt
switch chunkType {
case chunkTypeCompressedData:
// Section 4.2. Compressed data (chunk type 0x00).
// Skip checksum.
dLen, err := DecodedLen(buf[checksumSize:])
if err != nil {
return nil, err
}
if dLen > maxBlockSize {
return nil, ErrCorrupt
}
if i.estBlockUncomp == 0 {
// Use first block for estimate...
i.estBlockUncomp = int64(dLen)
}
err = i.add(startChunk, i.TotalUncompressed)
if err != nil {
return nil, err
}
i.TotalUncompressed += int64(dLen)
continue
case chunkTypeUncompressedData:
n2 := chunkLen - checksumSize
if n2 > maxBlockSize {
return nil, ErrCorrupt
}
if i.estBlockUncomp == 0 {
// Use first block for estimate...
i.estBlockUncomp = int64(n2)
}
err = i.add(startChunk, i.TotalUncompressed)
if err != nil {
return nil, err
}
i.TotalUncompressed += int64(n2)
continue
case chunkTypeStreamIdentifier:
// Section 4.1. Stream identifier (chunk type 0xff).
if chunkLen != len(magicBody) {
return nil, ErrCorrupt
}
if string(buf[:len(magicBody)]) != magicBody {
if string(buf[:len(magicBody)]) != magicBodySnappy {
return nil, ErrCorrupt
}
}
continue
}
if chunkType <= 0x7f {
// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
return nil, ErrUnsupported
}
if chunkLen > maxChunkSize {
return nil, ErrUnsupported
}
// Section 4.4 Padding (chunk type 0xfe).
// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
}
}
// JSON returns the index as JSON text.
func (i *Index) JSON() []byte {
type offset struct {
CompressedOffset int64 `json:"compressed"`
UncompressedOffset int64 `json:"uncompressed"`
}
x := struct {
TotalUncompressed int64 `json:"total_uncompressed"` // Total Uncompressed size if known. Will be -1 if unknown.
TotalCompressed int64 `json:"total_compressed"` // Total Compressed size if known. Will be -1 if unknown.
Offsets []offset `json:"offsets"`
EstBlockUncomp int64 `json:"est_block_uncompressed"`
}{
TotalUncompressed: i.TotalUncompressed,
TotalCompressed: i.TotalCompressed,
EstBlockUncomp: i.estBlockUncomp,
}
for _, v := range i.info {
x.Offsets = append(x.Offsets, offset{CompressedOffset: v.compressedOffset, UncompressedOffset: v.uncompressedOffset})
}
b, _ := json.MarshalIndent(x, "", " ")
return b
}
// RemoveIndexHeaders will trim all headers and trailers from a given index.
// This is expected to save 20 bytes.
// These can be restored using RestoreIndexHeaders.
// This removes a layer of security, but is the most compact representation.
// Returns nil if headers contains errors.
// The returned slice references the provided slice.
func RemoveIndexHeaders(b []byte) []byte {
const save = 4 + len(S2IndexHeader) + len(S2IndexTrailer) + 4
if len(b) <= save {
return nil
}
if b[0] != ChunkTypeIndex {
return nil
}
chunkLen := int(b[1]) | int(b[2])<<8 | int(b[3])<<16
b = b[4:]
// Validate we have enough...
if len(b) < chunkLen {
return nil
}
b = b[:chunkLen]
if !bytes.Equal(b[:len(S2IndexHeader)], []byte(S2IndexHeader)) {
return nil
}
b = b[len(S2IndexHeader):]
if !bytes.HasSuffix(b, []byte(S2IndexTrailer)) {
return nil
}
b = bytes.TrimSuffix(b, []byte(S2IndexTrailer))
if len(b) < 4 {
return nil
}
return b[:len(b)-4]
}
// RestoreIndexHeaders will index restore headers removed by RemoveIndexHeaders.
// No error checking is performed on the input.
// If a 0 length slice is sent, it is returned without modification.
func RestoreIndexHeaders(in []byte) []byte {
if len(in) == 0 {
return in
}
b := make([]byte, 0, 4+len(S2IndexHeader)+len(in)+len(S2IndexTrailer)+4)
b = append(b, ChunkTypeIndex, 0, 0, 0)
b = append(b, []byte(S2IndexHeader)...)
b = append(b, in...)
var tmp [4]byte
binary.LittleEndian.PutUint32(tmp[:], uint32(len(b)+4+len(S2IndexTrailer)))
b = append(b, tmp[:4]...)
// Trailer
b = append(b, []byte(S2IndexTrailer)...)
chunkLen := len(b) - skippableFrameHeader
b[1] = uint8(chunkLen >> 0)
b[2] = uint8(chunkLen >> 8)
b[3] = uint8(chunkLen >> 16)
return b
}

View File

@ -1,585 +0,0 @@
// Copyright (c) 2022 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package s2
import (
"encoding/binary"
"errors"
"fmt"
)
// LZ4Converter provides conversion from LZ4 blocks as defined here:
// https://github.com/lz4/lz4/blob/dev/doc/lz4_Block_format.md
type LZ4Converter struct {
}
// ErrDstTooSmall is returned when provided destination is too small.
var ErrDstTooSmall = errors.New("s2: destination too small")
// ConvertBlock will convert an LZ4 block and append it as an S2
// block without block length to dst.
// The uncompressed size is returned as well.
// dst must have capacity to contain the entire compressed block.
func (l *LZ4Converter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
if len(src) == 0 {
return dst, 0, nil
}
const debug = false
const inline = true
const lz4MinMatch = 4
s, d := 0, len(dst)
dst = dst[:cap(dst)]
if !debug && hasAmd64Asm {
res, sz := cvtLZ4BlockAsm(dst[d:], src)
if res < 0 {
const (
errCorrupt = -1
errDstTooSmall = -2
)
switch res {
case errCorrupt:
return nil, 0, ErrCorrupt
case errDstTooSmall:
return nil, 0, ErrDstTooSmall
default:
return nil, 0, fmt.Errorf("unexpected result: %d", res)
}
}
if d+sz > len(dst) {
return nil, 0, ErrDstTooSmall
}
return dst[:d+sz], res, nil
}
dLimit := len(dst) - 10
var lastOffset uint16
var uncompressed int
if debug {
fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
}
for {
if s >= len(src) {
return dst[:d], 0, ErrCorrupt
}
// Read literal info
token := src[s]
ll := int(token >> 4)
ml := int(lz4MinMatch + (token & 0xf))
// If upper nibble is 15, literal length is extended
if token >= 0xf0 {
for {
s++
if s >= len(src) {
if debug {
fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
}
return dst[:d], 0, ErrCorrupt
}
val := src[s]
ll += int(val)
if val != 255 {
break
}
}
}
// Skip past token
if s+ll >= len(src) {
if debug {
fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
}
return nil, 0, ErrCorrupt
}
s++
if ll > 0 {
if d+ll > dLimit {
return nil, 0, ErrDstTooSmall
}
if debug {
fmt.Printf("emit %d literals\n", ll)
}
d += emitLiteralGo(dst[d:], src[s:s+ll])
s += ll
uncompressed += ll
}
// Check if we are done...
if s == len(src) && ml == lz4MinMatch {
break
}
// 2 byte offset
if s >= len(src)-2 {
if debug {
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
}
return nil, 0, ErrCorrupt
}
offset := binary.LittleEndian.Uint16(src[s:])
s += 2
if offset == 0 {
if debug {
fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
}
return nil, 0, ErrCorrupt
}
if int(offset) > uncompressed {
if debug {
fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
}
return nil, 0, ErrCorrupt
}
if ml == lz4MinMatch+15 {
for {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
val := src[s]
s++
ml += int(val)
if val != 255 {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
break
}
}
}
if offset == lastOffset {
if debug {
fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
}
if !inline {
d += emitRepeat16(dst[d:], offset, ml)
} else {
length := ml
dst := dst[d:]
for len(dst) > 5 {
// Repeat offset, make length cheaper
length -= 4
if length <= 4 {
dst[0] = uint8(length)<<2 | tagCopy1
dst[1] = 0
d += 2
break
}
if length < 8 && offset < 2048 {
// Encode WITH offset
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
d += 2
break
}
if length < (1<<8)+4 {
length -= 4
dst[2] = uint8(length)
dst[1] = 0
dst[0] = 5<<2 | tagCopy1
d += 3
break
}
if length < (1<<16)+(1<<8) {
length -= 1 << 8
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 6<<2 | tagCopy1
d += 4
break
}
const maxRepeat = (1 << 24) - 1
length -= 1 << 16
left := 0
if length > maxRepeat {
left = length - maxRepeat + 4
length = maxRepeat - 4
}
dst[4] = uint8(length >> 16)
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 7<<2 | tagCopy1
if left > 0 {
d += 5 + emitRepeat16(dst[5:], offset, left)
break
}
d += 5
break
}
}
} else {
if debug {
fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
}
if !inline {
d += emitCopy16(dst[d:], offset, ml)
} else {
length := ml
dst := dst[d:]
for len(dst) > 5 {
// Offset no more than 2 bytes.
if length > 64 {
off := 3
if offset < 2048 {
// emit 8 bytes as tagCopy1, rest as repeats.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
length -= 8
off = 2
} else {
// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 59<<2 | tagCopy2
length -= 60
}
// Emit remaining as repeats, at least 4 bytes remain.
d += off + emitRepeat16(dst[off:], offset, length)
break
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = uint8(length-1)<<2 | tagCopy2
d += 3
break
}
// Emit the remaining copy, encoded as 2 bytes.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
d += 2
break
}
}
lastOffset = offset
}
uncompressed += ml
if d > dLimit {
return nil, 0, ErrDstTooSmall
}
}
return dst[:d], uncompressed, nil
}
// ConvertBlockSnappy will convert an LZ4 block and append it
// as a Snappy block without block length to dst.
// The uncompressed size is returned as well.
// dst must have capacity to contain the entire compressed block.
func (l *LZ4Converter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
if len(src) == 0 {
return dst, 0, nil
}
const debug = false
const lz4MinMatch = 4
s, d := 0, len(dst)
dst = dst[:cap(dst)]
// Use assembly when possible
if !debug && hasAmd64Asm {
res, sz := cvtLZ4BlockSnappyAsm(dst[d:], src)
if res < 0 {
const (
errCorrupt = -1
errDstTooSmall = -2
)
switch res {
case errCorrupt:
return nil, 0, ErrCorrupt
case errDstTooSmall:
return nil, 0, ErrDstTooSmall
default:
return nil, 0, fmt.Errorf("unexpected result: %d", res)
}
}
if d+sz > len(dst) {
return nil, 0, ErrDstTooSmall
}
return dst[:d+sz], res, nil
}
dLimit := len(dst) - 10
var uncompressed int
if debug {
fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
}
for {
if s >= len(src) {
return nil, 0, ErrCorrupt
}
// Read literal info
token := src[s]
ll := int(token >> 4)
ml := int(lz4MinMatch + (token & 0xf))
// If upper nibble is 15, literal length is extended
if token >= 0xf0 {
for {
s++
if s >= len(src) {
if debug {
fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
val := src[s]
ll += int(val)
if val != 255 {
break
}
}
}
// Skip past token
if s+ll >= len(src) {
if debug {
fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
}
return nil, 0, ErrCorrupt
}
s++
if ll > 0 {
if d+ll > dLimit {
return nil, 0, ErrDstTooSmall
}
if debug {
fmt.Printf("emit %d literals\n", ll)
}
d += emitLiteralGo(dst[d:], src[s:s+ll])
s += ll
uncompressed += ll
}
// Check if we are done...
if s == len(src) && ml == lz4MinMatch {
break
}
// 2 byte offset
if s >= len(src)-2 {
if debug {
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
}
return nil, 0, ErrCorrupt
}
offset := binary.LittleEndian.Uint16(src[s:])
s += 2
if offset == 0 {
if debug {
fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
}
return nil, 0, ErrCorrupt
}
if int(offset) > uncompressed {
if debug {
fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
}
return nil, 0, ErrCorrupt
}
if ml == lz4MinMatch+15 {
for {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
val := src[s]
s++
ml += int(val)
if val != 255 {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
break
}
}
}
if debug {
fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
}
length := ml
// d += emitCopyNoRepeat(dst[d:], int(offset), ml)
for length > 0 {
if d >= dLimit {
return nil, 0, ErrDstTooSmall
}
// Offset no more than 2 bytes.
if length > 64 {
// Emit a length 64 copy, encoded as 3 bytes.
dst[d+2] = uint8(offset >> 8)
dst[d+1] = uint8(offset)
dst[d+0] = 63<<2 | tagCopy2
length -= 64
d += 3
continue
}
if length >= 12 || offset >= 2048 || length < 4 {
// Emit the remaining copy, encoded as 3 bytes.
dst[d+2] = uint8(offset >> 8)
dst[d+1] = uint8(offset)
dst[d+0] = uint8(length-1)<<2 | tagCopy2
d += 3
break
}
// Emit the remaining copy, encoded as 2 bytes.
dst[d+1] = uint8(offset)
dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
d += 2
break
}
uncompressed += ml
if d > dLimit {
return nil, 0, ErrDstTooSmall
}
}
return dst[:d], uncompressed, nil
}
// emitRepeat writes a repeat chunk and returns the number of bytes written.
// Length must be at least 4 and < 1<<24
func emitRepeat16(dst []byte, offset uint16, length int) int {
// Repeat offset, make length cheaper
length -= 4
if length <= 4 {
dst[0] = uint8(length)<<2 | tagCopy1
dst[1] = 0
return 2
}
if length < 8 && offset < 2048 {
// Encode WITH offset
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
return 2
}
if length < (1<<8)+4 {
length -= 4
dst[2] = uint8(length)
dst[1] = 0
dst[0] = 5<<2 | tagCopy1
return 3
}
if length < (1<<16)+(1<<8) {
length -= 1 << 8
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 6<<2 | tagCopy1
return 4
}
const maxRepeat = (1 << 24) - 1
length -= 1 << 16
left := 0
if length > maxRepeat {
left = length - maxRepeat + 4
length = maxRepeat - 4
}
dst[4] = uint8(length >> 16)
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 7<<2 | tagCopy1
if left > 0 {
return 5 + emitRepeat16(dst[5:], offset, left)
}
return 5
}
// emitCopy writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= math.MaxUint16
// 4 <= length && length <= math.MaxUint32
func emitCopy16(dst []byte, offset uint16, length int) int {
// Offset no more than 2 bytes.
if length > 64 {
off := 3
if offset < 2048 {
// emit 8 bytes as tagCopy1, rest as repeats.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
length -= 8
off = 2
} else {
// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 59<<2 | tagCopy2
length -= 60
}
// Emit remaining as repeats, at least 4 bytes remain.
return off + emitRepeat16(dst[off:], offset, length)
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = uint8(length-1)<<2 | tagCopy2
return 3
}
// Emit the remaining copy, encoded as 2 bytes.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
return 2
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
//
// dst is long enough to hold the encoded bytes
// 0 <= len(lit) && len(lit) <= math.MaxUint32
func emitLiteralGo(dst, lit []byte) int {
if len(lit) == 0 {
return 0
}
i, n := 0, uint(len(lit)-1)
switch {
case n < 60:
dst[0] = uint8(n)<<2 | tagLiteral
i = 1
case n < 1<<8:
dst[1] = uint8(n)
dst[0] = 60<<2 | tagLiteral
i = 2
case n < 1<<16:
dst[2] = uint8(n >> 8)
dst[1] = uint8(n)
dst[0] = 61<<2 | tagLiteral
i = 3
case n < 1<<24:
dst[3] = uint8(n >> 16)
dst[2] = uint8(n >> 8)
dst[1] = uint8(n)
dst[0] = 62<<2 | tagLiteral
i = 4
default:
dst[4] = uint8(n >> 24)
dst[3] = uint8(n >> 16)
dst[2] = uint8(n >> 8)
dst[1] = uint8(n)
dst[0] = 63<<2 | tagLiteral
i = 5
}
return i + copy(dst[i:], lit)
}

View File

@ -1,467 +0,0 @@
// Copyright (c) 2022 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package s2
import (
"encoding/binary"
"fmt"
)
// LZ4sConverter provides conversion from LZ4s.
// (Intel modified LZ4 Blocks)
// https://cdrdv2-public.intel.com/743912/743912-qat-programmers-guide-v2.0.pdf
// LZ4s is a variant of LZ4 block format. LZ4s should be considered as an intermediate compressed block format.
// The LZ4s format is selected when the application sets the compType to CPA_DC_LZ4S in CpaDcSessionSetupData.
// The LZ4s block returned by the Intel® QAT hardware can be used by an external
// software post-processing to generate other compressed data formats.
// The following table lists the differences between LZ4 and LZ4s block format. LZ4s block format uses
// the same high-level formatting as LZ4 block format with the following encoding changes:
// For Min Match of 4 bytes, Copy length value 1-15 means length 4-18 with 18 bytes adding an extra byte.
// ONLY "Min match of 4 bytes" is supported.
type LZ4sConverter struct {
}
// ConvertBlock will convert an LZ4s block and append it as an S2
// block without block length to dst.
// The uncompressed size is returned as well.
// dst must have capacity to contain the entire compressed block.
func (l *LZ4sConverter) ConvertBlock(dst, src []byte) ([]byte, int, error) {
if len(src) == 0 {
return dst, 0, nil
}
const debug = false
const inline = true
const lz4MinMatch = 3
s, d := 0, len(dst)
dst = dst[:cap(dst)]
if !debug && hasAmd64Asm {
res, sz := cvtLZ4sBlockAsm(dst[d:], src)
if res < 0 {
const (
errCorrupt = -1
errDstTooSmall = -2
)
switch res {
case errCorrupt:
return nil, 0, ErrCorrupt
case errDstTooSmall:
return nil, 0, ErrDstTooSmall
default:
return nil, 0, fmt.Errorf("unexpected result: %d", res)
}
}
if d+sz > len(dst) {
return nil, 0, ErrDstTooSmall
}
return dst[:d+sz], res, nil
}
dLimit := len(dst) - 10
var lastOffset uint16
var uncompressed int
if debug {
fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
}
for {
if s >= len(src) {
return dst[:d], 0, ErrCorrupt
}
// Read literal info
token := src[s]
ll := int(token >> 4)
ml := int(lz4MinMatch + (token & 0xf))
// If upper nibble is 15, literal length is extended
if token >= 0xf0 {
for {
s++
if s >= len(src) {
if debug {
fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
}
return dst[:d], 0, ErrCorrupt
}
val := src[s]
ll += int(val)
if val != 255 {
break
}
}
}
// Skip past token
if s+ll >= len(src) {
if debug {
fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
}
return nil, 0, ErrCorrupt
}
s++
if ll > 0 {
if d+ll > dLimit {
return nil, 0, ErrDstTooSmall
}
if debug {
fmt.Printf("emit %d literals\n", ll)
}
d += emitLiteralGo(dst[d:], src[s:s+ll])
s += ll
uncompressed += ll
}
// Check if we are done...
if ml == lz4MinMatch {
if s == len(src) {
break
}
// 0 bytes.
continue
}
// 2 byte offset
if s >= len(src)-2 {
if debug {
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
}
return nil, 0, ErrCorrupt
}
offset := binary.LittleEndian.Uint16(src[s:])
s += 2
if offset == 0 {
if debug {
fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
}
return nil, 0, ErrCorrupt
}
if int(offset) > uncompressed {
if debug {
fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
}
return nil, 0, ErrCorrupt
}
if ml == lz4MinMatch+15 {
for {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
val := src[s]
s++
ml += int(val)
if val != 255 {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
break
}
}
}
if offset == lastOffset {
if debug {
fmt.Printf("emit repeat, length: %d, offset: %d\n", ml, offset)
}
if !inline {
d += emitRepeat16(dst[d:], offset, ml)
} else {
length := ml
dst := dst[d:]
for len(dst) > 5 {
// Repeat offset, make length cheaper
length -= 4
if length <= 4 {
dst[0] = uint8(length)<<2 | tagCopy1
dst[1] = 0
d += 2
break
}
if length < 8 && offset < 2048 {
// Encode WITH offset
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length)<<2 | tagCopy1
d += 2
break
}
if length < (1<<8)+4 {
length -= 4
dst[2] = uint8(length)
dst[1] = 0
dst[0] = 5<<2 | tagCopy1
d += 3
break
}
if length < (1<<16)+(1<<8) {
length -= 1 << 8
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 6<<2 | tagCopy1
d += 4
break
}
const maxRepeat = (1 << 24) - 1
length -= 1 << 16
left := 0
if length > maxRepeat {
left = length - maxRepeat + 4
length = maxRepeat - 4
}
dst[4] = uint8(length >> 16)
dst[3] = uint8(length >> 8)
dst[2] = uint8(length >> 0)
dst[1] = 0
dst[0] = 7<<2 | tagCopy1
if left > 0 {
d += 5 + emitRepeat16(dst[5:], offset, left)
break
}
d += 5
break
}
}
} else {
if debug {
fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
}
if !inline {
d += emitCopy16(dst[d:], offset, ml)
} else {
length := ml
dst := dst[d:]
for len(dst) > 5 {
// Offset no more than 2 bytes.
if length > 64 {
off := 3
if offset < 2048 {
// emit 8 bytes as tagCopy1, rest as repeats.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
length -= 8
off = 2
} else {
// Emit a length 60 copy, encoded as 3 bytes.
// Emit remaining as repeat value (minimum 4 bytes).
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = 59<<2 | tagCopy2
length -= 60
}
// Emit remaining as repeats, at least 4 bytes remain.
d += off + emitRepeat16(dst[off:], offset, length)
break
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.
dst[2] = uint8(offset >> 8)
dst[1] = uint8(offset)
dst[0] = uint8(length-1)<<2 | tagCopy2
d += 3
break
}
// Emit the remaining copy, encoded as 2 bytes.
dst[1] = uint8(offset)
dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
d += 2
break
}
}
lastOffset = offset
}
uncompressed += ml
if d > dLimit {
return nil, 0, ErrDstTooSmall
}
}
return dst[:d], uncompressed, nil
}
// ConvertBlockSnappy will convert an LZ4s block and append it
// as a Snappy block without block length to dst.
// The uncompressed size is returned as well.
// dst must have capacity to contain the entire compressed block.
func (l *LZ4sConverter) ConvertBlockSnappy(dst, src []byte) ([]byte, int, error) {
if len(src) == 0 {
return dst, 0, nil
}
const debug = false
const lz4MinMatch = 3
s, d := 0, len(dst)
dst = dst[:cap(dst)]
// Use assembly when possible
if !debug && hasAmd64Asm {
res, sz := cvtLZ4sBlockSnappyAsm(dst[d:], src)
if res < 0 {
const (
errCorrupt = -1
errDstTooSmall = -2
)
switch res {
case errCorrupt:
return nil, 0, ErrCorrupt
case errDstTooSmall:
return nil, 0, ErrDstTooSmall
default:
return nil, 0, fmt.Errorf("unexpected result: %d", res)
}
}
if d+sz > len(dst) {
return nil, 0, ErrDstTooSmall
}
return dst[:d+sz], res, nil
}
dLimit := len(dst) - 10
var uncompressed int
if debug {
fmt.Printf("convert block start: len(src): %d, len(dst):%d \n", len(src), len(dst))
}
for {
if s >= len(src) {
return nil, 0, ErrCorrupt
}
// Read literal info
token := src[s]
ll := int(token >> 4)
ml := int(lz4MinMatch + (token & 0xf))
// If upper nibble is 15, literal length is extended
if token >= 0xf0 {
for {
s++
if s >= len(src) {
if debug {
fmt.Printf("error reading ll: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
val := src[s]
ll += int(val)
if val != 255 {
break
}
}
}
// Skip past token
if s+ll >= len(src) {
if debug {
fmt.Printf("error literals: s+ll (%d+%d) >= len(src) (%d)\n", s, ll, len(src))
}
return nil, 0, ErrCorrupt
}
s++
if ll > 0 {
if d+ll > dLimit {
return nil, 0, ErrDstTooSmall
}
if debug {
fmt.Printf("emit %d literals\n", ll)
}
d += emitLiteralGo(dst[d:], src[s:s+ll])
s += ll
uncompressed += ll
}
// Check if we are done...
if ml == lz4MinMatch {
if s == len(src) {
break
}
// 0 bytes.
continue
}
// 2 byte offset
if s >= len(src)-2 {
if debug {
fmt.Printf("s (%d) >= len(src)-2 (%d)", s, len(src)-2)
}
return nil, 0, ErrCorrupt
}
offset := binary.LittleEndian.Uint16(src[s:])
s += 2
if offset == 0 {
if debug {
fmt.Printf("error: offset 0, ml: %d, len(src)-s: %d\n", ml, len(src)-s)
}
return nil, 0, ErrCorrupt
}
if int(offset) > uncompressed {
if debug {
fmt.Printf("error: offset (%d)> uncompressed (%d)\n", offset, uncompressed)
}
return nil, 0, ErrCorrupt
}
if ml == lz4MinMatch+15 {
for {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
val := src[s]
s++
ml += int(val)
if val != 255 {
if s >= len(src) {
if debug {
fmt.Printf("error reading ml: s (%d) >= len(src) (%d)\n", s, len(src))
}
return nil, 0, ErrCorrupt
}
break
}
}
}
if debug {
fmt.Printf("emit copy, length: %d, offset: %d\n", ml, offset)
}
length := ml
// d += emitCopyNoRepeat(dst[d:], int(offset), ml)
for length > 0 {
if d >= dLimit {
return nil, 0, ErrDstTooSmall
}
// Offset no more than 2 bytes.
if length > 64 {
// Emit a length 64 copy, encoded as 3 bytes.
dst[d+2] = uint8(offset >> 8)
dst[d+1] = uint8(offset)
dst[d+0] = 63<<2 | tagCopy2
length -= 64
d += 3
continue
}
if length >= 12 || offset >= 2048 || length < 4 {
// Emit the remaining copy, encoded as 3 bytes.
dst[d+2] = uint8(offset >> 8)
dst[d+1] = uint8(offset)
dst[d+0] = uint8(length-1)<<2 | tagCopy2
d += 3
break
}
// Emit the remaining copy, encoded as 2 bytes.
dst[d+1] = uint8(offset)
dst[d+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
d += 2
break
}
uncompressed += ml
if d > dLimit {
return nil, 0, ErrDstTooSmall
}
}
return dst[:d], uncompressed, nil
}

File diff suppressed because it is too large Load Diff

View File

@ -1,143 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Copyright (c) 2019 Klaus Post. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package s2 implements the S2 compression format.
//
// S2 is an extension of Snappy. Similar to Snappy S2 is aimed for high throughput,
// which is why it features concurrent compression for bigger payloads.
//
// Decoding is compatible with Snappy compressed content,
// but content compressed with S2 cannot be decompressed by Snappy.
//
// For more information on Snappy/S2 differences see README in: https://github.com/klauspost/compress/tree/master/s2
//
// There are actually two S2 formats: block and stream. They are related,
// but different: trying to decompress block-compressed data as a S2 stream
// will fail, and vice versa. The block format is the Decode and Encode
// functions and the stream format is the Reader and Writer types.
//
// A "better" compression option is available. This will trade some compression
// speed
//
// The block format, the more common case, is used when the complete size (the
// number of bytes) of the original data is known upfront, at the time
// compression starts. The stream format, also known as the framing format, is
// for when that isn't always true.
//
// Blocks to not offer much data protection, so it is up to you to
// add data validation of decompressed blocks.
//
// Streams perform CRC validation of the decompressed data.
// Stream compression will also be performed on multiple CPU cores concurrently
// significantly improving throughput.
package s2
import (
"bytes"
"hash/crc32"
)
/*
Each encoded block begins with the varint-encoded length of the decoded data,
followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
first byte of each chunk is broken into its 2 least and 6 most significant bits
called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
Zero means a literal tag. All other values mean a copy tag.
For literal tags:
- If m < 60, the next 1 + m bytes are literal bytes.
- Otherwise, let n be the little-endian unsigned integer denoted by the next
m - 59 bytes. The next 1 + n bytes after that are literal bytes.
For copy tags, length bytes are copied from offset bytes ago, in the style of
Lempel-Ziv compression algorithms. In particular:
- For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
of the offset. The next byte is bits 0-7 of the offset.
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
The length is 1 + m. The offset is the little-endian unsigned integer
denoted by the next 2 bytes.
- For l == 3, the offset ranges in [0, 1<<32) and the length in
[1, 65). The length is 1 + m. The offset is the little-endian unsigned
integer denoted by the next 4 bytes.
*/
const (
tagLiteral = 0x00
tagCopy1 = 0x01
tagCopy2 = 0x02
tagCopy4 = 0x03
)
const (
checksumSize = 4
chunkHeaderSize = 4
magicChunk = "\xff\x06\x00\x00" + magicBody
magicChunkSnappy = "\xff\x06\x00\x00" + magicBodySnappy
magicBodySnappy = "sNaPpY"
magicBody = "S2sTwO"
// maxBlockSize is the maximum size of the input to encodeBlock.
//
// For the framing format (Writer type instead of Encode function),
// this is the maximum uncompressed size of a block.
maxBlockSize = 4 << 20
// minBlockSize is the minimum size of block setting when creating a writer.
minBlockSize = 4 << 10
skippableFrameHeader = 4
maxChunkSize = 1<<24 - 1 // 16777215
// Default block size
defaultBlockSize = 1 << 20
// maxSnappyBlockSize is the maximum snappy block size.
maxSnappyBlockSize = 1 << 16
obufHeaderLen = checksumSize + chunkHeaderSize
)
const (
chunkTypeCompressedData = 0x00
chunkTypeUncompressedData = 0x01
ChunkTypeIndex = 0x99
chunkTypePadding = 0xfe
chunkTypeStreamIdentifier = 0xff
)
var crcTable = crc32.MakeTable(crc32.Castagnoli)
// crc implements the checksum specified in section 3 of
// https://github.com/google/snappy/blob/master/framing_format.txt
func crc(b []byte) uint32 {
c := crc32.Update(0, crcTable, b)
return c>>15 | c<<17 + 0xa282ead8
}
// literalExtraSize returns the extra size of encoding n literals.
// n should be >= 0 and <= math.MaxUint32.
func literalExtraSize(n int64) int64 {
if n == 0 {
return 0
}
switch {
case n < 60:
return 1
case n < 1<<8:
return 2
case n < 1<<16:
return 3
case n < 1<<24:
return 4
default:
return 5
}
}
type byter interface {
Bytes() []byte
}
var _ byter = &bytes.Buffer{}

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
module github.com/klauspost/compress
go 1.16
go 1.19

View File

@ -259,7 +259,7 @@ nyc-taxi-data-10M.csv gzkp 1 3325605752 922273214 13929 227.68
## Decompressor
Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
Status: STABLE - there may still be subtle bugs, but a wide variety of content has been tested.
This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz),
kindly supplied by [fuzzit.dev](https://fuzzit.dev/).

View File

@ -95,42 +95,54 @@ type Header struct {
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
func (h *Header) Decode(in []byte) error {
_, err := h.DecodeAndStrip(in)
return err
}
// DecodeAndStrip will decode the header from the beginning of the stream
// and on success return the remaining bytes.
// This will decode the frame header and the first block header if enough bytes are provided.
// It is recommended to provide at least HeaderMaxSize bytes.
// If the frame header cannot be read an error will be returned.
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
func (h *Header) DecodeAndStrip(in []byte) (remain []byte, err error) {
*h = Header{}
if len(in) < 4 {
return io.ErrUnexpectedEOF
return nil, io.ErrUnexpectedEOF
}
h.HeaderSize += 4
b, in := in[:4], in[4:]
if string(b) != frameMagic {
if string(b[1:4]) != skippableFrameMagic || b[0]&0xf0 != 0x50 {
return ErrMagicMismatch
return nil, ErrMagicMismatch
}
if len(in) < 4 {
return io.ErrUnexpectedEOF
return nil, io.ErrUnexpectedEOF
}
h.HeaderSize += 4
h.Skippable = true
h.SkippableID = int(b[0] & 0xf)
h.SkippableSize = binary.LittleEndian.Uint32(in)
return nil
return in[4:], nil
}
// Read Window_Descriptor
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
if len(in) < 1 {
return io.ErrUnexpectedEOF
return nil, io.ErrUnexpectedEOF
}
fhd, in := in[0], in[1:]
h.HeaderSize++
h.SingleSegment = fhd&(1<<5) != 0
h.HasCheckSum = fhd&(1<<2) != 0
if fhd&(1<<3) != 0 {
return errors.New("reserved bit set on frame header")
return nil, errors.New("reserved bit set on frame header")
}
if !h.SingleSegment {
if len(in) < 1 {
return io.ErrUnexpectedEOF
return nil, io.ErrUnexpectedEOF
}
var wd byte
wd, in = in[0], in[1:]
@ -148,7 +160,7 @@ func (h *Header) Decode(in []byte) error {
size = 4
}
if len(in) < int(size) {
return io.ErrUnexpectedEOF
return nil, io.ErrUnexpectedEOF
}
b, in = in[:size], in[size:]
h.HeaderSize += int(size)
@ -178,7 +190,7 @@ func (h *Header) Decode(in []byte) error {
if fcsSize > 0 {
h.HasFCS = true
if len(in) < fcsSize {
return io.ErrUnexpectedEOF
return nil, io.ErrUnexpectedEOF
}
b, in = in[:fcsSize], in[fcsSize:]
h.HeaderSize += int(fcsSize)
@ -199,7 +211,7 @@ func (h *Header) Decode(in []byte) error {
// Frame Header done, we will not fail from now on.
if len(in) < 3 {
return nil
return in, nil
}
tmp := in[:3]
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
@ -209,7 +221,7 @@ func (h *Header) Decode(in []byte) error {
cSize := int(bh >> 3)
switch blockType {
case blockTypeReserved:
return nil
return in, nil
case blockTypeRLE:
h.FirstBlock.Compressed = true
h.FirstBlock.DecompressedSize = cSize
@ -225,5 +237,25 @@ func (h *Header) Decode(in []byte) error {
}
h.FirstBlock.OK = true
return nil
return in, nil
}
// AppendTo will append the encoded header to the dst slice.
// There is no error checking performed on the header values.
func (h *Header) AppendTo(dst []byte) ([]byte, error) {
if h.Skippable {
magic := [4]byte{0x50, 0x2a, 0x4d, 0x18}
magic[0] |= byte(h.SkippableID & 0xf)
dst = append(dst, magic[:]...)
f := h.SkippableSize
return append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)), nil
}
f := frameHeader{
ContentSize: h.FrameContentSize,
WindowSize: uint32(h.WindowSize),
SingleSegment: h.SingleSegment,
Checksum: h.HasCheckSum,
DictID: h.DictionaryID,
}
return f.appendTo(dst), nil
}

View File

@ -43,7 +43,7 @@ func (m *match) estBits(bitsPerByte int32) {
if m.rep < 0 {
ofc = ofCode(uint32(m.s-m.offset) + 3)
} else {
ofc = ofCode(uint32(m.rep))
ofc = ofCode(uint32(m.rep) & 3)
}
// Cost, excluding
ofTT, mlTT := fsePredefEnc[tableOffsets].ct.symbolTT[ofc], fsePredefEnc[tableMatchLengths].ct.symbolTT[mlc]
@ -197,17 +197,10 @@ encodeLoop:
// Set m to a match at offset if it looks like that will improve compression.
improve := func(m *match, offset int32, s int32, first uint32, rep int32) {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
delta := s - offset
if delta >= e.maxMatchOff || delta <= 0 || load3232(src, offset) != first {
return
}
if debugAsserts {
if offset <= 0 {
panic(offset)
}
if !bytes.Equal(src[s:s+4], src[offset:offset+4]) {
panic(fmt.Sprintf("first match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
// Try to quick reject if we already have a long match.
if m.length > 16 {
left := len(src) - int(m.s+m.length)
@ -226,8 +219,10 @@ encodeLoop:
}
}
l := 4 + e.matchlen(s+4, offset+4, src)
if rep < 0 {
if m.rep <= 0 {
// Extend candidate match backwards as far as possible.
// Do not extend repeats as we can assume they are optimal
// and offsets change if s == nextEmit.
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
@ -238,7 +233,14 @@ encodeLoop:
l++
}
}
if debugAsserts {
if offset >= s {
panic(fmt.Sprintf("offset: %d - s:%d - rep: %d - cur :%d - max: %d", offset, s, rep, e.cur, e.maxMatchOff))
}
if !bytes.Equal(src[s:s+l], src[offset:offset+l]) {
panic(fmt.Sprintf("second match mismatch: %v != %v, first: %08x", src[s:s+4], src[offset:offset+4], first))
}
}
cand := match{offset: offset, s: s, length: l, rep: rep}
cand.estBits(bitsPerByte)
if m.est >= highScore || cand.est-m.est+(cand.s-m.s)*bitsPerByte>>10 < 0 {
@ -281,6 +283,7 @@ encodeLoop:
// Load next and check...
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
index0 := s + 1
// Look far ahead, unless we have a really long match already...
if best.length < goodEnough {
@ -334,41 +337,45 @@ encodeLoop:
}
if debugAsserts {
if best.offset >= best.s {
panic(fmt.Sprintf("best.offset > s: %d >= %d", best.offset, best.s))
}
if best.s < nextEmit {
panic(fmt.Sprintf("s %d < nextEmit %d", best.s, nextEmit))
}
if best.offset < s-e.maxMatchOff {
panic(fmt.Sprintf("best.offset < s-e.maxMatchOff: %d < %d", best.offset, s-e.maxMatchOff))
}
if !bytes.Equal(src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]) {
panic(fmt.Sprintf("match mismatch: %v != %v", src[best.s:best.s+best.length], src[best.offset:best.offset+best.length]))
}
}
// We have a match, we can store the forward value
s = best.s
if best.rep > 0 {
var seq seq
seq.matchLen = uint32(best.length - zstdMinMatch)
if debugAsserts && s <= nextEmit {
panic("s <= nextEmit")
}
addLiterals(&seq, best.s)
// Repeat. If bit 4 is set, this is a non-lit repeat.
seq.offset = uint32(best.rep & 3)
if debugSequences {
println("repeat sequence", seq, "next s:", s)
println("repeat sequence", seq, "next s:", best.s, "off:", best.s-best.offset)
}
blk.sequences = append(blk.sequences, seq)
// Index old s + 1 -> s - 1
index0 := s + 1
s = best.s + best.length
nextEmit = s
if s >= sLimit {
if debugEncoder {
println("repeat ended", s, best.length)
}
break encodeLoop
}
// Index skipped...
end := s
if s > sLimit+4 {
end = sLimit + 4
}
off := index0 + e.cur
for index0 < s {
for index0 < end {
cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
@ -377,6 +384,7 @@ encodeLoop:
off++
index0++
}
switch best.rep {
case 2, 4 | 1:
offset1, offset2 = offset2, offset1
@ -385,13 +393,17 @@ encodeLoop:
case 4 | 3:
offset1, offset2, offset3 = offset1-1, offset1, offset2
}
if s >= sLimit {
if debugEncoder {
println("repeat ended", s, best.length)
}
break encodeLoop
}
continue
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
index0 := s + 1
s = best.s
t := best.offset
offset1, offset2, offset3 = s-t, offset1, offset2
@ -418,19 +430,25 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
nextEmit = s
if s >= sLimit {
break encodeLoop
// Index old s + 1 -> s - 1 or sLimit
end := s
if s > sLimit-4 {
end = sLimit - 4
}
// Index old s + 1 -> s - 1
for index0 < s {
off := index0 + e.cur
for index0 < end {
cv0 := load6432(src, index0)
h0 := hashLen(cv0, bestLongTableBits, bestLongLen)
h1 := hashLen(cv0, bestShortTableBits, bestShortLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
index0++
off++
}
if s >= sLimit {
break encodeLoop
}
}

View File

@ -145,7 +145,7 @@ encodeLoop:
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
var matched int32
var matched, index0 int32
for {
if debugAsserts && canRepeat && offset1 == 0 {
@ -162,6 +162,7 @@ encodeLoop:
off := s + e.cur
e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
index0 = s + 1
if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@ -258,7 +259,6 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
index0 := s + repOff2
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
@ -498,15 +498,15 @@ encodeLoop:
}
// Index match start+1 (long) -> s - 1
index0 := s - l + 1
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)}
index0 += 2
off += 2
}
cv = load6432(src, s)
@ -672,7 +672,7 @@ encodeLoop:
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
var matched int32
var matched, index0 int32
for {
if debugAsserts && canRepeat && offset1 == 0 {
@ -691,6 +691,7 @@ encodeLoop:
e.markLongShardDirty(nextHashL)
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
e.markShortShardDirty(nextHashS)
index0 = s + 1
if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
@ -726,7 +727,6 @@ encodeLoop:
blk.sequences = append(blk.sequences, seq)
// Index match start+1 (long) -> s - 1
index0 := s + repOff
s += lenght + repOff
nextEmit = s
@ -790,7 +790,6 @@ encodeLoop:
}
blk.sequences = append(blk.sequences, seq)
index0 := s + repOff2
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
@ -1024,18 +1023,18 @@ encodeLoop:
}
// Index match start+1 (long) -> s - 1
index0 := s - l + 1
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hashLen(cv0, betterLongTableBits, betterLongLen)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hashLen(cv1, betterShortTableBits, betterShortLen)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
off += 2
}
cv = load6432(src, s)

View File

@ -94,7 +94,7 @@ func WithEncoderConcurrency(n int) EOption {
// The value must be a power of two between MinWindowSize and MaxWindowSize.
// A larger value will enable better compression but allocate more memory and,
// for above-default values, take considerably longer.
// The default value is determined by the compression level.
// The default value is determined by the compression level and max 8MB.
func WithWindowSize(n int) EOption {
return func(o *encoderOptions) error {
switch {
@ -232,9 +232,9 @@ func WithEncoderLevel(l EncoderLevel) EOption {
case SpeedDefault:
o.windowSize = 8 << 20
case SpeedBetterCompression:
o.windowSize = 16 << 20
o.windowSize = 8 << 20
case SpeedBestCompression:
o.windowSize = 32 << 20
o.windowSize = 8 << 20
}
}
if !o.customALEntropy {

View File

@ -76,7 +76,7 @@ func (f frameHeader) appendTo(dst []byte) []byte {
if f.SingleSegment {
dst = append(dst, uint8(f.ContentSize))
}
// Unless SingleSegment is set, framessizes < 256 are nto stored.
// Unless SingleSegment is set, framessizes < 256 are not stored.
case 1:
f.ContentSize -= 256
dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8))

View File

@ -20,10 +20,9 @@ func (s *fseDecoder) buildDtable() error {
if v == -1 {
s.dt[highThreshold].setAddBits(uint8(i))
highThreshold--
symbolNext[i] = 1
} else {
symbolNext[i] = uint16(v)
v = 1
}
symbolNext[i] = uint16(v)
}
}
@ -35,10 +34,12 @@ func (s *fseDecoder) buildDtable() error {
for ss, v := range s.norm[:s.symbolLen] {
for i := 0; i < int(v); i++ {
s.dt[position].setAddBits(uint8(ss))
position = (position + step) & tableMask
for position > highThreshold {
for {
// lowprob area
position = (position + step) & tableMask
if position <= highThreshold {
break
}
}
}
}

View File

@ -157,8 +157,7 @@ sequenceDecs_decode_amd64_ll_update_zero:
// Update Literal Length State
MOVBQZX DI, R14
SHRQ $0x10, DI
MOVWQZX DI, DI
SHRL $0x10, DI
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@ -177,8 +176,7 @@ sequenceDecs_decode_amd64_ll_update_zero:
// Update Match Length State
MOVBQZX R8, R14
SHRQ $0x10, R8
MOVWQZX R8, R8
SHRL $0x10, R8
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@ -197,8 +195,7 @@ sequenceDecs_decode_amd64_ll_update_zero:
// Update Offset State
MOVBQZX R9, R14
SHRQ $0x10, R9
MOVWQZX R9, R9
SHRL $0x10, R9
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@ -459,8 +456,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:
// Update Literal Length State
MOVBQZX DI, R14
SHRQ $0x10, DI
MOVWQZX DI, DI
SHRL $0x10, DI
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@ -479,8 +475,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:
// Update Match Length State
MOVBQZX R8, R14
SHRQ $0x10, R8
MOVWQZX R8, R8
SHRL $0x10, R8
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@ -499,8 +494,7 @@ sequenceDecs_decode_56_amd64_ll_update_zero:
// Update Offset State
MOVBQZX R9, R14
SHRQ $0x10, R9
MOVWQZX R9, R9
SHRL $0x10, R9
LEAQ (BX)(R14*1), CX
MOVQ DX, R15
MOVQ CX, BX
@ -772,11 +766,10 @@ sequenceDecs_decode_bmi2_fill_2_end:
BZHIQ R14, R15, R15
// Update Offset State
BZHIQ R8, R15, CX
SHRXQ R8, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, R8, R8
ADDQ CX, R8
BZHIQ R8, R15, CX
SHRXQ R8, R15, R15
SHRL $0x10, R8
ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
@ -784,11 +777,10 @@ sequenceDecs_decode_bmi2_fill_2_end:
MOVQ (CX)(R8*8), R8
// Update Match Length State
BZHIQ DI, R15, CX
SHRXQ DI, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, DI, DI
ADDQ CX, DI
BZHIQ DI, R15, CX
SHRXQ DI, R15, R15
SHRL $0x10, DI
ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
@ -796,10 +788,9 @@ sequenceDecs_decode_bmi2_fill_2_end:
MOVQ (CX)(DI*8), DI
// Update Literal Length State
BZHIQ SI, R15, CX
MOVQ $0x00001010, R14
BEXTRQ R14, SI, SI
ADDQ CX, SI
BZHIQ SI, R15, CX
SHRL $0x10, SI
ADDQ CX, SI
// Load ctx.llTable
MOVQ ctx+16(FP), CX
@ -1032,11 +1023,10 @@ sequenceDecs_decode_56_bmi2_fill_end:
BZHIQ R14, R15, R15
// Update Offset State
BZHIQ R8, R15, CX
SHRXQ R8, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, R8, R8
ADDQ CX, R8
BZHIQ R8, R15, CX
SHRXQ R8, R15, R15
SHRL $0x10, R8
ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
@ -1044,11 +1034,10 @@ sequenceDecs_decode_56_bmi2_fill_end:
MOVQ (CX)(R8*8), R8
// Update Match Length State
BZHIQ DI, R15, CX
SHRXQ DI, R15, R15
MOVQ $0x00001010, R14
BEXTRQ R14, DI, DI
ADDQ CX, DI
BZHIQ DI, R15, CX
SHRXQ DI, R15, R15
SHRL $0x10, DI
ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
@ -1056,10 +1045,9 @@ sequenceDecs_decode_56_bmi2_fill_end:
MOVQ (CX)(DI*8), DI
// Update Literal Length State
BZHIQ SI, R15, CX
MOVQ $0x00001010, R14
BEXTRQ R14, SI, SI
ADDQ CX, SI
BZHIQ SI, R15, CX
SHRL $0x10, SI
ADDQ CX, SI
// Load ctx.llTable
MOVQ ctx+16(FP), CX
@ -1967,8 +1955,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:
// Update Literal Length State
MOVBQZX DI, R13
SHRQ $0x10, DI
MOVWQZX DI, DI
SHRL $0x10, DI
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@ -1987,8 +1974,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:
// Update Match Length State
MOVBQZX R8, R13
SHRQ $0x10, R8
MOVWQZX R8, R8
SHRL $0x10, R8
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@ -2007,8 +1993,7 @@ sequenceDecs_decodeSync_amd64_ll_update_zero:
// Update Offset State
MOVBQZX R9, R13
SHRQ $0x10, R9
MOVWQZX R9, R9
SHRL $0x10, R9
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@ -2514,11 +2499,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
BZHIQ R13, R14, R14
// Update Offset State
BZHIQ R8, R14, CX
SHRXQ R8, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, R8, R8
ADDQ CX, R8
BZHIQ R8, R14, CX
SHRXQ R8, R14, R14
SHRL $0x10, R8
ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
@ -2526,11 +2510,10 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
MOVQ (CX)(R8*8), R8
// Update Match Length State
BZHIQ DI, R14, CX
SHRXQ DI, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, DI, DI
ADDQ CX, DI
BZHIQ DI, R14, CX
SHRXQ DI, R14, R14
SHRL $0x10, DI
ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
@ -2538,10 +2521,9 @@ sequenceDecs_decodeSync_bmi2_fill_2_end:
MOVQ (CX)(DI*8), DI
// Update Literal Length State
BZHIQ SI, R14, CX
MOVQ $0x00001010, R13
BEXTRQ R13, SI, SI
ADDQ CX, SI
BZHIQ SI, R14, CX
SHRL $0x10, SI
ADDQ CX, SI
// Load ctx.llTable
MOVQ ctx+16(FP), CX
@ -3055,8 +3037,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
// Update Literal Length State
MOVBQZX DI, R13
SHRQ $0x10, DI
MOVWQZX DI, DI
SHRL $0x10, DI
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@ -3075,8 +3056,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
// Update Match Length State
MOVBQZX R8, R13
SHRQ $0x10, R8
MOVWQZX R8, R8
SHRL $0x10, R8
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@ -3095,8 +3075,7 @@ sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
// Update Offset State
MOVBQZX R9, R13
SHRQ $0x10, R9
MOVWQZX R9, R9
SHRL $0x10, R9
LEAQ (BX)(R13*1), CX
MOVQ DX, R14
MOVQ CX, BX
@ -3704,11 +3683,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
BZHIQ R13, R14, R14
// Update Offset State
BZHIQ R8, R14, CX
SHRXQ R8, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, R8, R8
ADDQ CX, R8
BZHIQ R8, R14, CX
SHRXQ R8, R14, R14
SHRL $0x10, R8
ADDQ CX, R8
// Load ctx.ofTable
MOVQ ctx+16(FP), CX
@ -3716,11 +3694,10 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
MOVQ (CX)(R8*8), R8
// Update Match Length State
BZHIQ DI, R14, CX
SHRXQ DI, R14, R14
MOVQ $0x00001010, R13
BEXTRQ R13, DI, DI
ADDQ CX, DI
BZHIQ DI, R14, CX
SHRXQ DI, R14, R14
SHRL $0x10, DI
ADDQ CX, DI
// Load ctx.mlTable
MOVQ ctx+16(FP), CX
@ -3728,10 +3705,9 @@ sequenceDecs_decodeSync_safe_bmi2_fill_2_end:
MOVQ (CX)(DI*8), DI
// Update Literal Length State
BZHIQ SI, R14, CX
MOVQ $0x00001010, R13
BEXTRQ R13, SI, SI
ADDQ CX, SI
BZHIQ SI, R14, CX
SHRL $0x10, SI
ADDQ CX, SI
// Load ctx.llTable
MOVQ ctx+16(FP), CX

View File

@ -1,24 +0,0 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof

View File

@ -1,74 +0,0 @@
# This is an example goreleaser.yaml file with some sane defaults.
# Make sure to check the documentation at http://goreleaser.com
builds:
-
id: "cpuid"
binary: cpuid
main: ./cmd/cpuid/main.go
env:
- CGO_ENABLED=0
flags:
- -ldflags=-s -w
goos:
- aix
- linux
- freebsd
- netbsd
- windows
- darwin
goarch:
- 386
- amd64
- arm64
goarm:
- 7
archives:
-
id: cpuid
name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
replacements:
aix: AIX
darwin: OSX
linux: Linux
windows: Windows
386: i386
amd64: x86_64
freebsd: FreeBSD
netbsd: NetBSD
format_overrides:
- goos: windows
format: zip
files:
- LICENSE
checksum:
name_template: 'checksums.txt'
snapshot:
name_template: "{{ .Tag }}-next"
changelog:
sort: asc
filters:
exclude:
- '^doc:'
- '^docs:'
- '^test:'
- '^tests:'
- '^Update\sREADME.md'
nfpms:
-
file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
vendor: Klaus Post
homepage: https://github.com/klauspost/cpuid
maintainer: Klaus Post <klauspost@gmail.com>
description: CPUID Tool
license: BSD 3-Clause
formats:
- deb
- rpm
replacements:
darwin: Darwin
linux: Linux
freebsd: FreeBSD
amd64: x86_64

View File

@ -1,35 +0,0 @@
Developer Certificate of Origin
Version 1.1
Copyright (C) 2015- Klaus Post & Contributors.
Email: klauspost@gmail.com
Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.
Developer's Certificate of Origin 1.1
By making a contribution to this project, I certify that:
(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or
(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or
(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.
(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.

View File

@ -1,22 +0,0 @@
The MIT License (MIT)
Copyright (c) 2015 Klaus Post
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@ -1,497 +0,0 @@
# cpuid
Package cpuid provides information about the CPU running the current program.
CPU features are detected on startup, and kept for fast access through the life of the application.
Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
You can access the CPU information by accessing the shared CPU variable of the cpuid library.
Package home: https://github.com/klauspost/cpuid
[![PkgGoDev](https://pkg.go.dev/badge/github.com/klauspost/cpuid)](https://pkg.go.dev/github.com/klauspost/cpuid/v2)
[![Go](https://github.com/klauspost/cpuid/actions/workflows/go.yml/badge.svg)](https://github.com/klauspost/cpuid/actions/workflows/go.yml)
## installing
`go get -u github.com/klauspost/cpuid/v2` using modules.
Drop `v2` for others.
Installing binary:
`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
Or download binaries from release page: https://github.com/klauspost/cpuid/releases
### Homebrew
For macOS/Linux users, you can install via [brew](https://brew.sh/)
```sh
$ brew install cpuid
```
## example
```Go
package main
import (
"fmt"
"strings"
. "github.com/klauspost/cpuid/v2"
)
func main() {
// Print basic CPU information:
fmt.Println("Name:", CPU.BrandName)
fmt.Println("PhysicalCores:", CPU.PhysicalCores)
fmt.Println("ThreadsPerCore:", CPU.ThreadsPerCore)
fmt.Println("LogicalCores:", CPU.LogicalCores)
fmt.Println("Family", CPU.Family, "Model:", CPU.Model, "Vendor ID:", CPU.VendorID)
fmt.Println("Features:", strings.Join(CPU.FeatureSet(), ","))
fmt.Println("Cacheline bytes:", CPU.CacheLine)
fmt.Println("L1 Data Cache:", CPU.Cache.L1D, "bytes")
fmt.Println("L1 Instruction Cache:", CPU.Cache.L1I, "bytes")
fmt.Println("L2 Cache:", CPU.Cache.L2, "bytes")
fmt.Println("L3 Cache:", CPU.Cache.L3, "bytes")
fmt.Println("Frequency", CPU.Hz, "hz")
// Test if we have these specific features:
if CPU.Supports(SSE, SSE2) {
fmt.Println("We have Streaming SIMD 2 Extensions")
}
}
```
Sample output:
```
>go run main.go
Name: AMD Ryzen 9 3950X 16-Core Processor
PhysicalCores: 16
ThreadsPerCore: 2
LogicalCores: 32
Family 23 Model: 113 Vendor ID: AMD
Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CMOV,CX16,F16C,FMA3,HTT,HYPERVISOR,LZCNT,MMX,MMXEXT,NX,POPCNT,RDRAND,RDSEED,RDTSCP,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3
Cacheline bytes: 64
L1 Data Cache: 32768 bytes
L1 Instruction Cache: 32768 bytes
L2 Cache: 524288 bytes
L3 Cache: 16777216 bytes
Frequency 0 hz
We have Streaming SIMD 2 Extensions
```
# usage
The `cpuid.CPU` provides access to CPU features. Use `cpuid.CPU.Supports()` to check for CPU features.
A faster `cpuid.CPU.Has()` is provided which will usually be inlined by the gc compiler.
To test a larger number of features, they can be combined using `f := CombineFeatures(CMOV, CMPXCHG8, X87, FXSR, MMX, SYSCALL, SSE, SSE2)`, etc.
This can be using with `cpuid.CPU.HasAll(f)` to quickly test if all features are supported.
Note that for some cpu/os combinations some features will not be detected.
`amd64` has rather good support and should work reliably on all platforms.
Note that hypervisors may not pass through all CPU features through to the guest OS,
so even if your host supports a feature it may not be visible on guests.
## arm64 feature detection
Not all operating systems provide ARM features directly
and there is no safe way to do so for the rest.
Currently `arm64/linux` and `arm64/freebsd` should be quite reliable.
`arm64/darwin` adds features expected from the M1 processor, but a lot remains undetected.
A `DetectARM()` can be used if you are able to control your deployment,
it will detect CPU features, but may crash if the OS doesn't intercept the calls.
A `-cpu.arm` flag for detecting unsafe ARM features can be added. See below.
Note that currently only features are detected on ARM,
no additional information is currently available.
## flags
It is possible to add flags that affects cpu detection.
For this the `Flags()` command is provided.
This must be called *before* `flag.Parse()` AND after the flags have been parsed `Detect()` must be called.
This means that any detection used in `init()` functions will not contain these flags.
Example:
```Go
package main
import (
"flag"
"fmt"
"strings"
"github.com/klauspost/cpuid/v2"
)
func main() {
cpuid.Flags()
flag.Parse()
cpuid.Detect()
// Test if we have these specific features:
if cpuid.CPU.Supports(cpuid.SSE, cpuid.SSE2) {
fmt.Println("We have Streaming SIMD 2 Extensions")
}
}
```
## commandline
Download as binary from: https://github.com/klauspost/cpuid/releases
Install from source:
`go install github.com/klauspost/cpuid/v2/cmd/cpuid@latest`
### Example
```
λ cpuid
Name: AMD Ryzen 9 3950X 16-Core Processor
Vendor String: AuthenticAMD
Vendor ID: AMD
PhysicalCores: 16
Threads Per Core: 2
Logical Cores: 32
CPU Family 23 Model: 113
Features: ADX,AESNI,AVX,AVX2,BMI1,BMI2,CLMUL,CLZERO,CMOV,CMPXCHG8,CPBOOST,CX16,F16C,FMA3,FXSR,FXSROPT,HTT,HYPERVISOR,LAHF,LZCNT,MCAOVERFLOW,MMX,MMXEXT,MOVBE,NX,OSXSAVE,POPCNT,RDRAND,RDSEED,RDTSCP,SCE,SHA,SSE,SSE2,SSE3,SSE4,SSE42,SSE4A,SSSE3,SUCCOR,X87,XSAVE
Microarchitecture level: 3
Cacheline bytes: 64
L1 Instruction Cache: 32768 bytes
L1 Data Cache: 32768 bytes
L2 Cache: 524288 bytes
L3 Cache: 16777216 bytes
```
### JSON Output:
```
λ cpuid --json
{
"BrandName": "AMD Ryzen 9 3950X 16-Core Processor",
"VendorID": 2,
"VendorString": "AuthenticAMD",
"PhysicalCores": 16,
"ThreadsPerCore": 2,
"LogicalCores": 32,
"Family": 23,
"Model": 113,
"CacheLine": 64,
"Hz": 0,
"BoostFreq": 0,
"Cache": {
"L1I": 32768,
"L1D": 32768,
"L2": 524288,
"L3": 16777216
},
"SGX": {
"Available": false,
"LaunchControl": false,
"SGX1Supported": false,
"SGX2Supported": false,
"MaxEnclaveSizeNot64": 0,
"MaxEnclaveSize64": 0,
"EPCSections": null
},
"Features": [
"ADX",
"AESNI",
"AVX",
"AVX2",
"BMI1",
"BMI2",
"CLMUL",
"CLZERO",
"CMOV",
"CMPXCHG8",
"CPBOOST",
"CX16",
"F16C",
"FMA3",
"FXSR",
"FXSROPT",
"HTT",
"HYPERVISOR",
"LAHF",
"LZCNT",
"MCAOVERFLOW",
"MMX",
"MMXEXT",
"MOVBE",
"NX",
"OSXSAVE",
"POPCNT",
"RDRAND",
"RDSEED",
"RDTSCP",
"SCE",
"SHA",
"SSE",
"SSE2",
"SSE3",
"SSE4",
"SSE42",
"SSE4A",
"SSSE3",
"SUCCOR",
"X87",
"XSAVE"
],
"X64Level": 3
}
```
### Check CPU microarch level
```
λ cpuid --check-level=3
2022/03/18 17:04:40 AMD Ryzen 9 3950X 16-Core Processor
2022/03/18 17:04:40 Microarchitecture level 3 is supported. Max level is 3.
Exit Code 0
λ cpuid --check-level=4
2022/03/18 17:06:18 AMD Ryzen 9 3950X 16-Core Processor
2022/03/18 17:06:18 Microarchitecture level 4 not supported. Max level is 3.
Exit Code 1
```
## Available flags
### x86 & amd64
| Feature Flag | Description |
|--------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| ADX | Intel ADX (Multi-Precision Add-Carry Instruction Extensions) |
| AESNI | Advanced Encryption Standard New Instructions |
| AMD3DNOW | AMD 3DNOW |
| AMD3DNOWEXT | AMD 3DNowExt |
| AMXBF16 | Tile computational operations on BFLOAT16 numbers |
| AMXINT8 | Tile computational operations on 8-bit integers |
| AMXFP16 | Tile computational operations on FP16 numbers |
| AMXTILE | Tile architecture |
| APX_F | Intel APX |
| AVX | AVX functions |
| AVX10 | If set the Intel AVX10 Converged Vector ISA is supported |
| AVX10_128 | If set indicates that AVX10 128-bit vector support is present |
| AVX10_256 | If set indicates that AVX10 256-bit vector support is present |
| AVX10_512 | If set indicates that AVX10 512-bit vector support is present |
| AVX2 | AVX2 functions |
| AVX512BF16 | AVX-512 BFLOAT16 Instructions |
| AVX512BITALG | AVX-512 Bit Algorithms |
| AVX512BW | AVX-512 Byte and Word Instructions |
| AVX512CD | AVX-512 Conflict Detection Instructions |
| AVX512DQ | AVX-512 Doubleword and Quadword Instructions |
| AVX512ER | AVX-512 Exponential and Reciprocal Instructions |
| AVX512F | AVX-512 Foundation |
| AVX512FP16 | AVX-512 FP16 Instructions |
| AVX512IFMA | AVX-512 Integer Fused Multiply-Add Instructions |
| AVX512PF | AVX-512 Prefetch Instructions |
| AVX512VBMI | AVX-512 Vector Bit Manipulation Instructions |
| AVX512VBMI2 | AVX-512 Vector Bit Manipulation Instructions, Version 2 |
| AVX512VL | AVX-512 Vector Length Extensions |
| AVX512VNNI | AVX-512 Vector Neural Network Instructions |
| AVX512VP2INTERSECT | AVX-512 Intersect for D/Q |
| AVX512VPOPCNTDQ | AVX-512 Vector Population Count Doubleword and Quadword |
| AVXIFMA | AVX-IFMA instructions |
| AVXNECONVERT | AVX-NE-CONVERT instructions |
| AVXSLOW | Indicates the CPU performs 2 128 bit operations instead of one |
| AVXVNNI | AVX (VEX encoded) VNNI neural network instructions |
| AVXVNNIINT8 | AVX-VNNI-INT8 instructions |
| BHI_CTRL | Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598 |
| BMI1 | Bit Manipulation Instruction Set 1 |
| BMI2 | Bit Manipulation Instruction Set 2 |
| CETIBT | Intel CET Indirect Branch Tracking |
| CETSS | Intel CET Shadow Stack |
| CLDEMOTE | Cache Line Demote |
| CLMUL | Carry-less Multiplication |
| CLZERO | CLZERO instruction supported |
| CMOV | i686 CMOV |
| CMPCCXADD | CMPCCXADD instructions |
| CMPSB_SCADBS_SHORT | Fast short CMPSB and SCASB |
| CMPXCHG8 | CMPXCHG8 instruction |
| CPBOOST | Core Performance Boost |
| CPPC | AMD: Collaborative Processor Performance Control |
| CX16 | CMPXCHG16B Instruction |
| EFER_LMSLE_UNS | AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ |
| ENQCMD | Enqueue Command |
| ERMS | Enhanced REP MOVSB/STOSB |
| F16C | Half-precision floating-point conversion |
| FLUSH_L1D | Flush L1D cache |
| FMA3 | Intel FMA 3. Does not imply AVX. |
| FMA4 | Bulldozer FMA4 functions |
| FP128 | AMD: When set, the internal FP/SIMD execution datapath is 128-bits wide |
| FP256 | AMD: When set, the internal FP/SIMD execution datapath is 256-bits wide |
| FSRM | Fast Short Rep Mov |
| FXSR | FXSAVE, FXRESTOR instructions, CR4 bit 9 |
| FXSROPT | FXSAVE/FXRSTOR optimizations |
| GFNI | Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage. |
| HLE | Hardware Lock Elision |
| HRESET | If set CPU supports history reset and the IA32_HRESET_ENABLE MSR |
| HTT | Hyperthreading (enabled) |
| HWA | Hardware assert supported. Indicates support for MSRC001_10 |
| HYBRID_CPU | This part has CPUs of more than one type. |
| HYPERVISOR | This bit has been reserved by Intel & AMD for use by hypervisors |
| IA32_ARCH_CAP | IA32_ARCH_CAPABILITIES MSR (Intel) |
| IA32_CORE_CAP | IA32_CORE_CAPABILITIES MSR |
| IBPB | Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB) |
| IBRS | AMD: Indirect Branch Restricted Speculation |
| IBRS_PREFERRED | AMD: IBRS is preferred over software solution |
| IBRS_PROVIDES_SMP | AMD: IBRS provides Same Mode Protection |
| IBS | Instruction Based Sampling (AMD) |
| IBSBRNTRGT | Instruction Based Sampling Feature (AMD) |
| IBSFETCHSAM | Instruction Based Sampling Feature (AMD) |
| IBSFFV | Instruction Based Sampling Feature (AMD) |
| IBSOPCNT | Instruction Based Sampling Feature (AMD) |
| IBSOPCNTEXT | Instruction Based Sampling Feature (AMD) |
| IBSOPSAM | Instruction Based Sampling Feature (AMD) |
| IBSRDWROPCNT | Instruction Based Sampling Feature (AMD) |
| IBSRIPINVALIDCHK | Instruction Based Sampling Feature (AMD) |
| IBS_FETCH_CTLX | AMD: IBS fetch control extended MSR supported |
| IBS_OPDATA4 | AMD: IBS op data 4 MSR supported |
| IBS_OPFUSE | AMD: Indicates support for IbsOpFuse |
| IBS_PREVENTHOST | Disallowing IBS use by the host supported |
| IBS_ZEN4 | Fetch and Op IBS support IBS extensions added with Zen4 |
| IDPRED_CTRL | IPRED_DIS |
| INT_WBINVD | WBINVD/WBNOINVD are interruptible. |
| INVLPGB | NVLPGB and TLBSYNC instruction supported |
| KEYLOCKER | Key locker |
| KEYLOCKERW | Key locker wide |
| LAHF | LAHF/SAHF in long mode |
| LAM | If set, CPU supports Linear Address Masking |
| LBRVIRT | LBR virtualization |
| LZCNT | LZCNT instruction |
| MCAOVERFLOW | MCA overflow recovery support. |
| MCDT_NO | Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it. |
| MCOMMIT | MCOMMIT instruction supported |
| MD_CLEAR | VERW clears CPU buffers |
| MMX | standard MMX |
| MMXEXT | SSE integer functions or AMD MMX ext |
| MOVBE | MOVBE instruction (big-endian) |
| MOVDIR64B | Move 64 Bytes as Direct Store |
| MOVDIRI | Move Doubleword as Direct Store |
| MOVSB_ZL | Fast Zero-Length MOVSB |
| MPX | Intel MPX (Memory Protection Extensions) |
| MOVU | MOVU SSE instructions are more efficient and should be preferred to SSE MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD |
| MSRIRC | Instruction Retired Counter MSR available |
| MSRLIST | Read/Write List of Model Specific Registers |
| MSR_PAGEFLUSH | Page Flush MSR available |
| NRIPS | Indicates support for NRIP save on VMEXIT |
| NX | NX (No-Execute) bit |
| OSXSAVE | XSAVE enabled by OS |
| PCONFIG | PCONFIG for Intel Multi-Key Total Memory Encryption |
| POPCNT | POPCNT instruction |
| PPIN | AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled |
| PREFETCHI | PREFETCHIT0/1 instructions |
| PSFD | Predictive Store Forward Disable |
| RDPRU | RDPRU instruction supported |
| RDRAND | RDRAND instruction is available |
| RDSEED | RDSEED instruction is available |
| RDTSCP | RDTSCP Instruction |
| RRSBA_CTRL | Restricted RSB Alternate |
| RTM | Restricted Transactional Memory |
| RTM_ALWAYS_ABORT | Indicates that the loaded microcode is forcing RTM abort. |
| SERIALIZE | Serialize Instruction Execution |
| SEV | AMD Secure Encrypted Virtualization supported |
| SEV_64BIT | AMD SEV guest execution only allowed from a 64-bit host |
| SEV_ALTERNATIVE | AMD SEV Alternate Injection supported |
| SEV_DEBUGSWAP | Full debug state swap supported for SEV-ES guests |
| SEV_ES | AMD SEV Encrypted State supported |
| SEV_RESTRICTED | AMD SEV Restricted Injection supported |
| SEV_SNP | AMD SEV Secure Nested Paging supported |
| SGX | Software Guard Extensions |
| SGXLC | Software Guard Extensions Launch Control |
| SHA | Intel SHA Extensions |
| SME | AMD Secure Memory Encryption supported |
| SME_COHERENT | AMD Hardware cache coherency across encryption domains enforced |
| SPEC_CTRL_SSBD | Speculative Store Bypass Disable |
| SRBDS_CTRL | SRBDS mitigation MSR available |
| SSE | SSE functions |
| SSE2 | P4 SSE functions |
| SSE3 | Prescott SSE3 functions |
| SSE4 | Penryn SSE4.1 functions |
| SSE42 | Nehalem SSE4.2 functions |
| SSE4A | AMD Barcelona microarchitecture SSE4a instructions |
| SSSE3 | Conroe SSSE3 functions |
| STIBP | Single Thread Indirect Branch Predictors |
| STIBP_ALWAYSON | AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On |
| STOSB_SHORT | Fast short STOSB |
| SUCCOR | Software uncorrectable error containment and recovery capability. |
| SVM | AMD Secure Virtual Machine |
| SVMDA | Indicates support for the SVM decode assists. |
| SVMFBASID | SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control |
| SVML | AMD SVM lock. Indicates support for SVM-Lock. |
| SVMNP | AMD SVM nested paging |
| SVMPF | SVM pause intercept filter. Indicates support for the pause intercept filter |
| SVMPFT | SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold |
| SYSCALL | System-Call Extension (SCE): SYSCALL and SYSRET instructions. |
| SYSEE | SYSENTER and SYSEXIT instructions |
| TBM | AMD Trailing Bit Manipulation |
| TDX_GUEST | Intel Trust Domain Extensions Guest |
| TLB_FLUSH_NESTED | AMD: Flushing includes all the nested translations for guest translations |
| TME | Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE. |
| TOPEXT | TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX. |
| TSCRATEMSR | MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104 |
| TSXLDTRK | Intel TSX Suspend Load Address Tracking |
| VAES | Vector AES. AVX(512) versions requires additional checks. |
| VMCBCLEAN | VMCB clean bits. Indicates support for VMCB clean bits. |
| VMPL | AMD VM Permission Levels supported |
| VMSA_REGPROT | AMD VMSA Register Protection supported |
| VMX | Virtual Machine Extensions |
| VPCLMULQDQ | Carry-Less Multiplication Quadword. Requires AVX for 3 register versions. |
| VTE | AMD Virtual Transparent Encryption supported |
| WAITPKG | TPAUSE, UMONITOR, UMWAIT |
| WBNOINVD | Write Back and Do Not Invalidate Cache |
| WRMSRNS | Non-Serializing Write to Model Specific Register |
| X87 | FPU |
| XGETBV1 | Supports XGETBV with ECX = 1 |
| XOP | Bulldozer XOP functions |
| XSAVE | XSAVE, XRESTOR, XSETBV, XGETBV |
| XSAVEC | Supports XSAVEC and the compacted form of XRSTOR. |
| XSAVEOPT | XSAVEOPT available |
| XSAVES | Supports XSAVES/XRSTORS and IA32_XSS |
# ARM features:
| Feature Flag | Description |
|--------------|------------------------------------------------------------------|
| AESARM | AES instructions |
| ARMCPUID | Some CPU ID registers readable at user-level |
| ASIMD | Advanced SIMD |
| ASIMDDP | SIMD Dot Product |
| ASIMDHP | Advanced SIMD half-precision floating point |
| ASIMDRDM | Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH) |
| ATOMICS | Large System Extensions (LSE) |
| CRC32 | CRC32/CRC32C instructions |
| DCPOP | Data cache clean to Point of Persistence (DC CVAP) |
| EVTSTRM | Generic timer |
| FCMA | Floatin point complex number addition and multiplication |
| FP | Single-precision and double-precision floating point |
| FPHP | Half-precision floating point |
| GPA | Generic Pointer Authentication |
| JSCVT | Javascript-style double->int convert (FJCVTZS) |
| LRCPC | Weaker release consistency (LDAPR, etc) |
| PMULL | Polynomial Multiply instructions (PMULL/PMULL2) |
| SHA1 | SHA-1 instructions (SHA1C, etc) |
| SHA2 | SHA-2 instructions (SHA256H, etc) |
| SHA3 | SHA-3 instructions (EOR3, RAXI, XAR, BCAX) |
| SHA512 | SHA512 instructions |
| SM3 | SM3 instructions |
| SM4 | SM4 instructions |
| SVE | Scalable Vector Extension |
# license
This code is published under an MIT license. See LICENSE file for more information.

File diff suppressed because it is too large Load Diff

View File

@ -1,47 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build 386,!gccgo,!noasm,!appengine
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuid(SB), 7, $0
XORL CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+4(FP)
MOVL BX, ebx+8(FP)
MOVL CX, ecx+12(FP)
MOVL DX, edx+16(FP)
RET
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv(index uint32) (eax, edx uint32)
TEXT ·asmXgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+4(FP)
MOVL DX, edx+8(FP)
RET
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
TEXT ·asmRdtscpAsm(SB), 7, $0
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
MOVL AX, eax+0(FP)
MOVL BX, ebx+4(FP)
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET
// func asmDarwinHasAVX512() bool
TEXT ·asmDarwinHasAVX512(SB), 7, $0
MOVL $0, eax+0(FP)
RET

View File

@ -1,72 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build amd64,!gccgo,!noasm,!appengine
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuid(SB), 7, $0
XORQ CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func asmXgetbv(index uint32) (eax, edx uint32)
TEXT ·asmXgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+8(FP)
MOVL DX, edx+12(FP)
RET
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
TEXT ·asmRdtscpAsm(SB), 7, $0
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
MOVL AX, eax+0(FP)
MOVL BX, ebx+4(FP)
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET
// From https://go-review.googlesource.com/c/sys/+/285572/
// func asmDarwinHasAVX512() bool
TEXT ·asmDarwinHasAVX512(SB), 7, $0-1
MOVB $0, ret+0(FP) // default to false
#ifdef GOOS_darwin // return if not darwin
#ifdef GOARCH_amd64 // return if not amd64
// These values from:
// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
#define commpage64_base_address 0x00007fffffe00000
#define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
#define commpage64_version (commpage64_base_address+0x01E)
#define hasAVX512F 0x0000004000000000
MOVQ $commpage64_version, BX
MOVW (BX), AX
CMPW AX, $13 // versions < 13 do not support AVX512
JL no_avx512
MOVQ $commpage64_cpu_capabilities64, BX
MOVQ (BX), AX
MOVQ $hasAVX512F, CX
ANDQ CX, AX
JZ no_avx512
MOVB $1, ret+0(FP)
no_avx512:
#endif
#endif
RET

View File

@ -1,26 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build arm64,!gccgo,!noasm,!appengine
// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
// func getMidr
TEXT ·getMidr(SB), 7, $0
WORD $0xd5380000 // mrs x0, midr_el1 /* Main ID Register */
MOVD R0, midr+0(FP)
RET
// func getProcFeatures
TEXT ·getProcFeatures(SB), 7, $0
WORD $0xd5380400 // mrs x0, id_aa64pfr0_el1 /* Processor Feature Register 0 */
MOVD R0, procFeatures+0(FP)
RET
// func getInstAttributes
TEXT ·getInstAttributes(SB), 7, $0
WORD $0xd5380600 // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */
WORD $0xd5380621 // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */
MOVD R0, instAttrReg0+0(FP)
MOVD R1, instAttrReg1+8(FP)
RET

View File

@ -1,247 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//go:build arm64 && !gccgo && !noasm && !appengine
// +build arm64,!gccgo,!noasm,!appengine
package cpuid
import "runtime"
func getMidr() (midr uint64)
func getProcFeatures() (procFeatures uint64)
func getInstAttributes() (instAttrReg0, instAttrReg1 uint64)
func initCPU() {
cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
}
func addInfo(c *CPUInfo, safe bool) {
// Seems to be safe to assume on ARM64
c.CacheLine = 64
detectOS(c)
// ARM64 disabled since it may crash if interrupt is not intercepted by OS.
if safe && !c.Supports(ARMCPUID) && runtime.GOOS != "freebsd" {
return
}
midr := getMidr()
// MIDR_EL1 - Main ID Register
// https://developer.arm.com/docs/ddi0595/h/aarch64-system-registers/midr_el1
// x--------------------------------------------------x
// | Name | bits | visible |
// |--------------------------------------------------|
// | Implementer | [31-24] | y |
// |--------------------------------------------------|
// | Variant | [23-20] | y |
// |--------------------------------------------------|
// | Architecture | [19-16] | y |
// |--------------------------------------------------|
// | PartNum | [15-4] | y |
// |--------------------------------------------------|
// | Revision | [3-0] | y |
// x--------------------------------------------------x
switch (midr >> 24) & 0xff {
case 0xC0:
c.VendorString = "Ampere Computing"
c.VendorID = Ampere
case 0x41:
c.VendorString = "Arm Limited"
c.VendorID = ARM
case 0x42:
c.VendorString = "Broadcom Corporation"
c.VendorID = Broadcom
case 0x43:
c.VendorString = "Cavium Inc"
c.VendorID = Cavium
case 0x44:
c.VendorString = "Digital Equipment Corporation"
c.VendorID = DEC
case 0x46:
c.VendorString = "Fujitsu Ltd"
c.VendorID = Fujitsu
case 0x49:
c.VendorString = "Infineon Technologies AG"
c.VendorID = Infineon
case 0x4D:
c.VendorString = "Motorola or Freescale Semiconductor Inc"
c.VendorID = Motorola
case 0x4E:
c.VendorString = "NVIDIA Corporation"
c.VendorID = NVIDIA
case 0x50:
c.VendorString = "Applied Micro Circuits Corporation"
c.VendorID = AMCC
case 0x51:
c.VendorString = "Qualcomm Inc"
c.VendorID = Qualcomm
case 0x56:
c.VendorString = "Marvell International Ltd"
c.VendorID = Marvell
case 0x69:
c.VendorString = "Intel Corporation"
c.VendorID = Intel
}
// Lower 4 bits: Architecture
// Architecture Meaning
// 0b0001 Armv4.
// 0b0010 Armv4T.
// 0b0011 Armv5 (obsolete).
// 0b0100 Armv5T.
// 0b0101 Armv5TE.
// 0b0110 Armv5TEJ.
// 0b0111 Armv6.
// 0b1111 Architectural features are individually identified in the ID_* registers, see 'ID registers'.
// Upper 4 bit: Variant
// An IMPLEMENTATION DEFINED variant number.
// Typically, this field is used to distinguish between different product variants, or major revisions of a product.
c.Family = int(midr>>16) & 0xff
// PartNum, bits [15:4]
// An IMPLEMENTATION DEFINED primary part number for the device.
// On processors implemented by Arm, if the top four bits of the primary
// part number are 0x0 or 0x7, the variant and architecture are encoded differently.
// Revision, bits [3:0]
// An IMPLEMENTATION DEFINED revision number for the device.
c.Model = int(midr) & 0xffff
procFeatures := getProcFeatures()
// ID_AA64PFR0_EL1 - Processor Feature Register 0
// x--------------------------------------------------x
// | Name | bits | visible |
// |--------------------------------------------------|
// | DIT | [51-48] | y |
// |--------------------------------------------------|
// | SVE | [35-32] | y |
// |--------------------------------------------------|
// | GIC | [27-24] | n |
// |--------------------------------------------------|
// | AdvSIMD | [23-20] | y |
// |--------------------------------------------------|
// | FP | [19-16] | y |
// |--------------------------------------------------|
// | EL3 | [15-12] | n |
// |--------------------------------------------------|
// | EL2 | [11-8] | n |
// |--------------------------------------------------|
// | EL1 | [7-4] | n |
// |--------------------------------------------------|
// | EL0 | [3-0] | n |
// x--------------------------------------------------x
var f flagSet
// if procFeatures&(0xf<<48) != 0 {
// fmt.Println("DIT")
// }
f.setIf(procFeatures&(0xf<<32) != 0, SVE)
if procFeatures&(0xf<<20) != 15<<20 {
f.set(ASIMD)
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
// 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
f.setIf(procFeatures&(0xf<<20) == 1<<20, FPHP, ASIMDHP)
}
f.setIf(procFeatures&(0xf<<16) != 0, FP)
instAttrReg0, instAttrReg1 := getInstAttributes()
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
//
// ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
// x--------------------------------------------------x
// | Name | bits | visible |
// |--------------------------------------------------|
// | TS | [55-52] | y |
// |--------------------------------------------------|
// | FHM | [51-48] | y |
// |--------------------------------------------------|
// | DP | [47-44] | y |
// |--------------------------------------------------|
// | SM4 | [43-40] | y |
// |--------------------------------------------------|
// | SM3 | [39-36] | y |
// |--------------------------------------------------|
// | SHA3 | [35-32] | y |
// |--------------------------------------------------|
// | RDM | [31-28] | y |
// |--------------------------------------------------|
// | ATOMICS | [23-20] | y |
// |--------------------------------------------------|
// | CRC32 | [19-16] | y |
// |--------------------------------------------------|
// | SHA2 | [15-12] | y |
// |--------------------------------------------------|
// | SHA1 | [11-8] | y |
// |--------------------------------------------------|
// | AES | [7-4] | y |
// x--------------------------------------------------x
// if instAttrReg0&(0xf<<52) != 0 {
// fmt.Println("TS")
// }
// if instAttrReg0&(0xf<<48) != 0 {
// fmt.Println("FHM")
// }
f.setIf(instAttrReg0&(0xf<<44) != 0, ASIMDDP)
f.setIf(instAttrReg0&(0xf<<40) != 0, SM4)
f.setIf(instAttrReg0&(0xf<<36) != 0, SM3)
f.setIf(instAttrReg0&(0xf<<32) != 0, SHA3)
f.setIf(instAttrReg0&(0xf<<28) != 0, ASIMDRDM)
f.setIf(instAttrReg0&(0xf<<20) != 0, ATOMICS)
f.setIf(instAttrReg0&(0xf<<16) != 0, CRC32)
f.setIf(instAttrReg0&(0xf<<12) != 0, SHA2)
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
// 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
f.setIf(instAttrReg0&(0xf<<12) == 2<<12, SHA512)
f.setIf(instAttrReg0&(0xf<<8) != 0, SHA1)
f.setIf(instAttrReg0&(0xf<<4) != 0, AESARM)
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
// 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
f.setIf(instAttrReg0&(0xf<<4) == 2<<4, PMULL)
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1
//
// ID_AA64ISAR1_EL1 - Instruction set attribute register 1
// x--------------------------------------------------x
// | Name | bits | visible |
// |--------------------------------------------------|
// | GPI | [31-28] | y |
// |--------------------------------------------------|
// | GPA | [27-24] | y |
// |--------------------------------------------------|
// | LRCPC | [23-20] | y |
// |--------------------------------------------------|
// | FCMA | [19-16] | y |
// |--------------------------------------------------|
// | JSCVT | [15-12] | y |
// |--------------------------------------------------|
// | API | [11-8] | y |
// |--------------------------------------------------|
// | APA | [7-4] | y |
// |--------------------------------------------------|
// | DPB | [3-0] | y |
// x--------------------------------------------------x
// if instAttrReg1&(0xf<<28) != 0 {
// fmt.Println("GPI")
// }
f.setIf(instAttrReg1&(0xf<<28) != 24, GPA)
f.setIf(instAttrReg1&(0xf<<20) != 0, LRCPC)
f.setIf(instAttrReg1&(0xf<<16) != 0, FCMA)
f.setIf(instAttrReg1&(0xf<<12) != 0, JSCVT)
// if instAttrReg1&(0xf<<8) != 0 {
// fmt.Println("API")
// }
// if instAttrReg1&(0xf<<4) != 0 {
// fmt.Println("APA")
// }
f.setIf(instAttrReg1&(0xf<<0) != 0, DCPOP)
// Store
c.featureSet.or(f)
}

View File

@ -1,15 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//go:build (!amd64 && !386 && !arm64) || gccgo || noasm || appengine
// +build !amd64,!386,!arm64 gccgo noasm appengine
package cpuid
func initCPU() {
cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
}
func addInfo(info *CPUInfo, safe bool) {}

View File

@ -1,38 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//go:build (386 && !gccgo && !noasm && !appengine) || (amd64 && !gccgo && !noasm && !appengine)
// +build 386,!gccgo,!noasm,!appengine amd64,!gccgo,!noasm,!appengine
package cpuid
func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
func asmXgetbv(index uint32) (eax, edx uint32)
func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
func asmDarwinHasAVX512() bool
func initCPU() {
cpuid = asmCpuid
cpuidex = asmCpuidex
xgetbv = asmXgetbv
rdtscpAsm = asmRdtscpAsm
darwinHasAVX512 = asmDarwinHasAVX512
}
func addInfo(c *CPUInfo, safe bool) {
c.maxFunc = maxFunctionID()
c.maxExFunc = maxExtendedFunction()
c.BrandName = brandName()
c.CacheLine = cacheLine()
c.Family, c.Model, c.Stepping = familyModel()
c.featureSet = support()
c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
c.AMDMemEncryption = hasAMDMemEncryption(c.featureSet.inSet(SME) || c.featureSet.inSet(SEV))
c.ThreadsPerCore = threadsPerCore()
c.LogicalCores = logicalCores()
c.PhysicalCores = physicalCores()
c.VendorID, c.VendorString = vendorID()
c.AVX10Level = c.supportAVX10()
c.cacheSize()
c.frequencies()
}

View File

@ -1,284 +0,0 @@
// Code generated by "stringer -type=FeatureID,Vendor"; DO NOT EDIT.
package cpuid
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[ADX-1]
_ = x[AESNI-2]
_ = x[AMD3DNOW-3]
_ = x[AMD3DNOWEXT-4]
_ = x[AMXBF16-5]
_ = x[AMXFP16-6]
_ = x[AMXINT8-7]
_ = x[AMXTILE-8]
_ = x[APX_F-9]
_ = x[AVX-10]
_ = x[AVX10-11]
_ = x[AVX10_128-12]
_ = x[AVX10_256-13]
_ = x[AVX10_512-14]
_ = x[AVX2-15]
_ = x[AVX512BF16-16]
_ = x[AVX512BITALG-17]
_ = x[AVX512BW-18]
_ = x[AVX512CD-19]
_ = x[AVX512DQ-20]
_ = x[AVX512ER-21]
_ = x[AVX512F-22]
_ = x[AVX512FP16-23]
_ = x[AVX512IFMA-24]
_ = x[AVX512PF-25]
_ = x[AVX512VBMI-26]
_ = x[AVX512VBMI2-27]
_ = x[AVX512VL-28]
_ = x[AVX512VNNI-29]
_ = x[AVX512VP2INTERSECT-30]
_ = x[AVX512VPOPCNTDQ-31]
_ = x[AVXIFMA-32]
_ = x[AVXNECONVERT-33]
_ = x[AVXSLOW-34]
_ = x[AVXVNNI-35]
_ = x[AVXVNNIINT8-36]
_ = x[BHI_CTRL-37]
_ = x[BMI1-38]
_ = x[BMI2-39]
_ = x[CETIBT-40]
_ = x[CETSS-41]
_ = x[CLDEMOTE-42]
_ = x[CLMUL-43]
_ = x[CLZERO-44]
_ = x[CMOV-45]
_ = x[CMPCCXADD-46]
_ = x[CMPSB_SCADBS_SHORT-47]
_ = x[CMPXCHG8-48]
_ = x[CPBOOST-49]
_ = x[CPPC-50]
_ = x[CX16-51]
_ = x[EFER_LMSLE_UNS-52]
_ = x[ENQCMD-53]
_ = x[ERMS-54]
_ = x[F16C-55]
_ = x[FLUSH_L1D-56]
_ = x[FMA3-57]
_ = x[FMA4-58]
_ = x[FP128-59]
_ = x[FP256-60]
_ = x[FSRM-61]
_ = x[FXSR-62]
_ = x[FXSROPT-63]
_ = x[GFNI-64]
_ = x[HLE-65]
_ = x[HRESET-66]
_ = x[HTT-67]
_ = x[HWA-68]
_ = x[HYBRID_CPU-69]
_ = x[HYPERVISOR-70]
_ = x[IA32_ARCH_CAP-71]
_ = x[IA32_CORE_CAP-72]
_ = x[IBPB-73]
_ = x[IBPB_BRTYPE-74]
_ = x[IBRS-75]
_ = x[IBRS_PREFERRED-76]
_ = x[IBRS_PROVIDES_SMP-77]
_ = x[IBS-78]
_ = x[IBSBRNTRGT-79]
_ = x[IBSFETCHSAM-80]
_ = x[IBSFFV-81]
_ = x[IBSOPCNT-82]
_ = x[IBSOPCNTEXT-83]
_ = x[IBSOPSAM-84]
_ = x[IBSRDWROPCNT-85]
_ = x[IBSRIPINVALIDCHK-86]
_ = x[IBS_FETCH_CTLX-87]
_ = x[IBS_OPDATA4-88]
_ = x[IBS_OPFUSE-89]
_ = x[IBS_PREVENTHOST-90]
_ = x[IBS_ZEN4-91]
_ = x[IDPRED_CTRL-92]
_ = x[INT_WBINVD-93]
_ = x[INVLPGB-94]
_ = x[KEYLOCKER-95]
_ = x[KEYLOCKERW-96]
_ = x[LAHF-97]
_ = x[LAM-98]
_ = x[LBRVIRT-99]
_ = x[LZCNT-100]
_ = x[MCAOVERFLOW-101]
_ = x[MCDT_NO-102]
_ = x[MCOMMIT-103]
_ = x[MD_CLEAR-104]
_ = x[MMX-105]
_ = x[MMXEXT-106]
_ = x[MOVBE-107]
_ = x[MOVDIR64B-108]
_ = x[MOVDIRI-109]
_ = x[MOVSB_ZL-110]
_ = x[MOVU-111]
_ = x[MPX-112]
_ = x[MSRIRC-113]
_ = x[MSRLIST-114]
_ = x[MSR_PAGEFLUSH-115]
_ = x[NRIPS-116]
_ = x[NX-117]
_ = x[OSXSAVE-118]
_ = x[PCONFIG-119]
_ = x[POPCNT-120]
_ = x[PPIN-121]
_ = x[PREFETCHI-122]
_ = x[PSFD-123]
_ = x[RDPRU-124]
_ = x[RDRAND-125]
_ = x[RDSEED-126]
_ = x[RDTSCP-127]
_ = x[RRSBA_CTRL-128]
_ = x[RTM-129]
_ = x[RTM_ALWAYS_ABORT-130]
_ = x[SBPB-131]
_ = x[SERIALIZE-132]
_ = x[SEV-133]
_ = x[SEV_64BIT-134]
_ = x[SEV_ALTERNATIVE-135]
_ = x[SEV_DEBUGSWAP-136]
_ = x[SEV_ES-137]
_ = x[SEV_RESTRICTED-138]
_ = x[SEV_SNP-139]
_ = x[SGX-140]
_ = x[SGXLC-141]
_ = x[SHA-142]
_ = x[SME-143]
_ = x[SME_COHERENT-144]
_ = x[SPEC_CTRL_SSBD-145]
_ = x[SRBDS_CTRL-146]
_ = x[SRSO_MSR_FIX-147]
_ = x[SRSO_NO-148]
_ = x[SRSO_USER_KERNEL_NO-149]
_ = x[SSE-150]
_ = x[SSE2-151]
_ = x[SSE3-152]
_ = x[SSE4-153]
_ = x[SSE42-154]
_ = x[SSE4A-155]
_ = x[SSSE3-156]
_ = x[STIBP-157]
_ = x[STIBP_ALWAYSON-158]
_ = x[STOSB_SHORT-159]
_ = x[SUCCOR-160]
_ = x[SVM-161]
_ = x[SVMDA-162]
_ = x[SVMFBASID-163]
_ = x[SVML-164]
_ = x[SVMNP-165]
_ = x[SVMPF-166]
_ = x[SVMPFT-167]
_ = x[SYSCALL-168]
_ = x[SYSEE-169]
_ = x[TBM-170]
_ = x[TDX_GUEST-171]
_ = x[TLB_FLUSH_NESTED-172]
_ = x[TME-173]
_ = x[TOPEXT-174]
_ = x[TSCRATEMSR-175]
_ = x[TSXLDTRK-176]
_ = x[VAES-177]
_ = x[VMCBCLEAN-178]
_ = x[VMPL-179]
_ = x[VMSA_REGPROT-180]
_ = x[VMX-181]
_ = x[VPCLMULQDQ-182]
_ = x[VTE-183]
_ = x[WAITPKG-184]
_ = x[WBNOINVD-185]
_ = x[WRMSRNS-186]
_ = x[X87-187]
_ = x[XGETBV1-188]
_ = x[XOP-189]
_ = x[XSAVE-190]
_ = x[XSAVEC-191]
_ = x[XSAVEOPT-192]
_ = x[XSAVES-193]
_ = x[AESARM-194]
_ = x[ARMCPUID-195]
_ = x[ASIMD-196]
_ = x[ASIMDDP-197]
_ = x[ASIMDHP-198]
_ = x[ASIMDRDM-199]
_ = x[ATOMICS-200]
_ = x[CRC32-201]
_ = x[DCPOP-202]
_ = x[EVTSTRM-203]
_ = x[FCMA-204]
_ = x[FP-205]
_ = x[FPHP-206]
_ = x[GPA-207]
_ = x[JSCVT-208]
_ = x[LRCPC-209]
_ = x[PMULL-210]
_ = x[SHA1-211]
_ = x[SHA2-212]
_ = x[SHA3-213]
_ = x[SHA512-214]
_ = x[SM3-215]
_ = x[SM4-216]
_ = x[SVE-217]
_ = x[lastID-218]
_ = x[firstID-0]
}
const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 319, 323, 327, 333, 338, 346, 351, 357, 361, 370, 388, 396, 403, 407, 411, 425, 431, 435, 439, 448, 452, 456, 461, 466, 470, 474, 481, 485, 488, 494, 497, 500, 510, 520, 533, 546, 550, 561, 565, 579, 596, 599, 609, 620, 626, 634, 645, 653, 665, 681, 695, 706, 716, 731, 739, 750, 760, 767, 776, 786, 790, 793, 800, 805, 816, 823, 830, 838, 841, 847, 852, 861, 868, 876, 880, 883, 889, 896, 909, 914, 916, 923, 930, 936, 940, 949, 953, 958, 964, 970, 976, 986, 989, 1005, 1009, 1018, 1021, 1030, 1045, 1058, 1064, 1078, 1085, 1088, 1093, 1096, 1099, 1111, 1125, 1135, 1147, 1154, 1173, 1176, 1180, 1184, 1188, 1193, 1198, 1203, 1208, 1222, 1233, 1239, 1242, 1247, 1256, 1260, 1265, 1270, 1276, 1283, 1288, 1291, 1300, 1316, 1319, 1325, 1335, 1343, 1347, 1356, 1360, 1372, 1375, 1385, 1388, 1395, 1403, 1410, 1413, 1420, 1423, 1428, 1434, 1442, 1448, 1454, 1462, 1467, 1474, 1481, 1489, 1496, 1501, 1506, 1513, 1517, 1519, 1523, 1526, 1531, 1536, 1541, 1545, 1549, 1553, 1559, 1562, 1565, 1568, 1574}
func (i FeatureID) String() string {
if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
return "FeatureID(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _FeatureID_name[_FeatureID_index[i]:_FeatureID_index[i+1]]
}
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[VendorUnknown-0]
_ = x[Intel-1]
_ = x[AMD-2]
_ = x[VIA-3]
_ = x[Transmeta-4]
_ = x[NSC-5]
_ = x[KVM-6]
_ = x[MSVM-7]
_ = x[VMware-8]
_ = x[XenHVM-9]
_ = x[Bhyve-10]
_ = x[Hygon-11]
_ = x[SiS-12]
_ = x[RDC-13]
_ = x[Ampere-14]
_ = x[ARM-15]
_ = x[Broadcom-16]
_ = x[Cavium-17]
_ = x[DEC-18]
_ = x[Fujitsu-19]
_ = x[Infineon-20]
_ = x[Motorola-21]
_ = x[NVIDIA-22]
_ = x[AMCC-23]
_ = x[Qualcomm-24]
_ = x[Marvell-25]
_ = x[lastVendor-26]
}
const _Vendor_name = "VendorUnknownIntelAMDVIATransmetaNSCKVMMSVMVMwareXenHVMBhyveHygonSiSRDCAmpereARMBroadcomCaviumDECFujitsuInfineonMotorolaNVIDIAAMCCQualcommMarvelllastVendor"
var _Vendor_index = [...]uint8{0, 13, 18, 21, 24, 33, 36, 39, 43, 49, 55, 60, 65, 68, 71, 77, 80, 88, 94, 97, 104, 112, 120, 126, 130, 138, 145, 155}
func (i Vendor) String() string {
if i < 0 || i >= Vendor(len(_Vendor_index)-1) {
return "Vendor(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _Vendor_name[_Vendor_index[i]:_Vendor_index[i+1]]
}

View File

@ -1,121 +0,0 @@
// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
package cpuid
import (
"runtime"
"strings"
"golang.org/x/sys/unix"
)
func detectOS(c *CPUInfo) bool {
if runtime.GOOS != "ios" {
tryToFillCPUInfoFomSysctl(c)
}
// There are no hw.optional sysctl values for the below features on Mac OS 11.0
// to detect their supported state dynamically. Assume the CPU features that
// Apple Silicon M1 supports to be available as a minimal set of features
// to all Go programs running on darwin/arm64.
// TODO: Add more if we know them.
c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2)
return true
}
func sysctlGetBool(name string) bool {
value, err := unix.SysctlUint32(name)
if err != nil {
return false
}
return value != 0
}
func sysctlGetString(name string) string {
value, err := unix.Sysctl(name)
if err != nil {
return ""
}
return value
}
func sysctlGetInt(unknown int, names ...string) int {
for _, name := range names {
value, err := unix.SysctlUint32(name)
if err != nil {
continue
}
if value != 0 {
return int(value)
}
}
return unknown
}
func sysctlGetInt64(unknown int, names ...string) int {
for _, name := range names {
value64, err := unix.SysctlUint64(name)
if err != nil {
continue
}
if int(value64) != unknown {
return int(value64)
}
}
return unknown
}
func setFeature(c *CPUInfo, name string, feature FeatureID) {
c.featureSet.setIf(sysctlGetBool(name), feature)
}
func tryToFillCPUInfoFomSysctl(c *CPUInfo) {
c.BrandName = sysctlGetString("machdep.cpu.brand_string")
if len(c.BrandName) != 0 {
c.VendorString = strings.Fields(c.BrandName)[0]
}
c.PhysicalCores = sysctlGetInt(runtime.NumCPU(), "hw.physicalcpu")
c.ThreadsPerCore = sysctlGetInt(1, "machdep.cpu.thread_count", "kern.num_threads") /
sysctlGetInt(1, "hw.physicalcpu")
c.LogicalCores = sysctlGetInt(runtime.NumCPU(), "machdep.cpu.core_count")
c.Family = sysctlGetInt(0, "machdep.cpu.family", "hw.cpufamily")
c.Model = sysctlGetInt(0, "machdep.cpu.model")
c.CacheLine = sysctlGetInt64(0, "hw.cachelinesize")
c.Cache.L1I = sysctlGetInt64(-1, "hw.l1icachesize")
c.Cache.L1D = sysctlGetInt64(-1, "hw.l1dcachesize")
c.Cache.L2 = sysctlGetInt64(-1, "hw.l2cachesize")
c.Cache.L3 = sysctlGetInt64(-1, "hw.l3cachesize")
// from https://developer.arm.com/downloads/-/exploration-tools/feature-names-for-a-profile
setFeature(c, "hw.optional.arm.FEAT_AES", AESARM)
setFeature(c, "hw.optional.AdvSIMD", ASIMD)
setFeature(c, "hw.optional.arm.FEAT_DotProd", ASIMDDP)
setFeature(c, "hw.optional.arm.FEAT_RDM", ASIMDRDM)
setFeature(c, "hw.optional.FEAT_CRC32", CRC32)
setFeature(c, "hw.optional.arm.FEAT_DPB", DCPOP)
// setFeature(c, "", EVTSTRM)
setFeature(c, "hw.optional.arm.FEAT_FCMA", FCMA)
setFeature(c, "hw.optional.arm.FEAT_FP", FP)
setFeature(c, "hw.optional.arm.FEAT_FP16", FPHP)
setFeature(c, "hw.optional.arm.FEAT_PAuth", GPA)
setFeature(c, "hw.optional.arm.FEAT_JSCVT", JSCVT)
setFeature(c, "hw.optional.arm.FEAT_LRCPC", LRCPC)
setFeature(c, "hw.optional.arm.FEAT_PMULL", PMULL)
setFeature(c, "hw.optional.arm.FEAT_SHA1", SHA1)
setFeature(c, "hw.optional.arm.FEAT_SHA256", SHA2)
setFeature(c, "hw.optional.arm.FEAT_SHA3", SHA3)
setFeature(c, "hw.optional.arm.FEAT_SHA512", SHA512)
// setFeature(c, "", SM3)
// setFeature(c, "", SM4)
setFeature(c, "hw.optional.arm.FEAT_SVE", SVE)
// from empirical observation
setFeature(c, "hw.optional.AdvSIMD_HPFPCvt", ASIMDHP)
setFeature(c, "hw.optional.armv8_1_atomics", ATOMICS)
setFeature(c, "hw.optional.floatingpoint", FP)
setFeature(c, "hw.optional.armv8_2_sha3", SHA3)
setFeature(c, "hw.optional.armv8_2_sha512", SHA512)
setFeature(c, "hw.optional.armv8_3_compnum", FCMA)
setFeature(c, "hw.optional.armv8_crc32", CRC32)
}

View File

@ -1,130 +0,0 @@
// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file located
// here https://github.com/golang/sys/blob/master/LICENSE
package cpuid
import (
"encoding/binary"
"io/ioutil"
"runtime"
)
// HWCAP bits.
const (
hwcap_FP = 1 << 0
hwcap_ASIMD = 1 << 1
hwcap_EVTSTRM = 1 << 2
hwcap_AES = 1 << 3
hwcap_PMULL = 1 << 4
hwcap_SHA1 = 1 << 5
hwcap_SHA2 = 1 << 6
hwcap_CRC32 = 1 << 7
hwcap_ATOMICS = 1 << 8
hwcap_FPHP = 1 << 9
hwcap_ASIMDHP = 1 << 10
hwcap_CPUID = 1 << 11
hwcap_ASIMDRDM = 1 << 12
hwcap_JSCVT = 1 << 13
hwcap_FCMA = 1 << 14
hwcap_LRCPC = 1 << 15
hwcap_DCPOP = 1 << 16
hwcap_SHA3 = 1 << 17
hwcap_SM3 = 1 << 18
hwcap_SM4 = 1 << 19
hwcap_ASIMDDP = 1 << 20
hwcap_SHA512 = 1 << 21
hwcap_SVE = 1 << 22
hwcap_ASIMDFHM = 1 << 23
)
func detectOS(c *CPUInfo) bool {
// For now assuming no hyperthreading is reasonable.
c.LogicalCores = runtime.NumCPU()
c.PhysicalCores = c.LogicalCores
c.ThreadsPerCore = 1
if hwcap == 0 {
// We did not get values from the runtime.
// Try reading /proc/self/auxv
// From https://github.com/golang/sys
const (
_AT_HWCAP = 16
_AT_HWCAP2 = 26
uintSize = int(32 << (^uint(0) >> 63))
)
buf, err := ioutil.ReadFile("/proc/self/auxv")
if err != nil {
// e.g. on android /proc/self/auxv is not accessible, so silently
// ignore the error and leave Initialized = false. On some
// architectures (e.g. arm64) doinit() implements a fallback
// readout and will set Initialized = true again.
return false
}
bo := binary.LittleEndian
for len(buf) >= 2*(uintSize/8) {
var tag, val uint
switch uintSize {
case 32:
tag = uint(bo.Uint32(buf[0:]))
val = uint(bo.Uint32(buf[4:]))
buf = buf[8:]
case 64:
tag = uint(bo.Uint64(buf[0:]))
val = uint(bo.Uint64(buf[8:]))
buf = buf[16:]
}
switch tag {
case _AT_HWCAP:
hwcap = val
case _AT_HWCAP2:
// Not used
}
}
if hwcap == 0 {
return false
}
}
// HWCap was populated by the runtime from the auxiliary vector.
// Use HWCap information since reading aarch64 system registers
// is not supported in user space on older linux kernels.
c.featureSet.setIf(isSet(hwcap, hwcap_AES), AESARM)
c.featureSet.setIf(isSet(hwcap, hwcap_ASIMD), ASIMD)
c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDDP), ASIMDDP)
c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDHP), ASIMDHP)
c.featureSet.setIf(isSet(hwcap, hwcap_ASIMDRDM), ASIMDRDM)
c.featureSet.setIf(isSet(hwcap, hwcap_CPUID), ARMCPUID)
c.featureSet.setIf(isSet(hwcap, hwcap_CRC32), CRC32)
c.featureSet.setIf(isSet(hwcap, hwcap_DCPOP), DCPOP)
c.featureSet.setIf(isSet(hwcap, hwcap_EVTSTRM), EVTSTRM)
c.featureSet.setIf(isSet(hwcap, hwcap_FCMA), FCMA)
c.featureSet.setIf(isSet(hwcap, hwcap_FP), FP)
c.featureSet.setIf(isSet(hwcap, hwcap_FPHP), FPHP)
c.featureSet.setIf(isSet(hwcap, hwcap_JSCVT), JSCVT)
c.featureSet.setIf(isSet(hwcap, hwcap_LRCPC), LRCPC)
c.featureSet.setIf(isSet(hwcap, hwcap_PMULL), PMULL)
c.featureSet.setIf(isSet(hwcap, hwcap_SHA1), SHA1)
c.featureSet.setIf(isSet(hwcap, hwcap_SHA2), SHA2)
c.featureSet.setIf(isSet(hwcap, hwcap_SHA3), SHA3)
c.featureSet.setIf(isSet(hwcap, hwcap_SHA512), SHA512)
c.featureSet.setIf(isSet(hwcap, hwcap_SM3), SM3)
c.featureSet.setIf(isSet(hwcap, hwcap_SM4), SM4)
c.featureSet.setIf(isSet(hwcap, hwcap_SVE), SVE)
// The Samsung S9+ kernel reports support for atomics, but not all cores
// actually support them, resulting in SIGILL. See issue #28431.
// TODO(elias.naur): Only disable the optimization on bad chipsets on android.
c.featureSet.setIf(isSet(hwcap, hwcap_ATOMICS) && runtime.GOOS != "android", ATOMICS)
return true
}
func isSet(hwc uint, value uint) bool {
return hwc&value != 0
}

View File

@ -1,16 +0,0 @@
// Copyright (c) 2020 Klaus Post, released under MIT License. See LICENSE file.
//go:build arm64 && !linux && !darwin
// +build arm64,!linux,!darwin
package cpuid
import "runtime"
func detectOS(c *CPUInfo) bool {
c.PhysicalCores = runtime.NumCPU()
// For now assuming 1 thread per core...
c.ThreadsPerCore = 1
c.LogicalCores = c.PhysicalCores
return false
}

View File

@ -1,8 +0,0 @@
// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
//go:build nounsafe
// +build nounsafe
package cpuid
var hwcap uint

View File

@ -1,11 +0,0 @@
// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
//go:build !nounsafe
// +build !nounsafe
package cpuid
import _ "unsafe" // needed for go:linkname
//go:linkname hwcap internal/cpu.HWCap
var hwcap uint

View File

@ -1,15 +0,0 @@
#!/bin/sh
set -e
go tool dist list | while IFS=/ read os arch; do
echo "Checking $os/$arch..."
echo " normal"
GOARCH=$arch GOOS=$os go build -o /dev/null .
echo " noasm"
GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null .
echo " appengine"
GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null .
echo " noasm,appengine"
GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null .
done