4
0
mirror of https://github.com/cwinfo/matterbridge.git synced 2025-07-03 15:27:45 +00:00

Update dependencies (#1851)

This commit is contained in:
Wim
2022-06-25 00:36:16 +02:00
committed by GitHub
parent 5604d140e3
commit 4649876956
87 changed files with 10535 additions and 4392 deletions

View File

@ -386,47 +386,31 @@ In practice this means that concurrency is often limited to utilizing about 3 co
### Benchmarks
These are some examples of performance compared to [datadog cgo library](https://github.com/DataDog/zstd).
The first two are streaming decodes and the last are smaller inputs.
Running on AMD Ryzen 9 3950X 16-Core Processor. AMD64 assembly used.
```
BenchmarkDecoderSilesia-8 3 385000067 ns/op 550.51 MB/s 5498 B/op 8 allocs/op
BenchmarkDecoderSilesiaCgo-8 6 197666567 ns/op 1072.25 MB/s 270672 B/op 8 allocs/op
BenchmarkDecoderSilesia-32 5 206878840 ns/op 1024.50 MB/s 49808 B/op 43 allocs/op
BenchmarkDecoderEnwik9-32 1 1271809000 ns/op 786.28 MB/s 72048 B/op 52 allocs/op
BenchmarkDecoderEnwik9-8 1 2027001600 ns/op 493.34 MB/s 10496 B/op 18 allocs/op
BenchmarkDecoderEnwik9Cgo-8 2 979499200 ns/op 1020.93 MB/s 270672 B/op 8 allocs/op
Concurrent blocks, performance:
Concurrent performance:
BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-16 28915 42469 ns/op 4340.07 MB/s 114 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-16 116505 9965 ns/op 11900.16 MB/s 16 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-16 8952 134272 ns/op 3588.70 MB/s 915 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-16 11820 102538 ns/op 4161.90 MB/s 594 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-16 34782 34184 ns/op 3661.88 MB/s 60 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-16 27712 43447 ns/op 3500.58 MB/s 99 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-16 62826 18750 ns/op 21845.10 MB/s 104 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-16 631545 1794 ns/op 57078.74 MB/s 2 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-16 1690140 712 ns/op 172938.13 MB/s 1 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-16 10432 113593 ns/op 6180.73 MB/s 1143 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/html.zst-16 113206 10671 ns/op 9596.27 MB/s 15 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-16 1530615 779 ns/op 5229.49 MB/s 0 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/kppkn.gtb.zst-16 65217 16192 ns/op 11383.34 MB/s 46 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/geo.protodata.zst-16 292671 4039 ns/op 29363.19 MB/s 6 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/plrabn12.txt.zst-16 26314 46021 ns/op 10470.43 MB/s 293 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/lcet10.txt.zst-16 33897 34900 ns/op 12227.96 MB/s 205 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/asyoulik.txt.zst-16 104348 11433 ns/op 10949.01 MB/s 20 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/alice29.txt.zst-16 75949 15510 ns/op 9805.60 MB/s 32 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/html_x_4.zst-16 173910 6756 ns/op 60624.29 MB/s 37 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/paper-100k.pdf.zst-16 923076 1339 ns/op 76474.87 MB/s 1 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/fireworks.jpeg.zst-16 922920 1351 ns/op 91102.57 MB/s 2 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/urls.10K.zst-16 27649 43618 ns/op 16096.19 MB/s 407 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/html.zst-16 279073 4160 ns/op 24614.18 MB/s 6 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938 1579 ns/op 2581.71 MB/s 0 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/kppkn.gtb.zst-32 67356 17857 ns/op 10321.96 MB/s 22.48 pct 102 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/geo.protodata.zst-32 266656 4421 ns/op 26823.21 MB/s 11.89 pct 19 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/plrabn12.txt.zst-32 20992 56842 ns/op 8477.17 MB/s 39.90 pct 754 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/lcet10.txt.zst-32 27456 43932 ns/op 9714.01 MB/s 33.27 pct 524 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/asyoulik.txt.zst-32 78432 15047 ns/op 8319.15 MB/s 40.34 pct 66 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/alice29.txt.zst-32 65800 18436 ns/op 8249.63 MB/s 37.75 pct 88 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/html_x_4.zst-32 102993 11523 ns/op 35546.09 MB/s 3.637 pct 143 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/paper-100k.pdf.zst-32 1000000 1070 ns/op 95720.98 MB/s 80.53 pct 3 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/fireworks.jpeg.zst-32 749802 1752 ns/op 70272.35 MB/s 100.0 pct 5 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/urls.10K.zst-32 22640 52934 ns/op 13263.37 MB/s 26.25 pct 1014 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/html.zst-32 226412 5232 ns/op 19572.27 MB/s 14.49 pct 20 B/op 0 allocs/op
BenchmarkDecoder_DecodeAllParallel/comp-data.bin.zst-32 923041 1276 ns/op 3194.71 MB/s 31.26 pct 0 B/op 0 allocs/op
```
This reflects the performance around May 2020, but this may be out of date.
This reflects the performance around May 2022, but this may be out of date.
## Zstd inside ZIP files

View File

@ -63,13 +63,6 @@ func (b *bitReader) get32BitsFast(n uint8) uint32 {
return v
}
func (b *bitReader) get16BitsFast(n uint8) uint16 {
const regMask = 64 - 1
v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
b.bitsRead += n
return v
}
// fillFast() will make sure at least 32 bits are available.
// There must be at least 4 bytes available.
func (b *bitReader) fillFast() {

View File

@ -5,8 +5,6 @@
package zstd
import "fmt"
// bitWriter will write bits.
// First bit will be LSB of the first byte of output.
type bitWriter struct {
@ -73,80 +71,6 @@ func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {
b.nBits += bits
}
// flush will flush all pending full bytes.
// There will be at least 56 bits available for writing when this has been called.
// Using flush32 is faster, but leaves less space for writing.
func (b *bitWriter) flush() {
v := b.nBits >> 3
switch v {
case 0:
case 1:
b.out = append(b.out,
byte(b.bitContainer),
)
case 2:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
)
case 3:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
)
case 4:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
)
case 5:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
)
case 6:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
byte(b.bitContainer>>40),
)
case 7:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
byte(b.bitContainer>>40),
byte(b.bitContainer>>48),
)
case 8:
b.out = append(b.out,
byte(b.bitContainer),
byte(b.bitContainer>>8),
byte(b.bitContainer>>16),
byte(b.bitContainer>>24),
byte(b.bitContainer>>32),
byte(b.bitContainer>>40),
byte(b.bitContainer>>48),
byte(b.bitContainer>>56),
)
default:
panic(fmt.Errorf("bits (%d) > 64", b.nBits))
}
b.bitContainer >>= v << 3
b.nBits &= 7
}
// flush32 will flush out, so there are at least 32 bits available for writing.
func (b *bitWriter) flush32() {
if b.nBits < 32 {

View File

@ -5,9 +5,14 @@
package zstd
import (
"bytes"
"encoding/binary"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"sync"
"github.com/klauspost/compress/huff0"
@ -38,14 +43,14 @@ const (
// maxCompressedBlockSize is the biggest allowed compressed block size (128KB)
maxCompressedBlockSize = 128 << 10
compressedBlockOverAlloc = 16
maxCompressedBlockSizeAlloc = 128<<10 + compressedBlockOverAlloc
// Maximum possible block size (all Raw+Uncompressed).
maxBlockSize = (1 << 21) - 1
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header
maxCompressedLiteralSize = 1 << 18
maxRLELiteralSize = 1 << 20
maxMatchLen = 131074
maxSequences = 0x7f00 + 0xffff
maxMatchLen = 131074
maxSequences = 0x7f00 + 0xffff
// We support slightly less than the reference decoder to be able to
// use ints on 32 bit archs.
@ -97,7 +102,6 @@ type blockDec struct {
// Block is RLE, this is the size.
RLESize uint32
tmp [4]byte
Type blockType
@ -136,7 +140,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
b.Type = blockType((bh >> 1) & 3)
// find size.
cSize := int(bh >> 3)
maxSize := maxBlockSize
maxSize := maxCompressedBlockSizeAlloc
switch b.Type {
case blockTypeReserved:
return ErrReservedBlockType
@ -157,9 +161,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
println("Data size on stream:", cSize)
}
b.RLESize = 0
maxSize = maxCompressedBlockSize
maxSize = maxCompressedBlockSizeAlloc
if windowSize < maxCompressedBlockSize && b.lowMem {
maxSize = int(windowSize)
maxSize = int(windowSize) + compressedBlockOverAlloc
}
if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
if debugDecoder {
@ -190,9 +194,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
// Read block data.
if cap(b.dataStorage) < cSize {
if b.lowMem || cSize > maxCompressedBlockSize {
b.dataStorage = make([]byte, 0, cSize)
b.dataStorage = make([]byte, 0, cSize+compressedBlockOverAlloc)
} else {
b.dataStorage = make([]byte, 0, maxCompressedBlockSize)
b.dataStorage = make([]byte, 0, maxCompressedBlockSizeAlloc)
}
}
if cap(b.dst) <= maxSize {
@ -360,14 +364,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
}
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
b.literalBuf = make([]byte, litRegenSize)
b.literalBuf = make([]byte, litRegenSize, litRegenSize+compressedBlockOverAlloc)
} else {
if litRegenSize > maxCompressedLiteralSize {
// Exceptional
b.literalBuf = make([]byte, litRegenSize)
} else {
b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize)
}
b.literalBuf = make([]byte, litRegenSize, maxCompressedBlockSize+compressedBlockOverAlloc)
}
}
literals = b.literalBuf[:litRegenSize]
@ -397,14 +396,14 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
// Ensure we have space to store it.
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
b.literalBuf = make([]byte, 0, litRegenSize)
b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
} else {
b.literalBuf = make([]byte, 0, maxCompressedLiteralSize)
b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
}
}
var err error
// Use our out buffer.
huff.MaxDecodedSize = maxCompressedBlockSize
huff.MaxDecodedSize = litRegenSize
if fourStreams {
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
} else {
@ -429,9 +428,9 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
// Ensure we have space to store it.
if cap(b.literalBuf) < litRegenSize {
if b.lowMem {
b.literalBuf = make([]byte, 0, litRegenSize)
b.literalBuf = make([]byte, 0, litRegenSize+compressedBlockOverAlloc)
} else {
b.literalBuf = make([]byte, 0, maxCompressedBlockSize)
b.literalBuf = make([]byte, 0, maxCompressedBlockSize+compressedBlockOverAlloc)
}
}
huff := hist.huffTree
@ -448,7 +447,7 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
return in, err
}
hist.huffTree = huff
huff.MaxDecodedSize = maxCompressedBlockSize
huff.MaxDecodedSize = litRegenSize
// Use our out buffer.
if fourStreams {
literals, err = huff.Decoder().Decompress4X(b.literalBuf[:0:litRegenSize], literals)
@ -463,6 +462,8 @@ func (b *blockDec) decodeLiterals(in []byte, hist *history) (remain []byte, err
if len(literals) != litRegenSize {
return in, fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
}
// Re-cap to get extra size.
literals = b.literalBuf[:len(literals)]
if debugDecoder {
printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
}
@ -486,10 +487,15 @@ func (b *blockDec) decodeCompressed(hist *history) error {
b.dst = append(b.dst, hist.decoders.literals...)
return nil
}
err = hist.decoders.decodeSync(hist)
before := len(hist.decoders.out)
err = hist.decoders.decodeSync(hist.b[hist.ignoreBuffer:])
if err != nil {
return err
}
if hist.decoders.maxSyncLen > 0 {
hist.decoders.maxSyncLen += uint64(before)
hist.decoders.maxSyncLen -= uint64(len(hist.decoders.out))
}
b.dst = hist.decoders.out
hist.recentOffsets = hist.decoders.prevOffset
return nil
@ -632,6 +638,22 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
println("initializing sequences:", err)
return err
}
// Extract blocks...
if false && hist.dict == nil {
fatalErr := func(err error) {
if err != nil {
panic(err)
}
}
fn := fmt.Sprintf("n-%d-lits-%d-prev-%d-%d-%d-win-%d.blk", hist.decoders.nSeqs, len(hist.decoders.literals), hist.recentOffsets[0], hist.recentOffsets[1], hist.recentOffsets[2], hist.windowSize)
var buf bytes.Buffer
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.litLengths.fse))
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse))
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse))
buf.Write(in)
ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
}
return nil
}
@ -650,6 +672,7 @@ func (b *blockDec) decodeSequences(hist *history) error {
}
hist.decoders.windowSize = hist.windowSize
hist.decoders.prevOffset = hist.recentOffsets
err := hist.decoders.decode(b.sequence)
hist.recentOffsets = hist.decoders.prevOffset
return err

View File

@ -52,10 +52,6 @@ func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
return r, nil
}
func (b *byteBuf) remain() []byte {
return *b
}
func (b *byteBuf) readByte() (byte, error) {
bb := *b
if len(bb) < 1 {

View File

@ -13,12 +13,6 @@ type byteReader struct {
off int
}
// init will initialize the reader and set the input.
func (b *byteReader) init(in []byte) {
b.b = in
b.off = 0
}
// advance the stream b n bytes.
func (b *byteReader) advance(n uint) {
b.off += int(n)

View File

@ -347,18 +347,20 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
}
frame.history.setDict(&dict)
}
if frame.FrameContentSize != fcsUnknown && frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
return dst, ErrDecoderSizeExceeded
if frame.WindowSize > d.o.maxWindowSize {
return dst, ErrWindowSizeExceeded
}
if frame.FrameContentSize < 1<<30 {
// Never preallocate more than 1 GB up front.
if frame.FrameContentSize != fcsUnknown {
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
return dst, ErrDecoderSizeExceeded
}
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize))
dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)+compressedBlockOverAlloc)
copy(dst2, dst)
dst = dst2
}
}
if cap(dst) == 0 {
// Allocate len(input) * 2 by default if nothing is provided
// and we didn't get frame content size.
@ -437,7 +439,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
println("got", len(d.current.b), "bytes, error:", d.current.err, "data crc:", tmp)
}
if len(next.b) > 0 {
if !d.o.ignoreChecksum && len(next.b) > 0 {
n, err := d.current.crc.Write(next.b)
if err == nil {
if n != len(next.b) {
@ -449,7 +451,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
got := d.current.crc.Sum64()
var tmp [4]byte
binary.LittleEndian.PutUint32(tmp[:], uint32(got))
if !bytes.Equal(tmp[:], next.d.checkCRC) && !ignoreCRC {
if !d.o.ignoreChecksum && !bytes.Equal(tmp[:], next.d.checkCRC) {
if debugDecoder {
println("CRC Check Failed:", tmp[:], " (got) !=", next.d.checkCRC, "(on stream)")
}
@ -533,9 +535,15 @@ func (d *Decoder) nextBlockSync() (ok bool) {
// Update/Check CRC
if d.frame.HasCheckSum {
d.frame.crc.Write(d.current.b)
if !d.o.ignoreChecksum {
d.frame.crc.Write(d.current.b)
}
if d.current.d.Last {
d.current.err = d.frame.checkCRC()
if !d.o.ignoreChecksum {
d.current.err = d.frame.checkCRC()
} else {
d.current.err = d.frame.consumeCRC()
}
if d.current.err != nil {
println("CRC error:", d.current.err)
return false
@ -629,60 +637,18 @@ func (d *Decoder) startSyncDecoder(r io.Reader) error {
// Create Decoder:
// ASYNC:
// Spawn 4 go routines.
// 0: Read frames and decode blocks.
// 1: Decode block and literals. Receives hufftree and seqdecs, returns seqdecs and huff tree.
// 2: Wait for recentOffsets if needed. Decode sequences, send recentOffsets.
// 3: Wait for stream history, execute sequences, send stream history.
// Spawn 3 go routines.
// 0: Read frames and decode block literals.
// 1: Decode sequences.
// 2: Execute sequences, send to output.
func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output chan decodeOutput) {
defer d.streamWg.Done()
br := readerWrapper{r: r}
var seqPrepare = make(chan *blockDec, d.o.concurrent)
var seqDecode = make(chan *blockDec, d.o.concurrent)
var seqExecute = make(chan *blockDec, d.o.concurrent)
// Async 1: Prepare blocks...
go func() {
var hist history
var hasErr bool
for block := range seqPrepare {
if hasErr {
if block != nil {
seqDecode <- block
}
continue
}
if block.async.newHist != nil {
if debugDecoder {
println("Async 1: new history")
}
hist.reset()
if block.async.newHist.dict != nil {
hist.setDict(block.async.newHist.dict)
}
}
if block.err != nil || block.Type != blockTypeCompressed {
hasErr = block.err != nil
seqDecode <- block
continue
}
remain, err := block.decodeLiterals(block.data, &hist)
block.err = err
hasErr = block.err != nil
if err == nil {
block.async.literals = hist.decoders.literals
block.async.seqData = remain
} else if debugDecoder {
println("decodeLiterals error:", err)
}
seqDecode <- block
}
close(seqDecode)
}()
// Async 2: Decode sequences...
// Async 1: Decode sequences...
go func() {
var hist history
var hasErr bool
@ -696,7 +662,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
}
if block.async.newHist != nil {
if debugDecoder {
println("Async 2: new history, recent:", block.async.newHist.recentOffsets)
println("Async 1: new history, recent:", block.async.newHist.recentOffsets)
}
hist.decoders = block.async.newHist.decoders
hist.recentOffsets = block.async.newHist.recentOffsets
@ -750,7 +716,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
}
if block.async.newHist != nil {
if debugDecoder {
println("Async 3: new history")
println("Async 2: new history")
}
hist.windowSize = block.async.newHist.windowSize
hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
@ -837,6 +803,33 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
decodeStream:
for {
var hist history
var hasErr bool
decodeBlock := func(block *blockDec) {
if hasErr {
if block != nil {
seqDecode <- block
}
return
}
if block.err != nil || block.Type != blockTypeCompressed {
hasErr = block.err != nil
seqDecode <- block
return
}
remain, err := block.decodeLiterals(block.data, &hist)
block.err = err
hasErr = block.err != nil
if err == nil {
block.async.literals = hist.decoders.literals
block.async.seqData = remain
} else if debugDecoder {
println("decodeLiterals error:", err)
}
seqDecode <- block
}
frame := d.frame
if debugDecoder {
println("New frame...")
@ -863,7 +856,7 @@ decodeStream:
case <-ctx.Done():
case dec := <-d.decoders:
dec.sendErr(err)
seqPrepare <- dec
decodeBlock(dec)
}
break decodeStream
}
@ -883,6 +876,10 @@ decodeStream:
if debugDecoder {
println("Alloc History:", h.allocFrameBuffer)
}
hist.reset()
if h.dict != nil {
hist.setDict(h.dict)
}
dec.async.newHist = &h
dec.async.fcs = frame.FrameContentSize
historySent = true
@ -909,7 +906,7 @@ decodeStream:
}
err = dec.err
last := dec.Last
seqPrepare <- dec
decodeBlock(dec)
if err != nil {
break decodeStream
}
@ -918,7 +915,7 @@ decodeStream:
}
}
}
close(seqPrepare)
close(seqDecode)
wg.Wait()
d.frame.history.b = frameHistCache
}

View File

@ -19,6 +19,7 @@ type decoderOptions struct {
maxDecodedSize uint64
maxWindowSize uint64
dicts []dict
ignoreChecksum bool
}
func (o *decoderOptions) setDefault() {
@ -31,7 +32,7 @@ func (o *decoderOptions) setDefault() {
if o.concurrent > 4 {
o.concurrent = 4
}
o.maxDecodedSize = 1 << 63
o.maxDecodedSize = 64 << 30
}
// WithDecoderLowmem will set whether to use a lower amount of memory,
@ -66,7 +67,7 @@ func WithDecoderConcurrency(n int) DOption {
// WithDecoderMaxMemory allows to set a maximum decoded size for in-memory
// non-streaming operations or maximum window size for streaming operations.
// This can be used to control memory usage of potentially hostile content.
// Maximum and default is 1 << 63 bytes.
// Maximum is 1 << 63 bytes. Default is 64GiB.
func WithDecoderMaxMemory(n uint64) DOption {
return func(o *decoderOptions) error {
if n == 0 {
@ -112,3 +113,11 @@ func WithDecoderMaxWindow(size uint64) DOption {
return nil
}
}
// IgnoreChecksum allows to forcibly ignore checksum checking.
func IgnoreChecksum(b bool) DOption {
return func(o *decoderOptions) error {
o.ignoreChecksum = b
return nil
}
}

View File

@ -156,8 +156,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@ -518,8 +518,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
@ -674,8 +674,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@ -1047,8 +1047,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
nextHashL := hashLen(cv, betterLongTableBits, betterLongLen)
nextHashS := hashLen(cv, betterShortTableBits, betterShortLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match

View File

@ -127,8 +127,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@ -439,8 +439,8 @@ encodeLoop:
var t int32
for {
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@ -785,8 +785,8 @@ encodeLoop:
panic("offset0 was 0")
}
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
@ -969,7 +969,7 @@ encodeLoop:
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
longHash2 := hashLen(cv0, dFastLongTableBits, dFastLongLen)
longHash2 := hashLen(cv1, dFastLongTableBits, dFastLongLen)
e.longTable[longHash1] = te0
e.longTable[longHash2] = te1
e.markLongShardDirty(longHash1)
@ -1002,8 +1002,8 @@ encodeLoop:
}
// Store this, since we have it.
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen)
nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match

View File

@ -551,7 +551,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
}
// If we can do everything in one block, prefer that.
if len(src) <= maxCompressedBlockSize {
if len(src) <= e.o.blockSize {
enc.Reset(e.o.dict, true)
// Slightly faster with no history and everything in one block.
if e.o.crc {

View File

@ -253,10 +253,11 @@ func (d *frameDec) reset(br byteBuffer) error {
return ErrWindowSizeTooSmall
}
d.history.windowSize = int(d.WindowSize)
if d.o.lowMem && d.history.windowSize < maxBlockSize {
if !d.o.lowMem || d.history.windowSize < maxBlockSize {
// Alloc 2x window size if not low-mem, or very small window size.
d.history.allocFrameBuffer = d.history.windowSize * 2
// TODO: Maybe use FrameContent size
} else {
// Alloc with one additional block
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
}
@ -290,13 +291,6 @@ func (d *frameDec) checkCRC() error {
if !d.HasCheckSum {
return nil
}
var tmp [4]byte
got := d.crc.Sum64()
// Flip to match file order.
tmp[0] = byte(got >> 0)
tmp[1] = byte(got >> 8)
tmp[2] = byte(got >> 16)
tmp[3] = byte(got >> 24)
// We can overwrite upper tmp now
want, err := d.rawInput.readSmall(4)
@ -305,7 +299,19 @@ func (d *frameDec) checkCRC() error {
return err
}
if !bytes.Equal(tmp[:], want) && !ignoreCRC {
if d.o.ignoreChecksum {
return nil
}
var tmp [4]byte
got := d.crc.Sum64()
// Flip to match file order.
tmp[0] = byte(got >> 0)
tmp[1] = byte(got >> 8)
tmp[2] = byte(got >> 16)
tmp[3] = byte(got >> 24)
if !bytes.Equal(tmp[:], want) {
if debugDecoder {
println("CRC Check Failed:", tmp[:], "!=", want)
}
@ -317,6 +323,19 @@ func (d *frameDec) checkCRC() error {
return nil
}
// consumeCRC reads the checksum data if the frame has one.
func (d *frameDec) consumeCRC() error {
if d.HasCheckSum {
_, err := d.rawInput.readSmall(4)
if err != nil {
println("CRC missing?", err)
return err
}
}
return nil
}
// runDecoder will create a sync decoder that will decode a block of data.
func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
saved := d.history.b
@ -326,6 +345,19 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
d.history.ignoreBuffer = len(dst)
// Store input length, so we only check new data.
crcStart := len(dst)
d.history.decoders.maxSyncLen = 0
if d.FrameContentSize != fcsUnknown {
d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
return dst, ErrDecoderSizeExceeded
}
if uint64(cap(dst)) < d.history.decoders.maxSyncLen {
// Alloc for output
dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
copy(dst2, dst)
dst = dst2
}
}
var err error
for {
err = dec.reset(d.rawInput, d.WindowSize)
@ -360,13 +392,17 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
if d.FrameContentSize != fcsUnknown && uint64(len(d.history.b)-crcStart) != d.FrameContentSize {
err = ErrFrameSizeMismatch
} else if d.HasCheckSum {
var n int
n, err = d.crc.Write(dst[crcStart:])
if err == nil {
if n != len(dst)-crcStart {
err = io.ErrShortWrite
} else {
err = d.checkCRC()
if d.o.ignoreChecksum {
err = d.consumeCRC()
} else {
var n int
n, err = d.crc.Write(dst[crcStart:])
if err == nil {
if n != len(dst)-crcStart {
err = io.ErrShortWrite
} else {
err = d.checkCRC()
}
}
}
}

View File

@ -5,8 +5,10 @@
package zstd
import (
"encoding/binary"
"errors"
"fmt"
"io"
)
const (
@ -182,6 +184,29 @@ func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error {
return s.buildDtable()
}
func (s *fseDecoder) mustReadFrom(r io.Reader) {
fatalErr := func(err error) {
if err != nil {
panic(err)
}
}
// dt [maxTablesize]decSymbol // Decompression table.
// symbolLen uint16 // Length of active part of the symbol table.
// actualTableLog uint8 // Selected tablelog.
// maxBits uint8 // Maximum number of additional bits
// // used for table creation to avoid allocations.
// stateTable [256]uint16
// norm [maxSymbolValue + 1]int16
// preDefined bool
fatalErr(binary.Read(r, binary.LittleEndian, &s.dt))
fatalErr(binary.Read(r, binary.LittleEndian, &s.symbolLen))
fatalErr(binary.Read(r, binary.LittleEndian, &s.actualTableLog))
fatalErr(binary.Read(r, binary.LittleEndian, &s.maxBits))
fatalErr(binary.Read(r, binary.LittleEndian, &s.stateTable))
fatalErr(binary.Read(r, binary.LittleEndian, &s.norm))
fatalErr(binary.Read(r, binary.LittleEndian, &s.preDefined))
}
// decSymbol contains information about a state entry,
// Including the state offset base, the output symbol and
// the number of bits to read for the low part of the destination state.
@ -204,18 +229,10 @@ func (d decSymbol) newState() uint16 {
return uint16(d >> 16)
}
func (d decSymbol) baseline() uint32 {
return uint32(d >> 32)
}
func (d decSymbol) baselineInt() int {
return int(d >> 32)
}
func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) {
*d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32)
}
func (d *decSymbol) setNBits(nBits uint8) {
const mask = 0xffffffffffffff00
*d = (*d & mask) | decSymbol(nBits)
@ -231,11 +248,6 @@ func (d *decSymbol) setNewState(state uint16) {
*d = (*d & mask) | decSymbol(state)<<16
}
func (d *decSymbol) setBaseline(baseline uint32) {
const mask = 0xffffffff
*d = (*d & mask) | decSymbol(baseline)<<32
}
func (d *decSymbol) setExt(addBits uint8, baseline uint32) {
const mask = 0xffff00ff
*d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32)
@ -352,34 +364,7 @@ func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) {
s.state = dt[br.getBits(tableLog)]
}
// next returns the current symbol and sets the next state.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) next(br *bitReader) {
lowBits := uint16(br.getBits(s.state.nbBits()))
s.state = s.dt[s.state.newState()+lowBits]
}
// finished returns true if all bits have been read from the bitstream
// and the next state would require reading bits from the input.
func (s *fseState) finished(br *bitReader) bool {
return br.finished() && s.state.nbBits() > 0
}
// final returns the current state symbol without decoding the next.
func (s *fseState) final() (int, uint8) {
return s.state.baselineInt(), s.state.addBits()
}
// final returns the current state symbol without decoding the next.
func (s decSymbol) final() (int, uint8) {
return s.baselineInt(), s.addBits()
}
// nextFast returns the next symbol and sets the next state.
// This can only be used if no symbols are 0 bits.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
lowBits := br.get16BitsFast(s.state.nbBits())
s.state = s.dt[s.state.newState()+lowBits]
return s.state.baseline(), s.state.addBits()
}

View File

@ -76,21 +76,6 @@ func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) {
s.clearCount = maxCount != 0
}
// prepare will prepare and allocate scratch tables used for both compression and decompression.
func (s *fseEncoder) prepare() (*fseEncoder, error) {
if s == nil {
s = &fseEncoder{}
}
s.useRLE = false
if s.clearCount && s.maxCount == 0 {
for i := range s.count {
s.count[i] = 0
}
s.clearCount = false
}
return s, nil
}
// allocCtable will allocate tables needed for compression.
// If existing tables a re big enough, they are simply re-used.
func (s *fseEncoder) allocCtable() {
@ -709,14 +694,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) {
c.state = c.stateTable[lu]
}
// encode the output symbol provided and write it to the bitstream.
func (c *cState) encode(symbolTT symbolTransform) {
nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
dstState := int32(c.state>>(nbBitsOut&15)) + int32(symbolTT.deltaFindState)
c.bw.addBits16NC(c.state, uint8(nbBitsOut))
c.state = c.stateTable[dstState]
}
// flush will write the tablelog to the output and flush the remaining full bytes.
func (c *cState) flush(tableLog uint8) {
c.bw.flush32()

View File

@ -1,11 +0,0 @@
//go:build ignorecrc
// +build ignorecrc
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
// Based on work by Yann Collet, released under BSD License.
package zstd
// ignoreCRC can be used for fuzz testing to ignore CRC values...
const ignoreCRC = true

View File

@ -1,11 +0,0 @@
//go:build !ignorecrc
// +build !ignorecrc
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
// Based on work by Yann Collet, released under BSD License.
package zstd
// ignoreCRC can be used for fuzz testing to ignore CRC values...
const ignoreCRC = false

View File

@ -33,9 +33,3 @@ func hashLen(u uint64, length, mls uint8) uint32 {
return (uint32(u) * prime4bytes) >> (32 - length)
}
}
// hash3 returns the hash of the lower 3 bytes of u to fit in a hash table with h bits.
// Preferably h should be a constant and should always be <32.
func hash3(u uint32, h uint8) uint32 {
return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31)
}

View File

@ -73,6 +73,7 @@ type sequenceDecs struct {
seqSize int
windowSize int
maxBits uint8
maxSyncLen uint64
}
// initialize all 3 decoders from the stream input.
@ -98,153 +99,13 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) erro
return nil
}
// decode sequences from the stream with the provided history.
func (s *sequenceDecs) decode(seqs []seqVals) error {
br := s.br
// Grab full sizes tables, to avoid bounds checks.
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
s.seqSize = 0
litRemain := len(s.literals)
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
maxBlockSize = s.windowSize
}
for i := range seqs {
var ll, mo, ml int
if br.off > 4+((maxOffsetBits+16+16)>>3) {
// inlined function:
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
// Final will not read from stream.
var llB, mlB, moB uint8
ll, llB = llState.final()
ml, mlB = mlState.final()
mo, moB = ofState.final()
// extra bits are stored in reverse order.
br.fillFast()
mo += br.getBits(moB)
if s.maxBits > 32 {
br.fillFast()
}
ml += br.getBits(mlB)
ll += br.getBits(llB)
if moB > 1 {
s.prevOffset[2] = s.prevOffset[1]
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = mo
} else {
// mo = s.adjustOffset(mo, ll, moB)
// Inlined for rather big speedup
if ll == 0 {
// There is an exception though, when current sequence's literals_length = 0.
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
mo++
}
if mo == 0 {
mo = s.prevOffset[0]
} else {
var temp int
if mo == 3 {
temp = s.prevOffset[0] - 1
} else {
temp = s.prevOffset[mo]
}
if temp == 0 {
// 0 is not valid; input is corrupted; force offset to 1
println("WARNING: temp was 0")
temp = 1
}
if mo != 1 {
s.prevOffset[2] = s.prevOffset[1]
}
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = temp
mo = temp
}
}
br.fillFast()
} else {
if br.overread() {
if debugDecoder {
printf("reading sequence %d, exceeded available data\n", i)
}
return io.ErrUnexpectedEOF
}
ll, mo, ml = s.next(br, llState, mlState, ofState)
br.fill()
}
if debugSequences {
println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
}
// Evaluate.
// We might be doing this async, so do it early.
if mo == 0 && ml > 0 {
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
}
if ml > maxMatchLen {
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
}
s.seqSize += ll + ml
if s.seqSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
}
litRemain -= ll
if litRemain < 0 {
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll)
}
seqs[i] = seqVals{
ll: ll,
ml: ml,
mo: mo,
}
if i == len(seqs)-1 {
// This is the last sequence, so we shouldn't update state.
break
}
// Manually inlined, ~ 5-20% faster
// Update all 3 states at once. Approx 20% faster.
nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
if nBits == 0 {
llState = llTable[llState.newState()&maxTableMask]
mlState = mlTable[mlState.newState()&maxTableMask]
ofState = ofTable[ofState.newState()&maxTableMask]
} else {
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits >> (ofState.nbBits() & 31))
lowBits &= bitMask[mlState.nbBits()&15]
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
}
}
s.seqSize += litRemain
if s.seqSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
}
err := br.close()
if err != nil {
printf("Closing sequences: %v, %+v\n", err, *br)
}
return err
}
// execute will execute the decoded sequence with the provided history.
// The sequence must be evaluated before being sent.
func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
if len(s.dict) == 0 {
return s.executeSimple(seqs, hist)
}
// Ensure we have enough output size...
if len(s.out)+s.seqSize > cap(s.out) {
addBytes := s.seqSize + len(s.out)
@ -327,6 +188,7 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
}
}
}
// Add final literals
copy(out[t:], s.literals)
if debugDecoder {
@ -341,14 +203,18 @@ func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
}
// decode sequences from the stream with the provided history.
func (s *sequenceDecs) decodeSync(history *history) error {
func (s *sequenceDecs) decodeSync(hist []byte) error {
supported, err := s.decodeSyncSimple(hist)
if supported {
return err
}
br := s.br
seqs := s.nSeqs
startSize := len(s.out)
// Grab full sizes tables, to avoid bounds checks.
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
hist := history.b[history.ignoreBuffer:]
out := s.out
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
@ -433,7 +299,7 @@ func (s *sequenceDecs) decodeSync(history *history) error {
}
size := ll + ml + len(out)
if size-startSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
}
if size > cap(out) {
// Not enough size, which can happen under high volume block streaming conditions
@ -463,13 +329,13 @@ func (s *sequenceDecs) decodeSync(history *history) error {
if mo > len(out)+len(hist) || mo > s.windowSize {
if len(s.dict) == 0 {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist))
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
}
// we may be in dictionary.
dictO := len(s.dict) - (mo - (len(out) + len(hist)))
if dictO < 0 || dictO >= len(s.dict) {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist))
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(out)+len(hist)-startSize)
}
end := dictO + ml
if end > len(s.dict) {
@ -530,6 +396,7 @@ func (s *sequenceDecs) decodeSync(history *history) error {
ofState = ofTable[ofState.newState()&maxTableMask]
} else {
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
@ -543,8 +410,8 @@ func (s *sequenceDecs) decodeSync(history *history) error {
}
// Check if space for literals
if len(s.literals)+len(s.out)-startSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", len(s.out), maxBlockSize)
if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
}
// Add final literals
@ -552,16 +419,6 @@ func (s *sequenceDecs) decodeSync(history *history) error {
return br.close()
}
// update states, at least 27 bits must be available.
func (s *sequenceDecs) update(br *bitReader) {
// Max 8 bits
s.litLengths.state.next(br)
// Max 9 bits
s.matchLengths.state.next(br)
// Max 8 bits
s.offsets.state.next(br)
}
var bitMask [16]uint16
func init() {
@ -570,87 +427,6 @@ func init() {
}
}
// update states, at least 27 bits must be available.
func (s *sequenceDecs) updateAlt(br *bitReader) {
// Update all 3 states at once. Approx 20% faster.
a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
nBits := a.nbBits() + b.nbBits() + c.nbBits()
if nBits == 0 {
s.litLengths.state.state = s.litLengths.state.dt[a.newState()]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()]
s.offsets.state.state = s.offsets.state.dt[c.newState()]
return
}
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]
lowBits = uint16(bits >> (c.nbBits() & 31))
lowBits &= bitMask[b.nbBits()&15]
s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits]
lowBits = uint16(bits) & bitMask[c.nbBits()&15]
s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits]
}
// nextFast will return new states when there are at least 4 unused bytes left on the stream when done.
func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
// Final will not read from stream.
ll, llB := llState.final()
ml, mlB := mlState.final()
mo, moB := ofState.final()
// extra bits are stored in reverse order.
br.fillFast()
mo += br.getBits(moB)
if s.maxBits > 32 {
br.fillFast()
}
ml += br.getBits(mlB)
ll += br.getBits(llB)
if moB > 1 {
s.prevOffset[2] = s.prevOffset[1]
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = mo
return
}
// mo = s.adjustOffset(mo, ll, moB)
// Inlined for rather big speedup
if ll == 0 {
// There is an exception though, when current sequence's literals_length = 0.
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
mo++
}
if mo == 0 {
mo = s.prevOffset[0]
return
}
var temp int
if mo == 3 {
temp = s.prevOffset[0] - 1
} else {
temp = s.prevOffset[mo]
}
if temp == 0 {
// 0 is not valid; input is corrupted; force offset to 1
println("temp was 0")
temp = 1
}
if mo != 1 {
s.prevOffset[2] = s.prevOffset[1]
}
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = temp
mo = temp
return
}
func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) {
// Final will not read from stream.
ll, llB := llState.final()

View File

@ -0,0 +1,362 @@
//go:build amd64 && !appengine && !noasm && gc
// +build amd64,!appengine,!noasm,gc
package zstd
import (
"fmt"
"github.com/klauspost/compress/internal/cpuinfo"
)
type decodeSyncAsmContext struct {
llTable []decSymbol
mlTable []decSymbol
ofTable []decSymbol
llState uint64
mlState uint64
ofState uint64
iteration int
litRemain int
out []byte
outPosition int
literals []byte
litPosition int
history []byte
windowSize int
ll int // set on error (not for all errors, please refer to _generate/gen.go)
ml int // set on error (not for all errors, please refer to _generate/gen.go)
mo int // set on error (not for all errors, please refer to _generate/gen.go)
}
// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
//go:noescape
func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
//go:noescape
func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
//go:noescape
func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
//go:noescape
func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
// decode sequences from the stream with the provided history but without a dictionary.
func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
if len(s.dict) > 0 {
return false, nil
}
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
return false, nil
}
useSafe := false
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
useSafe = true
}
if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
useSafe = true
}
if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
useSafe = true
}
br := s.br
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
maxBlockSize = s.windowSize
}
ctx := decodeSyncAsmContext{
llTable: s.litLengths.fse.dt[:maxTablesize],
mlTable: s.matchLengths.fse.dt[:maxTablesize],
ofTable: s.offsets.fse.dt[:maxTablesize],
llState: uint64(s.litLengths.state.state),
mlState: uint64(s.matchLengths.state.state),
ofState: uint64(s.offsets.state.state),
iteration: s.nSeqs - 1,
litRemain: len(s.literals),
out: s.out,
outPosition: len(s.out),
literals: s.literals,
windowSize: s.windowSize,
history: hist,
}
s.seqSize = 0
startSize := len(s.out)
var errCode int
if cpuinfo.HasBMI2() {
if useSafe {
errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx)
} else {
errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
}
} else {
if useSafe {
errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx)
} else {
errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
}
}
switch errCode {
case noError:
break
case errorMatchLenOfsMismatch:
return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml)
case errorMatchLenTooBig:
return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml)
case errorMatchOffTooBig:
return true, fmt.Errorf("match offset (%d) bigger than current history (%d)",
ctx.mo, ctx.outPosition+len(hist)-startSize)
case errorNotEnoughLiterals:
return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
ctx.ll, ctx.litRemain+ctx.ll)
case errorNotEnoughSpace:
size := ctx.outPosition + ctx.ll + ctx.ml
if debugDecoder {
println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
}
return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
default:
return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
}
s.seqSize += ctx.litRemain
if s.seqSize > maxBlockSize {
return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
}
err := br.close()
if err != nil {
printf("Closing sequences: %v, %+v\n", err, *br)
return true, err
}
s.literals = s.literals[ctx.litPosition:]
t := ctx.outPosition
s.out = s.out[:t]
// Add final literals
s.out = append(s.out, s.literals...)
if debugDecoder {
t += len(s.literals)
if t != len(s.out) {
panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t))
}
}
return true, nil
}
// --------------------------------------------------------------------------------
type decodeAsmContext struct {
llTable []decSymbol
mlTable []decSymbol
ofTable []decSymbol
llState uint64
mlState uint64
ofState uint64
iteration int
seqs []seqVals
litRemain int
}
const noError = 0
// error reported when mo == 0 && ml > 0
const errorMatchLenOfsMismatch = 1
// error reported when ml > maxMatchLen
const errorMatchLenTooBig = 2
// error reported when mo > available history or mo > s.windowSize
const errorMatchOffTooBig = 3
// error reported when the sum of literal lengths exeeceds the literal buffer size
const errorNotEnoughLiterals = 4
// error reported when capacity of `out` is too small
const errorNotEnoughSpace = 5
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
//go:noescape
func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
//
// Please refer to seqdec_generic.go for the reference implementation.
//go:noescape
func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
//go:noescape
func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
//go:noescape
func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// decode sequences from the stream without the provided history.
func (s *sequenceDecs) decode(seqs []seqVals) error {
br := s.br
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
maxBlockSize = s.windowSize
}
ctx := decodeAsmContext{
llTable: s.litLengths.fse.dt[:maxTablesize],
mlTable: s.matchLengths.fse.dt[:maxTablesize],
ofTable: s.offsets.fse.dt[:maxTablesize],
llState: uint64(s.litLengths.state.state),
mlState: uint64(s.matchLengths.state.state),
ofState: uint64(s.offsets.state.state),
seqs: seqs,
iteration: len(seqs) - 1,
litRemain: len(s.literals),
}
s.seqSize = 0
lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
var errCode int
if cpuinfo.HasBMI2() {
if lte56bits {
errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx)
} else {
errCode = sequenceDecs_decode_bmi2(s, br, &ctx)
}
} else {
if lte56bits {
errCode = sequenceDecs_decode_56_amd64(s, br, &ctx)
} else {
errCode = sequenceDecs_decode_amd64(s, br, &ctx)
}
}
if errCode != 0 {
i := len(seqs) - ctx.iteration - 1
switch errCode {
case errorMatchLenOfsMismatch:
ml := ctx.seqs[i].ml
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
case errorMatchLenTooBig:
ml := ctx.seqs[i].ml
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
case errorNotEnoughLiterals:
ll := ctx.seqs[i].ll
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
}
return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
}
if ctx.litRemain < 0 {
return fmt.Errorf("literal count is too big: total available %d, total requested %d",
len(s.literals), len(s.literals)-ctx.litRemain)
}
s.seqSize += ctx.litRemain
if s.seqSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
}
err := br.close()
if err != nil {
printf("Closing sequences: %v, %+v\n", err, *br)
}
return err
}
// --------------------------------------------------------------------------------
type executeAsmContext struct {
seqs []seqVals
seqIndex int
out []byte
history []byte
literals []byte
outPosition int
litPosition int
windowSize int
}
// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm.
//
// Returns false if a match offset is too big.
//
// Please refer to seqdec_generic.go for the reference implementation.
//go:noescape
func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
// Same as above, but with safe memcopies
//go:noescape
func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
// executeSimple handles cases when dictionary is not used.
func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
// Ensure we have enough output size...
if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) {
addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc
s.out = append(s.out, make([]byte, addBytes)...)
s.out = s.out[:len(s.out)-addBytes]
}
if debugDecoder {
printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
}
var t = len(s.out)
out := s.out[:t+s.seqSize]
ctx := executeAsmContext{
seqs: seqs,
seqIndex: 0,
out: out,
history: hist,
outPosition: t,
litPosition: 0,
literals: s.literals,
windowSize: s.windowSize,
}
var ok bool
if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
ok = sequenceDecs_executeSimple_safe_amd64(&ctx)
} else {
ok = sequenceDecs_executeSimple_amd64(&ctx)
}
if !ok {
return fmt.Errorf("match offset (%d) bigger than current history (%d)",
seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
}
s.literals = s.literals[ctx.litPosition:]
t = ctx.outPosition
// Add final literals
copy(out[t:], s.literals)
if debugDecoder {
t += len(s.literals)
if t != len(out) {
panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
}
}
s.out = out
return nil
}

3689
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,237 @@
//go:build !amd64 || appengine || !gc || noasm
// +build !amd64 appengine !gc noasm
package zstd
import (
"fmt"
"io"
)
// decode sequences from the stream with the provided history but without dictionary.
func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
return false, nil
}
// decode sequences from the stream without the provided history.
func (s *sequenceDecs) decode(seqs []seqVals) error {
br := s.br
// Grab full sizes tables, to avoid bounds checks.
llTable, mlTable, ofTable := s.litLengths.fse.dt[:maxTablesize], s.matchLengths.fse.dt[:maxTablesize], s.offsets.fse.dt[:maxTablesize]
llState, mlState, ofState := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state
s.seqSize = 0
litRemain := len(s.literals)
maxBlockSize := maxCompressedBlockSize
if s.windowSize < maxBlockSize {
maxBlockSize = s.windowSize
}
for i := range seqs {
var ll, mo, ml int
if br.off > 4+((maxOffsetBits+16+16)>>3) {
// inlined function:
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)
// Final will not read from stream.
var llB, mlB, moB uint8
ll, llB = llState.final()
ml, mlB = mlState.final()
mo, moB = ofState.final()
// extra bits are stored in reverse order.
br.fillFast()
mo += br.getBits(moB)
if s.maxBits > 32 {
br.fillFast()
}
ml += br.getBits(mlB)
ll += br.getBits(llB)
if moB > 1 {
s.prevOffset[2] = s.prevOffset[1]
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = mo
} else {
// mo = s.adjustOffset(mo, ll, moB)
// Inlined for rather big speedup
if ll == 0 {
// There is an exception though, when current sequence's literals_length = 0.
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
mo++
}
if mo == 0 {
mo = s.prevOffset[0]
} else {
var temp int
if mo == 3 {
temp = s.prevOffset[0] - 1
} else {
temp = s.prevOffset[mo]
}
if temp == 0 {
// 0 is not valid; input is corrupted; force offset to 1
println("WARNING: temp was 0")
temp = 1
}
if mo != 1 {
s.prevOffset[2] = s.prevOffset[1]
}
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = temp
mo = temp
}
}
br.fillFast()
} else {
if br.overread() {
if debugDecoder {
printf("reading sequence %d, exceeded available data\n", i)
}
return io.ErrUnexpectedEOF
}
ll, mo, ml = s.next(br, llState, mlState, ofState)
br.fill()
}
if debugSequences {
println("Seq", i, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
}
// Evaluate.
// We might be doing this async, so do it early.
if mo == 0 && ml > 0 {
return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
}
if ml > maxMatchLen {
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
}
s.seqSize += ll + ml
if s.seqSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
}
litRemain -= ll
if litRemain < 0 {
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, litRemain+ll)
}
seqs[i] = seqVals{
ll: ll,
ml: ml,
mo: mo,
}
if i == len(seqs)-1 {
// This is the last sequence, so we shouldn't update state.
break
}
// Manually inlined, ~ 5-20% faster
// Update all 3 states at once. Approx 20% faster.
nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits()
if nBits == 0 {
llState = llTable[llState.newState()&maxTableMask]
mlState = mlTable[mlState.newState()&maxTableMask]
ofState = ofTable[ofState.newState()&maxTableMask]
} else {
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits >> (ofState.nbBits() & 31))
lowBits &= bitMask[mlState.nbBits()&15]
mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask]
lowBits = uint16(bits) & bitMask[ofState.nbBits()&15]
ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask]
}
}
s.seqSize += litRemain
if s.seqSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
}
err := br.close()
if err != nil {
printf("Closing sequences: %v, %+v\n", err, *br)
}
return err
}
// executeSimple handles cases when a dictionary is not used.
func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
// Ensure we have enough output size...
if len(s.out)+s.seqSize > cap(s.out) {
addBytes := s.seqSize + len(s.out)
s.out = append(s.out, make([]byte, addBytes)...)
s.out = s.out[:len(s.out)-addBytes]
}
if debugDecoder {
printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
}
var t = len(s.out)
out := s.out[:t+s.seqSize]
for _, seq := range seqs {
// Add literals
copy(out[t:], s.literals[:seq.ll])
t += seq.ll
s.literals = s.literals[seq.ll:]
// Malformed input
if seq.mo > t+len(hist) || seq.mo > s.windowSize {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", seq.mo, t+len(hist))
}
// Copy from history.
if v := seq.mo - t; v > 0 {
// v is the start position in history from end.
start := len(hist) - v
if seq.ml > v {
// Some goes into the current block.
// Copy remainder of history
copy(out[t:], hist[start:])
t += v
seq.ml -= v
} else {
copy(out[t:], hist[start:start+seq.ml])
t += seq.ml
continue
}
}
// We must be in the current buffer now
if seq.ml > 0 {
start := t - seq.mo
if seq.ml <= t-start {
// No overlap
copy(out[t:], out[start:start+seq.ml])
t += seq.ml
} else {
// Overlapping copy
// Extend destination slice and copy one byte at the time.
src := out[start : start+seq.ml]
dst := out[t:]
dst = dst[:len(src)]
t += len(src)
// Destination is the space we just added.
for i := range src {
dst[i] = src[i]
}
}
}
}
// Add final literals
copy(out[t:], s.literals)
if debugDecoder {
t += len(s.literals)
if t != len(out) {
panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
}
}
s.out = out
return nil
}

View File

@ -18,26 +18,44 @@ const ZipMethodWinZip = 93
// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT
const ZipMethodPKWare = 20
var zipReaderPool sync.Pool
// zipReaderPool is the default reader pool.
var zipReaderPool = sync.Pool{New: func() interface{} {
z, err := NewReader(nil, WithDecoderLowmem(true), WithDecoderMaxWindow(128<<20), WithDecoderConcurrency(1))
if err != nil {
panic(err)
}
return z
}}
// newZipReader creates a pooled zip decompressor.
func newZipReader(r io.Reader) io.ReadCloser {
dec, ok := zipReaderPool.Get().(*Decoder)
if ok {
dec.Reset(r)
} else {
d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
if err != nil {
panic(err)
}
dec = d
func newZipReader(opts ...DOption) func(r io.Reader) io.ReadCloser {
pool := &zipReaderPool
if len(opts) > 0 {
opts = append([]DOption{WithDecoderLowmem(true), WithDecoderMaxWindow(128 << 20)}, opts...)
// Force concurrency 1
opts = append(opts, WithDecoderConcurrency(1))
// Create our own pool
pool = &sync.Pool{}
}
return func(r io.Reader) io.ReadCloser {
dec, ok := pool.Get().(*Decoder)
if ok {
dec.Reset(r)
} else {
d, err := NewReader(r, opts...)
if err != nil {
panic(err)
}
dec = d
}
return &pooledZipReader{dec: dec, pool: pool}
}
return &pooledZipReader{dec: dec}
}
type pooledZipReader struct {
mu sync.Mutex // guards Close and Read
dec *Decoder
mu sync.Mutex // guards Close and Read
pool *sync.Pool
dec *Decoder
}
func (r *pooledZipReader) Read(p []byte) (n int, err error) {
@ -48,8 +66,8 @@ func (r *pooledZipReader) Read(p []byte) (n int, err error) {
}
dec, err := r.dec.Read(p)
if err == io.EOF {
err = r.dec.Reset(nil)
zipReaderPool.Put(r.dec)
r.dec.Reset(nil)
r.pool.Put(r.dec)
r.dec = nil
}
return dec, err
@ -61,7 +79,7 @@ func (r *pooledZipReader) Close() error {
var err error
if r.dec != nil {
err = r.dec.Reset(nil)
zipReaderPool.Put(r.dec)
r.pool.Put(r.dec)
r.dec = nil
}
return err
@ -115,6 +133,9 @@ func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
// ZipDecompressor returns a decompressor that can be registered with zip libraries.
// See ZipCompressor for example.
func ZipDecompressor() func(r io.Reader) io.ReadCloser {
return newZipReader
// Options can be specified. WithDecoderConcurrency(1) is forced,
// and by default a 128MB maximum decompression window is specified.
// The window size can be overridden if required.
func ZipDecompressor(opts ...DOption) func(r io.Reader) io.ReadCloser {
return newZipReader(opts...)
}

View File

@ -110,17 +110,6 @@ func printf(format string, a ...interface{}) {
}
}
// matchLenFast does matching, but will not match the last up to 7 bytes.
func matchLenFast(a, b []byte) int {
endI := len(a) & (math.MaxInt32 - 7)
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
return i + bits.TrailingZeros64(diff)>>3
}
}
return endI
}
// matchLen returns the maximum length.
// a must be the shortest of the two.
// The function also returns whether all bytes matched.