Update dependencies (#1851)

2025-07-03 22:27:44 +00:00 · 2022-06-25 00:36:16 +02:00
parent 5604d140e3
commit 4649876956
87 changed files with 10535 additions and 4392 deletions
--- a/vendor/github.com/klauspost/compress/s2/README.md
+++ b/vendor/github.com/klauspost/compress/s2/README.md
@ -19,6 +19,7 @@ This is important, so you don't have to worry about spending CPU cycles on alrea
 * Adjustable compression (3 levels) 
 * Concurrent stream compression
 * Faster decompression, even for Snappy compatible content
+* Concurrent Snappy/S2 stream decompression
 * Ability to quickly skip forward in compressed stream
 * Random seeking with indexes
 * Compatible with reading Snappy compressed content
@ -415,6 +416,25 @@ Without assembly decompression is also very fast; single goroutine decompression

 Even though S2 typically compresses better than Snappy, decompression speed is always better. 

+### Concurrent Stream Decompression
+
+For full stream decompression S2 offers a [DecodeConcurrent](https://pkg.go.dev/github.com/klauspost/compress/s2#Reader.DecodeConcurrent) 
+that will decode a full stream using multiple goroutines.
+
+Example scaling, AMD Ryzen 3950X, 16 cores, decompression using `s2d -bench=3 <input>`, best of 3: 
+
+| Input                                     | `-cpu=1`   | `-cpu=2`   | `-cpu=4`   | `-cpu=8`   | `-cpu=16`   |
+|-------------------------------------------|------------|------------|------------|------------|-------------|
+| enwik10.snappy                            | 1098.6MB/s | 1819.8MB/s | 3625.6MB/s | 6910.6MB/s | 10818.2MB/s |
+| enwik10.s2                                | 1303.5MB/s | 2606.1MB/s | 4847.9MB/s | 8878.4MB/s | 9592.1MB/s  |
+| sofia-air-quality-dataset.tar.snappy      | 1302.0MB/s | 2165.0MB/s | 4244.5MB/s | 8241.0MB/s | 12920.5MB/s |
+| sofia-air-quality-dataset.tar.s2          | 1399.2MB/s | 2463.2MB/s | 5196.5MB/s | 9639.8MB/s | 11439.5MB/s |
+| sofia-air-quality-dataset.tar.s2 (no asm) | 837.5MB/s  | 1652.6MB/s | 3183.6MB/s | 5945.0MB/s | 9620.7MB/s  |
+
+Scaling can be expected to be pretty linear until memory bandwidth is saturated. 
+
+For now the DecodeConcurrent can only be used for full streams without seeking or combining with regular reads.
+
 ## Block compression


@ -873,7 +893,7 @@ for each entry {
    }
    
    // Uncompressed uses previous offset and adds EstBlockSize
-    entry[entryNum].UncompressedOffset = entry[entryNum-1].UncompressedOffset + EstBlockSize
+    entry[entryNum].UncompressedOffset = entry[entryNum-1].UncompressedOffset + EstBlockSize + uOff
 }


@ -901,6 +921,14 @@ for each entry {
 }
 ```

+To decode from any given uncompressed offset `(wantOffset)`:
+
+* Iterate entries until `entry[n].UncompressedOffset > wantOffset`.
+* Start decoding from `entry[n-1].CompressedOffset`.
+* Discard `entry[n-1].UncompressedOffset - wantOffset` bytes from the decoded stream.
+
+See [using indexes](https://github.com/klauspost/compress/tree/master/s2#using-indexes) for functions that perform the operations with a simpler interface.
+
 # Format Extensions

 * Frame [Stream identifier](https://github.com/google/snappy/blob/master/framing_format.txt#L68) changed from `sNaPpY` to `S2sTwO`.
--- a/vendor/github.com/klauspost/compress/s2/decode.go
+++ b/vendor/github.com/klauspost/compress/s2/decode.go
@ -11,6 +11,8 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
+	"runtime"
+	"sync"
 )

 var (
@ -169,6 +171,14 @@ func ReaderSkippableCB(id uint8, fn func(r io.Reader) error) ReaderOption {
 	}
 }

+// ReaderIgnoreCRC will make the reader skip CRC calculation and checks.
+func ReaderIgnoreCRC() ReaderOption {
+	return func(r *Reader) error {
+		r.ignoreCRC = true
+		return nil
+	}
+}
+
 // Reader is an io.Reader that can read Snappy-compressed bytes.
 type Reader struct {
 	r           io.Reader
@ -191,18 +201,19 @@ type Reader struct {
 	paramsOK       bool
 	snappyFrame    bool
 	ignoreStreamID bool
+	ignoreCRC      bool
 }

 // ensureBufferSize will ensure that the buffer can take at least n bytes.
 // If false is returned the buffer exceeds maximum allowed size.
 func (r *Reader) ensureBufferSize(n int) bool {
-	if len(r.buf) >= n {
-		return true
-	}
 	if n > r.maxBufSize {
 		r.err = ErrCorrupt
 		return false
 	}
+	if cap(r.buf) >= n {
+		return true
+	}
 	// Realloc buffer.
 	r.buf = make([]byte, n)
 	return true
@ -220,6 +231,7 @@ func (r *Reader) Reset(reader io.Reader) {
 	r.err = nil
 	r.i = 0
 	r.j = 0
+	r.blockStart = 0
 	r.readHeader = r.ignoreStreamID
 }

@ -344,7 +356,7 @@ func (r *Reader) Read(p []byte) (int, error) {
 				r.err = err
 				return 0, r.err
 			}
-			if crc(r.decoded[:n]) != checksum {
+			if !r.ignoreCRC && crc(r.decoded[:n]) != checksum {
 				r.err = ErrCRC
 				return 0, r.err
 			}
@ -385,7 +397,7 @@ func (r *Reader) Read(p []byte) (int, error) {
 			if !r.readFull(r.decoded[:n], false) {
 				return 0, r.err
 			}
-			if crc(r.decoded[:n]) != checksum {
+			if !r.ignoreCRC && crc(r.decoded[:n]) != checksum {
 				r.err = ErrCRC
 				return 0, r.err
 			}
@ -435,6 +447,259 @@ func (r *Reader) Read(p []byte) (int, error) {
 	}
 }

+// DecodeConcurrent will decode the full stream to w.
+// This function should not be combined with reading, seeking or other operations.
+// Up to 'concurrent' goroutines will be used.
+// If <= 0, runtime.NumCPU will be used.
+// On success the number of bytes decompressed nil and is returned.
+// This is mainly intended for bigger streams.
+func (r *Reader) DecodeConcurrent(w io.Writer, concurrent int) (written int64, err error) {
+	if r.i > 0 || r.j > 0 || r.blockStart > 0 {
+		return 0, errors.New("DecodeConcurrent called after ")
+	}
+	if concurrent <= 0 {
+		concurrent = runtime.NumCPU()
+	}
+
+	// Write to output
+	var errMu sync.Mutex
+	var aErr error
+	setErr := func(e error) (ok bool) {
+		errMu.Lock()
+		defer errMu.Unlock()
+		if e == nil {
+			return aErr == nil
+		}
+		if aErr == nil {
+			aErr = e
+		}
+		return false
+	}
+	hasErr := func() (ok bool) {
+		errMu.Lock()
+		v := aErr != nil
+		errMu.Unlock()
+		return v
+	}
+
+	var aWritten int64
+	toRead := make(chan []byte, concurrent)
+	writtenBlocks := make(chan []byte, concurrent)
+	queue := make(chan chan []byte, concurrent)
+	reUse := make(chan chan []byte, concurrent)
+	for i := 0; i < concurrent; i++ {
+		toRead <- make([]byte, 0, r.maxBufSize)
+		writtenBlocks <- make([]byte, 0, r.maxBufSize)
+		reUse <- make(chan []byte, 1)
+	}
+	// Writer
+	var wg sync.WaitGroup
+	wg.Add(1)
+	go func() {
+		defer wg.Done()
+		for toWrite := range queue {
+			entry := <-toWrite
+			reUse <- toWrite
+			if hasErr() {
+				writtenBlocks <- entry
+				continue
+			}
+			n, err := w.Write(entry)
+			want := len(entry)
+			writtenBlocks <- entry
+			if err != nil {
+				setErr(err)
+				continue
+			}
+			if n != want {
+				setErr(io.ErrShortWrite)
+				continue
+			}
+			aWritten += int64(n)
+		}
+	}()
+
+	// Reader
+	defer func() {
+		close(queue)
+		if r.err != nil {
+			err = r.err
+			setErr(r.err)
+		}
+		wg.Wait()
+		if err == nil {
+			err = aErr
+		}
+		written = aWritten
+	}()
+
+	for !hasErr() {
+		if !r.readFull(r.buf[:4], true) {
+			if r.err == io.EOF {
+				r.err = nil
+			}
+			return 0, r.err
+		}
+		chunkType := r.buf[0]
+		if !r.readHeader {
+			if chunkType != chunkTypeStreamIdentifier {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			r.readHeader = true
+		}
+		chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
+
+		// The chunk types are specified at
+		// https://github.com/google/snappy/blob/master/framing_format.txt
+		switch chunkType {
+		case chunkTypeCompressedData:
+			r.blockStart += int64(r.j)
+			// Section 4.2. Compressed data (chunk type 0x00).
+			if chunkLen < checksumSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if chunkLen > r.maxBufSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			orgBuf := <-toRead
+			buf := orgBuf[:chunkLen]
+
+			if !r.readFull(buf, false) {
+				return 0, r.err
+			}
+
+			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+			buf = buf[checksumSize:]
+
+			n, err := DecodedLen(buf)
+			if err != nil {
+				r.err = err
+				return 0, r.err
+			}
+			if r.snappyFrame && n > maxSnappyBlockSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+
+			if n > r.maxBlock {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			wg.Add(1)
+
+			decoded := <-writtenBlocks
+			entry := <-reUse
+			queue <- entry
+			go func() {
+				defer wg.Done()
+				decoded = decoded[:n]
+				_, err := Decode(decoded, buf)
+				toRead <- orgBuf
+				if err != nil {
+					writtenBlocks <- decoded
+					setErr(err)
+					return
+				}
+				if !r.ignoreCRC && crc(decoded) != checksum {
+					writtenBlocks <- decoded
+					setErr(ErrCRC)
+					return
+				}
+				entry <- decoded
+			}()
+			continue
+
+		case chunkTypeUncompressedData:
+
+			// Section 4.3. Uncompressed data (chunk type 0x01).
+			if chunkLen < checksumSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if chunkLen > r.maxBufSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			// Grab write buffer
+			orgBuf := <-writtenBlocks
+			buf := orgBuf[:checksumSize]
+			if !r.readFull(buf, false) {
+				return 0, r.err
+			}
+			checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
+			// Read content.
+			n := chunkLen - checksumSize
+
+			if r.snappyFrame && n > maxSnappyBlockSize {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if n > r.maxBlock {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			// Read uncompressed
+			buf = orgBuf[:n]
+			if !r.readFull(buf, false) {
+				return 0, r.err
+			}
+
+			if !r.ignoreCRC && crc(buf) != checksum {
+				r.err = ErrCRC
+				return 0, r.err
+			}
+			entry := <-reUse
+			queue <- entry
+			entry <- buf
+			continue
+
+		case chunkTypeStreamIdentifier:
+			// Section 4.1. Stream identifier (chunk type 0xff).
+			if chunkLen != len(magicBody) {
+				r.err = ErrCorrupt
+				return 0, r.err
+			}
+			if !r.readFull(r.buf[:len(magicBody)], false) {
+				return 0, r.err
+			}
+			if string(r.buf[:len(magicBody)]) != magicBody {
+				if string(r.buf[:len(magicBody)]) != magicBodySnappy {
+					r.err = ErrCorrupt
+					return 0, r.err
+				} else {
+					r.snappyFrame = true
+				}
+			} else {
+				r.snappyFrame = false
+			}
+			continue
+		}
+
+		if chunkType <= 0x7f {
+			// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
+			// fmt.Printf("ERR chunktype: 0x%x\n", chunkType)
+			r.err = ErrUnsupported
+			return 0, r.err
+		}
+		// Section 4.4 Padding (chunk type 0xfe).
+		// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
+		if chunkLen > maxChunkSize {
+			// fmt.Printf("ERR chunkLen: 0x%x\n", chunkLen)
+			r.err = ErrUnsupported
+			return 0, r.err
+		}
+
+		// fmt.Printf("skippable: ID: 0x%x, len: 0x%x\n", chunkType, chunkLen)
+		if !r.skippable(r.buf, chunkLen, false, chunkType) {
+			return 0, r.err
+		}
+	}
+	return 0, r.err
+}
+
 // Skip will skip n bytes forward in the decompressed output.
 // For larger skips this consumes less CPU and is faster than reading output and discarding it.
 // CRC is not checked on skipped blocks.
@ -699,8 +964,16 @@ func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
 	case io.SeekCurrent:
 		offset += r.blockStart + int64(r.i)
 	case io.SeekEnd:
-		offset = -offset
+		if offset > 0 {
+			return 0, errors.New("seek after end of file")
+		}
+		offset = r.index.TotalUncompressed + offset
 	}
+
+	if offset < 0 {
+		return 0, errors.New("seek before start of file")
+	}
+
 	c, u, err := r.index.Find(offset)
 	if err != nil {
 		return r.blockStart + int64(r.i), err
@ -712,10 +985,6 @@ func (r *ReadSeeker) Seek(offset int64, whence int) (int64, error) {
 		return 0, err
 	}

-	if offset < 0 {
-		offset = r.index.TotalUncompressed + offset
-	}
-
 	r.i = r.j // Remove rest of current block.
 	if u < offset {
 		// Forward inside block
--- a/vendor/github.com/klauspost/compress/s2/encode.go
+++ b/vendor/github.com/klauspost/compress/s2/encode.go
@ -1119,12 +1119,6 @@ func (w *Writer) closeIndex(idx bool) ([]byte, error) {
 			if w.appendIndex {
 				w.written += int64(len(index))
 			}
-			if true {
-				_, err := w.index.Load(index)
-				if err != nil {
-					panic(err)
-				}
-			}
 		}

 		if w.pad > 1 {
--- a/vendor/github.com/klauspost/compress/s2/encode_best.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_best.go
@ -370,7 +370,7 @@ func encodeBlockBestSnappy(dst, src []byte) (d int) {
 				}
 				offset := m.s - m.offset

-				return score - emitCopySize(offset, m.length)
+				return score - emitCopyNoRepeatSize(offset, m.length)
 			}

 			matchAt := func(offset, s int, first uint32) match {
@ -567,6 +567,10 @@ func emitCopySize(offset, length int) int {

 	// Offset no more than 2 bytes.
 	if length > 64 {
+		if offset < 2048 {
+			// Emit 8 bytes, then rest as repeats...
+			return 2 + emitRepeatSize(offset, length-8)
+		}
 		// Emit remaining as repeats, at least 4 bytes remain.
 		return 3 + emitRepeatSize(offset, length-60)
 	}
@ -577,6 +581,28 @@ func emitCopySize(offset, length int) int {
 	return 2
 }

+// emitCopyNoRepeatSize returns the size to encode the offset+length
+//
+// It assumes that:
+//	1 <= offset && offset <= math.MaxUint32
+//	4 <= length && length <= 1 << 24
+func emitCopyNoRepeatSize(offset, length int) int {
+	if offset >= 65536 {
+		return 5 + 5*(length/64)
+	}
+
+	// Offset no more than 2 bytes.
+	if length > 64 {
+		// Emit remaining as repeats, at least 4 bytes remain.
+		return 3 + 3*(length/60)
+	}
+	if length >= 12 || offset >= 2048 {
+		return 3
+	}
+	// Emit the remaining copy, encoded as 2 bytes.
+	return 2
+}
+
 // emitRepeatSize returns the number of bytes required to encode a repeat.
 // Length must be at least 4 and < 1<<24
 func emitRepeatSize(offset, length int) int {
--- a/vendor/github.com/klauspost/compress/s2/encode_go.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_go.go
@ -180,14 +180,23 @@ func emitCopy(dst []byte, offset, length int) int {

 	// Offset no more than 2 bytes.
 	if length > 64 {
-		// Emit a length 60 copy, encoded as 3 bytes.
-		// Emit remaining as repeat value (minimum 4 bytes).
-		dst[2] = uint8(offset >> 8)
-		dst[1] = uint8(offset)
-		dst[0] = 59<<2 | tagCopy2
-		length -= 60
+		off := 3
+		if offset < 2048 {
+			// emit 8 bytes as tagCopy1, rest as repeats.
+			dst[1] = uint8(offset)
+			dst[0] = uint8(offset>>8)<<5 | uint8(8-4)<<2 | tagCopy1
+			length -= 8
+			off = 2
+		} else {
+			// Emit a length 60 copy, encoded as 3 bytes.
+			// Emit remaining as repeat value (minimum 4 bytes).
+			dst[2] = uint8(offset >> 8)
+			dst[1] = uint8(offset)
+			dst[0] = 59<<2 | tagCopy2
+			length -= 60
+		}
 		// Emit remaining as repeats, at least 4 bytes remain.
-		return 3 + emitRepeat(dst[3:], offset, length)
+		return off + emitRepeat(dst[off:], offset, length)
 	}
 	if length >= 12 || offset >= 2048 {
 		// Emit the remaining copy, encoded as 3 bytes.
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
@ -5,6 +5,8 @@

 package s2

+func _dummy_()
+
 // encodeBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 4294967295 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
--- a/vendor/github.com/klauspost/compress/s2/index.go
+++ b/vendor/github.com/klauspost/compress/s2/index.go
@ -10,6 +10,7 @@ import (
 	"encoding/json"
 	"fmt"
 	"io"
+	"sort"
 )

 const (
@ -100,6 +101,15 @@ func (i *Index) Find(offset int64) (compressedOff, uncompressedOff int64, err er
 	if offset > i.TotalUncompressed {
 		return 0, 0, io.ErrUnexpectedEOF
 	}
+	if len(i.info) > 200 {
+		n := sort.Search(len(i.info), func(n int) bool {
+			return i.info[n].uncompressedOffset > offset
+		})
+		if n == 0 {
+			n = 1
+		}
+		return i.info[n-1].compressedOffset, i.info[n-1].uncompressedOffset, nil
+	}
 	for _, info := range i.info {
 		if info.uncompressedOffset > offset {
 			break