matterbridge/vendor/golang.org/x/image/vp8/reconstruct.go

// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package vp8

// This file implements decoding DCT/WHT residual coefficients and
// reconstructing YCbCr data equal to predicted values plus residuals.
//
// There are 1*16*16 + 2*8*8 + 1*4*4 coefficients per macroblock:
//	- 1*16*16 luma DCT coefficients,
//	- 2*8*8 chroma DCT coefficients, and
//	- 1*4*4 luma WHT coefficients.
// Coefficients are read in lots of 16, and the later coefficients in each lot
// are often zero.
//
// The YCbCr data consists of 1*16*16 luma values and 2*8*8 chroma values,
// plus previously decoded values along the top and left borders. The combined
// values are laid out as a [1+16+1+8][32]uint8 so that vertically adjacent
// samples are 32 bytes apart. In detail, the layout is:
//
//	0 1 2 3 4 5 6 7  8 9 0 1 2 3 4 5  6 7 8 9 0 1 2 3  4 5 6 7 8 9 0 1
//	. . . . . . . a  b b b b b b b b  b b b b b b b b  c c c c . . . .	0
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	1
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	2
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	3
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  c c c c . . . .	4
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	5
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	6
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	7
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  c c c c . . . .	8
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	9
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	10
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	11
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  c c c c . . . .	12
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	13
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	14
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	15
//	. . . . . . . d  Y Y Y Y Y Y Y Y  Y Y Y Y Y Y Y Y  . . . . . . . .	16
//	. . . . . . . e  f f f f f f f f  . . . . . . . g  h h h h h h h h	17
//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	18
//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	19
//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	20
//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	21
//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	22
//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	23
//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	24
//	. . . . . . . i  B B B B B B B B  . . . . . . . j  R R R R R R R R	25
//
// Y, B and R are the reconstructed luma (Y) and chroma (B, R) values.
// The Y values are predicted (either as one 16x16 region or 16 4x4 regions)
// based on the row above's Y values (some combination of {abc} or {dYC}) and
// the column left's Y values (either {ad} or {bY}). Similarly, B and R values
// are predicted on the row above and column left of their respective 8x8
// region: {efi} for B, {ghj} for R.
//
// For uppermost macroblocks (i.e. those with mby == 0), the {abcefgh} values
// are initialized to 0x81. Otherwise, they are copied from the bottom row of
// the macroblock above. The {c} values are then duplicated from row 0 to rows
// 4, 8 and 12 of the ybr workspace.
// Similarly, for leftmost macroblocks (i.e. those with mbx == 0), the {adeigj}
// values are initialized to 0x7f. Otherwise, they are copied from the right
// column of the macroblock to the left.
// For the top-left macroblock (with mby == 0 && mbx == 0), {aeg} is 0x81.
//
// When moving from one macroblock to the next horizontally, the {adeigj}
// values can simply be copied from the workspace to itself, shifted by 8 or
// 16 columns. When moving from one macroblock to the next vertically,
// filtering can occur and hence the row values have to be copied from the
// post-filtered image instead of the pre-filtered workspace.

const (
	bCoeffBase   = 1*16*16 + 0*8*8
	rCoeffBase   = 1*16*16 + 1*8*8
	whtCoeffBase = 1*16*16 + 2*8*8
)

const (
	ybrYX = 8
	ybrYY = 1
	ybrBX = 8
	ybrBY = 18
	ybrRX = 24
	ybrRY = 18
)

// prepareYBR prepares the {abcdefghij} elements of ybr.
func (d *Decoder) prepareYBR(mbx, mby int) {
	if mbx == 0 {
		for y := 0; y < 17; y++ {
			d.ybr[y][7] = 0x81
		}
		for y := 17; y < 26; y++ {
			d.ybr[y][7] = 0x81
			d.ybr[y][23] = 0x81
		}
	} else {
		for y := 0; y < 17; y++ {
			d.ybr[y][7] = d.ybr[y][7+16]
		}
		for y := 17; y < 26; y++ {
			d.ybr[y][7] = d.ybr[y][15]
			d.ybr[y][23] = d.ybr[y][31]
		}
	}
	if mby == 0 {
		for x := 7; x < 28; x++ {
			d.ybr[0][x] = 0x7f
		}
		for x := 7; x < 16; x++ {
			d.ybr[17][x] = 0x7f
		}
		for x := 23; x < 32; x++ {
			d.ybr[17][x] = 0x7f
		}
	} else {
		for i := 0; i < 16; i++ {
			d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+i]
		}
		for i := 0; i < 8; i++ {
			d.ybr[17][8+i] = d.img.Cb[(8*mby-1)*d.img.CStride+8*mbx+i]
		}
		for i := 0; i < 8; i++ {
			d.ybr[17][24+i] = d.img.Cr[(8*mby-1)*d.img.CStride+8*mbx+i]
		}
		if mbx == d.mbw-1 {
			for i := 16; i < 20; i++ {
				d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+15]
			}
		} else {
			for i := 16; i < 20; i++ {
				d.ybr[0][8+i] = d.img.Y[(16*mby-1)*d.img.YStride+16*mbx+i]
			}
		}
	}
	for y := 4; y < 16; y += 4 {
		d.ybr[y][24] = d.ybr[0][24]
		d.ybr[y][25] = d.ybr[0][25]
		d.ybr[y][26] = d.ybr[0][26]
		d.ybr[y][27] = d.ybr[0][27]
	}
}

// btou converts a bool to a 0/1 value.
func btou(b bool) uint8 {
	if b {
		return 1
	}
	return 0
}

// pack packs four 0/1 values into four bits of a uint32.
func pack(x [4]uint8, shift int) uint32 {
	u := uint32(x[0])<<0 | uint32(x[1])<<1 | uint32(x[2])<<2 | uint32(x[3])<<3
	return u << uint(shift)
}

// unpack unpacks four 0/1 values from a four-bit value.
var unpack = [16][4]uint8{
	{0, 0, 0, 0},
	{1, 0, 0, 0},
	{0, 1, 0, 0},
	{1, 1, 0, 0},
	{0, 0, 1, 0},
	{1, 0, 1, 0},
	{0, 1, 1, 0},
	{1, 1, 1, 0},
	{0, 0, 0, 1},
	{1, 0, 0, 1},
	{0, 1, 0, 1},
	{1, 1, 0, 1},
	{0, 0, 1, 1},
	{1, 0, 1, 1},
	{0, 1, 1, 1},
	{1, 1, 1, 1},
}

var (
	// The mapping from 4x4 region position to band is specified in section 13.3.
	bands = [17]uint8{0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0}
	// Category probabilties are specified in section 13.2.
	// Decoding categories 1 and 2 are done inline.
	cat3456 = [4][12]uint8{
		{173, 148, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0},
		{176, 155, 140, 135, 0, 0, 0, 0, 0, 0, 0, 0},
		{180, 157, 141, 134, 130, 0, 0, 0, 0, 0, 0, 0},
		{254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0},
	}
	// The zigzag order is:
	//	0  1  5  6
	//	2  4  7 12
	//	3  8 11 13
	//	9 10 14 15
	zigzag = [16]uint8{0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15}
)

// parseResiduals4 parses a 4x4 region of residual coefficients, as specified
// in section 13.3, and returns a 0/1 value indicating whether there was at
// least one non-zero coefficient.
// r is the partition to read bits from.
// plane and context describe which token probability table to use. context is
// either 0, 1 or 2, and equals how many of the macroblock left and macroblock
// above have non-zero coefficients.
// quant are the DC/AC quantization factors.
// skipFirstCoeff is whether the DC coefficient has already been parsed.
// coeffBase is the base index of d.coeff to write to.
func (d *Decoder) parseResiduals4(r *partition, plane int, context uint8, quant [2]uint16, skipFirstCoeff bool, coeffBase int) uint8 {
	prob, n := &d.tokenProb[plane], 0
	if skipFirstCoeff {
		n = 1
	}
	p := prob[bands[n]][context]
	if !r.readBit(p[0]) {
		return 0
	}
	for n != 16 {
		n++
		if !r.readBit(p[1]) {
			p = prob[bands[n]][0]
			continue
		}
		var v uint32
		if !r.readBit(p[2]) {
			v = 1
			p = prob[bands[n]][1]
		} else {
			if !r.readBit(p[3]) {
				if !r.readBit(p[4]) {
					v = 2
				} else {
					v = 3 + r.readUint(p[5], 1)
				}
			} else if !r.readBit(p[6]) {
				if !r.readBit(p[7]) {
					// Category 1.
					v = 5 + r.readUint(159, 1)
				} else {
					// Category 2.
					v = 7 + 2*r.readUint(165, 1) + r.readUint(145, 1)
				}
			} else {
				// Categories 3, 4, 5 or 6.
				b1 := r.readUint(p[8], 1)
				b0 := r.readUint(p[9+b1], 1)
				cat := 2*b1 + b0
				tab := &cat3456[cat]
				v = 0
				for i := 0; tab[i] != 0; i++ {
					v *= 2
					v += r.readUint(tab[i], 1)
				}
				v += 3 + (8 << cat)
			}
			p = prob[bands[n]][2]
		}
		z := zigzag[n-1]
		c := int32(v) * int32(quant[btou(z > 0)])
		if r.readBit(uniformProb) {
			c = -c
		}
		d.coeff[coeffBase+int(z)] = int16(c)
		if n == 16 || !r.readBit(p[0]) {
			return 1
		}
	}
	return 1
}

// parseResiduals parses the residuals and returns whether inner loop filtering
// should be skipped for this macroblock.
func (d *Decoder) parseResiduals(mbx, mby int) (skip bool) {
	partition := &d.op[mby&(d.nOP-1)]
	plane := planeY1SansY2
	quant := &d.quant[d.segment]

	// Parse the DC coefficient of each 4x4 luma region.
	if d.usePredY16 {
		nz := d.parseResiduals4(partition, planeY2, d.leftMB.nzY16+d.upMB[mbx].nzY16, quant.y2, false, whtCoeffBase)
		d.leftMB.nzY16 = nz
		d.upMB[mbx].nzY16 = nz
		d.inverseWHT16()
		plane = planeY1WithY2
	}

	var (
		nzDC, nzAC         [4]uint8
		nzDCMask, nzACMask uint32
		coeffBase          int
	)

	// Parse the luma coefficients.
	lnz := unpack[d.leftMB.nzMask&0x0f]
	unz := unpack[d.upMB[mbx].nzMask&0x0f]
	for y := 0; y < 4; y++ {
		nz := lnz[y]
		for x := 0; x < 4; x++ {
			nz = d.parseResiduals4(partition, plane, nz+unz[x], quant.y1, d.usePredY16, coeffBase)
			unz[x] = nz
			nzAC[x] = nz
			nzDC[x] = btou(d.coeff[coeffBase] != 0)
			coeffBase += 16
		}
		lnz[y] = nz
		nzDCMask |= pack(nzDC, y*4)
		nzACMask |= pack(nzAC, y*4)
	}
	lnzMask := pack(lnz, 0)
	unzMask := pack(unz, 0)

	// Parse the chroma coefficients.
	lnz = unpack[d.leftMB.nzMask>>4]
	unz = unpack[d.upMB[mbx].nzMask>>4]
	for c := 0; c < 4; c += 2 {
		for y := 0; y < 2; y++ {
			nz := lnz[y+c]
			for x := 0; x < 2; x++ {
				nz = d.parseResiduals4(partition, planeUV, nz+unz[x+c], quant.uv, false, coeffBase)
				unz[x+c] = nz
				nzAC[y*2+x] = nz
				nzDC[y*2+x] = btou(d.coeff[coeffBase] != 0)
				coeffBase += 16
			}
			lnz[y+c] = nz
		}
		nzDCMask |= pack(nzDC, 16+c*2)
		nzACMask |= pack(nzAC, 16+c*2)
	}
	lnzMask |= pack(lnz, 4)
	unzMask |= pack(unz, 4)

	// Save decoder state.
	d.leftMB.nzMask = uint8(lnzMask)
	d.upMB[mbx].nzMask = uint8(unzMask)
	d.nzDCMask = nzDCMask
	d.nzACMask = nzACMask

	// Section 15.1 of the spec says that "Steps 2 and 4 [of the loop filter]
	// are skipped... [if] there is no DCT coefficient coded for the whole
	// macroblock."
	return nzDCMask == 0 && nzACMask == 0
}

// reconstructMacroblock applies the predictor functions and adds the inverse-
// DCT transformed residuals to recover the YCbCr data.
func (d *Decoder) reconstructMacroblock(mbx, mby int) {
	if d.usePredY16 {
		p := checkTopLeftPred(mbx, mby, d.predY16)
		predFunc16[p](d, 1, 8)
		for j := 0; j < 4; j++ {
			for i := 0; i < 4; i++ {
				n := 4*j + i
				y := 4*j + 1
				x := 4*i + 8
				mask := uint32(1) << uint(n)
				if d.nzACMask&mask != 0 {
					d.inverseDCT4(y, x, 16*n)
				} else if d.nzDCMask&mask != 0 {
					d.inverseDCT4DCOnly(y, x, 16*n)
				}
			}
		}
	} else {
		for j := 0; j < 4; j++ {
			for i := 0; i < 4; i++ {
				n := 4*j + i
				y := 4*j + 1
				x := 4*i + 8
				predFunc4[d.predY4[j][i]](d, y, x)
				mask := uint32(1) << uint(n)
				if d.nzACMask&mask != 0 {
					d.inverseDCT4(y, x, 16*n)
				} else if d.nzDCMask&mask != 0 {
					d.inverseDCT4DCOnly(y, x, 16*n)
				}
			}
		}
	}
	p := checkTopLeftPred(mbx, mby, d.predC8)
	predFunc8[p](d, ybrBY, ybrBX)
	if d.nzACMask&0x0f0000 != 0 {
		d.inverseDCT8(ybrBY, ybrBX, bCoeffBase)
	} else if d.nzDCMask&0x0f0000 != 0 {
		d.inverseDCT8DCOnly(ybrBY, ybrBX, bCoeffBase)
	}
	predFunc8[p](d, ybrRY, ybrRX)
	if d.nzACMask&0xf00000 != 0 {
		d.inverseDCT8(ybrRY, ybrRX, rCoeffBase)
	} else if d.nzDCMask&0xf00000 != 0 {
		d.inverseDCT8DCOnly(ybrRY, ybrRX, rCoeffBase)
	}
}

// reconstruct reconstructs one macroblock and returns whether inner loop
// filtering should be skipped for it.
func (d *Decoder) reconstruct(mbx, mby int) (skip bool) {
	if d.segmentHeader.updateMap {
		if !d.fp.readBit(d.segmentHeader.prob[0]) {
			d.segment = int(d.fp.readUint(d.segmentHeader.prob[1], 1))
		} else {
			d.segment = int(d.fp.readUint(d.segmentHeader.prob[2], 1)) + 2
		}
	}
	if d.useSkipProb {
		skip = d.fp.readBit(d.skipProb)
	}
	// Prepare the workspace.
	for i := range d.coeff {
		d.coeff[i] = 0
	}
	d.prepareYBR(mbx, mby)
	// Parse the predictor modes.
	d.usePredY16 = d.fp.readBit(145)
	if d.usePredY16 {
		d.parsePredModeY16(mbx)
	} else {
		d.parsePredModeY4(mbx)
	}
	d.parsePredModeC8()
	// Parse the residuals.
	if !skip {
		skip = d.parseResiduals(mbx, mby)
	} else {
		if d.usePredY16 {
			d.leftMB.nzY16 = 0
			d.upMB[mbx].nzY16 = 0
		}
		d.leftMB.nzMask = 0
		d.upMB[mbx].nzMask = 0
		d.nzDCMask = 0
		d.nzACMask = 0
	}
	// Reconstruct the YCbCr data and copy it to the image.
	d.reconstructMacroblock(mbx, mby)
	for i, y := (mby*d.img.YStride+mbx)*16, 0; y < 16; i, y = i+d.img.YStride, y+1 {
		copy(d.img.Y[i:i+16], d.ybr[ybrYY+y][ybrYX:ybrYX+16])
	}
	for i, y := (mby*d.img.CStride+mbx)*8, 0; y < 8; i, y = i+d.img.CStride, y+1 {
		copy(d.img.Cb[i:i+8], d.ybr[ybrBY+y][ybrBX:ybrBX+8])
		copy(d.img.Cr[i:i+8], d.ybr[ybrRY+y][ybrRX:ybrRX+8])
	}
	return skip
}
Add MediaConvertWebPToPNG option (telegram). (#741) * Add MediaConvertWebPToPNG option (telegram). When enabled matterbridge will convert .webp files to .png files before uploading them to the mediaserver of the other bridges. Fixes #398 2019-02-26 23:41:50 +00:00			`// Copyright 2011 The Go Authors. All rights reserved.`
			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

			`package vp8`

			`// This file implements decoding DCT/WHT residual coefficients and`
			`// reconstructing YCbCr data equal to predicted values plus residuals.`
			`//`
			`// There are 11616 + 288 + 144 coefficients per macroblock:`
			`// - 11616 luma DCT coefficients,`
			`// - 288 chroma DCT coefficients, and`
			`// - 144 luma WHT coefficients.`
			`// Coefficients are read in lots of 16, and the later coefficients in each lot`
			`// are often zero.`
			`//`
			`// The YCbCr data consists of 11616 luma values and 288 chroma values,`
			`// plus previously decoded values along the top and left borders. The combined`
			`// values are laid out as a [1+16+1+8][32]uint8 so that vertically adjacent`
			`// samples are 32 bytes apart. In detail, the layout is:`
			`//`
			`// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1`
			`// . . . . . . . a b b b b b b b b b b b b b b b b c c c c . . . . 0`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 1`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 2`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 3`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y c c c c . . . . 4`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 5`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 6`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 7`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y c c c c . . . . 8`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 9`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 10`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 11`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y c c c c . . . . 12`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 13`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 14`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 15`
			`// . . . . . . . d Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y Y . . . . . . . . 16`
			`// . . . . . . . e f f f f f f f f . . . . . . . g h h h h h h h h 17`
			`// . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 18`
			`// . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 19`
			`// . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 20`
			`// . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 21`
			`// . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 22`
			`// . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 23`
			`// . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 24`
			`// . . . . . . . i B B B B B B B B . . . . . . . j R R R R R R R R 25`
			`//`
			`// Y, B and R are the reconstructed luma (Y) and chroma (B, R) values.`
			`// The Y values are predicted (either as one 16x16 region or 16 4x4 regions)`
			`// based on the row above's Y values (some combination of {abc} or {dYC}) and`
			`// the column left's Y values (either {ad} or {bY}). Similarly, B and R values`
			`// are predicted on the row above and column left of their respective 8x8`
			`// region: {efi} for B, {ghj} for R.`
			`//`
			`// For uppermost macroblocks (i.e. those with mby == 0), the {abcefgh} values`
			`// are initialized to 0x81. Otherwise, they are copied from the bottom row of`
			`// the macroblock above. The {c} values are then duplicated from row 0 to rows`
			`// 4, 8 and 12 of the ybr workspace.`
			`// Similarly, for leftmost macroblocks (i.e. those with mbx == 0), the {adeigj}`
			`// values are initialized to 0x7f. Otherwise, they are copied from the right`
			`// column of the macroblock to the left.`
			`// For the top-left macroblock (with mby == 0 && mbx == 0), {aeg} is 0x81.`
			`//`
			`// When moving from one macroblock to the next horizontally, the {adeigj}`
			`// values can simply be copied from the workspace to itself, shifted by 8 or`
			`// 16 columns. When moving from one macroblock to the next vertically,`
			`// filtering can occur and hence the row values have to be copied from the`
			`// post-filtered image instead of the pre-filtered workspace.`

			`const (`
			`bCoeffBase = 11616 + 088`
			`rCoeffBase = 11616 + 188`
			`whtCoeffBase = 11616 + 288`
			`)`

			`const (`
			`ybrYX = 8`
			`ybrYY = 1`
			`ybrBX = 8`
			`ybrBY = 18`
			`ybrRX = 24`
			`ybrRY = 18`
			`)`

			`// prepareYBR prepares the {abcdefghij} elements of ybr.`
			`func (d *Decoder) prepareYBR(mbx, mby int) {`
			`if mbx == 0 {`
			`for y := 0; y < 17; y++ {`
			`d.ybr[y][7] = 0x81`
			`}`
			`for y := 17; y < 26; y++ {`
			`d.ybr[y][7] = 0x81`
			`d.ybr[y][23] = 0x81`
			`}`
			`} else {`
			`for y := 0; y < 17; y++ {`
			`d.ybr[y][7] = d.ybr[y][7+16]`
			`}`
			`for y := 17; y < 26; y++ {`
			`d.ybr[y][7] = d.ybr[y][15]`
			`d.ybr[y][23] = d.ybr[y][31]`
			`}`
			`}`
			`if mby == 0 {`
			`for x := 7; x < 28; x++ {`
			`d.ybr[0][x] = 0x7f`
			`}`
			`for x := 7; x < 16; x++ {`
			`d.ybr[17][x] = 0x7f`
			`}`
			`for x := 23; x < 32; x++ {`
			`d.ybr[17][x] = 0x7f`
			`}`
			`} else {`
			`for i := 0; i < 16; i++ {`
			`d.ybr[0][8+i] = d.img.Y[(16mby-1)d.img.YStride+16*mbx+i]`
			`}`
			`for i := 0; i < 8; i++ {`
			`d.ybr[17][8+i] = d.img.Cb[(8mby-1)d.img.CStride+8*mbx+i]`
			`}`
			`for i := 0; i < 8; i++ {`
			`d.ybr[17][24+i] = d.img.Cr[(8mby-1)d.img.CStride+8*mbx+i]`
			`}`
			`if mbx == d.mbw-1 {`
			`for i := 16; i < 20; i++ {`
			`d.ybr[0][8+i] = d.img.Y[(16mby-1)d.img.YStride+16*mbx+15]`
			`}`
			`} else {`
			`for i := 16; i < 20; i++ {`
			`d.ybr[0][8+i] = d.img.Y[(16mby-1)d.img.YStride+16*mbx+i]`
			`}`
			`}`
			`}`
			`for y := 4; y < 16; y += 4 {`
			`d.ybr[y][24] = d.ybr[0][24]`
			`d.ybr[y][25] = d.ybr[0][25]`
			`d.ybr[y][26] = d.ybr[0][26]`
			`d.ybr[y][27] = d.ybr[0][27]`
			`}`
			`}`

			`// btou converts a bool to a 0/1 value.`
			`func btou(b bool) uint8 {`
			`if b {`
			`return 1`
			`}`
			`return 0`
			`}`

			`// pack packs four 0/1 values into four bits of a uint32.`
			`func pack(x [4]uint8, shift int) uint32 {`
			`u := uint32(x[0])<<0 \| uint32(x[1])<<1 \| uint32(x[2])<<2 \| uint32(x[3])<<3`
			`return u << uint(shift)`
			`}`

			`// unpack unpacks four 0/1 values from a four-bit value.`
			`var unpack = [16][4]uint8{`
			`{0, 0, 0, 0},`
			`{1, 0, 0, 0},`
			`{0, 1, 0, 0},`
			`{1, 1, 0, 0},`
			`{0, 0, 1, 0},`
			`{1, 0, 1, 0},`
			`{0, 1, 1, 0},`
			`{1, 1, 1, 0},`
			`{0, 0, 0, 1},`
			`{1, 0, 0, 1},`
			`{0, 1, 0, 1},`
			`{1, 1, 0, 1},`
			`{0, 0, 1, 1},`
			`{1, 0, 1, 1},`
			`{0, 1, 1, 1},`
			`{1, 1, 1, 1},`
			`}`

			`var (`
			`// The mapping from 4x4 region position to band is specified in section 13.3.`
			`bands = [17]uint8{0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0}`
			`// Category probabilties are specified in section 13.2.`
			`// Decoding categories 1 and 2 are done inline.`
			`cat3456 = [4][12]uint8{`
			`{173, 148, 140, 0, 0, 0, 0, 0, 0, 0, 0, 0},`
			`{176, 155, 140, 135, 0, 0, 0, 0, 0, 0, 0, 0},`
			`{180, 157, 141, 134, 130, 0, 0, 0, 0, 0, 0, 0},`
			`{254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0},`
			`}`
			`// The zigzag order is:`
			`// 0 1 5 6`
			`// 2 4 7 12`
			`// 3 8 11 13`
			`// 9 10 14 15`
			`zigzag = [16]uint8{0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15}`
			`)`

			`// parseResiduals4 parses a 4x4 region of residual coefficients, as specified`
			`// in section 13.3, and returns a 0/1 value indicating whether there was at`
			`// least one non-zero coefficient.`
			`// r is the partition to read bits from.`
			`// plane and context describe which token probability table to use. context is`
			`// either 0, 1 or 2, and equals how many of the macroblock left and macroblock`
			`// above have non-zero coefficients.`
			`// quant are the DC/AC quantization factors.`
			`// skipFirstCoeff is whether the DC coefficient has already been parsed.`
			`// coeffBase is the base index of d.coeff to write to.`
			`func (d Decoder) parseResiduals4(r partition, plane int, context uint8, quant [2]uint16, skipFirstCoeff bool, coeffBase int) uint8 {`
			`prob, n := &d.tokenProb[plane], 0`
			`if skipFirstCoeff {`
			`n = 1`
			`}`
			`p := prob[bands[n]][context]`
			`if !r.readBit(p[0]) {`
			`return 0`
			`}`
			`for n != 16 {`
			`n++`
			`if !r.readBit(p[1]) {`
			`p = prob[bands[n]][0]`
			`continue`
			`}`
			`var v uint32`
			`if !r.readBit(p[2]) {`
			`v = 1`
			`p = prob[bands[n]][1]`
			`} else {`
			`if !r.readBit(p[3]) {`
			`if !r.readBit(p[4]) {`
			`v = 2`
			`} else {`
			`v = 3 + r.readUint(p[5], 1)`
			`}`
			`} else if !r.readBit(p[6]) {`
			`if !r.readBit(p[7]) {`
			`// Category 1.`
			`v = 5 + r.readUint(159, 1)`
			`} else {`
			`// Category 2.`
			`v = 7 + 2*r.readUint(165, 1) + r.readUint(145, 1)`
			`}`
			`} else {`
			`// Categories 3, 4, 5 or 6.`
			`b1 := r.readUint(p[8], 1)`
			`b0 := r.readUint(p[9+b1], 1)`
			`cat := 2*b1 + b0`
			`tab := &cat3456[cat]`
			`v = 0`
			`for i := 0; tab[i] != 0; i++ {`
			`v *= 2`
			`v += r.readUint(tab[i], 1)`
			`}`
			`v += 3 + (8 << cat)`
			`}`
			`p = prob[bands[n]][2]`
			`}`
			`z := zigzag[n-1]`
			`c := int32(v) * int32(quant[btou(z > 0)])`
			`if r.readBit(uniformProb) {`
			`c = -c`
			`}`
			`d.coeff[coeffBase+int(z)] = int16(c)`
			`if n == 16 \|\| !r.readBit(p[0]) {`
			`return 1`
			`}`
			`}`
			`return 1`
			`}`

			`// parseResiduals parses the residuals and returns whether inner loop filtering`
			`// should be skipped for this macroblock.`
			`func (d *Decoder) parseResiduals(mbx, mby int) (skip bool) {`
			`partition := &d.op[mby&(d.nOP-1)]`
			`plane := planeY1SansY2`
			`quant := &d.quant[d.segment]`

			`// Parse the DC coefficient of each 4x4 luma region.`
			`if d.usePredY16 {`
			`nz := d.parseResiduals4(partition, planeY2, d.leftMB.nzY16+d.upMB[mbx].nzY16, quant.y2, false, whtCoeffBase)`
			`d.leftMB.nzY16 = nz`
			`d.upMB[mbx].nzY16 = nz`
			`d.inverseWHT16()`
			`plane = planeY1WithY2`
			`}`

			`var (`
			`nzDC, nzAC [4]uint8`
			`nzDCMask, nzACMask uint32`
			`coeffBase int`
			`)`

			`// Parse the luma coefficients.`
			`lnz := unpack[d.leftMB.nzMask&0x0f]`
			`unz := unpack[d.upMB[mbx].nzMask&0x0f]`
			`for y := 0; y < 4; y++ {`
			`nz := lnz[y]`
			`for x := 0; x < 4; x++ {`
			`nz = d.parseResiduals4(partition, plane, nz+unz[x], quant.y1, d.usePredY16, coeffBase)`
			`unz[x] = nz`
			`nzAC[x] = nz`
			`nzDC[x] = btou(d.coeff[coeffBase] != 0)`
			`coeffBase += 16`
			`}`
			`lnz[y] = nz`
			`nzDCMask \|= pack(nzDC, y*4)`
			`nzACMask \|= pack(nzAC, y*4)`
			`}`
			`lnzMask := pack(lnz, 0)`
			`unzMask := pack(unz, 0)`

			`// Parse the chroma coefficients.`
			`lnz = unpack[d.leftMB.nzMask>>4]`
			`unz = unpack[d.upMB[mbx].nzMask>>4]`
			`for c := 0; c < 4; c += 2 {`
			`for y := 0; y < 2; y++ {`
			`nz := lnz[y+c]`
			`for x := 0; x < 2; x++ {`
			`nz = d.parseResiduals4(partition, planeUV, nz+unz[x+c], quant.uv, false, coeffBase)`
			`unz[x+c] = nz`
			`nzAC[y*2+x] = nz`
			`nzDC[y*2+x] = btou(d.coeff[coeffBase] != 0)`
			`coeffBase += 16`
			`}`
			`lnz[y+c] = nz`
			`}`
			`nzDCMask \|= pack(nzDC, 16+c*2)`
			`nzACMask \|= pack(nzAC, 16+c*2)`
			`}`
			`lnzMask \|= pack(lnz, 4)`
			`unzMask \|= pack(unz, 4)`

			`// Save decoder state.`
			`d.leftMB.nzMask = uint8(lnzMask)`
			`d.upMB[mbx].nzMask = uint8(unzMask)`
			`d.nzDCMask = nzDCMask`
			`d.nzACMask = nzACMask`

			`// Section 15.1 of the spec says that "Steps 2 and 4 [of the loop filter]`
			`// are skipped... [if] there is no DCT coefficient coded for the whole`
			`// macroblock."`
			`return nzDCMask == 0 && nzACMask == 0`
			`}`

			`// reconstructMacroblock applies the predictor functions and adds the inverse-`
			`// DCT transformed residuals to recover the YCbCr data.`
			`func (d *Decoder) reconstructMacroblock(mbx, mby int) {`
			`if d.usePredY16 {`
			`p := checkTopLeftPred(mbx, mby, d.predY16)`
			`predFunc16[p](d, 1, 8)`
			`for j := 0; j < 4; j++ {`
			`for i := 0; i < 4; i++ {`
			`n := 4*j + i`
			`y := 4*j + 1`
			`x := 4*i + 8`
			`mask := uint32(1) << uint(n)`
			`if d.nzACMask&mask != 0 {`
			`d.inverseDCT4(y, x, 16*n)`
			`} else if d.nzDCMask&mask != 0 {`
			`d.inverseDCT4DCOnly(y, x, 16*n)`
			`}`
			`}`
			`}`
			`} else {`
			`for j := 0; j < 4; j++ {`
			`for i := 0; i < 4; i++ {`
			`n := 4*j + i`
			`y := 4*j + 1`
			`x := 4*i + 8`
			`predFunc4[d.predY4[j][i]](d, y, x)`
			`mask := uint32(1) << uint(n)`
			`if d.nzACMask&mask != 0 {`
			`d.inverseDCT4(y, x, 16*n)`
			`} else if d.nzDCMask&mask != 0 {`
			`d.inverseDCT4DCOnly(y, x, 16*n)`
			`}`
			`}`
			`}`
			`}`
			`p := checkTopLeftPred(mbx, mby, d.predC8)`
			`predFunc8[p](d, ybrBY, ybrBX)`
			`if d.nzACMask&0x0f0000 != 0 {`
			`d.inverseDCT8(ybrBY, ybrBX, bCoeffBase)`
			`} else if d.nzDCMask&0x0f0000 != 0 {`
			`d.inverseDCT8DCOnly(ybrBY, ybrBX, bCoeffBase)`
			`}`
			`predFunc8[p](d, ybrRY, ybrRX)`
			`if d.nzACMask&0xf00000 != 0 {`
			`d.inverseDCT8(ybrRY, ybrRX, rCoeffBase)`
			`} else if d.nzDCMask&0xf00000 != 0 {`
			`d.inverseDCT8DCOnly(ybrRY, ybrRX, rCoeffBase)`
			`}`
			`}`

			`// reconstruct reconstructs one macroblock and returns whether inner loop`
			`// filtering should be skipped for it.`
			`func (d *Decoder) reconstruct(mbx, mby int) (skip bool) {`
			`if d.segmentHeader.updateMap {`
			`if !d.fp.readBit(d.segmentHeader.prob[0]) {`
			`d.segment = int(d.fp.readUint(d.segmentHeader.prob[1], 1))`
			`} else {`
			`d.segment = int(d.fp.readUint(d.segmentHeader.prob[2], 1)) + 2`
			`}`
			`}`
			`if d.useSkipProb {`
			`skip = d.fp.readBit(d.skipProb)`
			`}`
			`// Prepare the workspace.`
			`for i := range d.coeff {`
			`d.coeff[i] = 0`
			`}`
			`d.prepareYBR(mbx, mby)`
			`// Parse the predictor modes.`
			`d.usePredY16 = d.fp.readBit(145)`
			`if d.usePredY16 {`
			`d.parsePredModeY16(mbx)`
			`} else {`
			`d.parsePredModeY4(mbx)`
			`}`
			`d.parsePredModeC8()`
			`// Parse the residuals.`
			`if !skip {`
			`skip = d.parseResiduals(mbx, mby)`
			`} else {`
			`if d.usePredY16 {`
			`d.leftMB.nzY16 = 0`
			`d.upMB[mbx].nzY16 = 0`
			`}`
			`d.leftMB.nzMask = 0`
			`d.upMB[mbx].nzMask = 0`
			`d.nzDCMask = 0`
			`d.nzACMask = 0`
			`}`
			`// Reconstruct the YCbCr data and copy it to the image.`
			`d.reconstructMacroblock(mbx, mby)`
			`for i, y := (mbyd.img.YStride+mbx)16, 0; y < 16; i, y = i+d.img.YStride, y+1 {`
			`copy(d.img.Y[i:i+16], d.ybr[ybrYY+y][ybrYX:ybrYX+16])`
			`}`
			`for i, y := (mbyd.img.CStride+mbx)8, 0; y < 8; i, y = i+d.img.CStride, y+1 {`
			`copy(d.img.Cb[i:i+8], d.ybr[ybrBY+y][ybrBX:ybrBX+8])`
			`copy(d.img.Cr[i:i+8], d.ybr[ybrRY+y][ybrRX:ybrRX+8])`
			`}`
			`return skip`
			`}`