matterbridge/vendor/golang.org/x/crypto/poly1305/sum_generic.go

// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// This file provides the generic implementation of Sum and MAC. Other files
// might provide optimized assembly implementations of some of this code.

package poly1305

import "encoding/binary"

// Poly1305 [RFC 7539] is a relatively simple algorithm: the authentication tag
// for a 64 bytes message is approximately
//
//     s + m[0:16] * r⁴ + m[16:32] * r³ + m[32:48] * r² + m[48:64] * r  mod  2¹³⁰ - 5
//
// for some secret r and s. It can be computed sequentially like
//
//     for len(msg) > 0:
//         h += read(msg, 16)
//         h *= r
//         h %= 2¹³⁰ - 5
//     return h + s
//
// All the complexity is about doing performant constant-time math on numbers
// larger than any available numeric type.

func sumGeneric(out *[TagSize]byte, msg []byte, key *[32]byte) {
	h := newMACGeneric(key)
	h.Write(msg)
	h.Sum(out)
}

func newMACGeneric(key *[32]byte) macGeneric {
	m := macGeneric{}
	initialize(key, &m.macState)
	return m
}

// macState holds numbers in saturated 64-bit little-endian limbs. That is,
// the value of [x0, x1, x2] is x[0] + x[1] * 2⁶⁴ + x[2] * 2¹²⁸.
type macState struct {
	// h is the main accumulator. It is to be interpreted modulo 2¹³⁰ - 5, but
	// can grow larger during and after rounds. It must, however, remain below
	// 2 * (2¹³⁰ - 5).
	h [3]uint64
	// r and s are the private key components.
	r [2]uint64
	s [2]uint64
}

type macGeneric struct {
	macState

	buffer [TagSize]byte
	offset int
}

// Write splits the incoming message into TagSize chunks, and passes them to
// update. It buffers incomplete chunks.
func (h *macGeneric) Write(p []byte) (int, error) {
	nn := len(p)
	if h.offset > 0 {
		n := copy(h.buffer[h.offset:], p)
		if h.offset+n < TagSize {
			h.offset += n
			return nn, nil
		}
		p = p[n:]
		h.offset = 0
		updateGeneric(&h.macState, h.buffer[:])
	}
	if n := len(p) - (len(p) % TagSize); n > 0 {
		updateGeneric(&h.macState, p[:n])
		p = p[n:]
	}
	if len(p) > 0 {
		h.offset += copy(h.buffer[h.offset:], p)
	}
	return nn, nil
}

// Sum flushes the last incomplete chunk from the buffer, if any, and generates
// the MAC output. It does not modify its state, in order to allow for multiple
// calls to Sum, even if no Write is allowed after Sum.
func (h *macGeneric) Sum(out *[TagSize]byte) {
	state := h.macState
	if h.offset > 0 {
		updateGeneric(&state, h.buffer[:h.offset])
	}
	finalize(out, &state.h, &state.s)
}

// [rMask0, rMask1] is the specified Poly1305 clamping mask in little-endian. It
// clears some bits of the secret coefficient to make it possible to implement
// multiplication more efficiently.
const (
	rMask0 = 0x0FFFFFFC0FFFFFFF
	rMask1 = 0x0FFFFFFC0FFFFFFC
)

// initialize loads the 256-bit key into the two 128-bit secret values r and s.
func initialize(key *[32]byte, m *macState) {
	m.r[0] = binary.LittleEndian.Uint64(key[0:8]) & rMask0
	m.r[1] = binary.LittleEndian.Uint64(key[8:16]) & rMask1
	m.s[0] = binary.LittleEndian.Uint64(key[16:24])
	m.s[1] = binary.LittleEndian.Uint64(key[24:32])
}

// uint128 holds a 128-bit number as two 64-bit limbs, for use with the
// bits.Mul64 and bits.Add64 intrinsics.
type uint128 struct {
	lo, hi uint64
}

func mul64(a, b uint64) uint128 {
	hi, lo := bitsMul64(a, b)
	return uint128{lo, hi}
}

func add128(a, b uint128) uint128 {
	lo, c := bitsAdd64(a.lo, b.lo, 0)
	hi, c := bitsAdd64(a.hi, b.hi, c)
	if c != 0 {
		panic("poly1305: unexpected overflow")
	}
	return uint128{lo, hi}
}

func shiftRightBy2(a uint128) uint128 {
	a.lo = a.lo>>2 | (a.hi&3)<<62
	a.hi = a.hi >> 2
	return a
}

// updateGeneric absorbs msg into the state.h accumulator. For each chunk m of
// 128 bits of message, it computes
//
//     h₊ = (h + m) * r  mod  2¹³⁰ - 5
//
// If the msg length is not a multiple of TagSize, it assumes the last
// incomplete chunk is the final one.
func updateGeneric(state *macState, msg []byte) {
	h0, h1, h2 := state.h[0], state.h[1], state.h[2]
	r0, r1 := state.r[0], state.r[1]

	for len(msg) > 0 {
		var c uint64

		// For the first step, h + m, we use a chain of bits.Add64 intrinsics.
		// The resulting value of h might exceed 2¹³⁰ - 5, but will be partially
		// reduced at the end of the multiplication below.
		//
		// The spec requires us to set a bit just above the message size, not to
		// hide leading zeroes. For full chunks, that's 1 << 128, so we can just
		// add 1 to the most significant (2¹²⁸) limb, h2.
		if len(msg) >= TagSize {
			h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(msg[0:8]), 0)
			h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(msg[8:16]), c)
			h2 += c + 1

			msg = msg[TagSize:]
		} else {
			var buf [TagSize]byte
			copy(buf[:], msg)
			buf[len(msg)] = 1

			h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(buf[0:8]), 0)
			h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(buf[8:16]), c)
			h2 += c

			msg = nil
		}

		// Multiplication of big number limbs is similar to elementary school
		// columnar multiplication. Instead of digits, there are 64-bit limbs.
		//
		// We are multiplying a 3 limbs number, h, by a 2 limbs number, r.
		//
		//                        h2    h1    h0  x
		//                              r1    r0  =
		//                       ----------------
		//                      h2r0  h1r0  h0r0     <-- individual 128-bit products
		//            +   h2r1  h1r1  h0r1
		//               ------------------------
		//                 m3    m2    m1    m0      <-- result in 128-bit overlapping limbs
		//               ------------------------
		//         m3.hi m2.hi m1.hi m0.hi           <-- carry propagation
		//     +         m3.lo m2.lo m1.lo m0.lo
		//        -------------------------------
		//           t4    t3    t2    t1    t0      <-- final result in 64-bit limbs
		//
		// The main difference from pen-and-paper multiplication is that we do
		// carry propagation in a separate step, as if we wrote two digit sums
		// at first (the 128-bit limbs), and then carried the tens all at once.

		h0r0 := mul64(h0, r0)
		h1r0 := mul64(h1, r0)
		h2r0 := mul64(h2, r0)
		h0r1 := mul64(h0, r1)
		h1r1 := mul64(h1, r1)
		h2r1 := mul64(h2, r1)

		// Since h2 is known to be at most 7 (5 + 1 + 1), and r0 and r1 have their
		// top 4 bits cleared by rMask{0,1}, we know that their product is not going
		// to overflow 64 bits, so we can ignore the high part of the products.
		//
		// This also means that the product doesn't have a fifth limb (t4).
		if h2r0.hi != 0 {
			panic("poly1305: unexpected overflow")
		}
		if h2r1.hi != 0 {
			panic("poly1305: unexpected overflow")
		}

		m0 := h0r0
		m1 := add128(h1r0, h0r1) // These two additions don't overflow thanks again
		m2 := add128(h2r0, h1r1) // to the 4 masked bits at the top of r0 and r1.
		m3 := h2r1

		t0 := m0.lo
		t1, c := bitsAdd64(m1.lo, m0.hi, 0)
		t2, c := bitsAdd64(m2.lo, m1.hi, c)
		t3, _ := bitsAdd64(m3.lo, m2.hi, c)

		// Now we have the result as 4 64-bit limbs, and we need to reduce it
		// modulo 2¹³⁰ - 5. The special shape of this Crandall prime lets us do
		// a cheap partial reduction according to the reduction identity
		//
		//     c * 2¹³⁰ + n  =  c * 5 + n  mod  2¹³⁰ - 5
		//
		// because 2¹³⁰ = 5 mod 2¹³⁰ - 5. Partial reduction since the result is
		// likely to be larger than 2¹³⁰ - 5, but still small enough to fit the
		// assumptions we make about h in the rest of the code.
		//
		// See also https://speakerdeck.com/gtank/engineering-prime-numbers?slide=23

		// We split the final result at the 2¹³⁰ mark into h and cc, the carry.
		// Note that the carry bits are effectively shifted left by 2, in other
		// words, cc = c * 4 for the c in the reduction identity.
		h0, h1, h2 = t0, t1, t2&maskLow2Bits
		cc := uint128{t2 & maskNotLow2Bits, t3}

		// To add c * 5 to h, we first add cc = c * 4, and then add (cc >> 2) = c.

		h0, c = bitsAdd64(h0, cc.lo, 0)
		h1, c = bitsAdd64(h1, cc.hi, c)
		h2 += c

		cc = shiftRightBy2(cc)

		h0, c = bitsAdd64(h0, cc.lo, 0)
		h1, c = bitsAdd64(h1, cc.hi, c)
		h2 += c

		// h2 is at most 3 + 1 + 1 = 5, making the whole of h at most
		//
		//     5 * 2¹²⁸ + (2¹²⁸ - 1) = 6 * 2¹²⁸ - 1
	}

	state.h[0], state.h[1], state.h[2] = h0, h1, h2
}

const (
	maskLow2Bits    uint64 = 0x0000000000000003
	maskNotLow2Bits uint64 = ^maskLow2Bits
)

// select64 returns x if v == 1 and y if v == 0, in constant time.
func select64(v, x, y uint64) uint64 { return ^(v-1)&x | (v-1)&y }

// [p0, p1, p2] is 2¹³⁰ - 5 in little endian order.
const (
	p0 = 0xFFFFFFFFFFFFFFFB
	p1 = 0xFFFFFFFFFFFFFFFF
	p2 = 0x0000000000000003
)

// finalize completes the modular reduction of h and computes
//
//     out = h + s  mod  2¹²⁸
//
func finalize(out *[TagSize]byte, h *[3]uint64, s *[2]uint64) {
	h0, h1, h2 := h[0], h[1], h[2]

	// After the partial reduction in updateGeneric, h might be more than
	// 2¹³⁰ - 5, but will be less than 2 * (2¹³⁰ - 5). To complete the reduction
	// in constant time, we compute t = h - (2¹³⁰ - 5), and select h as the
	// result if the subtraction underflows, and t otherwise.

	hMinusP0, b := bitsSub64(h0, p0, 0)
	hMinusP1, b := bitsSub64(h1, p1, b)
	_, b = bitsSub64(h2, p2, b)

	// h = h if h < p else h - p
	h0 = select64(b, h0, hMinusP0)
	h1 = select64(b, h1, hMinusP1)

	// Finally, we compute the last Poly1305 step
	//
	//     tag = h + s  mod  2¹²⁸
	//
	// by just doing a wide addition with the 128 low bits of h and discarding
	// the overflow.
	h0, c := bitsAdd64(h0, s[0], 0)
	h1, _ = bitsAdd64(h1, s[1], c)

	binary.LittleEndian.PutUint64(out[0:8], h0)
	binary.LittleEndian.PutUint64(out[8:16], h1)
}
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`// Copyright 2018 The Go Authors. All rights reserved.`
Add Discord support 2016-09-19 18:53:26 +00:00			`// Use of this source code is governed by a BSD-style`
			`// license that can be found in the LICENSE file.`

Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`// This file provides the generic implementation of Sum and MAC. Other files`
			`// might provide optimized assembly implementations of some of this code.`

Add Discord support 2016-09-19 18:53:26 +00:00			`package poly1305`

Use mod vendor for vendored directory (backwards compatible) 2018-08-06 19:47:05 +00:00			`import "encoding/binary"`
Add Discord support 2016-09-19 18:53:26 +00:00
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`// Poly1305 [RFC 7539] is a relatively simple algorithm: the authentication tag`
			`// for a 64 bytes message is approximately`
			`//`
			`// s + m[0:16] * r⁴ + m[16:32] * r³ + m[32:48] * r² + m[48:64] * r mod 2¹³⁰ - 5`
			`//`
			`// for some secret r and s. It can be computed sequentially like`
			`//`
			`// for len(msg) > 0:`
			`// h += read(msg, 16)`
			`// h *= r`
			`// h %= 2¹³⁰ - 5`
			`// return h + s`
			`//`
			`// All the complexity is about doing performant constant-time math on numbers`
			`// larger than any available numeric type.`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00
Switch back go upstream bwmarrin/discordgo Commit https://github.com/bwmarrin/discordgo/commit/ffa9956c9b41e8e2a10c26a254389854e016b006 got merged in. 2018-11-12 23:02:07 +00:00			`func sumGeneric(out [TagSize]byte, msg []byte, key [32]byte) {`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`h := newMACGeneric(key)`
			`h.Write(msg)`
			`h.Sum(out)`
			`}`

Update dependencies for 1.18.0 release (#1175) 2020-07-18 15:27:41 +00:00			`func newMACGeneric(key *[32]byte) macGeneric {`
			`m := macGeneric{}`
			`initialize(key, &m.macState)`
			`return m`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`}`

Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`// macState holds numbers in saturated 64-bit little-endian limbs. That is,`
			`// the value of [x0, x1, x2] is x[0] + x[1] * 2⁶⁴ + x[2] * 2¹²⁸.`
			`type macState struct {`
			`// h is the main accumulator. It is to be interpreted modulo 2¹³⁰ - 5, but`
Update dependencies for 1.18.0 release (#1175) 2020-07-18 15:27:41 +00:00			`// can grow larger during and after rounds. It must, however, remain below`
			`// 2 * (2¹³⁰ - 5).`
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`h [3]uint64`
			`// r and s are the private key components.`
			`r [2]uint64`
			`s [2]uint64`
			`}`

Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`type macGeneric struct {`
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`macState`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00
			`buffer [TagSize]byte`
			`offset int`
			`}`

Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`// Write splits the incoming message into TagSize chunks, and passes them to`
			`// update. It buffers incomplete chunks.`
			`func (h *macGeneric) Write(p []byte) (int, error) {`
			`nn := len(p)`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`if h.offset > 0 {`
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`n := copy(h.buffer[h.offset:], p)`
			`if h.offset+n < TagSize {`
			`h.offset += n`
			`return nn, nil`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`}`
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`p = p[n:]`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`h.offset = 0`
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`updateGeneric(&h.macState, h.buffer[:])`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`}`
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`if n := len(p) - (len(p) % TagSize); n > 0 {`
			`updateGeneric(&h.macState, p[:n])`
			`p = p[n:]`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`}`
			`if len(p) > 0 {`
			`h.offset += copy(h.buffer[h.offset:], p)`
			`}`
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`return nn, nil`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`}`
Add Discord support 2016-09-19 18:53:26 +00:00
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`// Sum flushes the last incomplete chunk from the buffer, if any, and generates`
			`// the MAC output. It does not modify its state, in order to allow for multiple`
			`// calls to Sum, even if no Write is allowed after Sum.`
			`func (h macGeneric) Sum(out [TagSize]byte) {`
			`state := h.macState`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`if h.offset > 0 {`
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`updateGeneric(&state, h.buffer[:h.offset])`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`}`
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`finalize(out, &state.h, &state.s)`
			`}`

			`// [rMask0, rMask1] is the specified Poly1305 clamping mask in little-endian. It`
			`// clears some bits of the secret coefficient to make it possible to implement`
			`// multiplication more efficiently.`
			`const (`
			`rMask0 = 0x0FFFFFFC0FFFFFFF`
			`rMask1 = 0x0FFFFFFC0FFFFFFC`
			`)`

Update dependencies for 1.18.0 release (#1175) 2020-07-18 15:27:41 +00:00			`// initialize loads the 256-bit key into the two 128-bit secret values r and s.`
			`func initialize(key [32]byte, m macState) {`
			`m.r[0] = binary.LittleEndian.Uint64(key[0:8]) & rMask0`
			`m.r[1] = binary.LittleEndian.Uint64(key[8:16]) & rMask1`
			`m.s[0] = binary.LittleEndian.Uint64(key[16:24])`
			`m.s[1] = binary.LittleEndian.Uint64(key[24:32])`
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`}`

			`// uint128 holds a 128-bit number as two 64-bit limbs, for use with the`
			`// bits.Mul64 and bits.Add64 intrinsics.`
			`type uint128 struct {`
			`lo, hi uint64`
			`}`

			`func mul64(a, b uint64) uint128 {`
			`hi, lo := bitsMul64(a, b)`
			`return uint128{lo, hi}`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`}`
Use mod vendor for vendored directory (backwards compatible) 2018-08-06 19:47:05 +00:00
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`func add128(a, b uint128) uint128 {`
			`lo, c := bitsAdd64(a.lo, b.lo, 0)`
			`hi, c := bitsAdd64(a.hi, b.hi, c)`
			`if c != 0 {`
			`panic("poly1305: unexpected overflow")`
Add Discord support 2016-09-19 18:53:26 +00:00			`}`
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`return uint128{lo, hi}`
			`}`
Add Discord support 2016-09-19 18:53:26 +00:00
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`func shiftRightBy2(a uint128) uint128 {`
			`a.lo = a.lo>>2 \| (a.hi&3)<<62`
			`a.hi = a.hi >> 2`
			`return a`
Use upstream whatsapp again (#809) 2019-05-30 10:20:56 +00:00			`}`
Use mod vendor for vendored directory (backwards compatible) 2018-08-06 19:47:05 +00:00
Update dependencies (#975) 2020-01-09 20:02:56 +00:00			`// updateGeneric absorbs msg into the state.h accumulator. For each chunk m of`
			`// 128 bits of message, it computes`
			`//`
			`// h₊ = (h + m) * r mod 2¹³⁰ - 5`
			`//`
			`// If the msg length is not a multiple of TagSize, it assumes the last`
			`// incomplete chunk is the final one.`
			`func updateGeneric(state *macState, msg []byte) {`
			`h0, h1, h2 := state.h[0], state.h[1], state.h[2]`
			`r0, r1 := state.r[0], state.r[1]`

			`for len(msg) > 0 {`
			`var c uint64`

			`// For the first step, h + m, we use a chain of bits.Add64 intrinsics.`
			`// The resulting value of h might exceed 2¹³⁰ - 5, but will be partially`
			`// reduced at the end of the multiplication below.`
			`//`
			`// The spec requires us to set a bit just above the message size, not to`
			`// hide leading zeroes. For full chunks, that's 1 << 128, so we can just`
			`// add 1 to the most significant (2¹²⁸) limb, h2.`
			`if len(msg) >= TagSize {`
			`h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(msg[0:8]), 0)`
			`h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(msg[8:16]), c)`
			`h2 += c + 1`

			`msg = msg[TagSize:]`
			`} else {`
			`var buf [TagSize]byte`
			`copy(buf[:], msg)`
			`buf[len(msg)] = 1`

			`h0, c = bitsAdd64(h0, binary.LittleEndian.Uint64(buf[0:8]), 0)`
			`h1, c = bitsAdd64(h1, binary.LittleEndian.Uint64(buf[8:16]), c)`
			`h2 += c`

			`msg = nil`
			`}`

			`// Multiplication of big number limbs is similar to elementary school`
			`// columnar multiplication. Instead of digits, there are 64-bit limbs.`
			`//`
			`// We are multiplying a 3 limbs number, h, by a 2 limbs number, r.`
			`//`
			`// h2 h1 h0 x`
			`// r1 r0 =`
			`// ----------------`
			`// h2r0 h1r0 h0r0 <-- individual 128-bit products`
			`// + h2r1 h1r1 h0r1`
			`// ------------------------`
			`// m3 m2 m1 m0 <-- result in 128-bit overlapping limbs`
			`// ------------------------`
			`// m3.hi m2.hi m1.hi m0.hi <-- carry propagation`
			`// + m3.lo m2.lo m1.lo m0.lo`
			`// -------------------------------`
			`// t4 t3 t2 t1 t0 <-- final result in 64-bit limbs`
			`//`
			`// The main difference from pen-and-paper multiplication is that we do`
			`// carry propagation in a separate step, as if we wrote two digit sums`
			`// at first (the 128-bit limbs), and then carried the tens all at once.`

			`h0r0 := mul64(h0, r0)`
			`h1r0 := mul64(h1, r0)`
			`h2r0 := mul64(h2, r0)`
			`h0r1 := mul64(h0, r1)`
			`h1r1 := mul64(h1, r1)`
			`h2r1 := mul64(h2, r1)`

			`// Since h2 is known to be at most 7 (5 + 1 + 1), and r0 and r1 have their`
			`// top 4 bits cleared by rMask{0,1}, we know that their product is not going`
			`// to overflow 64 bits, so we can ignore the high part of the products.`
			`//`
			`// This also means that the product doesn't have a fifth limb (t4).`
			`if h2r0.hi != 0 {`
			`panic("poly1305: unexpected overflow")`
			`}`
			`if h2r1.hi != 0 {`
			`panic("poly1305: unexpected overflow")`
			`}`

			`m0 := h0r0`
			`m1 := add128(h1r0, h0r1) // These two additions don't overflow thanks again`
			`m2 := add128(h2r0, h1r1) // to the 4 masked bits at the top of r0 and r1.`
			`m3 := h2r1`

			`t0 := m0.lo`
			`t1, c := bitsAdd64(m1.lo, m0.hi, 0)`
			`t2, c := bitsAdd64(m2.lo, m1.hi, c)`
			`t3, _ := bitsAdd64(m3.lo, m2.hi, c)`

			`// Now we have the result as 4 64-bit limbs, and we need to reduce it`
			`// modulo 2¹³⁰ - 5. The special shape of this Crandall prime lets us do`
			`// a cheap partial reduction according to the reduction identity`
			`//`
			`// c * 2¹³⁰ + n = c * 5 + n mod 2¹³⁰ - 5`
			`//`
			`// because 2¹³⁰ = 5 mod 2¹³⁰ - 5. Partial reduction since the result is`
			`// likely to be larger than 2¹³⁰ - 5, but still small enough to fit the`
			`// assumptions we make about h in the rest of the code.`
			`//`
			`// See also https://speakerdeck.com/gtank/engineering-prime-numbers?slide=23`

			`// We split the final result at the 2¹³⁰ mark into h and cc, the carry.`
			`// Note that the carry bits are effectively shifted left by 2, in other`
			`// words, cc = c * 4 for the c in the reduction identity.`
			`h0, h1, h2 = t0, t1, t2&maskLow2Bits`
			`cc := uint128{t2 & maskNotLow2Bits, t3}`

			`// To add c * 5 to h, we first add cc = c * 4, and then add (cc >> 2) = c.`

			`h0, c = bitsAdd64(h0, cc.lo, 0)`
			`h1, c = bitsAdd64(h1, cc.hi, c)`
			`h2 += c`

			`cc = shiftRightBy2(cc)`

			`h0, c = bitsAdd64(h0, cc.lo, 0)`
			`h1, c = bitsAdd64(h1, cc.hi, c)`
			`h2 += c`

			`// h2 is at most 3 + 1 + 1 = 5, making the whole of h at most`
			`//`
			`// 5 * 2¹²⁸ + (2¹²⁸ - 1) = 6 * 2¹²⁸ - 1`
			`}`

			`state.h[0], state.h[1], state.h[2] = h0, h1, h2`
			`}`

			`const (`
			`maskLow2Bits uint64 = 0x0000000000000003`
			`maskNotLow2Bits uint64 = ^maskLow2Bits`
			`)`

			`// select64 returns x if v == 1 and y if v == 0, in constant time.`
			`func select64(v, x, y uint64) uint64 { return ^(v-1)&x \| (v-1)&y }`

			`// [p0, p1, p2] is 2¹³⁰ - 5 in little endian order.`
			`const (`
			`p0 = 0xFFFFFFFFFFFFFFFB`
			`p1 = 0xFFFFFFFFFFFFFFFF`
			`p2 = 0x0000000000000003`
			`)`

			`// finalize completes the modular reduction of h and computes`
			`//`
			`// out = h + s mod 2¹²⁸`
			`//`
			`func finalize(out [TagSize]byte, h [3]uint64, s *[2]uint64) {`
			`h0, h1, h2 := h[0], h[1], h[2]`

			`// After the partial reduction in updateGeneric, h might be more than`
			`// 2¹³⁰ - 5, but will be less than 2 * (2¹³⁰ - 5). To complete the reduction`
			`// in constant time, we compute t = h - (2¹³⁰ - 5), and select h as the`
			`// result if the subtraction underflows, and t otherwise.`

			`hMinusP0, b := bitsSub64(h0, p0, 0)`
			`hMinusP1, b := bitsSub64(h1, p1, b)`
			`_, b = bitsSub64(h2, p2, b)`

			`// h = h if h < p else h - p`
			`h0 = select64(b, h0, hMinusP0)`
			`h1 = select64(b, h1, hMinusP1)`

			`// Finally, we compute the last Poly1305 step`
			`//`
			`// tag = h + s mod 2¹²⁸`
			`//`
			`// by just doing a wide addition with the 128 low bits of h and discarding`
			`// the overflow.`
			`h0, c := bitsAdd64(h0, s[0], 0)`
			`h1, _ = bitsAdd64(h1, s[1], c)`

			`binary.LittleEndian.PutUint64(out[0:8], h0)`
			`binary.LittleEndian.PutUint64(out[8:16], h1)`
Add Discord support 2016-09-19 18:53:26 +00:00			`}`