mirror of
https://github.com/cwinfo/matterbridge.git
synced 2025-07-05 00:04:04 +00:00
Update vendor
This commit is contained in:
281
vendor/github.com/minio/md5-simd/block8_amd64.s
generated
vendored
Normal file
281
vendor/github.com/minio/md5-simd/block8_amd64.s
generated
vendored
Normal file
@ -0,0 +1,281 @@
|
||||
//+build !noasm,!appengine,gc
|
||||
|
||||
// Copyright (c) 2018 Igneous Systems
|
||||
// MIT License
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"), to deal
|
||||
// in the Software without restriction, including without limitation the rights
|
||||
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the Software is
|
||||
// furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included in all
|
||||
// copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
// SOFTWARE.
|
||||
|
||||
// Copyright (c) 2020 MinIO Inc. All rights reserved.
|
||||
// Use of this source code is governed by a license that can be
|
||||
// found in the LICENSE file.
|
||||
|
||||
// This is the AVX2 implementation of the MD5 block function (8-way parallel)
|
||||
|
||||
// block8(state *uint64, base uintptr, bufs *int32, cache *byte, n int)
|
||||
TEXT ·block8(SB), 4, $0-40
|
||||
MOVQ state+0(FP), BX
|
||||
MOVQ base+8(FP), SI
|
||||
MOVQ bufs+16(FP), AX
|
||||
MOVQ cache+24(FP), CX
|
||||
MOVQ n+32(FP), DX
|
||||
MOVQ ·avx256md5consts+0(SB), DI
|
||||
|
||||
// Align cache (which is stack allocated by the compiler)
|
||||
// to a 256 bit boundary (ymm register alignment)
|
||||
// The cache8 type is deliberately oversized to permit this.
|
||||
ADDQ $31, CX
|
||||
ANDB $-32, CL
|
||||
|
||||
#define a Y0
|
||||
#define b Y1
|
||||
#define c Y2
|
||||
#define d Y3
|
||||
|
||||
#define sa Y4
|
||||
#define sb Y5
|
||||
#define sc Y6
|
||||
#define sd Y7
|
||||
|
||||
#define tmp Y8
|
||||
#define tmp2 Y9
|
||||
|
||||
#define mask Y10
|
||||
#define off Y11
|
||||
|
||||
#define ones Y12
|
||||
|
||||
#define rtmp1 Y13
|
||||
#define rtmp2 Y14
|
||||
|
||||
#define mem Y15
|
||||
|
||||
#define dig BX
|
||||
#define cache CX
|
||||
#define count DX
|
||||
#define base SI
|
||||
#define consts DI
|
||||
|
||||
#define prepmask \
|
||||
VPXOR mask, mask, mask \
|
||||
VPCMPGTD mask, off, mask
|
||||
|
||||
#define prep(index) \
|
||||
VMOVAPD mask, rtmp2 \
|
||||
VPGATHERDD rtmp2, index*4(base)(off*1), mem
|
||||
|
||||
#define load(index) \
|
||||
VMOVAPD index*32(cache), mem
|
||||
|
||||
#define store(index) \
|
||||
VMOVAPD mem, index*32(cache)
|
||||
|
||||
#define roll(shift, a) \
|
||||
VPSLLD $shift, a, rtmp1 \
|
||||
VPSRLD $32-shift, a, a \
|
||||
VPOR rtmp1, a, a
|
||||
|
||||
#define ROUND1(a, b, c, d, index, const, shift) \
|
||||
VPXOR c, tmp, tmp \
|
||||
VPADDD 32*const(consts), a, a \
|
||||
VPADDD mem, a, a \
|
||||
VPAND b, tmp, tmp \
|
||||
VPXOR d, tmp, tmp \
|
||||
prep(index) \
|
||||
VPADDD tmp, a, a \
|
||||
roll(shift,a) \
|
||||
VMOVAPD c, tmp \
|
||||
VPADDD b, a, a
|
||||
|
||||
#define ROUND1load(a, b, c, d, index, const, shift) \
|
||||
VXORPD c, tmp, tmp \
|
||||
VPADDD 32*const(consts), a, a \
|
||||
VPADDD mem, a, a \
|
||||
VPAND b, tmp, tmp \
|
||||
VPXOR d, tmp, tmp \
|
||||
load(index) \
|
||||
VPADDD tmp, a, a \
|
||||
roll(shift,a) \
|
||||
VMOVAPD c, tmp \
|
||||
VPADDD b, a, a
|
||||
|
||||
#define ROUND2(a, b, c, d, index, const, shift) \
|
||||
VPADDD 32*const(consts), a, a \
|
||||
VPADDD mem, a, a \
|
||||
VPAND b, tmp2, tmp2 \
|
||||
VANDNPD c, tmp, tmp \
|
||||
load(index) \
|
||||
VPOR tmp, tmp2, tmp2 \
|
||||
VMOVAPD c, tmp \
|
||||
VPADDD tmp2, a, a \
|
||||
VMOVAPD c, tmp2 \
|
||||
roll(shift,a) \
|
||||
VPADDD b, a, a
|
||||
|
||||
#define ROUND3(a, b, c, d, index, const, shift) \
|
||||
VPADDD 32*const(consts), a, a \
|
||||
VPADDD mem, a, a \
|
||||
load(index) \
|
||||
VPXOR d, tmp, tmp \
|
||||
VPXOR b, tmp, tmp \
|
||||
VPADDD tmp, a, a \
|
||||
roll(shift,a) \
|
||||
VMOVAPD b, tmp \
|
||||
VPADDD b, a, a
|
||||
|
||||
#define ROUND4(a, b, c, d, index, const, shift) \
|
||||
VPADDD 32*const(consts), a, a \
|
||||
VPADDD mem, a, a \
|
||||
VPOR b, tmp, tmp \
|
||||
VPXOR c, tmp, tmp \
|
||||
VPADDD tmp, a, a \
|
||||
load(index) \
|
||||
roll(shift,a) \
|
||||
VPXOR c, ones, tmp \
|
||||
VPADDD b, a, a
|
||||
|
||||
// load digest into state registers
|
||||
VMOVUPD (dig), a
|
||||
VMOVUPD 32(dig), b
|
||||
VMOVUPD 64(dig), c
|
||||
VMOVUPD 96(dig), d
|
||||
|
||||
// load source buffer offsets
|
||||
VMOVUPD (AX), off
|
||||
|
||||
prepmask
|
||||
VPCMPEQD ones, ones, ones
|
||||
|
||||
loop:
|
||||
VMOVAPD a, sa
|
||||
VMOVAPD b, sb
|
||||
VMOVAPD c, sc
|
||||
VMOVAPD d, sd
|
||||
|
||||
prep(0)
|
||||
VMOVAPD d, tmp
|
||||
store(0)
|
||||
|
||||
ROUND1(a,b,c,d, 1,0x00, 7)
|
||||
store(1)
|
||||
ROUND1(d,a,b,c, 2,0x01,12)
|
||||
store(2)
|
||||
ROUND1(c,d,a,b, 3,0x02,17)
|
||||
store(3)
|
||||
ROUND1(b,c,d,a, 4,0x03,22)
|
||||
store(4)
|
||||
ROUND1(a,b,c,d, 5,0x04, 7)
|
||||
store(5)
|
||||
ROUND1(d,a,b,c, 6,0x05,12)
|
||||
store(6)
|
||||
ROUND1(c,d,a,b, 7,0x06,17)
|
||||
store(7)
|
||||
ROUND1(b,c,d,a, 8,0x07,22)
|
||||
store(8)
|
||||
ROUND1(a,b,c,d, 9,0x08, 7)
|
||||
store(9)
|
||||
ROUND1(d,a,b,c,10,0x09,12)
|
||||
store(10)
|
||||
ROUND1(c,d,a,b,11,0x0a,17)
|
||||
store(11)
|
||||
ROUND1(b,c,d,a,12,0x0b,22)
|
||||
store(12)
|
||||
ROUND1(a,b,c,d,13,0x0c, 7)
|
||||
store(13)
|
||||
ROUND1(d,a,b,c,14,0x0d,12)
|
||||
store(14)
|
||||
ROUND1(c,d,a,b,15,0x0e,17)
|
||||
store(15)
|
||||
ROUND1load(b,c,d,a, 1,0x0f,22)
|
||||
|
||||
VMOVAPD d, tmp
|
||||
VMOVAPD d, tmp2
|
||||
|
||||
ROUND2(a,b,c,d, 6,0x10, 5)
|
||||
ROUND2(d,a,b,c,11,0x11, 9)
|
||||
ROUND2(c,d,a,b, 0,0x12,14)
|
||||
ROUND2(b,c,d,a, 5,0x13,20)
|
||||
ROUND2(a,b,c,d,10,0x14, 5)
|
||||
ROUND2(d,a,b,c,15,0x15, 9)
|
||||
ROUND2(c,d,a,b, 4,0x16,14)
|
||||
ROUND2(b,c,d,a, 9,0x17,20)
|
||||
ROUND2(a,b,c,d,14,0x18, 5)
|
||||
ROUND2(d,a,b,c, 3,0x19, 9)
|
||||
ROUND2(c,d,a,b, 8,0x1a,14)
|
||||
ROUND2(b,c,d,a,13,0x1b,20)
|
||||
ROUND2(a,b,c,d, 2,0x1c, 5)
|
||||
ROUND2(d,a,b,c, 7,0x1d, 9)
|
||||
ROUND2(c,d,a,b,12,0x1e,14)
|
||||
ROUND2(b,c,d,a, 0,0x1f,20)
|
||||
|
||||
load(5)
|
||||
VMOVAPD c, tmp
|
||||
|
||||
ROUND3(a,b,c,d, 8,0x20, 4)
|
||||
ROUND3(d,a,b,c,11,0x21,11)
|
||||
ROUND3(c,d,a,b,14,0x22,16)
|
||||
ROUND3(b,c,d,a, 1,0x23,23)
|
||||
ROUND3(a,b,c,d, 4,0x24, 4)
|
||||
ROUND3(d,a,b,c, 7,0x25,11)
|
||||
ROUND3(c,d,a,b,10,0x26,16)
|
||||
ROUND3(b,c,d,a,13,0x27,23)
|
||||
ROUND3(a,b,c,d, 0,0x28, 4)
|
||||
ROUND3(d,a,b,c, 3,0x29,11)
|
||||
ROUND3(c,d,a,b, 6,0x2a,16)
|
||||
ROUND3(b,c,d,a, 9,0x2b,23)
|
||||
ROUND3(a,b,c,d,12,0x2c, 4)
|
||||
ROUND3(d,a,b,c,15,0x2d,11)
|
||||
ROUND3(c,d,a,b, 2,0x2e,16)
|
||||
ROUND3(b,c,d,a, 0,0x2f,23)
|
||||
|
||||
load(0)
|
||||
VPXOR d, ones, tmp
|
||||
|
||||
ROUND4(a,b,c,d, 7,0x30, 6)
|
||||
ROUND4(d,a,b,c,14,0x31,10)
|
||||
ROUND4(c,d,a,b, 5,0x32,15)
|
||||
ROUND4(b,c,d,a,12,0x33,21)
|
||||
ROUND4(a,b,c,d, 3,0x34, 6)
|
||||
ROUND4(d,a,b,c,10,0x35,10)
|
||||
ROUND4(c,d,a,b, 1,0x36,15)
|
||||
ROUND4(b,c,d,a, 8,0x37,21)
|
||||
ROUND4(a,b,c,d,15,0x38, 6)
|
||||
ROUND4(d,a,b,c, 6,0x39,10)
|
||||
ROUND4(c,d,a,b,13,0x3a,15)
|
||||
ROUND4(b,c,d,a, 4,0x3b,21)
|
||||
ROUND4(a,b,c,d,11,0x3c, 6)
|
||||
ROUND4(d,a,b,c, 2,0x3d,10)
|
||||
ROUND4(c,d,a,b, 9,0x3e,15)
|
||||
ROUND4(b,c,d,a, 0,0x3f,21)
|
||||
|
||||
VPADDD sa, a, a
|
||||
VPADDD sb, b, b
|
||||
VPADDD sc, c, c
|
||||
VPADDD sd, d, d
|
||||
|
||||
LEAQ 64(base), base
|
||||
SUBQ $64, count
|
||||
JNE loop
|
||||
|
||||
VMOVUPD a, (dig)
|
||||
VMOVUPD b, 32(dig)
|
||||
VMOVUPD c, 64(dig)
|
||||
VMOVUPD d, 96(dig)
|
||||
|
||||
VZEROUPPER
|
||||
RET
|
Reference in New Issue
Block a user