mirror of
https://github.com/cwinfo/matterbridge.git
synced 2025-07-03 22:27:44 +00:00
489
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
generated
vendored
489
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
generated
vendored
@ -1,7 +1,6 @@
|
||||
// Code generated by command: go run gen.go -out ../seqdec_amd64.s -pkg=zstd. DO NOT EDIT.
|
||||
|
||||
//go:build !appengine && !noasm && gc && !noasm
|
||||
// +build !appengine,!noasm,gc,!noasm
|
||||
|
||||
// func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
// Requires: CMOV
|
||||
@ -52,34 +51,46 @@ sequenceDecs_decode_amd64_fill_byte_by_byte:
|
||||
|
||||
sequenceDecs_decode_amd64_fill_end:
|
||||
// Update offset
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R15
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R15
|
||||
ADDQ R15, AX
|
||||
MOVQ AX, 16(R10)
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decode_amd64_of_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decode_amd64_of_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decode_amd64_of_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R15
|
||||
ADDQ R15, AX
|
||||
|
||||
sequenceDecs_decode_amd64_of_update_zero:
|
||||
MOVQ AX, 16(R10)
|
||||
|
||||
// Update match length
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R15
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R15
|
||||
ADDQ R15, AX
|
||||
MOVQ AX, 8(R10)
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decode_amd64_ml_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decode_amd64_ml_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decode_amd64_ml_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R15
|
||||
ADDQ R15, AX
|
||||
|
||||
sequenceDecs_decode_amd64_ml_update_zero:
|
||||
MOVQ AX, 8(R10)
|
||||
|
||||
// Fill bitreader to have enough for the remaining
|
||||
CMPQ SI, $0x08
|
||||
@ -107,19 +118,25 @@ sequenceDecs_decode_amd64_fill_2_byte_by_byte:
|
||||
|
||||
sequenceDecs_decode_amd64_fill_2_end:
|
||||
// Update literal length
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R15
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R15
|
||||
ADDQ R15, AX
|
||||
MOVQ AX, (R10)
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decode_amd64_ll_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decode_amd64_ll_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decode_amd64_ll_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R15
|
||||
ADDQ R15, AX
|
||||
|
||||
sequenceDecs_decode_amd64_ll_update_zero:
|
||||
MOVQ AX, (R10)
|
||||
|
||||
// Fill bitreader for state updates
|
||||
MOVQ R14, (SP)
|
||||
@ -198,7 +215,7 @@ sequenceDecs_decode_amd64_skip_update:
|
||||
MOVQ R12, R13
|
||||
MOVQ R11, R12
|
||||
MOVQ CX, R11
|
||||
JMP sequenceDecs_decode_amd64_adjust_end
|
||||
JMP sequenceDecs_decode_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decode_amd64_adjust_offsetB_1_or_0:
|
||||
CMPQ (R10), $0x00000000
|
||||
@ -210,7 +227,7 @@ sequenceDecs_decode_amd64_adjust_offset_maybezero:
|
||||
TESTQ CX, CX
|
||||
JNZ sequenceDecs_decode_amd64_adjust_offset_nonzero
|
||||
MOVQ R11, CX
|
||||
JMP sequenceDecs_decode_amd64_adjust_end
|
||||
JMP sequenceDecs_decode_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decode_amd64_adjust_offset_nonzero:
|
||||
CMPQ CX, $0x01
|
||||
@ -247,7 +264,7 @@ sequenceDecs_decode_amd64_adjust_temp_valid:
|
||||
MOVQ AX, R11
|
||||
MOVQ AX, CX
|
||||
|
||||
sequenceDecs_decode_amd64_adjust_end:
|
||||
sequenceDecs_decode_amd64_after_adjust:
|
||||
MOVQ CX, 16(R10)
|
||||
|
||||
// Check values
|
||||
@ -303,10 +320,6 @@ error_not_enough_literals:
|
||||
MOVQ $0x00000004, ret+24(FP)
|
||||
RET
|
||||
|
||||
// Return with not enough output space error
|
||||
MOVQ $0x00000005, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
// Requires: CMOV
|
||||
TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
|
||||
@ -356,49 +369,67 @@ sequenceDecs_decode_56_amd64_fill_byte_by_byte:
|
||||
|
||||
sequenceDecs_decode_56_amd64_fill_end:
|
||||
// Update offset
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R15
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R15
|
||||
ADDQ R15, AX
|
||||
MOVQ AX, 16(R10)
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decode_56_amd64_of_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decode_56_amd64_of_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decode_56_amd64_of_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R15
|
||||
ADDQ R15, AX
|
||||
|
||||
sequenceDecs_decode_56_amd64_of_update_zero:
|
||||
MOVQ AX, 16(R10)
|
||||
|
||||
// Update match length
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R15
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R15
|
||||
ADDQ R15, AX
|
||||
MOVQ AX, 8(R10)
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decode_56_amd64_ml_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decode_56_amd64_ml_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decode_56_amd64_ml_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R15
|
||||
ADDQ R15, AX
|
||||
|
||||
sequenceDecs_decode_56_amd64_ml_update_zero:
|
||||
MOVQ AX, 8(R10)
|
||||
|
||||
// Update literal length
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R15
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R15
|
||||
ADDQ R15, AX
|
||||
MOVQ AX, (R10)
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R15
|
||||
SHLQ CL, R15
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decode_56_amd64_ll_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decode_56_amd64_ll_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decode_56_amd64_ll_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R15
|
||||
ADDQ R15, AX
|
||||
|
||||
sequenceDecs_decode_56_amd64_ll_update_zero:
|
||||
MOVQ AX, (R10)
|
||||
|
||||
// Fill bitreader for state updates
|
||||
MOVQ R14, (SP)
|
||||
@ -477,7 +508,7 @@ sequenceDecs_decode_56_amd64_skip_update:
|
||||
MOVQ R12, R13
|
||||
MOVQ R11, R12
|
||||
MOVQ CX, R11
|
||||
JMP sequenceDecs_decode_56_amd64_adjust_end
|
||||
JMP sequenceDecs_decode_56_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decode_56_amd64_adjust_offsetB_1_or_0:
|
||||
CMPQ (R10), $0x00000000
|
||||
@ -489,7 +520,7 @@ sequenceDecs_decode_56_amd64_adjust_offset_maybezero:
|
||||
TESTQ CX, CX
|
||||
JNZ sequenceDecs_decode_56_amd64_adjust_offset_nonzero
|
||||
MOVQ R11, CX
|
||||
JMP sequenceDecs_decode_56_amd64_adjust_end
|
||||
JMP sequenceDecs_decode_56_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decode_56_amd64_adjust_offset_nonzero:
|
||||
CMPQ CX, $0x01
|
||||
@ -526,7 +557,7 @@ sequenceDecs_decode_56_amd64_adjust_temp_valid:
|
||||
MOVQ AX, R11
|
||||
MOVQ AX, CX
|
||||
|
||||
sequenceDecs_decode_56_amd64_adjust_end:
|
||||
sequenceDecs_decode_56_amd64_after_adjust:
|
||||
MOVQ CX, 16(R10)
|
||||
|
||||
// Check values
|
||||
@ -582,10 +613,6 @@ error_not_enough_literals:
|
||||
MOVQ $0x00000004, ret+24(FP)
|
||||
RET
|
||||
|
||||
// Return with not enough output space error
|
||||
MOVQ $0x00000005, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
// Requires: BMI, BMI2, CMOV
|
||||
TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
|
||||
@ -757,7 +784,7 @@ sequenceDecs_decode_bmi2_skip_update:
|
||||
MOVQ R11, R12
|
||||
MOVQ R10, R11
|
||||
MOVQ CX, R10
|
||||
JMP sequenceDecs_decode_bmi2_adjust_end
|
||||
JMP sequenceDecs_decode_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decode_bmi2_adjust_offsetB_1_or_0:
|
||||
CMPQ (R9), $0x00000000
|
||||
@ -769,7 +796,7 @@ sequenceDecs_decode_bmi2_adjust_offset_maybezero:
|
||||
TESTQ CX, CX
|
||||
JNZ sequenceDecs_decode_bmi2_adjust_offset_nonzero
|
||||
MOVQ R10, CX
|
||||
JMP sequenceDecs_decode_bmi2_adjust_end
|
||||
JMP sequenceDecs_decode_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decode_bmi2_adjust_offset_nonzero:
|
||||
CMPQ CX, $0x01
|
||||
@ -806,7 +833,7 @@ sequenceDecs_decode_bmi2_adjust_temp_valid:
|
||||
MOVQ R13, R10
|
||||
MOVQ R13, CX
|
||||
|
||||
sequenceDecs_decode_bmi2_adjust_end:
|
||||
sequenceDecs_decode_bmi2_after_adjust:
|
||||
MOVQ CX, 16(R9)
|
||||
|
||||
// Check values
|
||||
@ -862,10 +889,6 @@ error_not_enough_literals:
|
||||
MOVQ $0x00000004, ret+24(FP)
|
||||
RET
|
||||
|
||||
// Return with not enough output space error
|
||||
MOVQ $0x00000005, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
// Requires: BMI, BMI2, CMOV
|
||||
TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
|
||||
@ -1012,7 +1035,7 @@ sequenceDecs_decode_56_bmi2_skip_update:
|
||||
MOVQ R11, R12
|
||||
MOVQ R10, R11
|
||||
MOVQ CX, R10
|
||||
JMP sequenceDecs_decode_56_bmi2_adjust_end
|
||||
JMP sequenceDecs_decode_56_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decode_56_bmi2_adjust_offsetB_1_or_0:
|
||||
CMPQ (R9), $0x00000000
|
||||
@ -1024,7 +1047,7 @@ sequenceDecs_decode_56_bmi2_adjust_offset_maybezero:
|
||||
TESTQ CX, CX
|
||||
JNZ sequenceDecs_decode_56_bmi2_adjust_offset_nonzero
|
||||
MOVQ R10, CX
|
||||
JMP sequenceDecs_decode_56_bmi2_adjust_end
|
||||
JMP sequenceDecs_decode_56_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decode_56_bmi2_adjust_offset_nonzero:
|
||||
CMPQ CX, $0x01
|
||||
@ -1061,7 +1084,7 @@ sequenceDecs_decode_56_bmi2_adjust_temp_valid:
|
||||
MOVQ R13, R10
|
||||
MOVQ R13, CX
|
||||
|
||||
sequenceDecs_decode_56_bmi2_adjust_end:
|
||||
sequenceDecs_decode_56_bmi2_after_adjust:
|
||||
MOVQ CX, 16(R9)
|
||||
|
||||
// Check values
|
||||
@ -1117,10 +1140,6 @@ error_not_enough_literals:
|
||||
MOVQ $0x00000004, ret+24(FP)
|
||||
RET
|
||||
|
||||
// Return with not enough output space error
|
||||
MOVQ $0x00000005, ret+24(FP)
|
||||
RET
|
||||
|
||||
// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
|
||||
// Requires: SSE
|
||||
TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
|
||||
@ -1354,8 +1373,7 @@ loop_finished:
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVQ DX, 24(AX)
|
||||
MOVQ DI, 104(AX)
|
||||
MOVQ 80(AX), CX
|
||||
SUBQ CX, SI
|
||||
SUBQ 80(AX), SI
|
||||
MOVQ SI, 112(AX)
|
||||
RET
|
||||
|
||||
@ -1367,8 +1385,7 @@ error_match_off_too_big:
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVQ DX, 24(AX)
|
||||
MOVQ DI, 104(AX)
|
||||
MOVQ 80(AX), CX
|
||||
SUBQ CX, SI
|
||||
SUBQ 80(AX), SI
|
||||
MOVQ SI, 112(AX)
|
||||
RET
|
||||
|
||||
@ -1712,8 +1729,7 @@ loop_finished:
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVQ DX, 24(AX)
|
||||
MOVQ DI, 104(AX)
|
||||
MOVQ 80(AX), CX
|
||||
SUBQ CX, SI
|
||||
SUBQ 80(AX), SI
|
||||
MOVQ SI, 112(AX)
|
||||
RET
|
||||
|
||||
@ -1725,8 +1741,7 @@ error_match_off_too_big:
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVQ DX, 24(AX)
|
||||
MOVQ DI, 104(AX)
|
||||
MOVQ 80(AX), CX
|
||||
SUBQ CX, SI
|
||||
SUBQ 80(AX), SI
|
||||
MOVQ SI, 112(AX)
|
||||
RET
|
||||
|
||||
@ -1749,6 +1764,10 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
|
||||
MOVQ 72(AX), DI
|
||||
MOVQ 80(AX), R8
|
||||
MOVQ 88(AX), R9
|
||||
XORQ CX, CX
|
||||
MOVQ CX, 8(SP)
|
||||
MOVQ CX, 16(SP)
|
||||
MOVQ CX, 24(SP)
|
||||
MOVQ 112(AX), R10
|
||||
MOVQ 128(AX), CX
|
||||
MOVQ CX, 32(SP)
|
||||
@ -1798,34 +1817,46 @@ sequenceDecs_decodeSync_amd64_fill_byte_by_byte:
|
||||
|
||||
sequenceDecs_decodeSync_amd64_fill_end:
|
||||
// Update offset
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R14
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R14
|
||||
ADDQ R14, AX
|
||||
MOVQ AX, 8(SP)
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decodeSync_amd64_of_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decodeSync_amd64_of_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decodeSync_amd64_of_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R14
|
||||
ADDQ R14, AX
|
||||
|
||||
sequenceDecs_decodeSync_amd64_of_update_zero:
|
||||
MOVQ AX, 8(SP)
|
||||
|
||||
// Update match length
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R14
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R14
|
||||
ADDQ R14, AX
|
||||
MOVQ AX, 16(SP)
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decodeSync_amd64_ml_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decodeSync_amd64_ml_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decodeSync_amd64_ml_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R14
|
||||
ADDQ R14, AX
|
||||
|
||||
sequenceDecs_decodeSync_amd64_ml_update_zero:
|
||||
MOVQ AX, 16(SP)
|
||||
|
||||
// Fill bitreader to have enough for the remaining
|
||||
CMPQ SI, $0x08
|
||||
@ -1853,19 +1884,25 @@ sequenceDecs_decodeSync_amd64_fill_2_byte_by_byte:
|
||||
|
||||
sequenceDecs_decodeSync_amd64_fill_2_end:
|
||||
// Update literal length
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R14
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R14
|
||||
ADDQ R14, AX
|
||||
MOVQ AX, 24(SP)
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decodeSync_amd64_ll_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decodeSync_amd64_ll_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decodeSync_amd64_ll_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R14
|
||||
ADDQ R14, AX
|
||||
|
||||
sequenceDecs_decodeSync_amd64_ll_update_zero:
|
||||
MOVQ AX, 24(SP)
|
||||
|
||||
// Fill bitreader for state updates
|
||||
MOVQ R13, (SP)
|
||||
@ -1945,7 +1982,7 @@ sequenceDecs_decodeSync_amd64_skip_update:
|
||||
MOVUPS 144(CX), X0
|
||||
MOVQ R13, 144(CX)
|
||||
MOVUPS X0, 152(CX)
|
||||
JMP sequenceDecs_decodeSync_amd64_adjust_end
|
||||
JMP sequenceDecs_decodeSync_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_amd64_adjust_offsetB_1_or_0:
|
||||
CMPQ 24(SP), $0x00000000
|
||||
@ -1957,7 +1994,7 @@ sequenceDecs_decodeSync_amd64_adjust_offset_maybezero:
|
||||
TESTQ R13, R13
|
||||
JNZ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero
|
||||
MOVQ 144(CX), R13
|
||||
JMP sequenceDecs_decodeSync_amd64_adjust_end
|
||||
JMP sequenceDecs_decodeSync_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_amd64_adjust_offset_nonzero:
|
||||
MOVQ R13, AX
|
||||
@ -1966,8 +2003,7 @@ sequenceDecs_decodeSync_amd64_adjust_offset_nonzero:
|
||||
CMPQ R13, $0x03
|
||||
CMOVQEQ R14, AX
|
||||
CMOVQEQ R15, R14
|
||||
LEAQ 144(CX), R15
|
||||
ADDQ (R15)(AX*8), R14
|
||||
ADDQ 144(CX)(AX*8), R14
|
||||
JNZ sequenceDecs_decodeSync_amd64_adjust_temp_valid
|
||||
MOVQ $0x00000001, R14
|
||||
|
||||
@ -1983,7 +2019,7 @@ sequenceDecs_decodeSync_amd64_adjust_skip:
|
||||
MOVQ R14, 144(CX)
|
||||
MOVQ R14, R13
|
||||
|
||||
sequenceDecs_decodeSync_amd64_adjust_end:
|
||||
sequenceDecs_decodeSync_amd64_after_adjust:
|
||||
MOVQ R13, 8(SP)
|
||||
|
||||
// Check values
|
||||
@ -2280,6 +2316,10 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
|
||||
MOVQ 72(CX), SI
|
||||
MOVQ 80(CX), DI
|
||||
MOVQ 88(CX), R8
|
||||
XORQ R9, R9
|
||||
MOVQ R9, 8(SP)
|
||||
MOVQ R9, 16(SP)
|
||||
MOVQ R9, 24(SP)
|
||||
MOVQ 112(CX), R9
|
||||
MOVQ 128(CX), R10
|
||||
MOVQ R10, 32(SP)
|
||||
@ -2452,7 +2492,7 @@ sequenceDecs_decodeSync_bmi2_skip_update:
|
||||
MOVUPS 144(CX), X0
|
||||
MOVQ R13, 144(CX)
|
||||
MOVUPS X0, 152(CX)
|
||||
JMP sequenceDecs_decodeSync_bmi2_adjust_end
|
||||
JMP sequenceDecs_decodeSync_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_bmi2_adjust_offsetB_1_or_0:
|
||||
CMPQ 24(SP), $0x00000000
|
||||
@ -2464,7 +2504,7 @@ sequenceDecs_decodeSync_bmi2_adjust_offset_maybezero:
|
||||
TESTQ R13, R13
|
||||
JNZ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero
|
||||
MOVQ 144(CX), R13
|
||||
JMP sequenceDecs_decodeSync_bmi2_adjust_end
|
||||
JMP sequenceDecs_decodeSync_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero:
|
||||
MOVQ R13, R12
|
||||
@ -2473,8 +2513,7 @@ sequenceDecs_decodeSync_bmi2_adjust_offset_nonzero:
|
||||
CMPQ R13, $0x03
|
||||
CMOVQEQ R14, R12
|
||||
CMOVQEQ R15, R14
|
||||
LEAQ 144(CX), R15
|
||||
ADDQ (R15)(R12*8), R14
|
||||
ADDQ 144(CX)(R12*8), R14
|
||||
JNZ sequenceDecs_decodeSync_bmi2_adjust_temp_valid
|
||||
MOVQ $0x00000001, R14
|
||||
|
||||
@ -2490,7 +2529,7 @@ sequenceDecs_decodeSync_bmi2_adjust_skip:
|
||||
MOVQ R14, 144(CX)
|
||||
MOVQ R14, R13
|
||||
|
||||
sequenceDecs_decodeSync_bmi2_adjust_end:
|
||||
sequenceDecs_decodeSync_bmi2_after_adjust:
|
||||
MOVQ R13, 8(SP)
|
||||
|
||||
// Check values
|
||||
@ -2787,6 +2826,10 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
|
||||
MOVQ 72(AX), DI
|
||||
MOVQ 80(AX), R8
|
||||
MOVQ 88(AX), R9
|
||||
XORQ CX, CX
|
||||
MOVQ CX, 8(SP)
|
||||
MOVQ CX, 16(SP)
|
||||
MOVQ CX, 24(SP)
|
||||
MOVQ 112(AX), R10
|
||||
MOVQ 128(AX), CX
|
||||
MOVQ CX, 32(SP)
|
||||
@ -2836,34 +2879,46 @@ sequenceDecs_decodeSync_safe_amd64_fill_byte_by_byte:
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_fill_end:
|
||||
// Update offset
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R14
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R14
|
||||
ADDQ R14, AX
|
||||
MOVQ AX, 8(SP)
|
||||
MOVQ R9, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decodeSync_safe_amd64_of_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decodeSync_safe_amd64_of_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decodeSync_safe_amd64_of_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R14
|
||||
ADDQ R14, AX
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_of_update_zero:
|
||||
MOVQ AX, 8(SP)
|
||||
|
||||
// Update match length
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R14
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R14
|
||||
ADDQ R14, AX
|
||||
MOVQ AX, 16(SP)
|
||||
MOVQ R8, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decodeSync_safe_amd64_ml_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decodeSync_safe_amd64_ml_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decodeSync_safe_amd64_ml_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R14
|
||||
ADDQ R14, AX
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_ml_update_zero:
|
||||
MOVQ AX, 16(SP)
|
||||
|
||||
// Fill bitreader to have enough for the remaining
|
||||
CMPQ SI, $0x08
|
||||
@ -2891,19 +2946,25 @@ sequenceDecs_decodeSync_safe_amd64_fill_2_byte_by_byte:
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_fill_2_end:
|
||||
// Update literal length
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
ADDQ CX, BX
|
||||
NEGL CX
|
||||
SHRQ CL, R14
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
CMOVQEQ CX, R14
|
||||
ADDQ R14, AX
|
||||
MOVQ AX, 24(SP)
|
||||
MOVQ DI, AX
|
||||
MOVQ BX, CX
|
||||
MOVQ DX, R14
|
||||
SHLQ CL, R14
|
||||
MOVB AH, CL
|
||||
SHRQ $0x20, AX
|
||||
TESTQ CX, CX
|
||||
JZ sequenceDecs_decodeSync_safe_amd64_ll_update_zero
|
||||
ADDQ CX, BX
|
||||
CMPQ BX, $0x40
|
||||
JA sequenceDecs_decodeSync_safe_amd64_ll_update_zero
|
||||
CMPQ CX, $0x40
|
||||
JAE sequenceDecs_decodeSync_safe_amd64_ll_update_zero
|
||||
NEGQ CX
|
||||
SHRQ CL, R14
|
||||
ADDQ R14, AX
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_ll_update_zero:
|
||||
MOVQ AX, 24(SP)
|
||||
|
||||
// Fill bitreader for state updates
|
||||
MOVQ R13, (SP)
|
||||
@ -2983,7 +3044,7 @@ sequenceDecs_decodeSync_safe_amd64_skip_update:
|
||||
MOVUPS 144(CX), X0
|
||||
MOVQ R13, 144(CX)
|
||||
MOVUPS X0, 152(CX)
|
||||
JMP sequenceDecs_decodeSync_safe_amd64_adjust_end
|
||||
JMP sequenceDecs_decodeSync_safe_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_adjust_offsetB_1_or_0:
|
||||
CMPQ 24(SP), $0x00000000
|
||||
@ -2995,7 +3056,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_offset_maybezero:
|
||||
TESTQ R13, R13
|
||||
JNZ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero
|
||||
MOVQ 144(CX), R13
|
||||
JMP sequenceDecs_decodeSync_safe_amd64_adjust_end
|
||||
JMP sequenceDecs_decodeSync_safe_amd64_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero:
|
||||
MOVQ R13, AX
|
||||
@ -3004,8 +3065,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_offset_nonzero:
|
||||
CMPQ R13, $0x03
|
||||
CMOVQEQ R14, AX
|
||||
CMOVQEQ R15, R14
|
||||
LEAQ 144(CX), R15
|
||||
ADDQ (R15)(AX*8), R14
|
||||
ADDQ 144(CX)(AX*8), R14
|
||||
JNZ sequenceDecs_decodeSync_safe_amd64_adjust_temp_valid
|
||||
MOVQ $0x00000001, R14
|
||||
|
||||
@ -3021,7 +3081,7 @@ sequenceDecs_decodeSync_safe_amd64_adjust_skip:
|
||||
MOVQ R14, 144(CX)
|
||||
MOVQ R14, R13
|
||||
|
||||
sequenceDecs_decodeSync_safe_amd64_adjust_end:
|
||||
sequenceDecs_decodeSync_safe_amd64_after_adjust:
|
||||
MOVQ R13, 8(SP)
|
||||
|
||||
// Check values
|
||||
@ -3420,6 +3480,10 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
|
||||
MOVQ 72(CX), SI
|
||||
MOVQ 80(CX), DI
|
||||
MOVQ 88(CX), R8
|
||||
XORQ R9, R9
|
||||
MOVQ R9, 8(SP)
|
||||
MOVQ R9, 16(SP)
|
||||
MOVQ R9, 24(SP)
|
||||
MOVQ 112(CX), R9
|
||||
MOVQ 128(CX), R10
|
||||
MOVQ R10, 32(SP)
|
||||
@ -3592,7 +3656,7 @@ sequenceDecs_decodeSync_safe_bmi2_skip_update:
|
||||
MOVUPS 144(CX), X0
|
||||
MOVQ R13, 144(CX)
|
||||
MOVUPS X0, 152(CX)
|
||||
JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end
|
||||
JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_safe_bmi2_adjust_offsetB_1_or_0:
|
||||
CMPQ 24(SP), $0x00000000
|
||||
@ -3604,7 +3668,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_maybezero:
|
||||
TESTQ R13, R13
|
||||
JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero
|
||||
MOVQ 144(CX), R13
|
||||
JMP sequenceDecs_decodeSync_safe_bmi2_adjust_end
|
||||
JMP sequenceDecs_decodeSync_safe_bmi2_after_adjust
|
||||
|
||||
sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero:
|
||||
MOVQ R13, R12
|
||||
@ -3613,8 +3677,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_offset_nonzero:
|
||||
CMPQ R13, $0x03
|
||||
CMOVQEQ R14, R12
|
||||
CMOVQEQ R15, R14
|
||||
LEAQ 144(CX), R15
|
||||
ADDQ (R15)(R12*8), R14
|
||||
ADDQ 144(CX)(R12*8), R14
|
||||
JNZ sequenceDecs_decodeSync_safe_bmi2_adjust_temp_valid
|
||||
MOVQ $0x00000001, R14
|
||||
|
||||
@ -3630,7 +3693,7 @@ sequenceDecs_decodeSync_safe_bmi2_adjust_skip:
|
||||
MOVQ R14, 144(CX)
|
||||
MOVQ R14, R13
|
||||
|
||||
sequenceDecs_decodeSync_safe_bmi2_adjust_end:
|
||||
sequenceDecs_decodeSync_safe_bmi2_after_adjust:
|
||||
MOVQ R13, 8(SP)
|
||||
|
||||
// Check values
|
||||
|
Reference in New Issue
Block a user