Simplify HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop

Get rid of three divisions. The original expression was:

    opmin := min((oend0 - op0) / 10, (oend1 - op1) / 10, (oend2 - op2) / 10, (oend3 - op3) / 10)
    r15   := min(r15, opmin)

The division by 10 can be moved outside the `min`:

    opmin := min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3)
    r15   := min(r15, opmin/10)
This commit is contained in:
Wojciech Muła 2022-01-19 18:29:54 +01:00
parent ef1eafded3
commit e74ca7979e

View File

@ -427,41 +427,30 @@ HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop:
/* r15 = (ip0 - ilimit) / 7 */
movq %rdx, %r15
movabsq $-3689348814741910323, %rdx
movq 8(%rsp), %rax /* rax = oend0 */
subq %op0, %rax /* rax = oend0 - op0 */
mulq %rdx
shrq $3, %rdx /* rdx = rax / 10 */
/* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */
movq 8(%rsp), %rax /* rax = oend0 */
subq %op0, %rax /* rax = oend0 - op0 */
movq 16(%rsp), %rdx /* rdx = oend1 */
subq %op1, %rdx /* rdx = oend1 - op1 */
/* r15 = min(%rdx, %r15) */
cmpq %rdx, %r15
cmova %rdx, %r15
cmpq %rax, %rdx
cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
movabsq $-3689348814741910323, %rdx
movq 16(%rsp), %rax /* rax = oend1 */
subq %op1, %rax /* rax = oend1 - op1 */
mulq %rdx
shrq $3, %rdx /* rdx = rax / 10 */
/* r15 = min(%rdx, %r15) */
cmpq %rdx, %r15
cmova %rdx, %r15
movabsq $-3689348814741910323, %rdx
movq 24(%rsp), %rax /* rax = oend2 */
subq %op2, %rax /* rax = oend2 - op2 */
mulq %rdx
shrq $3, %rdx /* rdx = rax / 10 */
/* r15 = min(%rdx, %r15) */
cmpq %rdx, %r15
cmova %rdx, %r15
cmpq %rax, %rdx
cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
movabsq $-3689348814741910323, %rdx
movq 32(%rsp), %rax /* rax = oend3 */
subq %op3, %rax /* rax = oend3 - op3 */
cmpq %rax, %rdx
cmova %rax, %rdx /* rdx = min(%rdx, %rax) */
movabsq $-3689348814741910323, %rax
mulq %rdx
shrq $3, %rdx /* rdx = rax / 10 */
shrq $3, %rdx /* rdx = rdx / 10 */
/* r15 = min(%rdx, %r15) */
cmpq %rdx, %r15