mirror of
https://github.com/intel/llvm.git
synced 2026-01-22 07:01:03 +08:00
[PR] Fix bb reordering optimization
Summary: Reorder-blocks optimization pass doesn't take into account that available offset for legacy Jcc instructions (for example, JRCXZ - operand 8 bits) has to be less than 255 bytes. It's rare case and to exclude such functions with unsupported instructions from optimization passes added extra checking Alexey Moksyakov Advanced Software Technology Lab, Huawei (cherry picked from FBD28264117)
This commit is contained in:
committed by
Maksim Panchenko
parent
9a884543f1
commit
ce84e9607a
@@ -1246,6 +1246,13 @@ bool BinaryFunction::disassemble() {
|
||||
const bool IsCondBranch = MIB->isConditionalBranch(Instruction);
|
||||
MCSymbol *TargetSymbol = nullptr;
|
||||
|
||||
if (BC.MIB->isUnsupportedBranch(Instruction.getOpcode())) {
|
||||
setIgnored();
|
||||
if (BinaryFunction *TargetFunc =
|
||||
BC.getBinaryFunctionContainingAddress(TargetAddress))
|
||||
TargetFunc->setIgnored();
|
||||
}
|
||||
|
||||
if (IsCall && containsAddress(TargetAddress)) {
|
||||
if (TargetAddress == getAddress()) {
|
||||
// Recursive call.
|
||||
|
||||
640
bolt/test/X86/bug-reorder-bb-jrcxz.s
Normal file
640
bolt/test/X86/bug-reorder-bb-jrcxz.s
Normal file
@@ -0,0 +1,640 @@
|
||||
# Test performs a BB reordering with unsupported
|
||||
# instruction jrcxz. Reordering works correctly with the
|
||||
# follow options: None, Normal or Reverse. Other strategies
|
||||
# are completed with Assertion `isIntN(Size * 8 + 1, Value).
|
||||
# The cause is the distance between BB where one contains
|
||||
# jrcxz instruction.
|
||||
# Example: OpenSSL
|
||||
# https://github.com/openssl/openssl/blob/master/crypto/bn/asm/x86_64-mont5.pl#L3319
|
||||
|
||||
# REQUIRES: system-linux
|
||||
|
||||
# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown \
|
||||
# RUN: %s -o %t.o
|
||||
# RUN: link_fdata %s %t.o %t.fdata
|
||||
# RUN: %host_cc %t.o -falign-labels -march=native -o %t.exe -Wl,-q
|
||||
|
||||
# RUN: llvm-bolt %t.exe -o %t.bolted -data %t.fdata \
|
||||
# RUN: -reorder-blocks=cache+ -reorder-functions=hfsort \
|
||||
# RUN: -split-functions=2 -split-all-cold -split-eh -dyno-stats \
|
||||
# RUN: -print-finalized 2>&1 | FileCheck %s
|
||||
|
||||
# CHECK-NOT: value of -2105 is too large for field of 1 byte.
|
||||
|
||||
.text
|
||||
.section .text.startup,"ax",@progbits
|
||||
.p2align 5,,31
|
||||
.globl main
|
||||
.type main, @function
|
||||
main:
|
||||
jmp bn_sqrx8x_internal
|
||||
|
||||
.globl bn_sqrx8x_internal
|
||||
.hidden bn_sqrx8x_internal
|
||||
.type bn_sqrx8x_internal,@function
|
||||
.align 32
|
||||
bn_sqrx8x_internal:
|
||||
__bn_sqrx8x_internal:
|
||||
# FDATA: 1 bn_from_mont8x 160 1 bn_sqrx8x_internal 0 0 56
|
||||
# FDATA: 1 bn_sqrx8x_internal 13 1 bn_sqrx8x_internal 40 0 60972
|
||||
# FDATA: 1 bn_sqrx8x_internal 5f 1 bn_sqrx8x_internal 2c 0 60972
|
||||
# FDATA: 1 bn_sqrx8x_internal 2f1 1 bn_sqrx8x_internal 500 0 60972
|
||||
# FDATA: 1 bn_sqrx8x_internal 34a 1 bn_sqrx8x_internal 360 0 60972
|
||||
# FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 360 0 447888
|
||||
# FDATA: 1 bn_sqrx8x_internal 411 1 bn_sqrx8x_internal 417 0 63984
|
||||
# FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 480 0 60972
|
||||
# FDATA: 1 bn_sqrx8x_internal 427 1 bn_sqrx8x_internal 429 0 3012
|
||||
# FDATA: 1 bn_sqrx8x_internal 467 1 bn_sqrx8x_internal 360 0 3012
|
||||
# FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 80 0 58964
|
||||
# FDATA: 1 bn_sqrx8x_internal 4ba 1 bn_sqrx8x_internal 4c0 0 2008
|
||||
# FDATA: 1 bn_sqrx8x_internal 4fb 1 bn_sqrx8x_internal 80 0 2008
|
||||
# FDATA: 1 bn_sqrx8x_internal 5f0 1 bn_sqrx8x_internal 5f2 0 180908
|
||||
# FDATA: 1 bn_sqrx8x_internal 61b 1 bn_sqrx8x_internal 540 0 180908
|
||||
# FDATA: 1 bn_sqrx8x_internal 632 1 bn_sqrx8x_internal 637 0 59020
|
||||
# FDATA: 1 bn_sqrx8x_internal 657 1 bn_sqrx8x_internal 660 0 59020
|
||||
# FDATA: 1 bn_sqrx8x_internal 696 1 bn_sqrx8x_internal 6a0 0 120048
|
||||
# FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 6a0 0 840336
|
||||
# FDATA: 1 bn_sqrx8x_internal 75a 1 bn_sqrx8x_internal 760 0 120048
|
||||
# FDATA: 1 bn_sqrx8x_internal 768 1 bn_sqrx8x_internal 76e 0 120048
|
||||
# FDATA: 1 bn_sqrx8x_internal 7b2 1 bn_sqrx8x_internal 7c0 0 120048
|
||||
# FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 7c0 0 896560
|
||||
# FDATA: 1 bn_sqrx8x_internal 86e 1 bn_sqrx8x_internal 874 0 128080
|
||||
# FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 8c0 0 120048
|
||||
# FDATA: 1 bn_sqrx8x_internal 879 1 bn_sqrx8x_internal 87b 0 8032
|
||||
# FDATA: 1 bn_sqrx8x_internal 8bb 1 bn_sqrx8x_internal 7c0 0 8032
|
||||
# FDATA: 1 bn_sqrx8x_internal 8e8 1 bn_sqrx8x_internal 8ed 0 120048
|
||||
# FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 660 0 61028
|
||||
# FDATA: 1 bn_sqrx8x_internal 955 1 bn_sqrx8x_internal 95b 0 59020
|
||||
# FDATA: 0 [unknown] 0 1 bn_sqrx8x_internal 5f0 0 59020
|
||||
.cfi_startproc
|
||||
leaq 48+8(%rsp),%rdi
|
||||
leaq (%rsi,%r9,1),%rbp
|
||||
movq %r9,0+8(%rsp)
|
||||
movq %rbp,8+8(%rsp)
|
||||
jmp .Lsqr8x_zero_start
|
||||
|
||||
.align 32
|
||||
.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
|
||||
.Lsqrx8x_zero:
|
||||
.byte 0x3e
|
||||
movdqa %xmm0,0(%rdi)
|
||||
movdqa %xmm0,16(%rdi)
|
||||
movdqa %xmm0,32(%rdi)
|
||||
movdqa %xmm0,48(%rdi)
|
||||
.Lsqr8x_zero_start:
|
||||
movdqa %xmm0,64(%rdi)
|
||||
movdqa %xmm0,80(%rdi)
|
||||
movdqa %xmm0,96(%rdi)
|
||||
movdqa %xmm0,112(%rdi)
|
||||
leaq 128(%rdi),%rdi
|
||||
subq $64,%r9
|
||||
jnz .Lsqrx8x_zero
|
||||
|
||||
movq 0(%rsi),%rdx
|
||||
|
||||
xorq %r10,%r10
|
||||
xorq %r11,%r11
|
||||
xorq %r12,%r12
|
||||
xorq %r13,%r13
|
||||
xorq %r14,%r14
|
||||
xorq %r15,%r15
|
||||
leaq 48+8(%rsp),%rdi
|
||||
xorq %rbp,%rbp
|
||||
jmp .Lsqrx8x_outer_loop
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_outer_loop:
|
||||
mulxq 8(%rsi),%r8,%rax
|
||||
adcxq %r9,%r8
|
||||
adoxq %rax,%r10
|
||||
mulxq 16(%rsi),%r9,%rax
|
||||
adcxq %r10,%r9
|
||||
adoxq %rax,%r11
|
||||
.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
|
||||
adcxq %r11,%r10
|
||||
adoxq %rax,%r12
|
||||
.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
|
||||
adcxq %r12,%r11
|
||||
adoxq %rax,%r13
|
||||
mulxq 40(%rsi),%r12,%rax
|
||||
adcxq %r13,%r12
|
||||
adoxq %rax,%r14
|
||||
mulxq 48(%rsi),%r13,%rax
|
||||
adcxq %r14,%r13
|
||||
adoxq %r15,%rax
|
||||
mulxq 56(%rsi),%r14,%r15
|
||||
movq 8(%rsi),%rdx
|
||||
adcxq %rax,%r14
|
||||
adoxq %rbp,%r15
|
||||
adcq 64(%rdi),%r15
|
||||
movq %r8,8(%rdi)
|
||||
movq %r9,16(%rdi)
|
||||
sbbq %rcx,%rcx
|
||||
xorq %rbp,%rbp
|
||||
|
||||
mulxq 16(%rsi),%r8,%rbx
|
||||
mulxq 24(%rsi),%r9,%rax
|
||||
adcxq %r10,%r8
|
||||
adoxq %rbx,%r9
|
||||
mulxq 32(%rsi),%r10,%rbx
|
||||
adcxq %r11,%r9
|
||||
adoxq %rax,%r10
|
||||
.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
|
||||
adcxq %r12,%r10
|
||||
adoxq %rbx,%r11
|
||||
.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
|
||||
adcxq %r13,%r11
|
||||
adoxq %r14,%r12
|
||||
.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
|
||||
movq 16(%rsi),%rdx
|
||||
adcxq %rax,%r12
|
||||
adoxq %rbx,%r13
|
||||
adcxq %r15,%r13
|
||||
adoxq %rbp,%r14
|
||||
adcxq %rbp,%r14
|
||||
|
||||
movq %r8,24(%rdi)
|
||||
movq %r9,32(%rdi)
|
||||
|
||||
mulxq 24(%rsi),%r8,%rbx
|
||||
mulxq 32(%rsi),%r9,%rax
|
||||
adcxq %r10,%r8
|
||||
adoxq %rbx,%r9
|
||||
mulxq 40(%rsi),%r10,%rbx
|
||||
adcxq %r11,%r9
|
||||
adoxq %rax,%r10
|
||||
.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
|
||||
adcxq %r12,%r10
|
||||
adoxq %r13,%r11
|
||||
.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
|
||||
.byte 0x3e
|
||||
movq 24(%rsi),%rdx
|
||||
adcxq %rbx,%r11
|
||||
adoxq %rax,%r12
|
||||
adcxq %r14,%r12
|
||||
movq %r8,40(%rdi)
|
||||
movq %r9,48(%rdi)
|
||||
mulxq 32(%rsi),%r8,%rax
|
||||
adoxq %rbp,%r13
|
||||
adcxq %rbp,%r13
|
||||
|
||||
mulxq 40(%rsi),%r9,%rbx
|
||||
adcxq %r10,%r8
|
||||
adoxq %rax,%r9
|
||||
mulxq 48(%rsi),%r10,%rax
|
||||
adcxq %r11,%r9
|
||||
adoxq %r12,%r10
|
||||
mulxq 56(%rsi),%r11,%r12
|
||||
movq 32(%rsi),%rdx
|
||||
movq 40(%rsi),%r14
|
||||
adcxq %rbx,%r10
|
||||
adoxq %rax,%r11
|
||||
movq 48(%rsi),%r15
|
||||
adcxq %r13,%r11
|
||||
adoxq %rbp,%r12
|
||||
adcxq %rbp,%r12
|
||||
|
||||
movq %r8,56(%rdi)
|
||||
movq %r9,64(%rdi)
|
||||
|
||||
mulxq %r14,%r9,%rax
|
||||
movq 56(%rsi),%r8
|
||||
adcxq %r10,%r9
|
||||
mulxq %r15,%r10,%rbx
|
||||
adoxq %rax,%r10
|
||||
adcxq %r11,%r10
|
||||
mulxq %r8,%r11,%rax
|
||||
movq %r14,%rdx
|
||||
adoxq %rbx,%r11
|
||||
adcxq %r12,%r11
|
||||
|
||||
adcxq %rbp,%rax
|
||||
|
||||
mulxq %r15,%r14,%rbx
|
||||
mulxq %r8,%r12,%r13
|
||||
movq %r15,%rdx
|
||||
leaq 64(%rsi),%rsi
|
||||
adcxq %r14,%r11
|
||||
adoxq %rbx,%r12
|
||||
adcxq %rax,%r12
|
||||
adoxq %rbp,%r13
|
||||
|
||||
.byte 0x67,0x67
|
||||
mulxq %r8,%r8,%r14
|
||||
adcxq %r8,%r13
|
||||
adcxq %rbp,%r14
|
||||
|
||||
cmpq 8+8(%rsp),%rsi
|
||||
je .Lsqrx8x_outer_break
|
||||
|
||||
negq %rcx
|
||||
movq $-8,%rcx
|
||||
movq %rbp,%r15
|
||||
movq 64(%rdi),%r8
|
||||
adcxq 72(%rdi),%r9
|
||||
adcxq 80(%rdi),%r10
|
||||
adcxq 88(%rdi),%r11
|
||||
adcq 96(%rdi),%r12
|
||||
adcq 104(%rdi),%r13
|
||||
adcq 112(%rdi),%r14
|
||||
adcq 120(%rdi),%r15
|
||||
leaq (%rsi),%rbp
|
||||
leaq 128(%rdi),%rdi
|
||||
sbbq %rax,%rax
|
||||
|
||||
movq -64(%rsi),%rdx
|
||||
movq %rax,16+8(%rsp)
|
||||
movq %rdi,24+8(%rsp)
|
||||
|
||||
|
||||
xorl %eax,%eax
|
||||
jmp .Lsqrx8x_loop
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_loop:
|
||||
movq %r8,%rbx
|
||||
mulxq 0(%rbp),%rax,%r8
|
||||
adcxq %rax,%rbx
|
||||
adoxq %r9,%r8
|
||||
|
||||
mulxq 8(%rbp),%rax,%r9
|
||||
adcxq %rax,%r8
|
||||
adoxq %r10,%r9
|
||||
|
||||
mulxq 16(%rbp),%rax,%r10
|
||||
adcxq %rax,%r9
|
||||
adoxq %r11,%r10
|
||||
|
||||
mulxq 24(%rbp),%rax,%r11
|
||||
adcxq %rax,%r10
|
||||
adoxq %r12,%r11
|
||||
|
||||
.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
|
||||
adcxq %rax,%r11
|
||||
adoxq %r13,%r12
|
||||
|
||||
mulxq 40(%rbp),%rax,%r13
|
||||
adcxq %rax,%r12
|
||||
adoxq %r14,%r13
|
||||
|
||||
mulxq 48(%rbp),%rax,%r14
|
||||
movq %rbx,(%rdi,%rcx,8)
|
||||
movl $0,%ebx
|
||||
adcxq %rax,%r13
|
||||
adoxq %r15,%r14
|
||||
|
||||
.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
|
||||
movq 8(%rsi,%rcx,8),%rdx
|
||||
adcxq %rax,%r14
|
||||
adoxq %rbx,%r15
|
||||
adcxq %rbx,%r15
|
||||
|
||||
.byte 0x67
|
||||
incq %rcx
|
||||
jnz .Lsqrx8x_loop
|
||||
|
||||
leaq 64(%rbp),%rbp
|
||||
movq $-8,%rcx
|
||||
cmpq 8+8(%rsp),%rbp
|
||||
je .Lsqrx8x_break
|
||||
|
||||
subq 16+8(%rsp),%rbx
|
||||
.byte 0x66
|
||||
movq -64(%rsi),%rdx
|
||||
adcxq 0(%rdi),%r8
|
||||
adcxq 8(%rdi),%r9
|
||||
adcq 16(%rdi),%r10
|
||||
adcq 24(%rdi),%r11
|
||||
adcq 32(%rdi),%r12
|
||||
adcq 40(%rdi),%r13
|
||||
adcq 48(%rdi),%r14
|
||||
adcq 56(%rdi),%r15
|
||||
leaq 64(%rdi),%rdi
|
||||
.byte 0x67
|
||||
sbbq %rax,%rax
|
||||
xorl %ebx,%ebx
|
||||
movq %rax,16+8(%rsp)
|
||||
jmp .Lsqrx8x_loop
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_break:
|
||||
xorq %rbp,%rbp
|
||||
subq 16+8(%rsp),%rbx
|
||||
adcxq %rbp,%r8
|
||||
movq 24+8(%rsp),%rcx
|
||||
adcxq %rbp,%r9
|
||||
movq 0(%rsi),%rdx
|
||||
adcq $0,%r10
|
||||
movq %r8,0(%rdi)
|
||||
adcq $0,%r11
|
||||
adcq $0,%r12
|
||||
adcq $0,%r13
|
||||
adcq $0,%r14
|
||||
adcq $0,%r15
|
||||
cmpq %rcx,%rdi
|
||||
je .Lsqrx8x_outer_loop
|
||||
|
||||
movq %r9,8(%rdi)
|
||||
movq 8(%rcx),%r9
|
||||
movq %r10,16(%rdi)
|
||||
movq 16(%rcx),%r10
|
||||
movq %r11,24(%rdi)
|
||||
movq 24(%rcx),%r11
|
||||
movq %r12,32(%rdi)
|
||||
movq 32(%rcx),%r12
|
||||
movq %r13,40(%rdi)
|
||||
movq 40(%rcx),%r13
|
||||
movq %r14,48(%rdi)
|
||||
movq 48(%rcx),%r14
|
||||
movq %r15,56(%rdi)
|
||||
movq 56(%rcx),%r15
|
||||
movq %rcx,%rdi
|
||||
jmp .Lsqrx8x_outer_loop
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_outer_break:
|
||||
movq %r9,72(%rdi)
|
||||
.byte 102,72,15,126,217
|
||||
movq %r10,80(%rdi)
|
||||
movq %r11,88(%rdi)
|
||||
movq %r12,96(%rdi)
|
||||
movq %r13,104(%rdi)
|
||||
movq %r14,112(%rdi)
|
||||
leaq 48+8(%rsp),%rdi
|
||||
movq (%rsi,%rcx,1),%rdx
|
||||
|
||||
movq 8(%rdi),%r11
|
||||
xorq %r10,%r10
|
||||
movq 0+8(%rsp),%r9
|
||||
adoxq %r11,%r11
|
||||
movq 16(%rdi),%r12
|
||||
movq 24(%rdi),%r13
|
||||
|
||||
.align 32
|
||||
.Lsqrx4x_shift_n_add:
|
||||
mulxq %rdx,%rax,%rbx
|
||||
adoxq %r12,%r12
|
||||
adcxq %r10,%rax
|
||||
.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
|
||||
.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
|
||||
adoxq %r13,%r13
|
||||
adcxq %r11,%rbx
|
||||
movq 40(%rdi),%r11
|
||||
movq %rax,0(%rdi)
|
||||
movq %rbx,8(%rdi)
|
||||
|
||||
mulxq %rdx,%rax,%rbx
|
||||
adoxq %r10,%r10
|
||||
adcxq %r12,%rax
|
||||
movq 16(%rsi,%rcx,1),%rdx
|
||||
movq 48(%rdi),%r12
|
||||
adoxq %r11,%r11
|
||||
adcxq %r13,%rbx
|
||||
movq 56(%rdi),%r13
|
||||
movq %rax,16(%rdi)
|
||||
movq %rbx,24(%rdi)
|
||||
|
||||
mulxq %rdx,%rax,%rbx
|
||||
adoxq %r12,%r12
|
||||
adcxq %r10,%rax
|
||||
movq 24(%rsi,%rcx,1),%rdx
|
||||
leaq 32(%rcx),%rcx
|
||||
movq 64(%rdi),%r10
|
||||
adoxq %r13,%r13
|
||||
adcxq %r11,%rbx
|
||||
movq 72(%rdi),%r11
|
||||
movq %rax,32(%rdi)
|
||||
movq %rbx,40(%rdi)
|
||||
|
||||
mulxq %rdx,%rax,%rbx
|
||||
adoxq %r10,%r10
|
||||
adcxq %r12,%rax
|
||||
jrcxz .Lsqrx4x_shift_n_add_break
|
||||
.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
|
||||
adoxq %r11,%r11
|
||||
adcxq %r13,%rbx
|
||||
movq 80(%rdi),%r12
|
||||
movq 88(%rdi),%r13
|
||||
movq %rax,48(%rdi)
|
||||
movq %rbx,56(%rdi)
|
||||
leaq 64(%rdi),%rdi
|
||||
nop
|
||||
jmp .Lsqrx4x_shift_n_add
|
||||
|
||||
.align 32
|
||||
.Lsqrx4x_shift_n_add_break:
|
||||
adcxq %r13,%rbx
|
||||
movq %rax,48(%rdi)
|
||||
movq %rbx,56(%rdi)
|
||||
leaq 64(%rdi),%rdi
|
||||
.byte 102,72,15,126,213
|
||||
__bn_sqrx8x_reduction:
|
||||
xorl %eax,%eax
|
||||
movq 32+8(%rsp),%rbx
|
||||
movq 48+8(%rsp),%rdx
|
||||
leaq -64(%rbp,%r9,1),%rcx
|
||||
|
||||
movq %rcx,0+8(%rsp)
|
||||
movq %rdi,8+8(%rsp)
|
||||
|
||||
leaq 48+8(%rsp),%rdi
|
||||
jmp .Lsqrx8x_reduction_loop
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_reduction_loop:
|
||||
movq 8(%rdi),%r9
|
||||
movq 16(%rdi),%r10
|
||||
movq 24(%rdi),%r11
|
||||
movq 32(%rdi),%r12
|
||||
movq %rdx,%r8
|
||||
imulq %rbx,%rdx
|
||||
movq 40(%rdi),%r13
|
||||
movq 48(%rdi),%r14
|
||||
movq 56(%rdi),%r15
|
||||
movq %rax,24+8(%rsp)
|
||||
|
||||
leaq 64(%rdi),%rdi
|
||||
xorq %rsi,%rsi
|
||||
movq $-8,%rcx
|
||||
jmp .Lsqrx8x_reduce
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_reduce:
|
||||
movq %r8,%rbx
|
||||
mulxq 0(%rbp),%rax,%r8
|
||||
adcxq %rbx,%rax
|
||||
adoxq %r9,%r8
|
||||
|
||||
mulxq 8(%rbp),%rbx,%r9
|
||||
adcxq %rbx,%r8
|
||||
adoxq %r10,%r9
|
||||
|
||||
mulxq 16(%rbp),%rbx,%r10
|
||||
adcxq %rbx,%r9
|
||||
adoxq %r11,%r10
|
||||
|
||||
mulxq 24(%rbp),%rbx,%r11
|
||||
adcxq %rbx,%r10
|
||||
adoxq %r12,%r11
|
||||
|
||||
.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
|
||||
movq %rdx,%rax
|
||||
movq %r8,%rdx
|
||||
adcxq %rbx,%r11
|
||||
adoxq %r13,%r12
|
||||
|
||||
mulxq 32+8(%rsp),%rbx,%rdx
|
||||
movq %rax,%rdx
|
||||
movq %rax,64+48+8(%rsp,%rcx,8)
|
||||
|
||||
mulxq 40(%rbp),%rax,%r13
|
||||
adcxq %rax,%r12
|
||||
adoxq %r14,%r13
|
||||
|
||||
mulxq 48(%rbp),%rax,%r14
|
||||
adcxq %rax,%r13
|
||||
adoxq %r15,%r14
|
||||
|
||||
mulxq 56(%rbp),%rax,%r15
|
||||
movq %rbx,%rdx
|
||||
adcxq %rax,%r14
|
||||
adoxq %rsi,%r15
|
||||
adcxq %rsi,%r15
|
||||
|
||||
.byte 0x67,0x67,0x67
|
||||
incq %rcx
|
||||
jnz .Lsqrx8x_reduce
|
||||
|
||||
movq %rsi,%rax
|
||||
cmpq 0+8(%rsp),%rbp
|
||||
jae .Lsqrx8x_no_tail
|
||||
|
||||
movq 48+8(%rsp),%rdx
|
||||
addq 0(%rdi),%r8
|
||||
leaq 64(%rbp),%rbp
|
||||
movq $-8,%rcx
|
||||
adcxq 8(%rdi),%r9
|
||||
adcxq 16(%rdi),%r10
|
||||
adcq 24(%rdi),%r11
|
||||
adcq 32(%rdi),%r12
|
||||
adcq 40(%rdi),%r13
|
||||
adcq 48(%rdi),%r14
|
||||
adcq 56(%rdi),%r15
|
||||
leaq 64(%rdi),%rdi
|
||||
sbbq %rax,%rax
|
||||
|
||||
xorq %rsi,%rsi
|
||||
movq %rax,16+8(%rsp)
|
||||
jmp .Lsqrx8x_tail
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_tail:
|
||||
movq %r8,%rbx
|
||||
mulxq 0(%rbp),%rax,%r8
|
||||
adcxq %rax,%rbx
|
||||
adoxq %r9,%r8
|
||||
|
||||
mulxq 8(%rbp),%rax,%r9
|
||||
adcxq %rax,%r8
|
||||
adoxq %r10,%r9
|
||||
|
||||
mulxq 16(%rbp),%rax,%r10
|
||||
adcxq %rax,%r9
|
||||
adoxq %r11,%r10
|
||||
|
||||
mulxq 24(%rbp),%rax,%r11
|
||||
adcxq %rax,%r10
|
||||
adoxq %r12,%r11
|
||||
|
||||
.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
|
||||
adcxq %rax,%r11
|
||||
adoxq %r13,%r12
|
||||
|
||||
mulxq 40(%rbp),%rax,%r13
|
||||
adcxq %rax,%r12
|
||||
adoxq %r14,%r13
|
||||
|
||||
mulxq 48(%rbp),%rax,%r14
|
||||
adcxq %rax,%r13
|
||||
adoxq %r15,%r14
|
||||
|
||||
mulxq 56(%rbp),%rax,%r15
|
||||
movq 72+48+8(%rsp,%rcx,8),%rdx
|
||||
adcxq %rax,%r14
|
||||
adoxq %rsi,%r15
|
||||
movq %rbx,(%rdi,%rcx,8)
|
||||
movq %r8,%rbx
|
||||
adcxq %rsi,%r15
|
||||
|
||||
incq %rcx
|
||||
jnz .Lsqrx8x_tail
|
||||
|
||||
cmpq 0+8(%rsp),%rbp
|
||||
jae .Lsqrx8x_tail_done
|
||||
|
||||
subq 16+8(%rsp),%rsi
|
||||
movq 48+8(%rsp),%rdx
|
||||
leaq 64(%rbp),%rbp
|
||||
adcq 0(%rdi),%r8
|
||||
adcq 8(%rdi),%r9
|
||||
adcq 16(%rdi),%r10
|
||||
adcq 24(%rdi),%r11
|
||||
adcq 32(%rdi),%r12
|
||||
adcq 40(%rdi),%r13
|
||||
adcq 48(%rdi),%r14
|
||||
adcq 56(%rdi),%r15
|
||||
leaq 64(%rdi),%rdi
|
||||
sbbq %rax,%rax
|
||||
subq $8,%rcx
|
||||
|
||||
xorq %rsi,%rsi
|
||||
movq %rax,16+8(%rsp)
|
||||
jmp .Lsqrx8x_tail
|
||||
|
||||
.align 32
|
||||
.Lsqrx8x_tail_done:
|
||||
xorq %rax,%rax
|
||||
addq 24+8(%rsp),%r8
|
||||
adcq $0,%r9
|
||||
adcq $0,%r10
|
||||
adcq $0,%r11
|
||||
adcq $0,%r12
|
||||
adcq $0,%r13
|
||||
adcq $0,%r14
|
||||
adcq $0,%r15
|
||||
adcq $0,%rax
|
||||
|
||||
subq 16+8(%rsp),%rsi
|
||||
.Lsqrx8x_no_tail:
|
||||
adcq 0(%rdi),%r8
|
||||
.byte 102,72,15,126,217
|
||||
adcq 8(%rdi),%r9
|
||||
movq 56(%rbp),%rsi
|
||||
.byte 102,72,15,126,213
|
||||
adcq 16(%rdi),%r10
|
||||
adcq 24(%rdi),%r11
|
||||
adcq 32(%rdi),%r12
|
||||
adcq 40(%rdi),%r13
|
||||
adcq 48(%rdi),%r14
|
||||
adcq 56(%rdi),%r15
|
||||
adcq $0,%rax
|
||||
|
||||
movq 32+8(%rsp),%rbx
|
||||
movq 64(%rdi,%rcx,1),%rdx
|
||||
|
||||
movq %r8,0(%rdi)
|
||||
leaq 64(%rdi),%r8
|
||||
movq %r9,8(%rdi)
|
||||
movq %r10,16(%rdi)
|
||||
movq %r11,24(%rdi)
|
||||
movq %r12,32(%rdi)
|
||||
movq %r13,40(%rdi)
|
||||
movq %r14,48(%rdi)
|
||||
movq %r15,56(%rdi)
|
||||
|
||||
leaq 64(%rdi,%rcx,1),%rdi
|
||||
cmpq 8+8(%rsp),%r8
|
||||
jb .Lsqrx8x_reduction_loop
|
||||
.byte 0xf3,0xc3
|
||||
.cfi_endproc
|
||||
.size bn_sqrx8x_internal,.-bn_sqrx8x_internal
|
||||
@@ -14,11 +14,19 @@
|
||||
# CHECK: BOLT-INFO
|
||||
|
||||
.text
|
||||
.section .text.startup,"ax",@progbits
|
||||
.p2align 5,,31
|
||||
.globl main
|
||||
.type main, %function
|
||||
.size main, .Lend1-main
|
||||
main:
|
||||
# FDATA: 0 [unknown] 0 1 main 0 0 510
|
||||
jmp test_function
|
||||
|
||||
.globl test_function
|
||||
.hidden test_function
|
||||
.type test_function,@function
|
||||
.align 32
|
||||
test_function:
|
||||
# FDATA: 0 main 0 1 test_function 0 0 510
|
||||
xorq %rcx, %rcx
|
||||
andq $3, %rdi
|
||||
jmpq *jumptbl(,%rdi,8)
|
||||
@@ -33,8 +41,8 @@ main:
|
||||
movl $0x0, %eax
|
||||
.J1:
|
||||
jrcxz .BBend
|
||||
# FDATA: 1 main #.J1# 1 main #.BB2# 0 10
|
||||
# FDATA: 1 main #.J1# 1 main #.BBend# 0 500
|
||||
# FDATA: 1 test_function #.J1# 1 test_function #.BB2# 0 10
|
||||
# FDATA: 1 test_function #.J1# 1 test_function #.BBend# 0 500
|
||||
.BB2:
|
||||
movl $0x2, %eax
|
||||
jmp .BBend
|
||||
|
||||
Reference in New Issue
Block a user