Update matmul example to the latest polly version
As the namings of the scops have changed, polly was not able to read in the user given .jscop files. By renaming the provided files, polly now finds them again and can use them to optimize the matmul function. We also update the generated files to reflect the very latest version of Polly. llvm-svn: 182265
@@ -110,7 +110,7 @@ view-scops-only:
|
||||
<pre>
|
||||
[...]
|
||||
Printing analysis 'Polly - Create polyhedral description of Scops' for region:
|
||||
'%1 => %17' in function 'init_array':
|
||||
'for.cond => for.end19' in function 'init_array':
|
||||
Context:
|
||||
{ [] }
|
||||
Statements {
|
||||
@@ -135,7 +135,7 @@ Printing analysis 'Polly - Create polyhedral description of Scops' for region:
|
||||
}
|
||||
[...]
|
||||
Printing analysis 'Polly - Create polyhedral description of Scops' for region:
|
||||
'%1 => %17' in function 'main':
|
||||
'for.cond => for.end30' in function 'main':
|
||||
Context:
|
||||
{ [] }
|
||||
Statements {
|
||||
@@ -178,7 +178,7 @@ Printing analysis 'Polly - Create polyhedral description of Scops' for region:
|
||||
<li><h4>Show the dependences for the SCoPs</h4>
|
||||
<pre class="code">opt -basicaa -polly-dependences -analyze matmul.preopt.ll</pre>
|
||||
<pre>Printing analysis 'Polly - Calculate dependences for SCoP' for region:
|
||||
'for.cond => for.end28' in function 'init_array':
|
||||
'for.cond => for.end19' in function 'init_array':
|
||||
Must dependences:
|
||||
{ }
|
||||
May dependences:
|
||||
@@ -188,7 +188,7 @@ Printing analysis 'Polly - Create polyhedral description of Scops' for region:
|
||||
May no source:
|
||||
{ }
|
||||
Printing analysis 'Polly - Calculate dependences for SCoP' for region:
|
||||
'for.cond => for.end48' in function 'main':
|
||||
'for.cond => for.end30' in function 'main':
|
||||
Must dependences:
|
||||
{ Stmt_4[i0, i1] -> Stmt_6[i0, i1, 0] :
|
||||
i0 >= 0 and i0 <= 1023 and i1 >= 0 and i1 <= 1023;
|
||||
@@ -221,8 +221,8 @@ Printing analysis 'Polly - Calculate dependences for SCoP' for region:
|
||||
Polly can export the polyhedral representation in so called jscop files. Jscop
|
||||
files contain the polyhedral representation stored in a JSON file.
|
||||
<pre class="code">opt -basicaa -polly-export-jscop matmul.preopt.ll</pre>
|
||||
<pre>Writing SCoP 'for.cond => for.end28' in function 'init_array' to './init_array___%for.cond---%for.end28.jscop'.
|
||||
Writing SCoP 'for.cond => for.end48' in function 'main' to './main___%for.cond---%for.end48.jscop'.
|
||||
<pre>Writing SCoP 'for.cond => for.end19' in function 'init_array' to './init_array___%for.cond---%for.end19.jscop'.
|
||||
Writing SCoP 'for.cond => for.end30' in function 'main' to './main___%for.cond---%for.end30.jscop'.
|
||||
</pre></li>
|
||||
|
||||
<li><h4>Import the changed jscop files and print the updated SCoP structure
|
||||
@@ -268,7 +268,7 @@ opt matmul.preopt.ll -basicaa \
|
||||
</pre>
|
||||
<pre>
|
||||
[...]
|
||||
Reading JScop '%1 => %17' in function 'main' from './main___%1---%17.jscop.interchanged'.
|
||||
Reading JScop 'for.cond => for.end30' in function 'main' from './main___%for.cond---%for.end30.jscop.interchanged+tiled'.
|
||||
[...]
|
||||
main():
|
||||
for (c2=0;c2<=1535;c2++) {
|
||||
@@ -295,7 +295,7 @@ opt matmul.preopt.ll -basicaa \
|
||||
</pre>
|
||||
<pre>
|
||||
[...]
|
||||
Reading JScop '%1 => %17' in function 'main' from './main___%1---%17.jscop.interchanged+tiled'.
|
||||
Reading JScop 'for.cond => for.end30' in function 'main' from './main___%for.cond---%for.end30.jscop.interchanged+tiled'.
|
||||
[...]
|
||||
main():
|
||||
for (c2=0;c2<=1535;c2++) {
|
||||
@@ -329,7 +329,7 @@ opt matmul.preopt.ll -basicaa \
|
||||
|
||||
<pre>
|
||||
[...]
|
||||
Reading JScop '%1 => %17' in function 'main' from './main___%1---%17.jscop.interchanged+tiled+vector'.
|
||||
Reading JScop 'for.cond => for.end30' in function 'main' from './main___%for.cond---%for.end30.jscop.interchanged+tiled+vector'.
|
||||
[...]
|
||||
main():
|
||||
for (c2=0;c2<=1535;c2++) {
|
||||
@@ -369,11 +369,11 @@ opt -basicaa \
|
||||
-polly-codegen matmul.preopt.ll \
|
||||
| opt -O3 > matmul.polly.interchanged.ll</pre>
|
||||
<pre>
|
||||
Reading JScop '%1 => %19' in function 'init_array' from
|
||||
'./init_array___%1---%19.jscop.interchanged'.
|
||||
Reading JScop 'for.cond => for.end19' in function 'init_array' from
|
||||
'./init_array___%for.cond---%for.end19.jscop.interchanged'.
|
||||
File could not be read: No such file or directory
|
||||
Reading JScop '%1 => %17' in function 'main' from
|
||||
'./main___%1---%17.jscop.interchanged'.
|
||||
Reading JScop 'for.cond => for.end30' in function 'main' from
|
||||
'./main___%for.cond---%for.end30.jscop.interchanged'.
|
||||
</pre>
|
||||
<pre class="code">
|
||||
opt -basicaa \
|
||||
@@ -381,11 +381,11 @@ opt -basicaa \
|
||||
-polly-codegen matmul.preopt.ll \
|
||||
| opt -O3 > matmul.polly.interchanged+tiled.ll</pre>
|
||||
<pre>
|
||||
Reading JScop '%1 => %19' in function 'init_array' from
|
||||
'./init_array___%1---%19.jscop.interchanged+tiled'.
|
||||
Reading JScop 'for.cond => for.end19' in function 'init_array' from
|
||||
'./init_array___%for.cond---%for.end19.jscop.interchanged+tiled'.
|
||||
File could not be read: No such file or directory
|
||||
Reading JScop '%1 => %17' in function 'main' from
|
||||
'./main___%1---%17.jscop.interchanged+tiled'.
|
||||
Reading JScop 'for.cond => for.end30' in function 'main' from
|
||||
'./main___%for.cond---%for.end30.jscop.interchanged+tiled'.
|
||||
</pre>
|
||||
<pre class="code">
|
||||
opt -basicaa \
|
||||
@@ -393,11 +393,11 @@ opt -basicaa \
|
||||
-polly-codegen -polly-vectorizer=polly matmul.preopt.ll \
|
||||
| opt -O3 > matmul.polly.interchanged+tiled+vector.ll</pre>
|
||||
<pre>
|
||||
Reading JScop '%1 => %19' in function 'init_array' from
|
||||
'./init_array___%1---%19.jscop.interchanged+tiled+vector'.
|
||||
Reading JScop 'for.cond => for.end19' in function 'init_array' from
|
||||
'./init_array___%for.cond---%for.end19.jscop.interchanged+tiled+vector'.
|
||||
File could not be read: No such file or directory
|
||||
Reading JScop '%1 => %17' in function 'main' from
|
||||
'./main___%1---%17.jscop.interchanged+tiled+vector'.
|
||||
Reading JScop 'for.cond => for.end30' in function 'main' from
|
||||
'./main___%for.cond---%for.end30.jscop.interchanged+tiled+vector'.
|
||||
</pre>
|
||||
<pre class="code">
|
||||
opt -basicaa \
|
||||
@@ -405,11 +405,11 @@ opt -basicaa \
|
||||
-polly-codegen -polly-vectorizer=polly -enable-polly-openmp matmul.preopt.ll \
|
||||
| opt -O3 > matmul.polly.interchanged+tiled+openmp.ll</pre>
|
||||
<pre>
|
||||
Reading JScop '%1 => %19' in function 'init_array' from
|
||||
'./init_array___%1---%19.jscop.interchanged+tiled+vector'.
|
||||
Reading JScop 'for.cond => for.end19' in function 'init_array' from
|
||||
'./init_array___%for.cond---%for.end19.jscop.interchanged+tiled+vector'.
|
||||
File could not be read: No such file or directory
|
||||
Reading JScop '%1 => %17' in function 'main' from
|
||||
'./main___%1---%17.jscop.interchanged+tiled+vector'.
|
||||
Reading JScop 'for.cond => for.end30' in function 'main' from
|
||||
'./main___%for.cond---%for.end30.jscop.interchanged+tiled+vector'.
|
||||
</pre>
|
||||
|
||||
<li><h4>Create the executables</h4>
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
{
|
||||
"context" : "{ [] }",
|
||||
"name" : "%1 => %19",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_5[i0, i1] -> MemRef_A[1536i0 + i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_5[i0, i1] -> MemRef_B[1536i0 + i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_5[i0, i1] : i0 >= 0 and i0 <= 1535 and i1 >= 0 and i1 <= 1535 }",
|
||||
"name" : "Stmt_5",
|
||||
"schedule" : "{ Stmt_5[i0, i1] -> scattering[0, i0, 0, i1, 0] }"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,21 @@
|
||||
{
|
||||
"context" : "{ : }",
|
||||
"name" : "for.cond => for.end19",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_A[1536i0 + i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_B[1536i0 + i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= 1535 and i1 >= 0 and i1 <= 1535 }",
|
||||
"name" : "Stmt_for_body3",
|
||||
"schedule" : "{ Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0] }"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
{
|
||||
"context" : "{ : }",
|
||||
"name" : "%1 => %17",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_4[i0, i1] -> MemRef_C[1536i0 + i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_4[i0, i1] : i0 >= 0 and i0 <= 1535 and i1 >= 0 and i1 <= 1535 }",
|
||||
"name" : "Stmt_4",
|
||||
"schedule" : "{ Stmt_4[i0, i1] -> scattering[0, i0, 0, i1, 0, 0, 0] }"
|
||||
},
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_6[i0, i1, i2] -> MemRef_C[1536i0 + i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_6[i0, i1, i2] -> MemRef_A[1536i0 + i2] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_6[i0, i1, i2] -> MemRef_B[i1 + 1536i2] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_6[i0, i1, i2] -> MemRef_C[1536i0 + i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_6[i0, i1, i2] : i0 >= 0 and i0 <= 1535 and i1 >= 0 and i1 <= 1535 and i2 >= 0 and i2 <= 1535 }",
|
||||
"name" : "Stmt_6",
|
||||
"schedule" : "{ Stmt_6[i0, i1, i2] -> scattering[0, i0, 0, i1, 1, i2, 0] }"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,40 @@
|
||||
{
|
||||
"context" : "{ : }",
|
||||
"name" : "for.cond => for.end30",
|
||||
"statements" : [
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body3[i0, i1] -> MemRef_C[1536i0 + i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body3[i0, i1] : i0 >= 0 and i0 <= 1535 and i1 >= 0 and i1 <= 1535 }",
|
||||
"name" : "Stmt_for_body3",
|
||||
"schedule" : "{ Stmt_for_body3[i0, i1] -> scattering[0, i0, 0, i1, 0, 0, 0] }"
|
||||
},
|
||||
{
|
||||
"accesses" : [
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[1536i0 + i1] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_A[1536i0 + i2] }"
|
||||
},
|
||||
{
|
||||
"kind" : "read",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_B[i1 + 1536i2] }"
|
||||
},
|
||||
{
|
||||
"kind" : "write",
|
||||
"relation" : "{ Stmt_for_body8[i0, i1, i2] -> MemRef_C[1536i0 + i1] }"
|
||||
}
|
||||
],
|
||||
"domain" : "{ Stmt_for_body8[i0, i1, i2] : i0 >= 0 and i0 <= 1535 and i1 >= 0 and i1 <= 1535 and i2 >= 0 and i2 <= 1535 }",
|
||||
"name" : "Stmt_for_body8",
|
||||
"schedule" : "{ Stmt_for_body8[i0, i1, i2] -> scattering[0, i0, 0, i1, 1, i2, 0] }"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -2,74 +2,112 @@
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LCPI0_0:
|
||||
.quad 4602678819172646912 # double 5.000000e-01
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl init_array
|
||||
.align 16, 0x90
|
||||
.type init_array,@function
|
||||
init_array: # @init_array
|
||||
# BB#0:
|
||||
xorl %eax, %eax
|
||||
movsd .LCPI0_0(%rip), %xmm0
|
||||
movq %rax, %rcx
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp2:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp3:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp4:
|
||||
.cfi_def_cfa_register %rbp
|
||||
xorl %r8d, %r8d
|
||||
vmovsd .LCPI0_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB0_1: # %.preheader
|
||||
.LBB0_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB0_2 Depth 2
|
||||
movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
|
||||
xorl %esi, %esi
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB0_2: # Parent Loop BB0_1 Depth=1
|
||||
.LBB0_2: # %for.body3
|
||||
# Parent Loop BB0_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %esi, %edi
|
||||
sarl $31, %edi
|
||||
shrl $22, %edi
|
||||
addl %esi, %edi
|
||||
andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %edi
|
||||
leal 1(%rsi,%rdi), %edi
|
||||
cvtsi2sd %edi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, A+6144(%rax,%rdx,4)
|
||||
movss %xmm1, B+6144(%rax,%rdx,4)
|
||||
addl %ecx, %esi
|
||||
incq %rdx
|
||||
movl %ecx, %edx
|
||||
imull %r8d, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %r8, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
jne .LBB0_2
|
||||
# BB#3: # in Loop: Header=BB0_1 Depth=1
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
incq %rcx
|
||||
cmpq $1536, %rcx # imm = 0x600
|
||||
# BB#3: # %for.inc17
|
||||
# in Loop: Header=BB0_1 Depth=1
|
||||
incq %r8
|
||||
cmpq $1536, %r8 # imm = 0x600
|
||||
jne .LBB0_1
|
||||
# BB#4:
|
||||
# BB#4: # %for.end19
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp0:
|
||||
.size init_array, .Ltmp0-init_array
|
||||
.Ltmp5:
|
||||
.size init_array, .Ltmp5-init_array
|
||||
.cfi_endproc
|
||||
|
||||
.globl print_array
|
||||
.align 16, 0x90
|
||||
.type print_array,@function
|
||||
print_array: # @print_array
|
||||
# BB#0:
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp9:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp10:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp11:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
pushq %rax
|
||||
movq $-9437184, %rbx # imm = 0xFFFFFFFFFF700000
|
||||
.Ltmp12:
|
||||
.cfi_offset %rbx, -48
|
||||
.Ltmp13:
|
||||
.cfi_offset %r12, -40
|
||||
.Ltmp14:
|
||||
.cfi_offset %r14, -32
|
||||
.Ltmp15:
|
||||
.cfi_offset %r15, -24
|
||||
xorl %r14d, %r14d
|
||||
movl $C, %r15d
|
||||
.align 16, 0x90
|
||||
.LBB1_1: # %.preheader
|
||||
.LBB1_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB1_2 Depth 2
|
||||
xorl %r14d, %r14d
|
||||
movq stdout(%rip), %rdi
|
||||
movq stdout(%rip), %rax
|
||||
movq %r15, %r12
|
||||
xorl %ebx, %ebx
|
||||
.align 16, 0x90
|
||||
.LBB1_2: # Parent Loop BB1_1 Depth=1
|
||||
.LBB1_2: # %for.body3
|
||||
# Parent Loop BB1_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movss C+9437184(%rbx,%r14,4), %xmm0
|
||||
cvtss2sd %xmm0, %xmm0
|
||||
vmovss (%r12), %xmm0
|
||||
vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
movq %rax, %rdi
|
||||
movl $.L.str, %esi
|
||||
movb $1, %al
|
||||
callq fprintf
|
||||
movslq %r14d, %rax
|
||||
movslq %ebx, %rax
|
||||
imulq $1717986919, %rax, %rcx # imm = 0x66666667
|
||||
movq %rcx, %rdx
|
||||
shrq $63, %rdx
|
||||
@@ -79,113 +117,146 @@ print_array: # @print_array
|
||||
subl %ecx, %eax
|
||||
cmpl $79, %eax
|
||||
jne .LBB1_4
|
||||
# BB#3: # in Loop: Header=BB1_2 Depth=2
|
||||
# BB#3: # %if.then
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
movq stdout(%rip), %rsi
|
||||
movl $10, %edi
|
||||
callq fputc
|
||||
.LBB1_4: # in Loop: Header=BB1_2 Depth=2
|
||||
incq %r14
|
||||
movq stdout(%rip), %rsi
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
movq %rsi, %rdi
|
||||
.LBB1_4: # %for.inc
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
addq $4, %r12
|
||||
incq %rbx
|
||||
movq stdout(%rip), %rax
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB1_2
|
||||
# BB#5: # in Loop: Header=BB1_1 Depth=1
|
||||
# BB#5: # %for.end
|
||||
# in Loop: Header=BB1_1 Depth=1
|
||||
movl $10, %edi
|
||||
movq %rax, %rsi
|
||||
callq fputc
|
||||
addq $6144, %rbx # imm = 0x1800
|
||||
addq $6144, %r15 # imm = 0x1800
|
||||
incq %r14
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
jne .LBB1_1
|
||||
# BB#6:
|
||||
addq $8, %rsp
|
||||
# BB#6: # %for.end12
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp1:
|
||||
.size print_array, .Ltmp1-print_array
|
||||
.Ltmp16:
|
||||
.size print_array, .Ltmp16-print_array
|
||||
.cfi_endproc
|
||||
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LCPI2_0:
|
||||
.quad 4602678819172646912 # double 5.000000e-01
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl main
|
||||
.align 16, 0x90
|
||||
.type main,@function
|
||||
main: # @main
|
||||
# BB#0:
|
||||
xorl %eax, %eax
|
||||
movsd .LCPI2_0(%rip), %xmm0
|
||||
movq %rax, %rcx
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp19:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp20:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp21:
|
||||
.cfi_def_cfa_register %rbp
|
||||
xorl %r8d, %r8d
|
||||
vmovsd .LCPI2_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB2_1: # %.preheader.i
|
||||
.LBB2_1: # %for.cond1.preheader.i
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_2 Depth 2
|
||||
movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
|
||||
xorl %esi, %esi
|
||||
.align 16, 0x90
|
||||
.LBB2_2: # Parent Loop BB2_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %esi, %edi
|
||||
sarl $31, %edi
|
||||
shrl $22, %edi
|
||||
addl %esi, %edi
|
||||
andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %edi
|
||||
leal 1(%rsi,%rdi), %edi
|
||||
cvtsi2sd %edi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, A+6144(%rax,%rdx,4)
|
||||
movss %xmm1, B+6144(%rax,%rdx,4)
|
||||
addl %ecx, %esi
|
||||
incq %rdx
|
||||
jne .LBB2_2
|
||||
# BB#3: # in Loop: Header=BB2_1 Depth=1
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
incq %rcx
|
||||
xorl %edx, %edx
|
||||
cmpq $1536, %rcx # imm = 0x600
|
||||
jne .LBB2_1
|
||||
.align 16, 0x90
|
||||
.LBB2_4: # %.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_5 Depth 2
|
||||
# Child Loop BB2_6 Depth 3
|
||||
xorl %eax, %eax
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB2_5: # Parent Loop BB2_4 Depth=1
|
||||
# => This Loop Header: Depth=2
|
||||
# Child Loop BB2_6 Depth 3
|
||||
movl $0, C(%rcx,%rdx)
|
||||
leaq B(%rcx), %rsi
|
||||
pxor %xmm0, %xmm0
|
||||
movq %rax, %rdi
|
||||
.LBB2_2: # %for.body3.i
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %ecx, %edx
|
||||
imull %r8d, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %r8, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
jne .LBB2_2
|
||||
# BB#3: # %for.inc17.i
|
||||
# in Loop: Header=BB2_1 Depth=1
|
||||
incq %r8
|
||||
cmpq $1536, %r8 # imm = 0x600
|
||||
jne .LBB2_1
|
||||
# BB#4:
|
||||
xorl %r8d, %r8d
|
||||
movl $A, %r9d
|
||||
.align 16, 0x90
|
||||
.LBB2_6: # Parent Loop BB2_4 Depth=1
|
||||
# Parent Loop BB2_5 Depth=2
|
||||
.LBB2_5: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_6 Depth 2
|
||||
# Child Loop BB2_7 Depth 3
|
||||
leaq (%r8,%r8,2), %rdx
|
||||
shlq $11, %rdx
|
||||
leaq C(%rdx), %rsi
|
||||
xorl %edi, %edi
|
||||
.align 16, 0x90
|
||||
.LBB2_6: # %for.body3
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# => This Loop Header: Depth=2
|
||||
# Child Loop BB2_7 Depth 3
|
||||
movl $0, (%rsi)
|
||||
vxorps %xmm0, %xmm0, %xmm0
|
||||
movq $-9437184, %rax # imm = 0xFFFFFFFFFF700000
|
||||
movq %r9, %rcx
|
||||
.align 16, 0x90
|
||||
.LBB2_7: # %for.body8
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_6 Depth=2
|
||||
# => This Inner Loop Header: Depth=3
|
||||
movss A(%rdx,%rdi,4), %xmm1
|
||||
mulss (%rsi), %xmm1
|
||||
addss %xmm1, %xmm0
|
||||
addq $6144, %rsi # imm = 0x1800
|
||||
vmovss (%rcx), %xmm1
|
||||
vmulss B+9437184(%rax,%rdi,4), %xmm1, %xmm1
|
||||
vaddss %xmm1, %xmm0, %xmm0
|
||||
addq $4, %rcx
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
jne .LBB2_7
|
||||
# BB#8: # %for.inc25
|
||||
# in Loop: Header=BB2_6 Depth=2
|
||||
vmovss %xmm0, (%rsi)
|
||||
leaq C+4(%rdx,%rdi,4), %rsi
|
||||
incq %rdi
|
||||
cmpq $1536, %rdi # imm = 0x600
|
||||
jne .LBB2_6
|
||||
# BB#7: # in Loop: Header=BB2_5 Depth=2
|
||||
movss %xmm0, C(%rcx,%rdx)
|
||||
addq $4, %rcx
|
||||
cmpq $6144, %rcx # imm = 0x1800
|
||||
# BB#9: # %for.inc28
|
||||
# in Loop: Header=BB2_5 Depth=1
|
||||
addq $6144, %r9 # imm = 0x1800
|
||||
incq %r8
|
||||
cmpq $1536, %r8 # imm = 0x600
|
||||
jne .LBB2_5
|
||||
# BB#8: # %init_array.exit
|
||||
# in Loop: Header=BB2_4 Depth=1
|
||||
addq $6144, %rdx # imm = 0x1800
|
||||
cmpq $9437184, %rdx # imm = 0x900000
|
||||
jne .LBB2_4
|
||||
# BB#9:
|
||||
# BB#10: # %for.end30
|
||||
xorl %eax, %eax
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp2:
|
||||
.size main, .Ltmp2-main
|
||||
.Ltmp22:
|
||||
.size main, .Ltmp22-main
|
||||
.cfi_endproc
|
||||
|
||||
.type A,@object # @A
|
||||
.comm A,9437184,16
|
||||
|
||||
@@ -2,76 +2,112 @@
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LCPI0_0:
|
||||
.quad 4602678819172646912 # double 5.000000e-01
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl init_array
|
||||
.align 16, 0x90
|
||||
.type init_array,@function
|
||||
init_array: # @init_array
|
||||
# BB#0: # %pollyBB
|
||||
xorl %eax, %eax
|
||||
movsd .LCPI0_0(%rip), %xmm0
|
||||
movq %rax, %rcx
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp2:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp3:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp4:
|
||||
.cfi_def_cfa_register %rbp
|
||||
xorl %r8d, %r8d
|
||||
vmovsd .LCPI0_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB0_2: # %polly.loop_header1.preheader
|
||||
.LBB0_1: # %polly.loop_preheader3
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB0_3 Depth 2
|
||||
movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
|
||||
xorl %esi, %esi
|
||||
# Child Loop BB0_2 Depth 2
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB0_3: # %polly.loop_body2
|
||||
# Parent Loop BB0_2 Depth=1
|
||||
.LBB0_2: # %polly.loop_header2
|
||||
# Parent Loop BB0_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %esi, %edi
|
||||
sarl $31, %edi
|
||||
shrl $22, %edi
|
||||
addl %esi, %edi
|
||||
andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %edi
|
||||
leal 1(%rsi,%rdi), %edi
|
||||
cvtsi2sd %edi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, A+6144(%rax,%rdx,4)
|
||||
movss %xmm1, B+6144(%rax,%rdx,4)
|
||||
addl %ecx, %esi
|
||||
incq %rdx
|
||||
jne .LBB0_3
|
||||
# BB#1: # %polly.loop_header.loopexit
|
||||
# in Loop: Header=BB0_2 Depth=1
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
incq %rcx
|
||||
cmpq $1536, %rcx # imm = 0x600
|
||||
movl %ecx, %edx
|
||||
imull %r8d, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %r8, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
jne .LBB0_2
|
||||
# BB#4: # %polly.after_loop
|
||||
# BB#3: # %polly.loop_exit4
|
||||
# in Loop: Header=BB0_1 Depth=1
|
||||
incq %r8
|
||||
cmpq $1536, %r8 # imm = 0x600
|
||||
jne .LBB0_1
|
||||
# BB#4: # %polly.loop_exit
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp0:
|
||||
.size init_array, .Ltmp0-init_array
|
||||
.Ltmp5:
|
||||
.size init_array, .Ltmp5-init_array
|
||||
.cfi_endproc
|
||||
|
||||
.globl print_array
|
||||
.align 16, 0x90
|
||||
.type print_array,@function
|
||||
print_array: # @print_array
|
||||
# BB#0:
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp9:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp10:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp11:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
pushq %rax
|
||||
movq $-9437184, %rbx # imm = 0xFFFFFFFFFF700000
|
||||
.Ltmp12:
|
||||
.cfi_offset %rbx, -48
|
||||
.Ltmp13:
|
||||
.cfi_offset %r12, -40
|
||||
.Ltmp14:
|
||||
.cfi_offset %r14, -32
|
||||
.Ltmp15:
|
||||
.cfi_offset %r15, -24
|
||||
xorl %r14d, %r14d
|
||||
movl $C, %r15d
|
||||
.align 16, 0x90
|
||||
.LBB1_1: # %.preheader
|
||||
.LBB1_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB1_2 Depth 2
|
||||
xorl %r14d, %r14d
|
||||
movq stdout(%rip), %rdi
|
||||
movq stdout(%rip), %rax
|
||||
movq %r15, %r12
|
||||
xorl %ebx, %ebx
|
||||
.align 16, 0x90
|
||||
.LBB1_2: # Parent Loop BB1_1 Depth=1
|
||||
.LBB1_2: # %for.body3
|
||||
# Parent Loop BB1_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movss C+9437184(%rbx,%r14,4), %xmm0
|
||||
cvtss2sd %xmm0, %xmm0
|
||||
vmovss (%r12), %xmm0
|
||||
vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
movq %rax, %rdi
|
||||
movl $.L.str, %esi
|
||||
movb $1, %al
|
||||
callq fprintf
|
||||
movslq %r14d, %rax
|
||||
movslq %ebx, %rax
|
||||
imulq $1717986919, %rax, %rcx # imm = 0x66666667
|
||||
movq %rcx, %rdx
|
||||
shrq $63, %rdx
|
||||
@@ -81,217 +117,258 @@ print_array: # @print_array
|
||||
subl %ecx, %eax
|
||||
cmpl $79, %eax
|
||||
jne .LBB1_4
|
||||
# BB#3: # in Loop: Header=BB1_2 Depth=2
|
||||
# BB#3: # %if.then
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
movq stdout(%rip), %rsi
|
||||
movl $10, %edi
|
||||
callq fputc
|
||||
.LBB1_4: # in Loop: Header=BB1_2 Depth=2
|
||||
incq %r14
|
||||
movq stdout(%rip), %rsi
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
movq %rsi, %rdi
|
||||
.LBB1_4: # %for.inc
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
addq $4, %r12
|
||||
incq %rbx
|
||||
movq stdout(%rip), %rax
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB1_2
|
||||
# BB#5: # in Loop: Header=BB1_1 Depth=1
|
||||
# BB#5: # %for.end
|
||||
# in Loop: Header=BB1_1 Depth=1
|
||||
movl $10, %edi
|
||||
movq %rax, %rsi
|
||||
callq fputc
|
||||
addq $6144, %rbx # imm = 0x1800
|
||||
addq $6144, %r15 # imm = 0x1800
|
||||
incq %r14
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
jne .LBB1_1
|
||||
# BB#6:
|
||||
addq $8, %rsp
|
||||
# BB#6: # %for.end12
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp1:
|
||||
.size print_array, .Ltmp1-print_array
|
||||
.Ltmp16:
|
||||
.size print_array, .Ltmp16-print_array
|
||||
.cfi_endproc
|
||||
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LCPI2_0:
|
||||
.quad 4602678819172646912 # double 5.000000e-01
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl main
|
||||
.align 16, 0x90
|
||||
.type main,@function
|
||||
main: # @main
|
||||
# BB#0: # %pollyBB
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp20:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp21:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp22:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
subq $24, %rsp
|
||||
xorl %eax, %eax
|
||||
movsd .LCPI2_0(%rip), %xmm0
|
||||
movq %rax, %rcx
|
||||
subq $56, %rsp
|
||||
.Ltmp23:
|
||||
.cfi_offset %rbx, -56
|
||||
.Ltmp24:
|
||||
.cfi_offset %r12, -48
|
||||
.Ltmp25:
|
||||
.cfi_offset %r13, -40
|
||||
.Ltmp26:
|
||||
.cfi_offset %r14, -32
|
||||
.Ltmp27:
|
||||
.cfi_offset %r15, -24
|
||||
xorl %ebx, %ebx
|
||||
vmovsd .LCPI2_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB2_1: # %polly.loop_header1.preheader.i
|
||||
.LBB2_1: # %polly.loop_preheader3.i
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_2 Depth 2
|
||||
movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
|
||||
xorl %esi, %esi
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB2_2: # %polly.loop_body2.i
|
||||
.LBB2_2: # %polly.loop_header2.i
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %esi, %edi
|
||||
sarl $31, %edi
|
||||
shrl $22, %edi
|
||||
addl %esi, %edi
|
||||
andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %edi
|
||||
leal 1(%rsi,%rdi), %edi
|
||||
cvtsi2sd %edi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, A+6144(%rax,%rdx,4)
|
||||
movss %xmm1, B+6144(%rax,%rdx,4)
|
||||
addl %ecx, %esi
|
||||
incq %rdx
|
||||
movl %ecx, %edx
|
||||
imull %ebx, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %rbx, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
jne .LBB2_2
|
||||
# BB#3: # %polly.loop_header.loopexit.i
|
||||
# BB#3: # %polly.loop_exit4.i
|
||||
# in Loop: Header=BB2_1 Depth=1
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
incq %rcx
|
||||
cmpq $1536, %rcx # imm = 0x600
|
||||
incq %rbx
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB2_1
|
||||
# BB#4: # %polly.loop_header.preheader
|
||||
# BB#4: # %polly.loop_preheader3.preheader
|
||||
movl $C, %edi
|
||||
xorl %esi, %esi
|
||||
movl $9437184, %edx # imm = 0x900000
|
||||
callq memset
|
||||
xorl %eax, %eax
|
||||
movq %rax, 16(%rsp) # 8-byte Spill
|
||||
movq %rax, (%rsp) # 8-byte Spill
|
||||
jmp .LBB2_6
|
||||
xorl %esi, %esi
|
||||
movl $C+16, %eax
|
||||
movq %rax, -88(%rbp) # 8-byte Spill
|
||||
.align 16, 0x90
|
||||
.LBB2_5: # %polly.loop_header7.loopexit
|
||||
# in Loop: Header=BB2_6 Depth=1
|
||||
addq $393216, (%rsp) # 8-byte Folded Spill
|
||||
# imm = 0x60000
|
||||
movq 16(%rsp), %rax # 8-byte Reload
|
||||
addq $64, %rax
|
||||
movq %rax, 16(%rsp) # 8-byte Spill
|
||||
cmpq $1536, %rax # imm = 0x600
|
||||
je .LBB2_7
|
||||
.LBB2_6: # %polly.loop_header12.preheader
|
||||
.LBB2_5: # %polly.loop_preheader17
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_9 Depth 2
|
||||
# Child Loop BB2_11 Depth 3
|
||||
# Child Loop BB2_14 Depth 4
|
||||
# Child Loop BB2_18 Depth 5
|
||||
# Child Loop BB2_19 Depth 6
|
||||
movq 16(%rsp), %rax # 8-byte Reload
|
||||
leaq 63(%rax), %rax
|
||||
movq (%rsp), %rcx # 8-byte Reload
|
||||
leaq A(%rcx), %rdx
|
||||
movq %rdx, 8(%rsp) # 8-byte Spill
|
||||
# Child Loop BB2_15 Depth 2
|
||||
# Child Loop BB2_8 Depth 3
|
||||
# Child Loop BB2_11 Depth 4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
movq %rsi, -56(%rbp) # 8-byte Spill
|
||||
movq %rsi, %rax
|
||||
orq $63, %rax
|
||||
movq %rax, -72(%rbp) # 8-byte Spill
|
||||
leaq -1(%rax), %rax
|
||||
movq %rax, -48(%rbp) # 8-byte Spill
|
||||
xorl %edx, %edx
|
||||
jmp .LBB2_9
|
||||
.align 16, 0x90
|
||||
.LBB2_8: # %polly.loop_header12.loopexit
|
||||
# in Loop: Header=BB2_9 Depth=2
|
||||
addq $256, %rcx # imm = 0x100
|
||||
addq $64, %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
je .LBB2_5
|
||||
.LBB2_9: # %polly.loop_header17.preheader
|
||||
# Parent Loop BB2_6 Depth=1
|
||||
.LBB2_15: # %polly.loop_preheader24
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# => This Loop Header: Depth=2
|
||||
# Child Loop BB2_11 Depth 3
|
||||
# Child Loop BB2_14 Depth 4
|
||||
# Child Loop BB2_18 Depth 5
|
||||
# Child Loop BB2_19 Depth 6
|
||||
leaq 63(%rdx), %rsi
|
||||
xorl %edi, %edi
|
||||
movq 8(%rsp), %r8 # 8-byte Reload
|
||||
movq %rdx, %r9
|
||||
jmp .LBB2_11
|
||||
# Child Loop BB2_8 Depth 3
|
||||
# Child Loop BB2_11 Depth 4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
movq %rdx, -80(%rbp) # 8-byte Spill
|
||||
leaq -4(%rdx), %rcx
|
||||
movq %rdx, %rax
|
||||
decq %rax
|
||||
cmovsq %rcx, %rax
|
||||
movq %rax, %r15
|
||||
sarq $63, %r15
|
||||
shrq $62, %r15
|
||||
addq %rax, %r15
|
||||
andq $-4, %r15
|
||||
movq %rdx, %r13
|
||||
orq $63, %r13
|
||||
leaq -4(%r13), %rdx
|
||||
xorl %r10d, %r10d
|
||||
movq -88(%rbp), %rax # 8-byte Reload
|
||||
leaq (%rax,%r15,4), %rax
|
||||
movq %rax, -64(%rbp) # 8-byte Spill
|
||||
leaq B+16(,%r15,4), %rbx
|
||||
leaq 4(%r15), %r12
|
||||
.align 16, 0x90
|
||||
.LBB2_10: # %polly.loop_header17.loopexit
|
||||
# in Loop: Header=BB2_11 Depth=3
|
||||
addq $256, %r8 # imm = 0x100
|
||||
addq $98304, %r9 # imm = 0x18000
|
||||
addq $64, %rdi
|
||||
cmpq $1536, %rdi # imm = 0x600
|
||||
je .LBB2_8
|
||||
.LBB2_11: # %polly.loop_body18
|
||||
# Parent Loop BB2_6 Depth=1
|
||||
# Parent Loop BB2_9 Depth=2
|
||||
.LBB2_8: # %polly.loop_header23
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# => This Loop Header: Depth=3
|
||||
# Child Loop BB2_14 Depth 4
|
||||
# Child Loop BB2_18 Depth 5
|
||||
# Child Loop BB2_19 Depth 6
|
||||
cmpq %rax, 16(%rsp) # 8-byte Folded Reload
|
||||
jg .LBB2_10
|
||||
# BB#12: # %polly.loop_body23.lr.ph
|
||||
# in Loop: Header=BB2_11 Depth=3
|
||||
leaq 63(%rdi), %r10
|
||||
xorl %r11d, %r11d
|
||||
jmp .LBB2_14
|
||||
# Child Loop BB2_11 Depth 4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
cmpq -72(%rbp), %rsi # 8-byte Folded Reload
|
||||
jg .LBB2_13
|
||||
# BB#9: # %polly.loop_header30.preheader
|
||||
# in Loop: Header=BB2_8 Depth=3
|
||||
movq %r10, %rax
|
||||
orq $63, %rax
|
||||
cmpq %rax, %r10
|
||||
jg .LBB2_13
|
||||
# BB#10: # in Loop: Header=BB2_8 Depth=3
|
||||
decq %rax
|
||||
movq -64(%rbp), %r14 # 8-byte Reload
|
||||
movq -56(%rbp), %r11 # 8-byte Reload
|
||||
.align 16, 0x90
|
||||
.LBB2_13: # %polly.loop_header22.loopexit
|
||||
# in Loop: Header=BB2_14 Depth=4
|
||||
addq $6144, %r11 # imm = 0x1800
|
||||
cmpq $393216, %r11 # imm = 0x60000
|
||||
je .LBB2_10
|
||||
.LBB2_14: # %polly.loop_body23
|
||||
# Parent Loop BB2_6 Depth=1
|
||||
# Parent Loop BB2_9 Depth=2
|
||||
# Parent Loop BB2_11 Depth=3
|
||||
.LBB2_11: # %polly.loop_header37.preheader
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# Parent Loop BB2_8 Depth=3
|
||||
# => This Loop Header: Depth=4
|
||||
# Child Loop BB2_18 Depth 5
|
||||
# Child Loop BB2_19 Depth 6
|
||||
cmpq %r10, %rdi
|
||||
jg .LBB2_13
|
||||
# BB#15: # %polly.loop_body23
|
||||
# in Loop: Header=BB2_14 Depth=4
|
||||
cmpq %rsi, %rdx
|
||||
jg .LBB2_13
|
||||
# BB#16: # %polly.loop_body33.lr.ph.preheader
|
||||
# in Loop: Header=BB2_14 Depth=4
|
||||
leaq (%r8,%r11), %rbx
|
||||
xorl %r14d, %r14d
|
||||
movq %r9, %r15
|
||||
movq %r14, %r12
|
||||
jmp .LBB2_18
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
cmpq %r13, %r12
|
||||
movq %rbx, %r8
|
||||
movq %r10, %rsi
|
||||
jg .LBB2_12
|
||||
.align 16, 0x90
|
||||
.LBB2_17: # %polly.loop_header27.loopexit
|
||||
# in Loop: Header=BB2_18 Depth=5
|
||||
addq $1536, %r15 # imm = 0x600
|
||||
incq %r12
|
||||
cmpq $64, %r12
|
||||
je .LBB2_13
|
||||
.LBB2_18: # %polly.loop_body33.lr.ph
|
||||
# Parent Loop BB2_6 Depth=1
|
||||
# Parent Loop BB2_9 Depth=2
|
||||
# Parent Loop BB2_11 Depth=3
|
||||
# Parent Loop BB2_14 Depth=4
|
||||
.LBB2_17: # %polly.loop_header46.preheader
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# Parent Loop BB2_8 Depth=3
|
||||
# Parent Loop BB2_11 Depth=4
|
||||
# => This Loop Header: Depth=5
|
||||
# Child Loop BB2_19 Depth 6
|
||||
movss (%rbx,%r12,4), %xmm0
|
||||
pshufd $0, %xmm0, %xmm0 # xmm0 = xmm0[0,0,0,0]
|
||||
movq %r14, %r13
|
||||
.align 16, 0x90
|
||||
.LBB2_19: # %polly.loop_body33
|
||||
# Parent Loop BB2_6 Depth=1
|
||||
# Parent Loop BB2_9 Depth=2
|
||||
# Parent Loop BB2_11 Depth=3
|
||||
# Parent Loop BB2_14 Depth=4
|
||||
# Parent Loop BB2_18 Depth=5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
leaq (%r11,%r11,2), %rcx
|
||||
shlq $11, %rcx
|
||||
vbroadcastss A(%rcx,%rsi,4), %xmm0
|
||||
movq %r14, %rdi
|
||||
movq %r8, %r9
|
||||
movq %r15, %rcx
|
||||
.LBB2_18: # %polly.loop_header46
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# Parent Loop BB2_8 Depth=3
|
||||
# Parent Loop BB2_11 Depth=4
|
||||
# Parent Loop BB2_17 Depth=5
|
||||
# => This Inner Loop Header: Depth=6
|
||||
movaps B(%r13,%r15,4), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
leaq (%r11,%r13), %rbp
|
||||
addps C(%rcx,%rbp), %xmm1
|
||||
movaps %xmm1, C(%rcx,%rbp)
|
||||
addq $16, %r13
|
||||
cmpq $256, %r13 # imm = 0x100
|
||||
jne .LBB2_19
|
||||
jmp .LBB2_17
|
||||
.LBB2_7: # %polly.after_loop9
|
||||
vmulps (%r9), %xmm0, %xmm1
|
||||
vaddps (%rdi), %xmm1, %xmm1
|
||||
vmovaps %xmm1, (%rdi)
|
||||
addq $16, %rdi
|
||||
addq $16, %r9
|
||||
addq $4, %rcx
|
||||
cmpq %rdx, %rcx
|
||||
jle .LBB2_18
|
||||
# BB#16: # %polly.loop_exit48
|
||||
# in Loop: Header=BB2_17 Depth=5
|
||||
addq $6144, %r8 # imm = 0x1800
|
||||
cmpq %rax, %rsi
|
||||
leaq 1(%rsi), %rsi
|
||||
jle .LBB2_17
|
||||
.align 16, 0x90
|
||||
.LBB2_12: # %polly.loop_exit39
|
||||
# in Loop: Header=BB2_11 Depth=4
|
||||
addq $6144, %r14 # imm = 0x1800
|
||||
cmpq -48(%rbp), %r11 # 8-byte Folded Reload
|
||||
leaq 1(%r11), %r11
|
||||
jle .LBB2_11
|
||||
.align 16, 0x90
|
||||
.LBB2_13: # %polly.loop_exit32
|
||||
# in Loop: Header=BB2_8 Depth=3
|
||||
addq $393216, %rbx # imm = 0x60000
|
||||
cmpq $1472, %r10 # imm = 0x5C0
|
||||
leaq 64(%r10), %r10
|
||||
movq -56(%rbp), %rsi # 8-byte Reload
|
||||
jl .LBB2_8
|
||||
# BB#14: # %polly.loop_exit25
|
||||
# in Loop: Header=BB2_15 Depth=2
|
||||
movq -80(%rbp), %rdx # 8-byte Reload
|
||||
cmpq $1472, %rdx # imm = 0x5C0
|
||||
leaq 64(%rdx), %rdx
|
||||
jl .LBB2_15
|
||||
# BB#6: # %polly.loop_exit18
|
||||
# in Loop: Header=BB2_5 Depth=1
|
||||
addq $393216, -88(%rbp) # 8-byte Folded Spill
|
||||
# imm = 0x60000
|
||||
cmpq $1472, %rsi # imm = 0x5C0
|
||||
leaq 64(%rsi), %rsi
|
||||
jl .LBB2_5
|
||||
# BB#7: # %polly.loop_exit11
|
||||
xorl %eax, %eax
|
||||
addq $24, %rsp
|
||||
addq $56, %rsp
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r13
|
||||
@@ -299,8 +376,9 @@ main: # @main
|
||||
popq %r15
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp2:
|
||||
.size main, .Ltmp2-main
|
||||
.Ltmp28:
|
||||
.size main, .Ltmp28-main
|
||||
.cfi_endproc
|
||||
|
||||
.type A,@object # @A
|
||||
.comm A,9437184,16
|
||||
|
||||
@@ -2,76 +2,112 @@
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LCPI0_0:
|
||||
.quad 4602678819172646912 # double 5.000000e-01
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl init_array
|
||||
.align 16, 0x90
|
||||
.type init_array,@function
|
||||
init_array: # @init_array
|
||||
# BB#0: # %pollyBB
|
||||
xorl %eax, %eax
|
||||
movsd .LCPI0_0(%rip), %xmm0
|
||||
movq %rax, %rcx
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp2:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp3:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp4:
|
||||
.cfi_def_cfa_register %rbp
|
||||
xorl %r8d, %r8d
|
||||
vmovsd .LCPI0_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB0_2: # %polly.loop_header1.preheader
|
||||
.LBB0_1: # %polly.loop_preheader3
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB0_3 Depth 2
|
||||
movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
|
||||
xorl %esi, %esi
|
||||
# Child Loop BB0_2 Depth 2
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB0_3: # %polly.loop_body2
|
||||
# Parent Loop BB0_2 Depth=1
|
||||
.LBB0_2: # %polly.loop_header2
|
||||
# Parent Loop BB0_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %esi, %edi
|
||||
sarl $31, %edi
|
||||
shrl $22, %edi
|
||||
addl %esi, %edi
|
||||
andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %edi
|
||||
leal 1(%rsi,%rdi), %edi
|
||||
cvtsi2sd %edi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, A+6144(%rax,%rdx,4)
|
||||
movss %xmm1, B+6144(%rax,%rdx,4)
|
||||
addl %ecx, %esi
|
||||
incq %rdx
|
||||
jne .LBB0_3
|
||||
# BB#1: # %polly.loop_header.loopexit
|
||||
# in Loop: Header=BB0_2 Depth=1
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
incq %rcx
|
||||
cmpq $1536, %rcx # imm = 0x600
|
||||
movl %ecx, %edx
|
||||
imull %r8d, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %r8, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
jne .LBB0_2
|
||||
# BB#4: # %polly.after_loop
|
||||
# BB#3: # %polly.loop_exit4
|
||||
# in Loop: Header=BB0_1 Depth=1
|
||||
incq %r8
|
||||
cmpq $1536, %r8 # imm = 0x600
|
||||
jne .LBB0_1
|
||||
# BB#4: # %polly.loop_exit
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp0:
|
||||
.size init_array, .Ltmp0-init_array
|
||||
.Ltmp5:
|
||||
.size init_array, .Ltmp5-init_array
|
||||
.cfi_endproc
|
||||
|
||||
.globl print_array
|
||||
.align 16, 0x90
|
||||
.type print_array,@function
|
||||
print_array: # @print_array
|
||||
# BB#0:
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp9:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp10:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp11:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
pushq %rax
|
||||
movq $-9437184, %rbx # imm = 0xFFFFFFFFFF700000
|
||||
.Ltmp12:
|
||||
.cfi_offset %rbx, -48
|
||||
.Ltmp13:
|
||||
.cfi_offset %r12, -40
|
||||
.Ltmp14:
|
||||
.cfi_offset %r14, -32
|
||||
.Ltmp15:
|
||||
.cfi_offset %r15, -24
|
||||
xorl %r14d, %r14d
|
||||
movl $C, %r15d
|
||||
.align 16, 0x90
|
||||
.LBB1_1: # %.preheader
|
||||
.LBB1_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB1_2 Depth 2
|
||||
xorl %r14d, %r14d
|
||||
movq stdout(%rip), %rdi
|
||||
movq stdout(%rip), %rax
|
||||
movq %r15, %r12
|
||||
xorl %ebx, %ebx
|
||||
.align 16, 0x90
|
||||
.LBB1_2: # Parent Loop BB1_1 Depth=1
|
||||
.LBB1_2: # %for.body3
|
||||
# Parent Loop BB1_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movss C+9437184(%rbx,%r14,4), %xmm0
|
||||
cvtss2sd %xmm0, %xmm0
|
||||
vmovss (%r12), %xmm0
|
||||
vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
movq %rax, %rdi
|
||||
movl $.L.str, %esi
|
||||
movb $1, %al
|
||||
callq fprintf
|
||||
movslq %r14d, %rax
|
||||
movslq %ebx, %rax
|
||||
imulq $1717986919, %rax, %rcx # imm = 0x66666667
|
||||
movq %rcx, %rdx
|
||||
shrq $63, %rdx
|
||||
@@ -81,222 +117,252 @@ print_array: # @print_array
|
||||
subl %ecx, %eax
|
||||
cmpl $79, %eax
|
||||
jne .LBB1_4
|
||||
# BB#3: # in Loop: Header=BB1_2 Depth=2
|
||||
# BB#3: # %if.then
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
movq stdout(%rip), %rsi
|
||||
movl $10, %edi
|
||||
callq fputc
|
||||
.LBB1_4: # in Loop: Header=BB1_2 Depth=2
|
||||
incq %r14
|
||||
movq stdout(%rip), %rsi
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
movq %rsi, %rdi
|
||||
.LBB1_4: # %for.inc
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
addq $4, %r12
|
||||
incq %rbx
|
||||
movq stdout(%rip), %rax
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB1_2
|
||||
# BB#5: # in Loop: Header=BB1_1 Depth=1
|
||||
# BB#5: # %for.end
|
||||
# in Loop: Header=BB1_1 Depth=1
|
||||
movl $10, %edi
|
||||
movq %rax, %rsi
|
||||
callq fputc
|
||||
addq $6144, %rbx # imm = 0x1800
|
||||
addq $6144, %r15 # imm = 0x1800
|
||||
incq %r14
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
jne .LBB1_1
|
||||
# BB#6:
|
||||
addq $8, %rsp
|
||||
# BB#6: # %for.end12
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp1:
|
||||
.size print_array, .Ltmp1-print_array
|
||||
.Ltmp16:
|
||||
.size print_array, .Ltmp16-print_array
|
||||
.cfi_endproc
|
||||
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LCPI2_0:
|
||||
.quad 4602678819172646912 # double 5.000000e-01
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl main
|
||||
.align 16, 0x90
|
||||
.type main,@function
|
||||
main: # @main
|
||||
# BB#0: # %pollyBB
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp20:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp21:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp22:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r13
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
subq $40, %rsp
|
||||
xorl %eax, %eax
|
||||
movsd .LCPI2_0(%rip), %xmm0
|
||||
movq %rax, %rcx
|
||||
subq $56, %rsp
|
||||
.Ltmp23:
|
||||
.cfi_offset %rbx, -56
|
||||
.Ltmp24:
|
||||
.cfi_offset %r12, -48
|
||||
.Ltmp25:
|
||||
.cfi_offset %r13, -40
|
||||
.Ltmp26:
|
||||
.cfi_offset %r14, -32
|
||||
.Ltmp27:
|
||||
.cfi_offset %r15, -24
|
||||
xorl %ebx, %ebx
|
||||
vmovsd .LCPI2_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB2_1: # %polly.loop_header1.preheader.i
|
||||
.LBB2_1: # %polly.loop_preheader3.i
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_2 Depth 2
|
||||
movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
|
||||
xorl %esi, %esi
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB2_2: # %polly.loop_body2.i
|
||||
.LBB2_2: # %polly.loop_header2.i
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %esi, %edi
|
||||
sarl $31, %edi
|
||||
shrl $22, %edi
|
||||
addl %esi, %edi
|
||||
andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %edi
|
||||
leal 1(%rsi,%rdi), %edi
|
||||
cvtsi2sd %edi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, A+6144(%rax,%rdx,4)
|
||||
movss %xmm1, B+6144(%rax,%rdx,4)
|
||||
addl %ecx, %esi
|
||||
incq %rdx
|
||||
movl %ecx, %edx
|
||||
imull %ebx, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %rbx, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
jne .LBB2_2
|
||||
# BB#3: # %polly.loop_header.loopexit.i
|
||||
# BB#3: # %polly.loop_exit4.i
|
||||
# in Loop: Header=BB2_1 Depth=1
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
incq %rcx
|
||||
cmpq $1536, %rcx # imm = 0x600
|
||||
incq %rbx
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB2_1
|
||||
# BB#4: # %polly.loop_header.preheader
|
||||
movl $C, %eax
|
||||
movq %rax, 8(%rsp) # 8-byte Spill
|
||||
# BB#4: # %polly.loop_preheader3.preheader
|
||||
movl $C, %ebx
|
||||
movl $C, %edi
|
||||
xorl %esi, %esi
|
||||
movl $9437184, %edx # imm = 0x900000
|
||||
movl $C, %edi
|
||||
callq memset
|
||||
movl $A, %eax
|
||||
movq %rax, 16(%rsp) # 8-byte Spill
|
||||
movq $0, 32(%rsp) # 8-byte Folded Spill
|
||||
jmp .LBB2_6
|
||||
.align 16, 0x90
|
||||
.LBB2_5: # %polly.loop_header7.loopexit
|
||||
# in Loop: Header=BB2_6 Depth=1
|
||||
addq $393216, 16(%rsp) # 8-byte Folded Spill
|
||||
# imm = 0x60000
|
||||
addq $393216, 8(%rsp) # 8-byte Folded Spill
|
||||
# imm = 0x60000
|
||||
movq 32(%rsp), %rax # 8-byte Reload
|
||||
addq $64, %rax
|
||||
movq %rax, 32(%rsp) # 8-byte Spill
|
||||
cmpq $1536, %rax # imm = 0x600
|
||||
je .LBB2_7
|
||||
.LBB2_6: # %polly.loop_header12.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_9 Depth 2
|
||||
# Child Loop BB2_11 Depth 3
|
||||
# Child Loop BB2_14 Depth 4
|
||||
# Child Loop BB2_18 Depth 5
|
||||
# Child Loop BB2_19 Depth 6
|
||||
movq 32(%rsp), %rax # 8-byte Reload
|
||||
leaq 63(%rax), %rax
|
||||
movl $B, %ecx
|
||||
movq %rcx, 24(%rsp) # 8-byte Spill
|
||||
xorl %ecx, %ecx
|
||||
movq 8(%rsp), %rdx # 8-byte Reload
|
||||
jmp .LBB2_9
|
||||
.align 16, 0x90
|
||||
.LBB2_8: # %polly.loop_header12.loopexit
|
||||
# in Loop: Header=BB2_9 Depth=2
|
||||
addq $256, %rdx # imm = 0x100
|
||||
addq $256, 24(%rsp) # 8-byte Folded Spill
|
||||
# imm = 0x100
|
||||
addq $64, %rcx
|
||||
cmpq $1536, %rcx # imm = 0x600
|
||||
je .LBB2_5
|
||||
.LBB2_9: # %polly.loop_header17.preheader
|
||||
# Parent Loop BB2_6 Depth=1
|
||||
# => This Loop Header: Depth=2
|
||||
# Child Loop BB2_11 Depth 3
|
||||
# Child Loop BB2_14 Depth 4
|
||||
# Child Loop BB2_18 Depth 5
|
||||
# Child Loop BB2_19 Depth 6
|
||||
leaq 63(%rcx), %rsi
|
||||
xorl %edi, %edi
|
||||
movq 16(%rsp), %r8 # 8-byte Reload
|
||||
movq 24(%rsp), %r9 # 8-byte Reload
|
||||
jmp .LBB2_11
|
||||
.align 16, 0x90
|
||||
.LBB2_10: # %polly.loop_header17.loopexit
|
||||
# in Loop: Header=BB2_11 Depth=3
|
||||
addq $256, %r8 # imm = 0x100
|
||||
addq $393216, %r9 # imm = 0x60000
|
||||
addq $64, %rdi
|
||||
cmpq $1536, %rdi # imm = 0x600
|
||||
je .LBB2_8
|
||||
.LBB2_11: # %polly.loop_body18
|
||||
# Parent Loop BB2_6 Depth=1
|
||||
# Parent Loop BB2_9 Depth=2
|
||||
# => This Loop Header: Depth=3
|
||||
# Child Loop BB2_14 Depth 4
|
||||
# Child Loop BB2_18 Depth 5
|
||||
# Child Loop BB2_19 Depth 6
|
||||
cmpq %rax, 32(%rsp) # 8-byte Folded Reload
|
||||
jg .LBB2_10
|
||||
# BB#12: # %polly.loop_body23.lr.ph
|
||||
# in Loop: Header=BB2_11 Depth=3
|
||||
leaq 63(%rdi), %r10
|
||||
xorl %r11d, %r11d
|
||||
jmp .LBB2_14
|
||||
.align 16, 0x90
|
||||
.LBB2_13: # %polly.loop_header22.loopexit
|
||||
# in Loop: Header=BB2_14 Depth=4
|
||||
addq $6144, %r11 # imm = 0x1800
|
||||
cmpq $393216, %r11 # imm = 0x60000
|
||||
je .LBB2_10
|
||||
.LBB2_14: # %polly.loop_body23
|
||||
# Parent Loop BB2_6 Depth=1
|
||||
# Parent Loop BB2_9 Depth=2
|
||||
# Parent Loop BB2_11 Depth=3
|
||||
# => This Loop Header: Depth=4
|
||||
# Child Loop BB2_18 Depth 5
|
||||
# Child Loop BB2_19 Depth 6
|
||||
cmpq %r10, %rdi
|
||||
jg .LBB2_13
|
||||
# BB#15: # %polly.loop_body23
|
||||
# in Loop: Header=BB2_14 Depth=4
|
||||
cmpq %rsi, %rcx
|
||||
jg .LBB2_13
|
||||
# BB#16: # %polly.loop_body33.lr.ph.preheader
|
||||
# in Loop: Header=BB2_14 Depth=4
|
||||
leaq (%rdx,%r11), %rbx
|
||||
leaq (%r8,%r11), %r14
|
||||
xorl %r15d, %r15d
|
||||
movq %r9, %r12
|
||||
movq %r15, %r13
|
||||
jmp .LBB2_18
|
||||
.align 16, 0x90
|
||||
.LBB2_17: # %polly.loop_header27.loopexit
|
||||
# in Loop: Header=BB2_18 Depth=5
|
||||
addq $6144, %r12 # imm = 0x1800
|
||||
incq %r13
|
||||
cmpq $64, %r13
|
||||
je .LBB2_13
|
||||
.LBB2_18: # %polly.loop_body33.lr.ph
|
||||
# Parent Loop BB2_6 Depth=1
|
||||
# Parent Loop BB2_9 Depth=2
|
||||
# Parent Loop BB2_11 Depth=3
|
||||
# Parent Loop BB2_14 Depth=4
|
||||
# => This Loop Header: Depth=5
|
||||
# Child Loop BB2_19 Depth 6
|
||||
movss (%r14,%r13,4), %xmm0
|
||||
movq %r15, %rbp
|
||||
.align 16, 0x90
|
||||
.LBB2_19: # %polly.loop_body33
|
||||
# Parent Loop BB2_6 Depth=1
|
||||
# Parent Loop BB2_9 Depth=2
|
||||
# Parent Loop BB2_11 Depth=3
|
||||
# Parent Loop BB2_14 Depth=4
|
||||
# Parent Loop BB2_18 Depth=5
|
||||
# => This Inner Loop Header: Depth=6
|
||||
movss (%r12,%rbp,4), %xmm1
|
||||
mulss %xmm0, %xmm1
|
||||
addss (%rbx,%rbp,4), %xmm1
|
||||
movss %xmm1, (%rbx,%rbp,4)
|
||||
incq %rbp
|
||||
cmpq $64, %rbp
|
||||
jne .LBB2_19
|
||||
jmp .LBB2_17
|
||||
.LBB2_7: # %polly.after_loop9
|
||||
xorl %eax, %eax
|
||||
addq $40, %rsp
|
||||
.align 16, 0x90
|
||||
.LBB2_5: # %polly.loop_preheader17
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_15 Depth 2
|
||||
# Child Loop BB2_8 Depth 3
|
||||
# Child Loop BB2_11 Depth 4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
movq %rax, -56(%rbp) # 8-byte Spill
|
||||
movq %rbx, -88(%rbp) # 8-byte Spill
|
||||
movq %rax, %rcx
|
||||
orq $63, %rcx
|
||||
movq %rcx, -72(%rbp) # 8-byte Spill
|
||||
leaq -1(%rcx), %rcx
|
||||
movq %rcx, -48(%rbp) # 8-byte Spill
|
||||
movq $-1, %r15
|
||||
movl $B, %ecx
|
||||
movq %rbx, -64(%rbp) # 8-byte Spill
|
||||
xorl %r12d, %r12d
|
||||
.align 16, 0x90
|
||||
.LBB2_15: # %polly.loop_preheader24
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# => This Loop Header: Depth=2
|
||||
# Child Loop BB2_8 Depth 3
|
||||
# Child Loop BB2_11 Depth 4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
movq %rcx, -80(%rbp) # 8-byte Spill
|
||||
movq %r12, %r13
|
||||
orq $63, %r13
|
||||
leaq -1(%r13), %rbx
|
||||
xorl %r9d, %r9d
|
||||
movq %rcx, %rdx
|
||||
.align 16, 0x90
|
||||
.LBB2_8: # %polly.loop_header23
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# => This Loop Header: Depth=3
|
||||
# Child Loop BB2_11 Depth 4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
cmpq -72(%rbp), %rax # 8-byte Folded Reload
|
||||
jg .LBB2_13
|
||||
# BB#9: # %polly.loop_header30.preheader
|
||||
# in Loop: Header=BB2_8 Depth=3
|
||||
movq %r9, %rax
|
||||
orq $63, %rax
|
||||
cmpq %rax, %r9
|
||||
jg .LBB2_13
|
||||
# BB#10: # in Loop: Header=BB2_8 Depth=3
|
||||
decq %rax
|
||||
movq -64(%rbp), %r10 # 8-byte Reload
|
||||
movq -56(%rbp), %r11 # 8-byte Reload
|
||||
.align 16, 0x90
|
||||
.LBB2_11: # %polly.loop_header37.preheader
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# Parent Loop BB2_8 Depth=3
|
||||
# => This Loop Header: Depth=4
|
||||
# Child Loop BB2_17 Depth 5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
cmpq %r13, %r12
|
||||
movq %rdx, %r14
|
||||
movq %r9, %rcx
|
||||
jg .LBB2_12
|
||||
.align 16, 0x90
|
||||
.LBB2_17: # %polly.loop_header46.preheader
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# Parent Loop BB2_8 Depth=3
|
||||
# Parent Loop BB2_11 Depth=4
|
||||
# => This Loop Header: Depth=5
|
||||
# Child Loop BB2_18 Depth 6
|
||||
leaq (%r11,%r11,2), %rsi
|
||||
shlq $11, %rsi
|
||||
vmovss A(%rsi,%rcx,4), %xmm0
|
||||
movq %r10, %rdi
|
||||
movq %r14, %r8
|
||||
movq %r15, %rsi
|
||||
.LBB2_18: # %polly.loop_header46
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_15 Depth=2
|
||||
# Parent Loop BB2_8 Depth=3
|
||||
# Parent Loop BB2_11 Depth=4
|
||||
# Parent Loop BB2_17 Depth=5
|
||||
# => This Inner Loop Header: Depth=6
|
||||
vmulss (%r8), %xmm0, %xmm1
|
||||
vaddss (%rdi), %xmm1, %xmm1
|
||||
vmovss %xmm1, (%rdi)
|
||||
addq $4, %rdi
|
||||
addq $4, %r8
|
||||
incq %rsi
|
||||
cmpq %rbx, %rsi
|
||||
jle .LBB2_18
|
||||
# BB#16: # %polly.loop_exit48
|
||||
# in Loop: Header=BB2_17 Depth=5
|
||||
addq $6144, %r14 # imm = 0x1800
|
||||
cmpq %rax, %rcx
|
||||
leaq 1(%rcx), %rcx
|
||||
jle .LBB2_17
|
||||
.align 16, 0x90
|
||||
.LBB2_12: # %polly.loop_exit39
|
||||
# in Loop: Header=BB2_11 Depth=4
|
||||
addq $6144, %r10 # imm = 0x1800
|
||||
cmpq -48(%rbp), %r11 # 8-byte Folded Reload
|
||||
leaq 1(%r11), %r11
|
||||
jle .LBB2_11
|
||||
.align 16, 0x90
|
||||
.LBB2_13: # %polly.loop_exit32
|
||||
# in Loop: Header=BB2_8 Depth=3
|
||||
addq $393216, %rdx # imm = 0x60000
|
||||
cmpq $1472, %r9 # imm = 0x5C0
|
||||
leaq 64(%r9), %r9
|
||||
movq -56(%rbp), %rax # 8-byte Reload
|
||||
jl .LBB2_8
|
||||
# BB#14: # %polly.loop_exit25
|
||||
# in Loop: Header=BB2_15 Depth=2
|
||||
addq $256, -64(%rbp) # 8-byte Folded Spill
|
||||
# imm = 0x100
|
||||
movq -80(%rbp), %rcx # 8-byte Reload
|
||||
addq $256, %rcx # imm = 0x100
|
||||
addq $64, %r15
|
||||
cmpq $1472, %r12 # imm = 0x5C0
|
||||
leaq 64(%r12), %r12
|
||||
jl .LBB2_15
|
||||
# BB#6: # %polly.loop_exit18
|
||||
# in Loop: Header=BB2_5 Depth=1
|
||||
movq -88(%rbp), %rbx # 8-byte Reload
|
||||
addq $393216, %rbx # imm = 0x60000
|
||||
cmpq $1472, %rax # imm = 0x5C0
|
||||
leaq 64(%rax), %rax
|
||||
jl .LBB2_5
|
||||
# BB#7: # %polly.loop_exit11
|
||||
xorl %eax, %eax
|
||||
addq $56, %rsp
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r13
|
||||
@@ -304,8 +370,9 @@ main: # @main
|
||||
popq %r15
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp2:
|
||||
.size main, .Ltmp2-main
|
||||
.Ltmp28:
|
||||
.size main, .Ltmp28-main
|
||||
.cfi_endproc
|
||||
|
||||
.type A,@object # @A
|
||||
.comm A,9437184,16
|
||||
|
||||
@@ -2,76 +2,112 @@
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LCPI0_0:
|
||||
.quad 4602678819172646912 # double 5.000000e-01
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl init_array
|
||||
.align 16, 0x90
|
||||
.type init_array,@function
|
||||
init_array: # @init_array
|
||||
# BB#0: # %pollyBB
|
||||
xorl %eax, %eax
|
||||
movsd .LCPI0_0(%rip), %xmm0
|
||||
movq %rax, %rcx
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp2:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp3:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp4:
|
||||
.cfi_def_cfa_register %rbp
|
||||
xorl %r8d, %r8d
|
||||
vmovsd .LCPI0_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB0_2: # %polly.loop_header1.preheader
|
||||
.LBB0_1: # %polly.loop_preheader3
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB0_3 Depth 2
|
||||
movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
|
||||
xorl %esi, %esi
|
||||
# Child Loop BB0_2 Depth 2
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB0_3: # %polly.loop_body2
|
||||
# Parent Loop BB0_2 Depth=1
|
||||
.LBB0_2: # %polly.loop_header2
|
||||
# Parent Loop BB0_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %esi, %edi
|
||||
sarl $31, %edi
|
||||
shrl $22, %edi
|
||||
addl %esi, %edi
|
||||
andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %edi
|
||||
leal 1(%rsi,%rdi), %edi
|
||||
cvtsi2sd %edi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, A+6144(%rax,%rdx,4)
|
||||
movss %xmm1, B+6144(%rax,%rdx,4)
|
||||
addl %ecx, %esi
|
||||
incq %rdx
|
||||
jne .LBB0_3
|
||||
# BB#1: # %polly.loop_header.loopexit
|
||||
# in Loop: Header=BB0_2 Depth=1
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
incq %rcx
|
||||
cmpq $1536, %rcx # imm = 0x600
|
||||
movl %ecx, %edx
|
||||
imull %r8d, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %r8, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
jne .LBB0_2
|
||||
# BB#4: # %polly.after_loop
|
||||
# BB#3: # %polly.loop_exit4
|
||||
# in Loop: Header=BB0_1 Depth=1
|
||||
incq %r8
|
||||
cmpq $1536, %r8 # imm = 0x600
|
||||
jne .LBB0_1
|
||||
# BB#4: # %polly.loop_exit
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp0:
|
||||
.size init_array, .Ltmp0-init_array
|
||||
.Ltmp5:
|
||||
.size init_array, .Ltmp5-init_array
|
||||
.cfi_endproc
|
||||
|
||||
.globl print_array
|
||||
.align 16, 0x90
|
||||
.type print_array,@function
|
||||
print_array: # @print_array
|
||||
# BB#0:
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp9:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp10:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp11:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r15
|
||||
pushq %r14
|
||||
pushq %r12
|
||||
pushq %rbx
|
||||
pushq %rax
|
||||
movq $-9437184, %rbx # imm = 0xFFFFFFFFFF700000
|
||||
.Ltmp12:
|
||||
.cfi_offset %rbx, -48
|
||||
.Ltmp13:
|
||||
.cfi_offset %r12, -40
|
||||
.Ltmp14:
|
||||
.cfi_offset %r14, -32
|
||||
.Ltmp15:
|
||||
.cfi_offset %r15, -24
|
||||
xorl %r14d, %r14d
|
||||
movl $C, %r15d
|
||||
.align 16, 0x90
|
||||
.LBB1_1: # %.preheader
|
||||
.LBB1_1: # %for.cond1.preheader
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB1_2 Depth 2
|
||||
xorl %r14d, %r14d
|
||||
movq stdout(%rip), %rdi
|
||||
movq stdout(%rip), %rax
|
||||
movq %r15, %r12
|
||||
xorl %ebx, %ebx
|
||||
.align 16, 0x90
|
||||
.LBB1_2: # Parent Loop BB1_1 Depth=1
|
||||
.LBB1_2: # %for.body3
|
||||
# Parent Loop BB1_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movss C+9437184(%rbx,%r14,4), %xmm0
|
||||
cvtss2sd %xmm0, %xmm0
|
||||
vmovss (%r12), %xmm0
|
||||
vcvtss2sd %xmm0, %xmm0, %xmm0
|
||||
movq %rax, %rdi
|
||||
movl $.L.str, %esi
|
||||
movb $1, %al
|
||||
callq fprintf
|
||||
movslq %r14d, %rax
|
||||
movslq %ebx, %rax
|
||||
imulq $1717986919, %rax, %rcx # imm = 0x66666667
|
||||
movq %rcx, %rdx
|
||||
shrq $63, %rdx
|
||||
@@ -81,125 +117,158 @@ print_array: # @print_array
|
||||
subl %ecx, %eax
|
||||
cmpl $79, %eax
|
||||
jne .LBB1_4
|
||||
# BB#3: # in Loop: Header=BB1_2 Depth=2
|
||||
# BB#3: # %if.then
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
movq stdout(%rip), %rsi
|
||||
movl $10, %edi
|
||||
callq fputc
|
||||
.LBB1_4: # in Loop: Header=BB1_2 Depth=2
|
||||
incq %r14
|
||||
movq stdout(%rip), %rsi
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
movq %rsi, %rdi
|
||||
.LBB1_4: # %for.inc
|
||||
# in Loop: Header=BB1_2 Depth=2
|
||||
addq $4, %r12
|
||||
incq %rbx
|
||||
movq stdout(%rip), %rax
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB1_2
|
||||
# BB#5: # in Loop: Header=BB1_1 Depth=1
|
||||
# BB#5: # %for.end
|
||||
# in Loop: Header=BB1_1 Depth=1
|
||||
movl $10, %edi
|
||||
movq %rax, %rsi
|
||||
callq fputc
|
||||
addq $6144, %rbx # imm = 0x1800
|
||||
addq $6144, %r15 # imm = 0x1800
|
||||
incq %r14
|
||||
cmpq $1536, %r14 # imm = 0x600
|
||||
jne .LBB1_1
|
||||
# BB#6:
|
||||
addq $8, %rsp
|
||||
# BB#6: # %for.end12
|
||||
popq %rbx
|
||||
popq %r12
|
||||
popq %r14
|
||||
popq %r15
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp1:
|
||||
.size print_array, .Ltmp1-print_array
|
||||
.Ltmp16:
|
||||
.size print_array, .Ltmp16-print_array
|
||||
.cfi_endproc
|
||||
|
||||
.section .rodata.cst8,"aM",@progbits,8
|
||||
.align 8
|
||||
.LCPI2_0:
|
||||
.quad 4602678819172646912 # double 5.000000e-01
|
||||
.quad 4602678819172646912 # double 0.5
|
||||
.text
|
||||
.globl main
|
||||
.align 16, 0x90
|
||||
.type main,@function
|
||||
main: # @main
|
||||
# BB#0: # %pollyBB
|
||||
pushq %rax
|
||||
xorl %eax, %eax
|
||||
movsd .LCPI2_0(%rip), %xmm0
|
||||
movq %rax, %rcx
|
||||
.cfi_startproc
|
||||
# BB#0: # %entry
|
||||
pushq %rbp
|
||||
.Ltmp20:
|
||||
.cfi_def_cfa_offset 16
|
||||
.Ltmp21:
|
||||
.cfi_offset %rbp, -16
|
||||
movq %rsp, %rbp
|
||||
.Ltmp22:
|
||||
.cfi_def_cfa_register %rbp
|
||||
pushq %r14
|
||||
pushq %rbx
|
||||
.Ltmp23:
|
||||
.cfi_offset %rbx, -32
|
||||
.Ltmp24:
|
||||
.cfi_offset %r14, -24
|
||||
xorl %ebx, %ebx
|
||||
vmovsd .LCPI2_0(%rip), %xmm0
|
||||
.align 16, 0x90
|
||||
.LBB2_1: # %polly.loop_header1.preheader.i
|
||||
.LBB2_1: # %polly.loop_preheader3.i
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_2 Depth 2
|
||||
movq $-1536, %rdx # imm = 0xFFFFFFFFFFFFFA00
|
||||
xorl %esi, %esi
|
||||
xorl %ecx, %ecx
|
||||
.align 16, 0x90
|
||||
.LBB2_2: # %polly.loop_body2.i
|
||||
.LBB2_2: # %polly.loop_header2.i
|
||||
# Parent Loop BB2_1 Depth=1
|
||||
# => This Inner Loop Header: Depth=2
|
||||
movl %esi, %edi
|
||||
sarl $31, %edi
|
||||
shrl $22, %edi
|
||||
addl %esi, %edi
|
||||
andl $-1024, %edi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %edi
|
||||
leal 1(%rsi,%rdi), %edi
|
||||
cvtsi2sd %edi, %xmm1
|
||||
mulsd %xmm0, %xmm1
|
||||
cvtsd2ss %xmm1, %xmm1
|
||||
movss %xmm1, A+6144(%rax,%rdx,4)
|
||||
movss %xmm1, B+6144(%rax,%rdx,4)
|
||||
addl %ecx, %esi
|
||||
incq %rdx
|
||||
movl %ecx, %edx
|
||||
imull %ebx, %edx
|
||||
movl %edx, %esi
|
||||
sarl $31, %esi
|
||||
shrl $22, %esi
|
||||
addl %edx, %esi
|
||||
andl $-1024, %esi # imm = 0xFFFFFFFFFFFFFC00
|
||||
negl %esi
|
||||
movq %rbx, %rax
|
||||
shlq $11, %rax
|
||||
leal 1(%rdx,%rsi), %edi
|
||||
leaq (%rax,%rax,2), %rsi
|
||||
leaq 1(%rcx), %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
vcvtsi2sdl %edi, %xmm0, %xmm1
|
||||
vmulsd %xmm0, %xmm1, %xmm1
|
||||
vcvtsd2ss %xmm1, %xmm1, %xmm1
|
||||
vmovss %xmm1, A(%rsi,%rcx,4)
|
||||
vmovss %xmm1, B(%rsi,%rcx,4)
|
||||
movq %rdx, %rcx
|
||||
jne .LBB2_2
|
||||
# BB#3: # %polly.loop_header.loopexit.i
|
||||
# BB#3: # %polly.loop_exit4.i
|
||||
# in Loop: Header=BB2_1 Depth=1
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
incq %rcx
|
||||
cmpq $1536, %rcx # imm = 0x600
|
||||
incq %rbx
|
||||
cmpq $1536, %rbx # imm = 0x600
|
||||
jne .LBB2_1
|
||||
# BB#4: # %polly.loop_header.preheader
|
||||
# BB#4: # %polly.loop_preheader3.preheader
|
||||
movl $C, %r14d
|
||||
movl $C, %edi
|
||||
xorl %esi, %esi
|
||||
movl $9437184, %edx # imm = 0x900000
|
||||
callq memset
|
||||
xorl %eax, %eax
|
||||
jmp .LBB2_6
|
||||
.align 16, 0x90
|
||||
.LBB2_5: # %polly.loop_header7.loopexit
|
||||
# in Loop: Header=BB2_6 Depth=1
|
||||
addq $6144, %rax # imm = 0x1800
|
||||
cmpq $9437184, %rax # imm = 0x900000
|
||||
je .LBB2_7
|
||||
.LBB2_6: # %polly.loop_header12.preheader
|
||||
.LBB2_5: # %polly.loop_preheader17
|
||||
# =>This Loop Header: Depth=1
|
||||
# Child Loop BB2_9 Depth 2
|
||||
# Child Loop BB2_10 Depth 3
|
||||
leaq A(%rax), %rcx
|
||||
movq $-9437184, %rdx # imm = 0xFFFFFFFFFF700000
|
||||
jmp .LBB2_9
|
||||
# Child Loop BB2_10 Depth 2
|
||||
# Child Loop BB2_8 Depth 3
|
||||
movl $B, %ebx
|
||||
xorl %edx, %edx
|
||||
.align 16, 0x90
|
||||
.LBB2_8: # %polly.loop_header12.loopexit
|
||||
# in Loop: Header=BB2_9 Depth=2
|
||||
addq $4, %rcx
|
||||
addq $6144, %rdx # imm = 0x1800
|
||||
je .LBB2_5
|
||||
.LBB2_9: # %polly.loop_header17.preheader
|
||||
# Parent Loop BB2_6 Depth=1
|
||||
.LBB2_10: # %polly.loop_preheader24
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# => This Loop Header: Depth=2
|
||||
# Child Loop BB2_10 Depth 3
|
||||
movss (%rcx), %xmm0
|
||||
xorl %esi, %esi
|
||||
# Child Loop BB2_8 Depth 3
|
||||
leaq (%rax,%rax,2), %rcx
|
||||
shlq $11, %rcx
|
||||
vmovss A(%rcx,%rdx,4), %xmm0
|
||||
movl $1536, %esi # imm = 0x600
|
||||
movq %r14, %rdi
|
||||
movq %rbx, %rcx
|
||||
.align 16, 0x90
|
||||
.LBB2_10: # %polly.loop_body18
|
||||
# Parent Loop BB2_6 Depth=1
|
||||
# Parent Loop BB2_9 Depth=2
|
||||
.LBB2_8: # %polly.loop_header23
|
||||
# Parent Loop BB2_5 Depth=1
|
||||
# Parent Loop BB2_10 Depth=2
|
||||
# => This Inner Loop Header: Depth=3
|
||||
movss B+9437184(%rdx,%rsi,4), %xmm1
|
||||
mulss %xmm0, %xmm1
|
||||
addss C(%rax,%rsi,4), %xmm1
|
||||
movss %xmm1, C(%rax,%rsi,4)
|
||||
incq %rsi
|
||||
cmpq $1536, %rsi # imm = 0x600
|
||||
vmulss (%rcx), %xmm0, %xmm1
|
||||
vaddss (%rdi), %xmm1, %xmm1
|
||||
vmovss %xmm1, (%rdi)
|
||||
addq $4, %rdi
|
||||
addq $4, %rcx
|
||||
decq %rsi
|
||||
jne .LBB2_8
|
||||
# BB#9: # %polly.loop_exit25
|
||||
# in Loop: Header=BB2_10 Depth=2
|
||||
addq $6144, %rbx # imm = 0x1800
|
||||
incq %rdx
|
||||
cmpq $1536, %rdx # imm = 0x600
|
||||
jne .LBB2_10
|
||||
jmp .LBB2_8
|
||||
.LBB2_7: # %polly.after_loop9
|
||||
# BB#6: # %polly.loop_exit18
|
||||
# in Loop: Header=BB2_5 Depth=1
|
||||
addq $6144, %r14 # imm = 0x1800
|
||||
incq %rax
|
||||
cmpq $1536, %rax # imm = 0x600
|
||||
jne .LBB2_5
|
||||
# BB#7: # %polly.loop_exit11
|
||||
xorl %eax, %eax
|
||||
popq %rdx
|
||||
popq %rbx
|
||||
popq %r14
|
||||
popq %rbp
|
||||
ret
|
||||
.Ltmp2:
|
||||
.size main, .Ltmp2-main
|
||||
.Ltmp25:
|
||||
.size main, .Ltmp25-main
|
||||
.cfi_endproc
|
||||
|
||||
.type A,@object # @A
|
||||
.comm A,9437184,16
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
; ModuleID = 'matmul.s'
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
|
||||
@@ -8,173 +8,179 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@stdout = external global %struct._IO_FILE*
|
||||
@.str = private unnamed_addr constant [5 x i8] c"%lf \00"
|
||||
@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
|
||||
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@.str1 = private unnamed_addr constant [2 x i8] c"\0A\00"
|
||||
@.str1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
|
||||
|
||||
define void @init_array() nounwind {
|
||||
; <label>:0
|
||||
br label %1
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @init_array() #0 {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
; <label>:1 ; preds = %18, %0
|
||||
%2 = phi i64 [ %indvar.next2, %18 ], [ 0, %0 ]
|
||||
%exitcond5 = icmp ne i64 %2, 1536
|
||||
br i1 %exitcond5, label %3, label %19
|
||||
for.cond: ; preds = %for.inc17, %entry
|
||||
%0 = phi i64 [ %indvar.next2, %for.inc17 ], [ 0, %entry ]
|
||||
%exitcond3 = icmp ne i64 %0, 1536
|
||||
br i1 %exitcond3, label %for.body, label %for.end19
|
||||
|
||||
; <label>:3 ; preds = %1
|
||||
br label %4
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
; <label>:4 ; preds = %16, %3
|
||||
%indvar = phi i64 [ %indvar.next, %16 ], [ 0, %3 ]
|
||||
%scevgep4 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %2, i64 %indvar
|
||||
%scevgep = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %2, i64 %indvar
|
||||
%tmp = mul i64 %2, %indvar
|
||||
%tmp3 = trunc i64 %tmp to i32
|
||||
for.cond1: ; preds = %for.inc, %for.body
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ]
|
||||
%arrayidx6 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %0, i64 %indvar
|
||||
%arrayidx16 = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %0, i64 %indvar
|
||||
%1 = mul i64 %0, %indvar
|
||||
%mul = trunc i64 %1 to i32
|
||||
%exitcond = icmp ne i64 %indvar, 1536
|
||||
br i1 %exitcond, label %5, label %17
|
||||
br i1 %exitcond, label %for.body3, label %for.end
|
||||
|
||||
; <label>:5 ; preds = %4
|
||||
%6 = srem i32 %tmp3, 1024
|
||||
%7 = add nsw i32 1, %6
|
||||
%8 = sitofp i32 %7 to double
|
||||
%9 = fdiv double %8, 2.000000e+00
|
||||
%10 = fptrunc double %9 to float
|
||||
store float %10, float* %scevgep4
|
||||
%11 = srem i32 %tmp3, 1024
|
||||
%12 = add nsw i32 1, %11
|
||||
%13 = sitofp i32 %12 to double
|
||||
%14 = fdiv double %13, 2.000000e+00
|
||||
%15 = fptrunc double %14 to float
|
||||
store float %15, float* %scevgep
|
||||
br label %16
|
||||
for.body3: ; preds = %for.cond1
|
||||
%rem = srem i32 %mul, 1024
|
||||
%add = add nsw i32 1, %rem
|
||||
%conv = sitofp i32 %add to double
|
||||
%div = fdiv double %conv, 2.000000e+00
|
||||
%conv4 = fptrunc double %div to float
|
||||
store float %conv4, float* %arrayidx6, align 4
|
||||
%rem8 = srem i32 %mul, 1024
|
||||
%add9 = add nsw i32 1, %rem8
|
||||
%conv10 = sitofp i32 %add9 to double
|
||||
%div11 = fdiv double %conv10, 2.000000e+00
|
||||
%conv12 = fptrunc double %div11 to float
|
||||
store float %conv12, float* %arrayidx16, align 4
|
||||
br label %for.inc
|
||||
|
||||
; <label>:16 ; preds = %5
|
||||
for.inc: ; preds = %for.body3
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %4
|
||||
br label %for.cond1
|
||||
|
||||
; <label>:17 ; preds = %4
|
||||
br label %18
|
||||
for.end: ; preds = %for.cond1
|
||||
br label %for.inc17
|
||||
|
||||
; <label>:18 ; preds = %17
|
||||
%indvar.next2 = add i64 %2, 1
|
||||
br label %1
|
||||
for.inc17: ; preds = %for.end
|
||||
%indvar.next2 = add i64 %0, 1
|
||||
br label %for.cond
|
||||
|
||||
; <label>:19 ; preds = %1
|
||||
for.end19: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @print_array() nounwind {
|
||||
; <label>:0
|
||||
br label %1
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @print_array() #0 {
|
||||
entry:
|
||||
br label %for.cond
|
||||
|
||||
; <label>:1 ; preds = %19, %0
|
||||
%indvar1 = phi i64 [ %indvar.next2, %19 ], [ 0, %0 ]
|
||||
for.cond: ; preds = %for.inc10, %entry
|
||||
%indvar1 = phi i64 [ %indvar.next2, %for.inc10 ], [ 0, %entry ]
|
||||
%exitcond3 = icmp ne i64 %indvar1, 1536
|
||||
br i1 %exitcond3, label %2, label %20
|
||||
br i1 %exitcond3, label %for.body, label %for.end12
|
||||
|
||||
; <label>:2 ; preds = %1
|
||||
br label %3
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
; <label>:3 ; preds = %15, %2
|
||||
%indvar = phi i64 [ %indvar.next, %15 ], [ 0, %2 ]
|
||||
%scevgep = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar1, i64 %indvar
|
||||
for.cond1: ; preds = %for.inc, %for.body
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ]
|
||||
%arrayidx5 = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar1, i64 %indvar
|
||||
%j.0 = trunc i64 %indvar to i32
|
||||
%exitcond = icmp ne i64 %indvar, 1536
|
||||
br i1 %exitcond, label %4, label %16
|
||||
br i1 %exitcond, label %for.body3, label %for.end
|
||||
|
||||
; <label>:4 ; preds = %3
|
||||
%5 = load %struct._IO_FILE** @stdout, align 8
|
||||
%6 = load float* %scevgep
|
||||
%7 = fpext float %6 to double
|
||||
%8 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %7)
|
||||
%9 = srem i32 %j.0, 80
|
||||
%10 = icmp eq i32 %9, 79
|
||||
br i1 %10, label %11, label %14
|
||||
for.body3: ; preds = %for.cond1
|
||||
%0 = load %struct._IO_FILE** @stdout, align 8
|
||||
%1 = load float* %arrayidx5, align 4
|
||||
%conv = fpext float %1 to double
|
||||
%call = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %conv)
|
||||
%rem = srem i32 %j.0, 80
|
||||
%cmp6 = icmp eq i32 %rem, 79
|
||||
br i1 %cmp6, label %if.then, label %if.end
|
||||
|
||||
; <label>:11 ; preds = %4
|
||||
%12 = load %struct._IO_FILE** @stdout, align 8
|
||||
%13 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %12, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
|
||||
br label %14
|
||||
if.then: ; preds = %for.body3
|
||||
%2 = load %struct._IO_FILE** @stdout, align 8
|
||||
%call8 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
|
||||
br label %if.end
|
||||
|
||||
; <label>:14 ; preds = %11, %4
|
||||
br label %15
|
||||
if.end: ; preds = %if.then, %for.body3
|
||||
br label %for.inc
|
||||
|
||||
; <label>:15 ; preds = %14
|
||||
for.inc: ; preds = %if.end
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %3
|
||||
br label %for.cond1
|
||||
|
||||
; <label>:16 ; preds = %3
|
||||
%17 = load %struct._IO_FILE** @stdout, align 8
|
||||
%18 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %17, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
|
||||
br label %19
|
||||
for.end: ; preds = %for.cond1
|
||||
%3 = load %struct._IO_FILE** @stdout, align 8
|
||||
%call9 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
|
||||
br label %for.inc10
|
||||
|
||||
; <label>:19 ; preds = %16
|
||||
for.inc10: ; preds = %for.end
|
||||
%indvar.next2 = add i64 %indvar1, 1
|
||||
br label %1
|
||||
br label %for.cond
|
||||
|
||||
; <label>:20 ; preds = %1
|
||||
for.end12: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
|
||||
declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
|
||||
|
||||
define i32 @main() nounwind {
|
||||
; <label>:0
|
||||
; Function Attrs: nounwind uwtable
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
call void @init_array()
|
||||
br label %1
|
||||
br label %for.cond
|
||||
|
||||
; <label>:1 ; preds = %16, %0
|
||||
%indvar3 = phi i64 [ %indvar.next4, %16 ], [ 0, %0 ]
|
||||
%exitcond9 = icmp ne i64 %indvar3, 1536
|
||||
br i1 %exitcond9, label %2, label %17
|
||||
for.cond: ; preds = %for.inc28, %entry
|
||||
%indvar3 = phi i64 [ %indvar.next4, %for.inc28 ], [ 0, %entry ]
|
||||
%exitcond6 = icmp ne i64 %indvar3, 1536
|
||||
br i1 %exitcond6, label %for.body, label %for.end30
|
||||
|
||||
; <label>:2 ; preds = %1
|
||||
br label %3
|
||||
for.body: ; preds = %for.cond
|
||||
br label %for.cond1
|
||||
|
||||
; <label>:3 ; preds = %14, %2
|
||||
%indvar1 = phi i64 [ %indvar.next2, %14 ], [ 0, %2 ]
|
||||
%scevgep8 = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar3, i64 %indvar1
|
||||
%exitcond6 = icmp ne i64 %indvar1, 1536
|
||||
br i1 %exitcond6, label %4, label %15
|
||||
for.cond1: ; preds = %for.inc25, %for.body
|
||||
%indvar1 = phi i64 [ %indvar.next2, %for.inc25 ], [ 0, %for.body ]
|
||||
%arrayidx5 = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar3, i64 %indvar1
|
||||
%exitcond5 = icmp ne i64 %indvar1, 1536
|
||||
br i1 %exitcond5, label %for.body3, label %for.end27
|
||||
|
||||
; <label>:4 ; preds = %3
|
||||
store float 0.000000e+00, float* %scevgep8
|
||||
br label %5
|
||||
for.body3: ; preds = %for.cond1
|
||||
store float 0.000000e+00, float* %arrayidx5, align 4
|
||||
br label %for.cond6
|
||||
|
||||
; <label>:5 ; preds = %12, %4
|
||||
%indvar = phi i64 [ %indvar.next, %12 ], [ 0, %4 ]
|
||||
%scevgep5 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %indvar3, i64 %indvar
|
||||
%scevgep = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %indvar, i64 %indvar1
|
||||
for.cond6: ; preds = %for.inc, %for.body3
|
||||
%indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body3 ]
|
||||
%arrayidx16 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %indvar3, i64 %indvar
|
||||
%arrayidx20 = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %indvar, i64 %indvar1
|
||||
%exitcond = icmp ne i64 %indvar, 1536
|
||||
br i1 %exitcond, label %6, label %13
|
||||
br i1 %exitcond, label %for.body8, label %for.end
|
||||
|
||||
; <label>:6 ; preds = %5
|
||||
%7 = load float* %scevgep8
|
||||
%8 = load float* %scevgep5
|
||||
%9 = load float* %scevgep
|
||||
%10 = fmul float %8, %9
|
||||
%11 = fadd float %7, %10
|
||||
store float %11, float* %scevgep8
|
||||
br label %12
|
||||
for.body8: ; preds = %for.cond6
|
||||
%0 = load float* %arrayidx5, align 4
|
||||
%1 = load float* %arrayidx16, align 4
|
||||
%2 = load float* %arrayidx20, align 4
|
||||
%mul = fmul float %1, %2
|
||||
%add = fadd float %0, %mul
|
||||
store float %add, float* %arrayidx5, align 4
|
||||
br label %for.inc
|
||||
|
||||
; <label>:12 ; preds = %6
|
||||
for.inc: ; preds = %for.body8
|
||||
%indvar.next = add i64 %indvar, 1
|
||||
br label %5
|
||||
br label %for.cond6
|
||||
|
||||
; <label>:13 ; preds = %5
|
||||
br label %14
|
||||
for.end: ; preds = %for.cond6
|
||||
br label %for.inc25
|
||||
|
||||
; <label>:14 ; preds = %13
|
||||
for.inc25: ; preds = %for.end
|
||||
%indvar.next2 = add i64 %indvar1, 1
|
||||
br label %3
|
||||
br label %for.cond1
|
||||
|
||||
; <label>:15 ; preds = %3
|
||||
br label %16
|
||||
for.end27: ; preds = %for.cond1
|
||||
br label %for.inc28
|
||||
|
||||
; <label>:16 ; preds = %15
|
||||
for.inc28: ; preds = %for.end27
|
||||
%indvar.next4 = add i64 %indvar3, 1
|
||||
br label %1
|
||||
br label %for.cond
|
||||
|
||||
; <label>:17 ; preds = %1
|
||||
for.end30: ; preds = %for.cond
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
; ModuleID = 'matmul.c'
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-unknown-linux-gnu"
|
||||
|
||||
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
|
||||
@@ -8,248 +8,257 @@ target triple = "x86_64-unknown-linux-gnu"
|
||||
@A = common global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@B = common global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@stdout = external global %struct._IO_FILE*
|
||||
@.str = private unnamed_addr constant [5 x i8] c"%lf \00"
|
||||
@.str = private unnamed_addr constant [5 x i8] c"%lf \00", align 1
|
||||
@C = common global [1536 x [1536 x float]] zeroinitializer, align 16
|
||||
@.str1 = private unnamed_addr constant [2 x i8] c"\0A\00"
|
||||
@.str1 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
|
||||
|
||||
define void @init_array() nounwind {
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @init_array() #0 {
|
||||
entry:
|
||||
%i = alloca i32, align 4
|
||||
%j = alloca i32, align 4
|
||||
store i32 0, i32* %i, align 4
|
||||
br label %1
|
||||
br label %for.cond
|
||||
|
||||
; <label>:1 ; preds = %41, %0
|
||||
%2 = load i32* %i, align 4
|
||||
%3 = icmp slt i32 %2, 1536
|
||||
br i1 %3, label %4, label %44
|
||||
for.cond: ; preds = %for.inc17, %entry
|
||||
%0 = load i32* %i, align 4
|
||||
%cmp = icmp slt i32 %0, 1536
|
||||
br i1 %cmp, label %for.body, label %for.end19
|
||||
|
||||
; <label>:4 ; preds = %1
|
||||
for.body: ; preds = %for.cond
|
||||
store i32 0, i32* %j, align 4
|
||||
br label %5
|
||||
br label %for.cond1
|
||||
|
||||
; <label>:5 ; preds = %37, %4
|
||||
%6 = load i32* %j, align 4
|
||||
%7 = icmp slt i32 %6, 1536
|
||||
br i1 %7, label %8, label %40
|
||||
for.cond1: ; preds = %for.inc, %for.body
|
||||
%1 = load i32* %j, align 4
|
||||
%cmp2 = icmp slt i32 %1, 1536
|
||||
br i1 %cmp2, label %for.body3, label %for.end
|
||||
|
||||
; <label>:8 ; preds = %5
|
||||
for.body3: ; preds = %for.cond1
|
||||
%2 = load i32* %i, align 4
|
||||
%3 = load i32* %j, align 4
|
||||
%mul = mul nsw i32 %2, %3
|
||||
%rem = srem i32 %mul, 1024
|
||||
%add = add nsw i32 1, %rem
|
||||
%conv = sitofp i32 %add to double
|
||||
%div = fdiv double %conv, 2.000000e+00
|
||||
%conv4 = fptrunc double %div to float
|
||||
%4 = load i32* %j, align 4
|
||||
%idxprom = sext i32 %4 to i64
|
||||
%5 = load i32* %i, align 4
|
||||
%idxprom5 = sext i32 %5 to i64
|
||||
%arrayidx = getelementptr inbounds [1536 x [1536 x float]]* @A, i32 0, i64 %idxprom5
|
||||
%arrayidx6 = getelementptr inbounds [1536 x float]* %arrayidx, i32 0, i64 %idxprom
|
||||
store float %conv4, float* %arrayidx6, align 4
|
||||
%6 = load i32* %i, align 4
|
||||
%7 = load i32* %j, align 4
|
||||
%mul7 = mul nsw i32 %6, %7
|
||||
%rem8 = srem i32 %mul7, 1024
|
||||
%add9 = add nsw i32 1, %rem8
|
||||
%conv10 = sitofp i32 %add9 to double
|
||||
%div11 = fdiv double %conv10, 2.000000e+00
|
||||
%conv12 = fptrunc double %div11 to float
|
||||
%8 = load i32* %j, align 4
|
||||
%idxprom13 = sext i32 %8 to i64
|
||||
%9 = load i32* %i, align 4
|
||||
%idxprom14 = sext i32 %9 to i64
|
||||
%arrayidx15 = getelementptr inbounds [1536 x [1536 x float]]* @B, i32 0, i64 %idxprom14
|
||||
%arrayidx16 = getelementptr inbounds [1536 x float]* %arrayidx15, i32 0, i64 %idxprom13
|
||||
store float %conv12, float* %arrayidx16, align 4
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body3
|
||||
%10 = load i32* %j, align 4
|
||||
%11 = mul nsw i32 %9, %10
|
||||
%12 = srem i32 %11, 1024
|
||||
%13 = add nsw i32 1, %12
|
||||
%14 = sitofp i32 %13 to double
|
||||
%15 = fdiv double %14, 2.000000e+00
|
||||
%16 = fptrunc double %15 to float
|
||||
%17 = load i32* %j, align 4
|
||||
%18 = sext i32 %17 to i64
|
||||
%19 = load i32* %i, align 4
|
||||
%20 = sext i32 %19 to i64
|
||||
%21 = getelementptr inbounds [1536 x [1536 x float]]* @A, i32 0, i64 %20
|
||||
%22 = getelementptr inbounds [1536 x float]* %21, i32 0, i64 %18
|
||||
store float %16, float* %22
|
||||
%23 = load i32* %i, align 4
|
||||
%24 = load i32* %j, align 4
|
||||
%25 = mul nsw i32 %23, %24
|
||||
%26 = srem i32 %25, 1024
|
||||
%27 = add nsw i32 1, %26
|
||||
%28 = sitofp i32 %27 to double
|
||||
%29 = fdiv double %28, 2.000000e+00
|
||||
%30 = fptrunc double %29 to float
|
||||
%31 = load i32* %j, align 4
|
||||
%32 = sext i32 %31 to i64
|
||||
%33 = load i32* %i, align 4
|
||||
%34 = sext i32 %33 to i64
|
||||
%35 = getelementptr inbounds [1536 x [1536 x float]]* @B, i32 0, i64 %34
|
||||
%36 = getelementptr inbounds [1536 x float]* %35, i32 0, i64 %32
|
||||
store float %30, float* %36
|
||||
br label %37
|
||||
%inc = add nsw i32 %10, 1
|
||||
store i32 %inc, i32* %j, align 4
|
||||
br label %for.cond1
|
||||
|
||||
; <label>:37 ; preds = %8
|
||||
%38 = load i32* %j, align 4
|
||||
%39 = add nsw i32 %38, 1
|
||||
store i32 %39, i32* %j, align 4
|
||||
br label %5
|
||||
for.end: ; preds = %for.cond1
|
||||
br label %for.inc17
|
||||
|
||||
; <label>:40 ; preds = %5
|
||||
br label %41
|
||||
for.inc17: ; preds = %for.end
|
||||
%11 = load i32* %i, align 4
|
||||
%inc18 = add nsw i32 %11, 1
|
||||
store i32 %inc18, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
; <label>:41 ; preds = %40
|
||||
%42 = load i32* %i, align 4
|
||||
%43 = add nsw i32 %42, 1
|
||||
store i32 %43, i32* %i, align 4
|
||||
br label %1
|
||||
|
||||
; <label>:44 ; preds = %1
|
||||
for.end19: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @print_array() nounwind {
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @print_array() #0 {
|
||||
entry:
|
||||
%i = alloca i32, align 4
|
||||
%j = alloca i32, align 4
|
||||
store i32 0, i32* %i, align 4
|
||||
br label %1
|
||||
br label %for.cond
|
||||
|
||||
; <label>:1 ; preds = %32, %0
|
||||
%2 = load i32* %i, align 4
|
||||
%3 = icmp slt i32 %2, 1536
|
||||
br i1 %3, label %4, label %35
|
||||
for.cond: ; preds = %for.inc10, %entry
|
||||
%0 = load i32* %i, align 4
|
||||
%cmp = icmp slt i32 %0, 1536
|
||||
br i1 %cmp, label %for.body, label %for.end12
|
||||
|
||||
; <label>:4 ; preds = %1
|
||||
for.body: ; preds = %for.cond
|
||||
store i32 0, i32* %j, align 4
|
||||
br label %5
|
||||
br label %for.cond1
|
||||
|
||||
; <label>:5 ; preds = %26, %4
|
||||
for.cond1: ; preds = %for.inc, %for.body
|
||||
%1 = load i32* %j, align 4
|
||||
%cmp2 = icmp slt i32 %1, 1536
|
||||
br i1 %cmp2, label %for.body3, label %for.end
|
||||
|
||||
for.body3: ; preds = %for.cond1
|
||||
%2 = load %struct._IO_FILE** @stdout, align 8
|
||||
%3 = load i32* %j, align 4
|
||||
%idxprom = sext i32 %3 to i64
|
||||
%4 = load i32* %i, align 4
|
||||
%idxprom4 = sext i32 %4 to i64
|
||||
%arrayidx = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %idxprom4
|
||||
%arrayidx5 = getelementptr inbounds [1536 x float]* %arrayidx, i32 0, i64 %idxprom
|
||||
%5 = load float* %arrayidx5, align 4
|
||||
%conv = fpext float %5 to double
|
||||
%call = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %conv)
|
||||
%6 = load i32* %j, align 4
|
||||
%7 = icmp slt i32 %6, 1536
|
||||
br i1 %7, label %8, label %29
|
||||
%rem = srem i32 %6, 80
|
||||
%cmp6 = icmp eq i32 %rem, 79
|
||||
br i1 %cmp6, label %if.then, label %if.end
|
||||
|
||||
; <label>:8 ; preds = %5
|
||||
if.then: ; preds = %for.body3
|
||||
%7 = load %struct._IO_FILE** @stdout, align 8
|
||||
%call8 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %if.then, %for.body3
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %if.end
|
||||
%8 = load i32* %j, align 4
|
||||
%inc = add nsw i32 %8, 1
|
||||
store i32 %inc, i32* %j, align 4
|
||||
br label %for.cond1
|
||||
|
||||
for.end: ; preds = %for.cond1
|
||||
%9 = load %struct._IO_FILE** @stdout, align 8
|
||||
%10 = load i32* %j, align 4
|
||||
%11 = sext i32 %10 to i64
|
||||
%12 = load i32* %i, align 4
|
||||
%13 = sext i32 %12 to i64
|
||||
%14 = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %13
|
||||
%15 = getelementptr inbounds [1536 x float]* %14, i32 0, i64 %11
|
||||
%16 = load float* %15
|
||||
%17 = fpext float %16 to double
|
||||
%18 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %9, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %17)
|
||||
%19 = load i32* %j, align 4
|
||||
%20 = srem i32 %19, 80
|
||||
%21 = icmp eq i32 %20, 79
|
||||
br i1 %21, label %22, label %25
|
||||
%call9 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %9, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
|
||||
br label %for.inc10
|
||||
|
||||
; <label>:22 ; preds = %8
|
||||
%23 = load %struct._IO_FILE** @stdout, align 8
|
||||
%24 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %23, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
|
||||
br label %25
|
||||
for.inc10: ; preds = %for.end
|
||||
%10 = load i32* %i, align 4
|
||||
%inc11 = add nsw i32 %10, 1
|
||||
store i32 %inc11, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
; <label>:25 ; preds = %22, %8
|
||||
br label %26
|
||||
|
||||
; <label>:26 ; preds = %25
|
||||
%27 = load i32* %j, align 4
|
||||
%28 = add nsw i32 %27, 1
|
||||
store i32 %28, i32* %j, align 4
|
||||
br label %5
|
||||
|
||||
; <label>:29 ; preds = %5
|
||||
%30 = load %struct._IO_FILE** @stdout, align 8
|
||||
%31 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %30, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))
|
||||
br label %32
|
||||
|
||||
; <label>:32 ; preds = %29
|
||||
%33 = load i32* %i, align 4
|
||||
%34 = add nsw i32 %33, 1
|
||||
store i32 %34, i32* %i, align 4
|
||||
br label %1
|
||||
|
||||
; <label>:35 ; preds = %1
|
||||
for.end12: ; preds = %for.cond
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @fprintf(%struct._IO_FILE*, i8*, ...)
|
||||
declare i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
|
||||
|
||||
define i32 @main() nounwind {
|
||||
%1 = alloca i32, align 4
|
||||
; Function Attrs: nounwind uwtable
|
||||
define i32 @main() #0 {
|
||||
entry:
|
||||
%retval = alloca i32, align 4
|
||||
%i = alloca i32, align 4
|
||||
%j = alloca i32, align 4
|
||||
%k = alloca i32, align 4
|
||||
%t_start = alloca double, align 8
|
||||
%t_end = alloca double, align 8
|
||||
store i32 0, i32* %1
|
||||
store i32 0, i32* %retval
|
||||
call void @init_array()
|
||||
store i32 0, i32* %i, align 4
|
||||
br label %2
|
||||
br label %for.cond
|
||||
|
||||
; <label>:2 ; preds = %57, %0
|
||||
%3 = load i32* %i, align 4
|
||||
%4 = icmp slt i32 %3, 1536
|
||||
br i1 %4, label %5, label %60
|
||||
for.cond: ; preds = %for.inc28, %entry
|
||||
%0 = load i32* %i, align 4
|
||||
%cmp = icmp slt i32 %0, 1536
|
||||
br i1 %cmp, label %for.body, label %for.end30
|
||||
|
||||
; <label>:5 ; preds = %2
|
||||
for.body: ; preds = %for.cond
|
||||
store i32 0, i32* %j, align 4
|
||||
br label %6
|
||||
br label %for.cond1
|
||||
|
||||
; <label>:6 ; preds = %53, %5
|
||||
%7 = load i32* %j, align 4
|
||||
%8 = icmp slt i32 %7, 1536
|
||||
br i1 %8, label %9, label %56
|
||||
for.cond1: ; preds = %for.inc25, %for.body
|
||||
%1 = load i32* %j, align 4
|
||||
%cmp2 = icmp slt i32 %1, 1536
|
||||
br i1 %cmp2, label %for.body3, label %for.end27
|
||||
|
||||
; <label>:9 ; preds = %6
|
||||
%10 = load i32* %j, align 4
|
||||
%11 = sext i32 %10 to i64
|
||||
%12 = load i32* %i, align 4
|
||||
%13 = sext i32 %12 to i64
|
||||
%14 = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %13
|
||||
%15 = getelementptr inbounds [1536 x float]* %14, i32 0, i64 %11
|
||||
store float 0.000000e+00, float* %15
|
||||
for.body3: ; preds = %for.cond1
|
||||
%2 = load i32* %j, align 4
|
||||
%idxprom = sext i32 %2 to i64
|
||||
%3 = load i32* %i, align 4
|
||||
%idxprom4 = sext i32 %3 to i64
|
||||
%arrayidx = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %idxprom4
|
||||
%arrayidx5 = getelementptr inbounds [1536 x float]* %arrayidx, i32 0, i64 %idxprom
|
||||
store float 0.000000e+00, float* %arrayidx5, align 4
|
||||
store i32 0, i32* %k, align 4
|
||||
br label %16
|
||||
br label %for.cond6
|
||||
|
||||
; <label>:16 ; preds = %49, %9
|
||||
%17 = load i32* %k, align 4
|
||||
%18 = icmp slt i32 %17, 1536
|
||||
br i1 %18, label %19, label %52
|
||||
for.cond6: ; preds = %for.inc, %for.body3
|
||||
%4 = load i32* %k, align 4
|
||||
%cmp7 = icmp slt i32 %4, 1536
|
||||
br i1 %cmp7, label %for.body8, label %for.end
|
||||
|
||||
; <label>:19 ; preds = %16
|
||||
%20 = load i32* %j, align 4
|
||||
%21 = sext i32 %20 to i64
|
||||
%22 = load i32* %i, align 4
|
||||
%23 = sext i32 %22 to i64
|
||||
%24 = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %23
|
||||
%25 = getelementptr inbounds [1536 x float]* %24, i32 0, i64 %21
|
||||
%26 = load float* %25
|
||||
%27 = load i32* %k, align 4
|
||||
%28 = sext i32 %27 to i64
|
||||
%29 = load i32* %i, align 4
|
||||
%30 = sext i32 %29 to i64
|
||||
%31 = getelementptr inbounds [1536 x [1536 x float]]* @A, i32 0, i64 %30
|
||||
%32 = getelementptr inbounds [1536 x float]* %31, i32 0, i64 %28
|
||||
%33 = load float* %32
|
||||
%34 = load i32* %j, align 4
|
||||
%35 = sext i32 %34 to i64
|
||||
%36 = load i32* %k, align 4
|
||||
%37 = sext i32 %36 to i64
|
||||
%38 = getelementptr inbounds [1536 x [1536 x float]]* @B, i32 0, i64 %37
|
||||
%39 = getelementptr inbounds [1536 x float]* %38, i32 0, i64 %35
|
||||
%40 = load float* %39
|
||||
%41 = fmul float %33, %40
|
||||
%42 = fadd float %26, %41
|
||||
%43 = load i32* %j, align 4
|
||||
%44 = sext i32 %43 to i64
|
||||
%45 = load i32* %i, align 4
|
||||
%46 = sext i32 %45 to i64
|
||||
%47 = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %46
|
||||
%48 = getelementptr inbounds [1536 x float]* %47, i32 0, i64 %44
|
||||
store float %42, float* %48
|
||||
br label %49
|
||||
for.body8: ; preds = %for.cond6
|
||||
%5 = load i32* %j, align 4
|
||||
%idxprom9 = sext i32 %5 to i64
|
||||
%6 = load i32* %i, align 4
|
||||
%idxprom10 = sext i32 %6 to i64
|
||||
%arrayidx11 = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %idxprom10
|
||||
%arrayidx12 = getelementptr inbounds [1536 x float]* %arrayidx11, i32 0, i64 %idxprom9
|
||||
%7 = load float* %arrayidx12, align 4
|
||||
%8 = load i32* %k, align 4
|
||||
%idxprom13 = sext i32 %8 to i64
|
||||
%9 = load i32* %i, align 4
|
||||
%idxprom14 = sext i32 %9 to i64
|
||||
%arrayidx15 = getelementptr inbounds [1536 x [1536 x float]]* @A, i32 0, i64 %idxprom14
|
||||
%arrayidx16 = getelementptr inbounds [1536 x float]* %arrayidx15, i32 0, i64 %idxprom13
|
||||
%10 = load float* %arrayidx16, align 4
|
||||
%11 = load i32* %j, align 4
|
||||
%idxprom17 = sext i32 %11 to i64
|
||||
%12 = load i32* %k, align 4
|
||||
%idxprom18 = sext i32 %12 to i64
|
||||
%arrayidx19 = getelementptr inbounds [1536 x [1536 x float]]* @B, i32 0, i64 %idxprom18
|
||||
%arrayidx20 = getelementptr inbounds [1536 x float]* %arrayidx19, i32 0, i64 %idxprom17
|
||||
%13 = load float* %arrayidx20, align 4
|
||||
%mul = fmul float %10, %13
|
||||
%add = fadd float %7, %mul
|
||||
%14 = load i32* %j, align 4
|
||||
%idxprom21 = sext i32 %14 to i64
|
||||
%15 = load i32* %i, align 4
|
||||
%idxprom22 = sext i32 %15 to i64
|
||||
%arrayidx23 = getelementptr inbounds [1536 x [1536 x float]]* @C, i32 0, i64 %idxprom22
|
||||
%arrayidx24 = getelementptr inbounds [1536 x float]* %arrayidx23, i32 0, i64 %idxprom21
|
||||
store float %add, float* %arrayidx24, align 4
|
||||
br label %for.inc
|
||||
|
||||
; <label>:49 ; preds = %19
|
||||
%50 = load i32* %k, align 4
|
||||
%51 = add nsw i32 %50, 1
|
||||
store i32 %51, i32* %k, align 4
|
||||
br label %16
|
||||
for.inc: ; preds = %for.body8
|
||||
%16 = load i32* %k, align 4
|
||||
%inc = add nsw i32 %16, 1
|
||||
store i32 %inc, i32* %k, align 4
|
||||
br label %for.cond6
|
||||
|
||||
; <label>:52 ; preds = %16
|
||||
br label %53
|
||||
for.end: ; preds = %for.cond6
|
||||
br label %for.inc25
|
||||
|
||||
; <label>:53 ; preds = %52
|
||||
%54 = load i32* %j, align 4
|
||||
%55 = add nsw i32 %54, 1
|
||||
store i32 %55, i32* %j, align 4
|
||||
br label %6
|
||||
for.inc25: ; preds = %for.end
|
||||
%17 = load i32* %j, align 4
|
||||
%inc26 = add nsw i32 %17, 1
|
||||
store i32 %inc26, i32* %j, align 4
|
||||
br label %for.cond1
|
||||
|
||||
; <label>:56 ; preds = %6
|
||||
br label %57
|
||||
for.end27: ; preds = %for.cond1
|
||||
br label %for.inc28
|
||||
|
||||
; <label>:57 ; preds = %56
|
||||
%58 = load i32* %i, align 4
|
||||
%59 = add nsw i32 %58, 1
|
||||
store i32 %59, i32* %i, align 4
|
||||
br label %2
|
||||
for.inc28: ; preds = %for.end27
|
||||
%18 = load i32* %i, align 4
|
||||
%inc29 = add nsw i32 %18, 1
|
||||
store i32 %inc29, i32* %i, align 4
|
||||
br label %for.cond
|
||||
|
||||
; <label>:60 ; preds = %2
|
||||
for.end30: ; preds = %for.cond
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
|
||||
@@ -1,47 +1,47 @@
|
||||
digraph "Scop Graph for 'init_array' function" {
|
||||
label="Scop Graph for 'init_array' function";
|
||||
|
||||
Node0x26ade30 [shape=record,label="{%0:\l\l br label %1\l}"];
|
||||
Node0x26ade30 -> Node0x26acdd0;
|
||||
Node0x26acdd0 [shape=record,label="{%1:\l\l %2 = phi i64 [ %indvar.next2, %18 ], [ 0, %0 ]\l %exitcond5 = icmp ne i64 %2, 1536\l br i1 %exitcond5, label %3, label %19\l}"];
|
||||
Node0x26acdd0 -> Node0x26acdf0;
|
||||
Node0x26acdd0 -> Node0x26adce0;
|
||||
Node0x26acdf0 [shape=record,label="{%3:\l\l br label %4\l}"];
|
||||
Node0x26acdf0 -> Node0x26addc0;
|
||||
Node0x26addc0 [shape=record,label="{%4:\l\l %indvar = phi i64 [ %indvar.next, %16 ], [ 0, %3 ]\l %scevgep4 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %2, i64 %indvar\l %scevgep = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %2, i64 %indvar\l %tmp = mul i64 %2, %indvar\l %tmp3 = trunc i64 %tmp to i32\l %exitcond = icmp ne i64 %indvar, 1536\l br i1 %exitcond, label %5, label %17\l}"];
|
||||
Node0x26addc0 -> Node0x26ace70;
|
||||
Node0x26addc0 -> Node0x26ad010;
|
||||
Node0x26ace70 [shape=record,label="{%5:\l\l %6 = srem i32 %tmp3, 1024\l %7 = add nsw i32 1, %6\l %8 = sitofp i32 %7 to double\l %9 = fdiv double %8, 2.000000e+00\l %10 = fptrunc double %9 to float\l store float %10, float* %scevgep4\l %11 = srem i32 %tmp3, 1024\l %12 = add nsw i32 1, %11\l %13 = sitofp i32 %12 to double\l %14 = fdiv double %13, 2.000000e+00\l %15 = fptrunc double %14 to float\l store float %15, float* %scevgep\l br label %16\l}"];
|
||||
Node0x26ace70 -> Node0x26ace90;
|
||||
Node0x26ace90 [shape=record,label="{%16:\l\l %indvar.next = add i64 %indvar, 1\l br label %4\l}"];
|
||||
Node0x26ace90 -> Node0x26addc0[constraint=false];
|
||||
Node0x26ad010 [shape=record,label="{%17:\l\l br label %18\l}"];
|
||||
Node0x26ad010 -> Node0x26ad6c0;
|
||||
Node0x26ad6c0 [shape=record,label="{%18:\l\l %indvar.next2 = add i64 %2, 1\l br label %1\l}"];
|
||||
Node0x26ad6c0 -> Node0x26acdd0[constraint=false];
|
||||
Node0x26adce0 [shape=record,label="{%19:\l\l ret void\l}"];
|
||||
Node0x17d4370 [shape=record,label="{entry:\l br label %for.cond\l}"];
|
||||
Node0x17d4370 -> Node0x17da5d0;
|
||||
Node0x17da5d0 [shape=record,label="{for.cond: \l %0 = phi i64 [ %indvar.next2, %for.inc17 ], [ 0, %entry ]\l %exitcond3 = icmp ne i64 %0, 1536\l br i1 %exitcond3, label %for.body, label %for.end19\l}"];
|
||||
Node0x17da5d0 -> Node0x17da5f0;
|
||||
Node0x17da5d0 -> Node0x17da650;
|
||||
Node0x17da5f0 [shape=record,label="{for.body: \l br label %for.cond1\l}"];
|
||||
Node0x17da5f0 -> Node0x17da900;
|
||||
Node0x17da900 [shape=record,label="{for.cond1: \l %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ]\l %arrayidx6 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %0, i64 %indvar\l %arrayidx16 = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %0, i64 %indvar\l %1 = mul i64 %0, %indvar\l %mul = trunc i64 %1 to i32\l %exitcond = icmp ne i64 %indvar, 1536\l br i1 %exitcond, label %for.body3, label %for.end\l}"];
|
||||
Node0x17da900 -> Node0x17da670;
|
||||
Node0x17da900 -> Node0x17da9a0;
|
||||
Node0x17da670 [shape=record,label="{for.body3: \l %rem = srem i32 %mul, 1024\l %add = add nsw i32 1, %rem\l %conv = sitofp i32 %add to double\l %div = fdiv double %conv, 2.000000e+00\l %conv4 = fptrunc double %div to float\l store float %conv4, float* %arrayidx6, align 4\l %rem8 = srem i32 %mul, 1024\l %add9 = add nsw i32 1, %rem8\l %conv10 = sitofp i32 %add9 to double\l %div11 = fdiv double %conv10, 2.000000e+00\l %conv12 = fptrunc double %div11 to float\l store float %conv12, float* %arrayidx16, align 4\l br label %for.inc\l}"];
|
||||
Node0x17da670 -> Node0x17da8e0;
|
||||
Node0x17da8e0 [shape=record,label="{for.inc: \l %indvar.next = add i64 %indvar, 1\l br label %for.cond1\l}"];
|
||||
Node0x17da8e0 -> Node0x17da900[constraint=false];
|
||||
Node0x17da9a0 [shape=record,label="{for.end: \l br label %for.inc17\l}"];
|
||||
Node0x17da9a0 -> Node0x17d9e70;
|
||||
Node0x17d9e70 [shape=record,label="{for.inc17: \l %indvar.next2 = add i64 %0, 1\l br label %for.cond\l}"];
|
||||
Node0x17d9e70 -> Node0x17da5d0[constraint=false];
|
||||
Node0x17da650 [shape=record,label="{for.end19: \l ret void\l}"];
|
||||
colorscheme = "paired12"
|
||||
subgraph cluster_0x26a94c0 {
|
||||
subgraph cluster_0x17d3a30 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 1
|
||||
subgraph cluster_0x26aa4e0 {
|
||||
subgraph cluster_0x17d4ec0 {
|
||||
label = "";
|
||||
style = filled;
|
||||
color = 3 subgraph cluster_0x26a9780 {
|
||||
color = 3 subgraph cluster_0x17d4180 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 5
|
||||
Node0x26addc0;
|
||||
Node0x26ace70;
|
||||
Node0x26ace90;
|
||||
Node0x17da900;
|
||||
Node0x17da670;
|
||||
Node0x17da8e0;
|
||||
}
|
||||
Node0x26acdd0;
|
||||
Node0x26acdf0;
|
||||
Node0x26ad010;
|
||||
Node0x26ad6c0;
|
||||
Node0x17da5d0;
|
||||
Node0x17da5f0;
|
||||
Node0x17da9a0;
|
||||
Node0x17d9e70;
|
||||
}
|
||||
Node0x26ade30;
|
||||
Node0x26adce0;
|
||||
Node0x17d4370;
|
||||
Node0x17da650;
|
||||
}
|
||||
}
|
||||
|
||||
|
Before Width: | Height: | Size: 141 KiB After Width: | Height: | Size: 151 KiB |
@@ -1,65 +1,65 @@
|
||||
digraph "Scop Graph for 'main' function" {
|
||||
label="Scop Graph for 'main' function";
|
||||
|
||||
Node0x26ace10 [shape=record,label="{%0:\l\l call void @init_array()\l br label %1\l}"];
|
||||
Node0x26ace10 -> Node0x26acd60;
|
||||
Node0x26acd60 [shape=record,label="{%1:\l\l %indvar3 = phi i64 [ %indvar.next4, %16 ], [ 0, %0 ]\l %exitcond9 = icmp ne i64 %indvar3, 1536\l br i1 %exitcond9, label %2, label %17\l}"];
|
||||
Node0x26acd60 -> Node0x26acd80;
|
||||
Node0x26acd60 -> Node0x26af2e0;
|
||||
Node0x26acd80 [shape=record,label="{%2:\l\l br label %3\l}"];
|
||||
Node0x26acd80 -> Node0x26aee80;
|
||||
Node0x26aee80 [shape=record,label="{%3:\l\l %indvar1 = phi i64 [ %indvar.next2, %14 ], [ 0, %2 ]\l %scevgep8 = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar3, i64 %indvar1\l %exitcond6 = icmp ne i64 %indvar1, 1536\l br i1 %exitcond6, label %4, label %15\l}"];
|
||||
Node0x26aee80 -> Node0x26aeea0;
|
||||
Node0x26aee80 -> Node0x26aeec0;
|
||||
Node0x26aeea0 [shape=record,label="{%4:\l\l store float 0.000000e+00, float* %scevgep8\l br label %5\l}"];
|
||||
Node0x26aeea0 -> Node0x26aced0;
|
||||
Node0x26aced0 [shape=record,label="{%5:\l\l %indvar = phi i64 [ %indvar.next, %12 ], [ 0, %4 ]\l %scevgep5 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %indvar3, i64 %indvar\l %scevgep = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %indvar, i64 %indvar1\l %exitcond = icmp ne i64 %indvar, 1536\l br i1 %exitcond, label %6, label %13\l}"];
|
||||
Node0x26aced0 -> Node0x26ace60;
|
||||
Node0x26aced0 -> Node0x26af5e0;
|
||||
Node0x26ace60 [shape=record,label="{%6:\l\l %7 = load float* %scevgep8\l %8 = load float* %scevgep5\l %9 = load float* %scevgep\l %10 = fmul float %8, %9\l %11 = fadd float %7, %10\l store float %11, float* %scevgep8\l br label %12\l}"];
|
||||
Node0x26ace60 -> Node0x26af640;
|
||||
Node0x26af640 [shape=record,label="{%12:\l\l %indvar.next = add i64 %indvar, 1\l br label %5\l}"];
|
||||
Node0x26af640 -> Node0x26aced0[constraint=false];
|
||||
Node0x26af5e0 [shape=record,label="{%13:\l\l br label %14\l}"];
|
||||
Node0x26af5e0 -> Node0x26af6e0;
|
||||
Node0x26af6e0 [shape=record,label="{%14:\l\l %indvar.next2 = add i64 %indvar1, 1\l br label %3\l}"];
|
||||
Node0x26af6e0 -> Node0x26aee80[constraint=false];
|
||||
Node0x26aeec0 [shape=record,label="{%15:\l\l br label %16\l}"];
|
||||
Node0x26aeec0 -> Node0x26af740;
|
||||
Node0x26af740 [shape=record,label="{%16:\l\l %indvar.next4 = add i64 %indvar3, 1\l br label %1\l}"];
|
||||
Node0x26af740 -> Node0x26acd60[constraint=false];
|
||||
Node0x26af2e0 [shape=record,label="{%17:\l\l ret i32 0\l}"];
|
||||
Node0x17d21a0 [shape=record,label="{entry:\l call void @init_array()\l br label %for.cond\l}"];
|
||||
Node0x17d21a0 -> Node0x17d2020;
|
||||
Node0x17d2020 [shape=record,label="{for.cond: \l %indvar3 = phi i64 [ %indvar.next4, %for.inc28 ], [ 0, %entry ]\l %exitcond6 = icmp ne i64 %indvar3, 1536\l br i1 %exitcond6, label %for.body, label %for.end30\l}"];
|
||||
Node0x17d2020 -> Node0x17d3950;
|
||||
Node0x17d2020 -> Node0x17da500;
|
||||
Node0x17d3950 [shape=record,label="{for.body: \l br label %for.cond1\l}"];
|
||||
Node0x17d3950 -> Node0x17da760;
|
||||
Node0x17da760 [shape=record,label="{for.cond1: \l %indvar1 = phi i64 [ %indvar.next2, %for.inc25 ], [ 0, %for.body ]\l %arrayidx5 = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar3, i64 %indvar1\l %exitcond5 = icmp ne i64 %indvar1, 1536\l br i1 %exitcond5, label %for.body3, label %for.end27\l}"];
|
||||
Node0x17da760 -> Node0x17db1e0;
|
||||
Node0x17da760 -> Node0x17db250;
|
||||
Node0x17db1e0 [shape=record,label="{for.body3: \l store float 0.000000e+00, float* %arrayidx5, align 4\l br label %for.cond6\l}"];
|
||||
Node0x17db1e0 -> Node0x17da740;
|
||||
Node0x17da740 [shape=record,label="{for.cond6: \l %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body3 ]\l %arrayidx16 = getelementptr [1536 x [1536 x float]]* @A, i64 0, i64 %indvar3, i64 %indvar\l %arrayidx20 = getelementptr [1536 x [1536 x float]]* @B, i64 0, i64 %indvar, i64 %indvar1\l %exitcond = icmp ne i64 %indvar, 1536\l br i1 %exitcond, label %for.body8, label %for.end\l}"];
|
||||
Node0x17da740 -> Node0x17da5a0;
|
||||
Node0x17da740 -> Node0x17da800;
|
||||
Node0x17da5a0 [shape=record,label="{for.body8: \l %0 = load float* %arrayidx5, align 4\l %1 = load float* %arrayidx16, align 4\l %2 = load float* %arrayidx20, align 4\l %mul = fmul float %1, %2\l %add = fadd float %0, %mul\l store float %add, float* %arrayidx5, align 4\l br label %for.inc\l}"];
|
||||
Node0x17da5a0 -> Node0x17da5c0;
|
||||
Node0x17da5c0 [shape=record,label="{for.inc: \l %indvar.next = add i64 %indvar, 1\l br label %for.cond6\l}"];
|
||||
Node0x17da5c0 -> Node0x17da740[constraint=false];
|
||||
Node0x17da800 [shape=record,label="{for.end: \l br label %for.inc25\l}"];
|
||||
Node0x17da800 -> Node0x17dae20;
|
||||
Node0x17dae20 [shape=record,label="{for.inc25: \l %indvar.next2 = add i64 %indvar1, 1\l br label %for.cond1\l}"];
|
||||
Node0x17dae20 -> Node0x17da760[constraint=false];
|
||||
Node0x17db250 [shape=record,label="{for.end27: \l br label %for.inc28\l}"];
|
||||
Node0x17db250 -> Node0x17dae80;
|
||||
Node0x17dae80 [shape=record,label="{for.inc28: \l %indvar.next4 = add i64 %indvar3, 1\l br label %for.cond\l}"];
|
||||
Node0x17dae80 -> Node0x17d2020[constraint=false];
|
||||
Node0x17da500 [shape=record,label="{for.end30: \l ret i32 0\l}"];
|
||||
colorscheme = "paired12"
|
||||
subgraph cluster_0x26a8b20 {
|
||||
subgraph cluster_0x17d3f30 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 1
|
||||
subgraph cluster_0x26a9220 {
|
||||
subgraph cluster_0x17d38d0 {
|
||||
label = "";
|
||||
style = filled;
|
||||
color = 3 subgraph cluster_0x26ad500 {
|
||||
color = 3 subgraph cluster_0x17d3850 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 5
|
||||
subgraph cluster_0x26ad480 {
|
||||
subgraph cluster_0x17d37d0 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 7
|
||||
Node0x26aced0;
|
||||
Node0x26ace60;
|
||||
Node0x26af640;
|
||||
Node0x17da740;
|
||||
Node0x17da5a0;
|
||||
Node0x17da5c0;
|
||||
}
|
||||
Node0x26aee80;
|
||||
Node0x26aeea0;
|
||||
Node0x26af5e0;
|
||||
Node0x26af6e0;
|
||||
Node0x17da760;
|
||||
Node0x17db1e0;
|
||||
Node0x17da800;
|
||||
Node0x17dae20;
|
||||
}
|
||||
Node0x26acd60;
|
||||
Node0x26acd80;
|
||||
Node0x26aeec0;
|
||||
Node0x26af740;
|
||||
Node0x17d2020;
|
||||
Node0x17d3950;
|
||||
Node0x17db250;
|
||||
Node0x17dae80;
|
||||
}
|
||||
Node0x26ace10;
|
||||
Node0x26af2e0;
|
||||
Node0x17d21a0;
|
||||
Node0x17da500;
|
||||
}
|
||||
}
|
||||
|
||||
|
Before Width: | Height: | Size: 171 KiB After Width: | Height: | Size: 186 KiB |
@@ -1,60 +1,60 @@
|
||||
digraph "Scop Graph for 'print_array' function" {
|
||||
label="Scop Graph for 'print_array' function";
|
||||
|
||||
Node0x26ac9a0 [shape=record,label="{%0:\l\l br label %1\l}"];
|
||||
Node0x26ac9a0 -> Node0x26acd00;
|
||||
Node0x26acd00 [shape=record,label="{%1:\l\l %indvar1 = phi i64 [ %indvar.next2, %19 ], [ 0, %0 ]\l %exitcond3 = icmp ne i64 %indvar1, 1536\l br i1 %exitcond3, label %2, label %20\l}"];
|
||||
Node0x26acd00 -> Node0x26a8ac0;
|
||||
Node0x26acd00 -> Node0x26ac9c0;
|
||||
Node0x26a8ac0 [shape=record,label="{%2:\l\l br label %3\l}"];
|
||||
Node0x26a8ac0 -> Node0x26ad940;
|
||||
Node0x26ad940 [shape=record,label="{%3:\l\l %indvar = phi i64 [ %indvar.next, %15 ], [ 0, %2 ]\l %scevgep = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar1, i64 %indvar\l %j.0 = trunc i64 %indvar to i32\l %exitcond = icmp ne i64 %indvar, 1536\l br i1 %exitcond, label %4, label %16\l}"];
|
||||
Node0x26ad940 -> Node0x26acde0;
|
||||
Node0x26ad940 -> Node0x26ad9e0;
|
||||
Node0x26acde0 [shape=record,label="{%4:\l\l %5 = load %struct._IO_FILE** @stdout, align 8\l %6 = load float* %scevgep\l %7 = fpext float %6 to double\l %8 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %7)\l %9 = srem i32 %j.0, 80\l %10 = icmp eq i32 %9, 79\l br i1 %10, label %11, label %14\l}"];
|
||||
Node0x26acde0 -> Node0x26ada40;
|
||||
Node0x26acde0 -> Node0x26acfa0;
|
||||
Node0x26ada40 [shape=record,label="{%11:\l\l %12 = load %struct._IO_FILE** @stdout, align 8\l %13 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %12, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))\l br label %14\l}"];
|
||||
Node0x26ada40 -> Node0x26acfa0;
|
||||
Node0x26acfa0 [shape=record,label="{%14:\l\l br label %15\l}"];
|
||||
Node0x26acfa0 -> Node0x26ad6c0;
|
||||
Node0x26ad6c0 [shape=record,label="{%15:\l\l %indvar.next = add i64 %indvar, 1\l br label %3\l}"];
|
||||
Node0x26ad6c0 -> Node0x26ad940[constraint=false];
|
||||
Node0x26ad9e0 [shape=record,label="{%16:\l\l %17 = load %struct._IO_FILE** @stdout, align 8\l %18 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %17, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))\l br label %19\l}"];
|
||||
Node0x26ad9e0 -> Node0x26ace00;
|
||||
Node0x26ace00 [shape=record,label="{%19:\l\l %indvar.next2 = add i64 %indvar1, 1\l br label %1\l}"];
|
||||
Node0x26ace00 -> Node0x26acd00[constraint=false];
|
||||
Node0x26ac9c0 [shape=record,label="{%20:\l\l ret void\l}"];
|
||||
Node0x17d2200 [shape=record,label="{entry:\l br label %for.cond\l}"];
|
||||
Node0x17d2200 -> Node0x17d4f20;
|
||||
Node0x17d4f20 [shape=record,label="{for.cond: \l %indvar1 = phi i64 [ %indvar.next2, %for.inc10 ], [ 0, %entry ]\l %exitcond3 = icmp ne i64 %indvar1, 1536\l br i1 %exitcond3, label %for.body, label %for.end12\l}"];
|
||||
Node0x17d4f20 -> Node0x17d3680;
|
||||
Node0x17d4f20 -> Node0x17d9fc0;
|
||||
Node0x17d3680 [shape=record,label="{for.body: \l br label %for.cond1\l}"];
|
||||
Node0x17d3680 -> Node0x17da220;
|
||||
Node0x17da220 [shape=record,label="{for.cond1: \l %indvar = phi i64 [ %indvar.next, %for.inc ], [ 0, %for.body ]\l %arrayidx5 = getelementptr [1536 x [1536 x float]]* @C, i64 0, i64 %indvar1, i64 %indvar\l %j.0 = trunc i64 %indvar to i32\l %exitcond = icmp ne i64 %indvar, 1536\l br i1 %exitcond, label %for.body3, label %for.end\l}"];
|
||||
Node0x17da220 -> Node0x17d9ea0;
|
||||
Node0x17da220 -> Node0x17da0f0;
|
||||
Node0x17d9ea0 [shape=record,label="{for.body3: \l %0 = load %struct._IO_FILE** @stdout, align 8\l %1 = load float* %arrayidx5, align 4\l %conv = fpext float %1 to double\l %call = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([5 x i8]* @.str, i32 0, i32 0), double %conv)\l %rem = srem i32 %j.0, 80\l %cmp6 = icmp eq i32 %rem, 79\l br i1 %cmp6, label %if.then, label %if.end\l}"];
|
||||
Node0x17d9ea0 -> Node0x17d9ec0;
|
||||
Node0x17d9ea0 -> Node0x17da060;
|
||||
Node0x17d9ec0 [shape=record,label="{if.then: \l %2 = load %struct._IO_FILE** @stdout, align 8\l %call8 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))\l br label %if.end\l}"];
|
||||
Node0x17d9ec0 -> Node0x17da060;
|
||||
Node0x17da060 [shape=record,label="{if.end: \l br label %for.inc\l}"];
|
||||
Node0x17da060 -> Node0x17da200;
|
||||
Node0x17da200 [shape=record,label="{for.inc: \l %indvar.next = add i64 %indvar, 1\l br label %for.cond1\l}"];
|
||||
Node0x17da200 -> Node0x17da220[constraint=false];
|
||||
Node0x17da0f0 [shape=record,label="{for.end: \l %3 = load %struct._IO_FILE** @stdout, align 8\l %call9 = call i32 (%struct._IO_FILE*, i8*, ...)* @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0))\l br label %for.inc10\l}"];
|
||||
Node0x17da0f0 -> Node0x17da080;
|
||||
Node0x17da080 [shape=record,label="{for.inc10: \l %indvar.next2 = add i64 %indvar1, 1\l br label %for.cond\l}"];
|
||||
Node0x17da080 -> Node0x17d4f20[constraint=false];
|
||||
Node0x17d9fc0 [shape=record,label="{for.end12: \l ret void\l}"];
|
||||
colorscheme = "paired12"
|
||||
subgraph cluster_0x26adae0 {
|
||||
subgraph cluster_0x17d38f0 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 1
|
||||
subgraph cluster_0x26aa030 {
|
||||
label = "";
|
||||
subgraph cluster_0x17d4030 {
|
||||
label = "Non affine branch in BB 'for.body3' with LHS: %rem and RHS: 79";
|
||||
style = solid;
|
||||
color = 6
|
||||
subgraph cluster_0x26a9fb0 {
|
||||
label = "";
|
||||
subgraph cluster_0x17d3fb0 {
|
||||
label = "Non affine branch in BB 'for.body3' with LHS: %rem and RHS: 79";
|
||||
style = solid;
|
||||
color = 5
|
||||
subgraph cluster_0x26adb60 {
|
||||
label = "";
|
||||
subgraph cluster_0x17d3f30 {
|
||||
label = "Non affine branch in BB 'for.body3' with LHS: %rem and RHS: 79";
|
||||
style = solid;
|
||||
color = 7
|
||||
Node0x26acde0;
|
||||
Node0x26ada40;
|
||||
Node0x17d9ea0;
|
||||
Node0x17d9ec0;
|
||||
}
|
||||
Node0x26ad940;
|
||||
Node0x26acfa0;
|
||||
Node0x26ad6c0;
|
||||
Node0x17da220;
|
||||
Node0x17da060;
|
||||
Node0x17da200;
|
||||
}
|
||||
Node0x26acd00;
|
||||
Node0x26a8ac0;
|
||||
Node0x26ad9e0;
|
||||
Node0x26ace00;
|
||||
Node0x17d4f20;
|
||||
Node0x17d3680;
|
||||
Node0x17da0f0;
|
||||
Node0x17da080;
|
||||
}
|
||||
Node0x26ac9a0;
|
||||
Node0x26ac9c0;
|
||||
Node0x17d2200;
|
||||
Node0x17d9fc0;
|
||||
}
|
||||
}
|
||||
|
||||
|
Before Width: | Height: | Size: 177 KiB After Width: | Height: | Size: 196 KiB |
@@ -1,47 +1,47 @@
|
||||
digraph "Scop Graph for 'init_array' function" {
|
||||
label="Scop Graph for 'init_array' function";
|
||||
|
||||
Node0x24dfca0 [shape=record,label="{%0}"];
|
||||
Node0x24dfca0 -> Node0x24dfdf0;
|
||||
Node0x24dfdf0 [shape=record,label="{%1}"];
|
||||
Node0x24dfdf0 -> Node0x24dee50;
|
||||
Node0x24dfdf0 -> Node0x24def50;
|
||||
Node0x24dee50 [shape=record,label="{%3}"];
|
||||
Node0x24dee50 -> Node0x24deec0;
|
||||
Node0x24deec0 [shape=record,label="{%4}"];
|
||||
Node0x24deec0 -> Node0x24dfdc0;
|
||||
Node0x24deec0 -> Node0x24df0c0;
|
||||
Node0x24dfdc0 [shape=record,label="{%5}"];
|
||||
Node0x24dfdc0 -> Node0x24defb0;
|
||||
Node0x24defb0 [shape=record,label="{%16}"];
|
||||
Node0x24defb0 -> Node0x24deec0[constraint=false];
|
||||
Node0x24df0c0 [shape=record,label="{%17}"];
|
||||
Node0x24df0c0 -> Node0x24deee0;
|
||||
Node0x24deee0 [shape=record,label="{%18}"];
|
||||
Node0x24deee0 -> Node0x24dfdf0[constraint=false];
|
||||
Node0x24def50 [shape=record,label="{%19}"];
|
||||
Node0x17d4370 [shape=record,label="{entry}"];
|
||||
Node0x17d4370 -> Node0x17d9de0;
|
||||
Node0x17d9de0 [shape=record,label="{for.cond}"];
|
||||
Node0x17d9de0 -> Node0x17d9e40;
|
||||
Node0x17d9de0 -> Node0x17d9ea0;
|
||||
Node0x17d9e40 [shape=record,label="{for.body}"];
|
||||
Node0x17d9e40 -> Node0x17d9f90;
|
||||
Node0x17d9f90 [shape=record,label="{for.cond1}"];
|
||||
Node0x17d9f90 -> Node0x17d9ff0;
|
||||
Node0x17d9f90 -> Node0x17da050;
|
||||
Node0x17d9ff0 [shape=record,label="{for.body3}"];
|
||||
Node0x17d9ff0 -> Node0x17d9f00;
|
||||
Node0x17d9f00 [shape=record,label="{for.inc}"];
|
||||
Node0x17d9f00 -> Node0x17d9f90[constraint=false];
|
||||
Node0x17da050 [shape=record,label="{for.end}"];
|
||||
Node0x17da050 -> Node0x17da200;
|
||||
Node0x17da200 [shape=record,label="{for.inc17}"];
|
||||
Node0x17da200 -> Node0x17d9de0[constraint=false];
|
||||
Node0x17d9ea0 [shape=record,label="{for.end19}"];
|
||||
colorscheme = "paired12"
|
||||
subgraph cluster_0x24db4c0 {
|
||||
subgraph cluster_0x17d3a30 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 1
|
||||
subgraph cluster_0x24dc4e0 {
|
||||
subgraph cluster_0x17d4ec0 {
|
||||
label = "";
|
||||
style = filled;
|
||||
color = 3 subgraph cluster_0x24db780 {
|
||||
color = 3 subgraph cluster_0x17d4180 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 5
|
||||
Node0x24deec0;
|
||||
Node0x24dfdc0;
|
||||
Node0x24defb0;
|
||||
Node0x17d9f90;
|
||||
Node0x17d9ff0;
|
||||
Node0x17d9f00;
|
||||
}
|
||||
Node0x24dfdf0;
|
||||
Node0x24dee50;
|
||||
Node0x24df0c0;
|
||||
Node0x24deee0;
|
||||
Node0x17d9de0;
|
||||
Node0x17d9e40;
|
||||
Node0x17da050;
|
||||
Node0x17da200;
|
||||
}
|
||||
Node0x24dfca0;
|
||||
Node0x24def50;
|
||||
Node0x17d4370;
|
||||
Node0x17d9ea0;
|
||||
}
|
||||
}
|
||||
|
||||
|
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 28 KiB |
@@ -1,65 +1,65 @@
|
||||
digraph "Scop Graph for 'main' function" {
|
||||
label="Scop Graph for 'main' function";
|
||||
|
||||
Node0x24deb60 [shape=record,label="{%0}"];
|
||||
Node0x24deb60 -> Node0x24deaa0;
|
||||
Node0x24deaa0 [shape=record,label="{%1}"];
|
||||
Node0x24deaa0 -> Node0x24e12a0;
|
||||
Node0x24deaa0 -> Node0x24e0e30;
|
||||
Node0x24e12a0 [shape=record,label="{%2}"];
|
||||
Node0x24e12a0 -> Node0x24e0e00;
|
||||
Node0x24e0e00 [shape=record,label="{%3}"];
|
||||
Node0x24e0e00 -> Node0x24e1410;
|
||||
Node0x24e0e00 -> Node0x24e1470;
|
||||
Node0x24e1410 [shape=record,label="{%4}"];
|
||||
Node0x24e1410 -> Node0x24e1380;
|
||||
Node0x24e1380 [shape=record,label="{%5}"];
|
||||
Node0x24e1380 -> Node0x24deaf0;
|
||||
Node0x24e1380 -> Node0x24e1620;
|
||||
Node0x24deaf0 [shape=record,label="{%6}"];
|
||||
Node0x24deaf0 -> Node0x24e1680;
|
||||
Node0x24e1680 [shape=record,label="{%12}"];
|
||||
Node0x24e1680 -> Node0x24e1380[constraint=false];
|
||||
Node0x24e1620 [shape=record,label="{%13}"];
|
||||
Node0x24e1620 -> Node0x24e16e0;
|
||||
Node0x24e16e0 [shape=record,label="{%14}"];
|
||||
Node0x24e16e0 -> Node0x24e0e00[constraint=false];
|
||||
Node0x24e1470 [shape=record,label="{%15}"];
|
||||
Node0x24e1470 -> Node0x24e01a0;
|
||||
Node0x24e01a0 [shape=record,label="{%16}"];
|
||||
Node0x24e01a0 -> Node0x24deaa0[constraint=false];
|
||||
Node0x24e0e30 [shape=record,label="{%17}"];
|
||||
Node0x17d3950 [shape=record,label="{entry}"];
|
||||
Node0x17d3950 -> Node0x17d21a0;
|
||||
Node0x17d21a0 [shape=record,label="{for.cond}"];
|
||||
Node0x17d21a0 -> Node0x17db9a0;
|
||||
Node0x17d21a0 -> Node0x17da4f0;
|
||||
Node0x17db9a0 [shape=record,label="{for.body}"];
|
||||
Node0x17db9a0 -> Node0x17da5e0;
|
||||
Node0x17da5e0 [shape=record,label="{for.cond1}"];
|
||||
Node0x17da5e0 -> Node0x17da640;
|
||||
Node0x17da5e0 -> Node0x17da6a0;
|
||||
Node0x17da640 [shape=record,label="{for.body3}"];
|
||||
Node0x17da640 -> Node0x17da550;
|
||||
Node0x17da550 [shape=record,label="{for.cond6}"];
|
||||
Node0x17da550 -> Node0x17da5b0;
|
||||
Node0x17da550 -> Node0x17da850;
|
||||
Node0x17da5b0 [shape=record,label="{for.body8}"];
|
||||
Node0x17da5b0 -> Node0x17da8b0;
|
||||
Node0x17da8b0 [shape=record,label="{for.inc}"];
|
||||
Node0x17da8b0 -> Node0x17da550[constraint=false];
|
||||
Node0x17da850 [shape=record,label="{for.end}"];
|
||||
Node0x17da850 -> Node0x17db930;
|
||||
Node0x17db930 [shape=record,label="{for.inc25}"];
|
||||
Node0x17db930 -> Node0x17da5e0[constraint=false];
|
||||
Node0x17da6a0 [shape=record,label="{for.end27}"];
|
||||
Node0x17da6a0 -> Node0x17dada0;
|
||||
Node0x17dada0 [shape=record,label="{for.inc28}"];
|
||||
Node0x17dada0 -> Node0x17d21a0[constraint=false];
|
||||
Node0x17da4f0 [shape=record,label="{for.end30}"];
|
||||
colorscheme = "paired12"
|
||||
subgraph cluster_0x24dfc10 {
|
||||
subgraph cluster_0x17d3f30 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 1
|
||||
subgraph cluster_0x24de570 {
|
||||
subgraph cluster_0x17d38d0 {
|
||||
label = "";
|
||||
style = filled;
|
||||
color = 3 subgraph cluster_0x24de7a0 {
|
||||
color = 3 subgraph cluster_0x17d3850 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 5
|
||||
subgraph cluster_0x24de720 {
|
||||
subgraph cluster_0x17d37d0 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 7
|
||||
Node0x24e1380;
|
||||
Node0x24deaf0;
|
||||
Node0x24e1680;
|
||||
Node0x17da550;
|
||||
Node0x17da5b0;
|
||||
Node0x17da8b0;
|
||||
}
|
||||
Node0x24e0e00;
|
||||
Node0x24e1410;
|
||||
Node0x24e1620;
|
||||
Node0x24e16e0;
|
||||
Node0x17da5e0;
|
||||
Node0x17da640;
|
||||
Node0x17da850;
|
||||
Node0x17db930;
|
||||
}
|
||||
Node0x24deaa0;
|
||||
Node0x24e12a0;
|
||||
Node0x24e1470;
|
||||
Node0x24e01a0;
|
||||
Node0x17d21a0;
|
||||
Node0x17db9a0;
|
||||
Node0x17da6a0;
|
||||
Node0x17dada0;
|
||||
}
|
||||
Node0x24deb60;
|
||||
Node0x24e0e30;
|
||||
Node0x17d3950;
|
||||
Node0x17da4f0;
|
||||
}
|
||||
}
|
||||
|
||||
|
Before Width: | Height: | Size: 35 KiB After Width: | Height: | Size: 42 KiB |
@@ -1,60 +1,60 @@
|
||||
digraph "Scop Graph for 'print_array' function" {
|
||||
label="Scop Graph for 'print_array' function";
|
||||
|
||||
Node0x24df2c0 [shape=record,label="{%0}"];
|
||||
Node0x24df2c0 -> Node0x24df2a0;
|
||||
Node0x24df2a0 [shape=record,label="{%1}"];
|
||||
Node0x24df2a0 -> Node0x24dee90;
|
||||
Node0x24df2a0 -> Node0x24dee20;
|
||||
Node0x24dee90 [shape=record,label="{%2}"];
|
||||
Node0x24dee90 -> Node0x24debd0;
|
||||
Node0x24debd0 [shape=record,label="{%3}"];
|
||||
Node0x24debd0 -> Node0x24df150;
|
||||
Node0x24debd0 -> Node0x24de990;
|
||||
Node0x24df150 [shape=record,label="{%4}"];
|
||||
Node0x24df150 -> Node0x24df3a0;
|
||||
Node0x24df150 -> Node0x24defb0;
|
||||
Node0x24df3a0 [shape=record,label="{%11}"];
|
||||
Node0x24df3a0 -> Node0x24defb0;
|
||||
Node0x24defb0 [shape=record,label="{%14}"];
|
||||
Node0x24defb0 -> Node0x24df530;
|
||||
Node0x24df530 [shape=record,label="{%15}"];
|
||||
Node0x24df530 -> Node0x24debd0[constraint=false];
|
||||
Node0x24de990 [shape=record,label="{%16}"];
|
||||
Node0x24de990 -> Node0x24df9a0;
|
||||
Node0x24df9a0 [shape=record,label="{%19}"];
|
||||
Node0x24df9a0 -> Node0x24df2a0[constraint=false];
|
||||
Node0x24dee20 [shape=record,label="{%20}"];
|
||||
Node0x17d2200 [shape=record,label="{entry}"];
|
||||
Node0x17d2200 -> Node0x17d4f20;
|
||||
Node0x17d4f20 [shape=record,label="{for.cond}"];
|
||||
Node0x17d4f20 -> Node0x17d9fd0;
|
||||
Node0x17d4f20 -> Node0x17da030;
|
||||
Node0x17d9fd0 [shape=record,label="{for.body}"];
|
||||
Node0x17d9fd0 -> Node0x17da120;
|
||||
Node0x17da120 [shape=record,label="{for.cond1}"];
|
||||
Node0x17da120 -> Node0x17da180;
|
||||
Node0x17da120 -> Node0x17da1e0;
|
||||
Node0x17da180 [shape=record,label="{for.body3}"];
|
||||
Node0x17da180 -> Node0x17da090;
|
||||
Node0x17da180 -> Node0x17da0f0;
|
||||
Node0x17da090 [shape=record,label="{if.then}"];
|
||||
Node0x17da090 -> Node0x17da0f0;
|
||||
Node0x17da0f0 [shape=record,label="{if.end}"];
|
||||
Node0x17da0f0 -> Node0x17da390;
|
||||
Node0x17da390 [shape=record,label="{for.inc}"];
|
||||
Node0x17da390 -> Node0x17da120[constraint=false];
|
||||
Node0x17da1e0 [shape=record,label="{for.end}"];
|
||||
Node0x17da1e0 -> Node0x17d9e40;
|
||||
Node0x17d9e40 [shape=record,label="{for.inc10}"];
|
||||
Node0x17d9e40 -> Node0x17d4f20[constraint=false];
|
||||
Node0x17da030 [shape=record,label="{for.end12}"];
|
||||
colorscheme = "paired12"
|
||||
subgraph cluster_0x24dbe40 {
|
||||
subgraph cluster_0x17d38f0 {
|
||||
label = "";
|
||||
style = solid;
|
||||
color = 1
|
||||
subgraph cluster_0x24db6e0 {
|
||||
label = "";
|
||||
subgraph cluster_0x17d4030 {
|
||||
label = "Non affine branch in BB 'for.body3' with LHS: %rem and RHS: 79";
|
||||
style = solid;
|
||||
color = 6
|
||||
subgraph cluster_0x24db660 {
|
||||
label = "";
|
||||
subgraph cluster_0x17d3fb0 {
|
||||
label = "Non affine branch in BB 'for.body3' with LHS: %rem and RHS: 79";
|
||||
style = solid;
|
||||
color = 5
|
||||
subgraph cluster_0x24db5e0 {
|
||||
label = "";
|
||||
subgraph cluster_0x17d3f30 {
|
||||
label = "Non affine branch in BB 'for.body3' with LHS: %rem and RHS: 79";
|
||||
style = solid;
|
||||
color = 7
|
||||
Node0x24df150;
|
||||
Node0x24df3a0;
|
||||
Node0x17da180;
|
||||
Node0x17da090;
|
||||
}
|
||||
Node0x24debd0;
|
||||
Node0x24defb0;
|
||||
Node0x24df530;
|
||||
Node0x17da120;
|
||||
Node0x17da0f0;
|
||||
Node0x17da390;
|
||||
}
|
||||
Node0x24df2a0;
|
||||
Node0x24dee90;
|
||||
Node0x24de990;
|
||||
Node0x24df9a0;
|
||||
Node0x17d4f20;
|
||||
Node0x17d9fd0;
|
||||
Node0x17da1e0;
|
||||
Node0x17d9e40;
|
||||
}
|
||||
Node0x24df2c0;
|
||||
Node0x24dee20;
|
||||
Node0x17d2200;
|
||||
Node0x17da030;
|
||||
}
|
||||
}
|
||||
|
||||
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 51 KiB |