diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index d3fb25dc7e57..ea4dda7ccbfe 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -663,9 +663,6 @@ def: InstRW<[BWWriteResGroup10], (instregex "FBSTPm", "MMX_MOVD64mr", "MMX_MOVNTQmr", "MMX_MOVQ64mr", - "MOV(16|32|64)mr", - "MOV8mi", - "MOV8mr", "MOVNTI_64mr", "MOVNTImr", "ST_FP32m", diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 8007f0c1e78d..2858209f2b8a 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -633,9 +633,6 @@ def: InstRW<[HWWriteResGroup1], (instregex "FBSTPm", "MMX_MOVD64mr", "MMX_MOVNTQmr", "MMX_MOVQ64mr", - "MOV(16|32|64)mr", - "MOV8mi", - "MOV8mr", "MOVNTI_64mr", "MOVNTImr", "ST_FP32m", diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 125283558fe6..0adf9ea3b7c9 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -925,30 +925,8 @@ def SBWriteResGroup33 : SchedWriteRes<[SBPort4,SBPort23]> { let NumMicroOps = 2; let ResourceCycles = [1,1]; } -def: InstRW<[SBWriteResGroup33], (instregex "MOV(8|16|32|64)mr", - "MOVNTI_64mr", - "MOVNTImr", - "PUSH64i8", - "PUSH(16|32|64)r", - "VEXTRACTF128mr", - "(V?)MOVAPD(Y?)mr", - "(V?)MOVAPS(Y?)mr", - "(V?)MOVDQA(Y?)mr", - "(V?)MOVDQU(Y?)mr", - "(V?)MOVHPDmr", - "(V?)MOVHPSmr", - "(V?)MOVLPDmr", - "(V?)MOVLPSmr", - "(V?)MOVNTDQ(Y?)mr", - "(V?)MOVNTPD(Y?)mr", - "(V?)MOVNTPS(Y?)mr", - "(V?)MOVPDI2DImr", - "(V?)MOVPQI2QImr", - "(V?)MOVPQIto64mr", - "(V?)MOVSDmr", - "(V?)MOVSSmr", - "(V?)MOVUPD(Y?)mr", - "(V?)MOVUPS(Y?)mr")>; +def: InstRW<[SBWriteResGroup33], (instregex "PUSH64i8", + "PUSH(16|32|64)r")>; def SBWriteResGroup34 : SchedWriteRes<[SBPort0,SBPort15]> { let Latency = 7; @@ -998,7 +976,7 @@ def: InstRW<[SBWriteResGroup37], (instregex "VMASKMOVPD(Y?)mr", "VMASKMOVPS(Y?)mr")>; def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { - let Latency = 5; + let Latency = 2; let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } @@ -1018,8 +996,7 @@ def SBWriteResGroup40 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { let NumMicroOps = 3; let ResourceCycles = [1,1,1]; } -def: InstRW<[SBWriteResGroup40], (instregex "MOV8mi", - "STOSB", +def: InstRW<[SBWriteResGroup40], (instregex "STOSB", "STOSL", "STOSQ", "STOSW")>; @@ -1039,7 +1016,7 @@ def SBWriteResGroup42 : SchedWriteRes<[SBPort05,SBPort015]> { def: InstRW<[SBWriteResGroup42], (instregex "CMPXCHG(8|16|32|64)rr")>; def SBWriteResGroup43 : SchedWriteRes<[SBPort4,SBPort23,SBPort05]> { - let Latency = 5; + let Latency = 3; let NumMicroOps = 4; let ResourceCycles = [1,1,2]; } diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 407b1cc27b67..3c268585c479 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -667,8 +667,6 @@ def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm", "MMX_MOVD64mr", "MMX_MOVNTQmr", "MMX_MOVQ64mr", - "MOV(8|16|32|64)mr", - "MOV8mi", "MOVNTI_64mr", "MOVNTImr", "ST_FP32m", diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index aab1b390e3c3..9d8a4c5434e8 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -1331,9 +1331,6 @@ def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm", "MMX_MOVD64mr", "MMX_MOVNTQmr", "MMX_MOVQ64mr", - "MOV(16|32|64)mr", - "MOV8mi", - "MOV8mr", "MOVAPDmr", "MOVAPSmr", "MOVDQAmr", diff --git a/llvm/test/CodeGen/X86/avx-schedule.ll b/llvm/test/CodeGen/X86/avx-schedule.ll index b11a9bb6d12d..eff1d6cfd7e9 100644 --- a/llvm/test/CodeGen/X86/avx-schedule.ll +++ b/llvm/test/CodeGen/X86/avx-schedule.ll @@ -1662,14 +1662,14 @@ define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x floa ; GENERIC-LABEL: test_extractf128: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_extractf128: ; SANDY: # %bb.0: ; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: vzeroupper # sched: [100:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2526,14 +2526,14 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movapd: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movapd: @@ -2588,14 +2588,14 @@ define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movaps: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movaps: @@ -2816,7 +2816,7 @@ define void @test_movntdq(<4 x i64> %a0, <4 x i64> *%a1) { ; GENERIC-LABEL: test_movntdq: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: vmovntdq %ymm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -2824,7 +2824,7 @@ define void @test_movntdq(<4 x i64> %a0, <4 x i64> *%a1) { ; SANDY-LABEL: test_movntdq: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: vmovntdq %ymm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: vmovntdq %ymm0, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: vzeroupper # sched: [100:0.33] ; SANDY-NEXT: retq # sched: [1:1.00] @@ -2883,13 +2883,13 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { ; GENERIC-LABEL: test_movntpd: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movntpd: ; SANDY: # %bb.0: ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntpd: @@ -2936,13 +2936,13 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { ; GENERIC-LABEL: test_movntps: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movntps: ; SANDY: # %bb.0: ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movntps: @@ -3116,7 +3116,7 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] ; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movupd: @@ -3124,8 +3124,8 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { ; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00] -; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] +; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movupd: @@ -3180,7 +3180,7 @@ define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] ; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movups: @@ -3188,8 +3188,8 @@ define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { ; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00] -; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] +; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movups: diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index e4363d273f95..2f5111f9ab30 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -1937,7 +1937,7 @@ define void @f32tof64_loadstore() { ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] ; GENERIC-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] -; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; GENERIC-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f32tof64_loadstore: @@ -1960,7 +1960,7 @@ define void @f64tof32_loadstore() nounwind uwtable { ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] ; GENERIC-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] -; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; GENERIC-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f64tof32_loadstore: @@ -5934,7 +5934,7 @@ define <4 x i32> @mov_test4(i32* %x) { define void @mov_test5(float %x, float* %y) { ; GENERIC-LABEL: mov_test5: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovss %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: vmovss %xmm0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mov_test5: @@ -5948,7 +5948,7 @@ define void @mov_test5(float %x, float* %y) { define void @mov_test6(double %x, double* %y) { ; GENERIC-LABEL: mov_test6: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovsd %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: vmovsd %xmm0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mov_test6: @@ -6943,8 +6943,8 @@ define i8 @zext_test3(<16 x i32> %a, <16 x i32> %b) { define i8 @conv1(<8 x i1>* %R) { ; GENERIC-LABEL: conv1: ; GENERIC: # %bb.0: # %entry -; GENERIC-NEXT: movb $-1, (%rdi) # sched: [5:1.00] -; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; GENERIC-NEXT: movb $-1, (%rdi) # sched: [1:1.00] +; GENERIC-NEXT: movb $-2, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; GENERIC-NEXT: movb $-2, %al # sched: [1:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7512,7 +7512,7 @@ define void @f1(i32 %c) { ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: movzbl {{.*}}(%rip), %edi # sched: [5:0.50] ; GENERIC-NEXT: xorl $1, %edi # sched: [1:0.33] -; GENERIC-NEXT: movb %dil, {{.*}}(%rip) # sched: [5:1.00] +; GENERIC-NEXT: movb %dil, {{.*}}(%rip) # sched: [1:1.00] ; GENERIC-NEXT: jmp f2 # TAILCALL ; ; SKX-LABEL: f1: @@ -7536,7 +7536,7 @@ define void @store_i16_i1(i16 %x, i1 *%y) { ; GENERIC-LABEL: store_i16_i1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] -; GENERIC-NEXT: movb %dil, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: store_i16_i1: @@ -7553,7 +7553,7 @@ define void @store_i8_i1(i8 %x, i1 *%y) { ; GENERIC-LABEL: store_i8_i1: ; GENERIC: # %bb.0: ; GENERIC-NEXT: andl $1, %edi # sched: [1:0.33] -; GENERIC-NEXT: movb %dil, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: movb %dil, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: store_i8_i1: @@ -8698,7 +8698,7 @@ define <16 x float> @broadcast_ss_spill(float %x) { ; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33] ; GENERIC-NEXT: .cfi_def_cfa_offset 32 ; GENERIC-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [5:1.00] +; GENERIC-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] ; GENERIC-NEXT: callq func_f32 ; GENERIC-NEXT: vbroadcastss (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00] ; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] @@ -8728,7 +8728,7 @@ define <8 x double> @broadcast_sd_spill(double %x) { ; GENERIC-NEXT: subq $24, %rsp # sched: [1:0.33] ; GENERIC-NEXT: .cfi_def_cfa_offset 32 ; GENERIC-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [5:1.00] +; GENERIC-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill sched: [1:1.00] ; GENERIC-NEXT: callq func_f64 ; GENERIC-NEXT: vbroadcastsd (%rsp), %zmm0 # 16-byte Folded Reload sched: [5:1.00] ; GENERIC-NEXT: addq $24, %rsp # sched: [1:0.33] diff --git a/llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll b/llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll index a2aa23bbb916..3752ebdf24ae 100644 --- a/llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll +++ b/llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll @@ -15,23 +15,22 @@ define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* noca ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: paddd (%ecx), %xmm0 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movdqa %xmm0, (%ecx) -; CHECK-NEXT: movl (%ecx), %esi -; CHECK-NEXT: movl 4(%ecx), %edi -; CHECK-NEXT: shll $4, %edx -; CHECK-NEXT: movl 8(%ecx), %ebx -; CHECK-NEXT: movl 12(%ecx), %ecx -; CHECK-NEXT: movl %esi, 12(%eax,%edx) -; CHECK-NEXT: movl %edi, (%eax,%edx) -; CHECK-NEXT: movl %ebx, 8(%eax,%edx) -; CHECK-NEXT: movl %ecx, 4(%eax,%edx) +; CHECK-NEXT: paddd (%edx), %xmm0 +; CHECK-NEXT: movdqa %xmm0, (%edx) +; CHECK-NEXT: movl (%edx), %esi +; CHECK-NEXT: movl 4(%edx), %edi +; CHECK-NEXT: shll $4, %ecx +; CHECK-NEXT: movl 8(%edx), %ebx +; CHECK-NEXT: movl 12(%edx), %edx +; CHECK-NEXT: movl %esi, 12(%eax,%ecx) +; CHECK-NEXT: movl %edi, (%eax,%ecx) +; CHECK-NEXT: movl %ebx, 8(%eax,%ecx) +; CHECK-NEXT: movl %edx, 4(%eax,%ecx) ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi ; CHECK-NEXT: popl %ebx ; CHECK-NEXT: retl -; CHECK-NEXT: ## -- End function entry: %0 = bitcast i32* %y to <4 x i32>* %1 = load <4 x i32>, <4 x i32>* %0, align 16 diff --git a/llvm/test/CodeGen/X86/fp128-i128.ll b/llvm/test/CodeGen/X86/fp128-i128.ll index f61173755ce5..5c2853581954 100644 --- a/llvm/test/CodeGen/X86/fp128-i128.ll +++ b/llvm/test/CodeGen/X86/fp128-i128.ll @@ -50,8 +50,8 @@ define void @TestUnionLD1(fp128 %s, i64 %n) #0 { ; CHECK-NEXT: andq %rdi, %rcx ; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000 ; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx -; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 ; CHECK-NEXT: jmp foo # TAILCALL @@ -105,11 +105,11 @@ define fp128 @TestI128_1(fp128 %x) #0 { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq $40, %rsp ; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-NEXT: andq {{[0-9]+}}(%rsp), %rcx -; CHECK-NEXT: movq %rcx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rax, (%rsp) +; CHECK-NEXT: movabsq $9223372036854775807, %rax # imm = 0x7FFFFFFFFFFFFFFF +; CHECK-NEXT: andq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rcx +; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %rcx, (%rsp) ; CHECK-NEXT: movaps (%rsp), %xmm0 ; CHECK-NEXT: movaps {{.*}}(%rip), %xmm1 ; CHECK-NEXT: callq __lttf2 @@ -336,11 +336,11 @@ define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result, ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 -; CHECK-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill ; CHECK-NEXT: callq __gttf2 ; CHECK-NEXT: movl %eax, %ebp -; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload +; CHECK-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: movaps %xmm0, %xmm1 ; CHECK-NEXT: callq __subtf3 ; CHECK-NEXT: testl %ebp, %ebp @@ -355,8 +355,8 @@ define void @TestCopySign({ fp128, fp128 }* noalias nocapture sret %agg.result, ; CHECK-NEXT: movaps (%rsp), %xmm2 # 16-byte Reload ; CHECK-NEXT: .LBB10_3: # %cleanup ; CHECK-NEXT: movaps {{.*}}(%rip), %xmm1 -; CHECK-NEXT: andps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload ; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: andps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload ; CHECK-NEXT: orps %xmm1, %xmm0 ; CHECK-NEXT: movaps %xmm2, (%rbx) ; CHECK-NEXT: movaps %xmm0, 16(%rbx) diff --git a/llvm/test/CodeGen/X86/memcpy-2.ll b/llvm/test/CodeGen/X86/memcpy-2.ll index 6deeaa698536..546a78f49466 100644 --- a/llvm/test/CodeGen/X86/memcpy-2.ll +++ b/llvm/test/CodeGen/X86/memcpy-2.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mattr=+sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2-Darwin ; RUN: llc < %s -mattr=+sse2 -mtriple=i686-pc-mingw32 -mcpu=core2 | FileCheck %s -check-prefix=SSE2-Mingw32 ; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE1 @@ -10,42 +11,62 @@ @.str2 = internal constant [30 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00", align 4 define void @t1(i32 %argc, i8** %argv) nounwind { -entry: ; SSE2-Darwin-LABEL: t1: -; SSE2-Darwin: movsd _.str+16, %xmm0 -; SSE2-Darwin: movsd %xmm0, 16(%esp) -; SSE2-Darwin: movaps _.str, %xmm0 -; SSE2-Darwin: movaps %xmm0 -; SSE2-Darwin: movb $0, 24(%esp) - +; SSE2-Darwin: ## %bb.0: ## %entry +; SSE2-Darwin-NEXT: subl $28, %esp +; SSE2-Darwin-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-Darwin-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; SSE2-Darwin-NEXT: movaps _.str, %xmm0 +; SSE2-Darwin-NEXT: movaps %xmm0, (%esp) +; SSE2-Darwin-NEXT: movb $0, {{[0-9]+}}(%esp) +; ; SSE2-Mingw32-LABEL: t1: -; SSE2-Mingw32: movsd _.str+16, %xmm0 -; SSE2-Mingw32: movsd %xmm0, 16(%esp) -; SSE2-Mingw32: movaps _.str, %xmm0 -; SSE2-Mingw32: movups %xmm0 -; SSE2-Mingw32: movb $0, 24(%esp) - +; SSE2-Mingw32: # %bb.0: # %entry +; SSE2-Mingw32-NEXT: subl $28, %esp +; SSE2-Mingw32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-Mingw32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) +; SSE2-Mingw32-NEXT: movaps _.str, %xmm0 +; SSE2-Mingw32-NEXT: movups %xmm0, (%esp) +; SSE2-Mingw32-NEXT: movb $0, {{[0-9]+}}(%esp) +; ; SSE1-LABEL: t1: -; SSE1: movaps _.str, %xmm0 -; SSE1: movb $0, 24(%esp) -; SSE1: movaps %xmm0 -; SSE1: movl $0, 20(%esp) -; SSE1: movl $0, 16(%esp) - +; SSE1: ## %bb.0: ## %entry +; SSE1-NEXT: subl $28, %esp +; SSE1-NEXT: movaps _.str, %xmm0 +; SSE1-NEXT: movaps %xmm0, (%esp) +; SSE1-NEXT: movb $0, {{[0-9]+}}(%esp) +; SSE1-NEXT: movl $0, {{[0-9]+}}(%esp) +; SSE1-NEXT: movl $0, {{[0-9]+}}(%esp) +; ; NOSSE-LABEL: t1: -; NOSSE: movb $0 -; NOSSE: movl $0 -; NOSSE: movl $0 -; NOSSE: movl $0 -; NOSSE: movl $0 -; NOSSE: movl $101 -; NOSSE: movl $1734438249 - +; NOSSE: ## %bb.0: ## %entry +; NOSSE-NEXT: subl $28, %esp +; NOSSE-NEXT: movb $0, {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl $0, {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl $101, {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl $1734438249, (%esp) ## imm = 0x67616D69 +; ; X86-64-LABEL: t1: -; X86-64: movaps _.str(%rip), %xmm0 -; X86-64: movaps %xmm0 -; X86-64: movb $0 -; X86-64: movq $0 +; X86-64: ## %bb.0: ## %entry +; X86-64-NEXT: movaps {{.*}}(%rip), %xmm0 +; X86-64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +; X86-64-NEXT: movb $0, -{{[0-9]+}}(%rsp) +; X86-64-NEXT: movq $0, -{{[0-9]+}}(%rsp) +; +; NHM_64-LABEL: t1: +; NHM_64: ## %bb.0: ## %entry +; NHM_64-NEXT: movups _.str+{{.*}}(%rip), %xmm0 +; NHM_64-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; NHM_64-NEXT: movaps {{.*}}(%rip), %xmm0 +; NHM_64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +entry: + + + + %tmp1 = alloca [25 x i8] %tmp2 = bitcast [25 x i8]* %tmp1 to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 %tmp2, i8* align 1 getelementptr inbounds ([25 x i8], [25 x i8]* @.str, i32 0, i32 0), i32 25, i1 false) @@ -56,34 +77,60 @@ entry: %struct.s0 = type { [2 x double] } define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp { -entry: ; SSE2-Darwin-LABEL: t2: -; SSE2-Darwin: movaps (%ecx), %xmm0 -; SSE2-Darwin: movaps %xmm0, (%eax) - +; SSE2-Darwin: ## %bb.0: ## %entry +; SSE2-Darwin-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE2-Darwin-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SSE2-Darwin-NEXT: movaps (%ecx), %xmm0 +; SSE2-Darwin-NEXT: movaps %xmm0, (%eax) +; SSE2-Darwin-NEXT: retl +; ; SSE2-Mingw32-LABEL: t2: -; SSE2-Mingw32: movaps (%ecx), %xmm0 -; SSE2-Mingw32: movaps %xmm0, (%eax) - +; SSE2-Mingw32: # %bb.0: # %entry +; SSE2-Mingw32-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE2-Mingw32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SSE2-Mingw32-NEXT: movaps (%ecx), %xmm0 +; SSE2-Mingw32-NEXT: movaps %xmm0, (%eax) +; SSE2-Mingw32-NEXT: retl +; ; SSE1-LABEL: t2: -; SSE1: movaps (%ecx), %xmm0 -; SSE1: movaps %xmm0, (%eax) - +; SSE1: ## %bb.0: ## %entry +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SSE1-NEXT: movaps (%ecx), %xmm0 +; SSE1-NEXT: movaps %xmm0, (%eax) +; SSE1-NEXT: retl +; ; NOSSE-LABEL: t2: -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl - +; NOSSE: ## %bb.0: ## %entry +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; NOSSE-NEXT: movl 12(%ecx), %edx +; NOSSE-NEXT: movl %edx, 12(%eax) +; NOSSE-NEXT: movl 8(%ecx), %edx +; NOSSE-NEXT: movl %edx, 8(%eax) +; NOSSE-NEXT: movl (%ecx), %edx +; NOSSE-NEXT: movl 4(%ecx), %ecx +; NOSSE-NEXT: movl %ecx, 4(%eax) +; NOSSE-NEXT: movl %edx, (%eax) +; NOSSE-NEXT: retl +; ; X86-64-LABEL: t2: -; X86-64: movaps (%rsi), %xmm0 -; X86-64: movaps %xmm0, (%rdi) +; X86-64: ## %bb.0: ## %entry +; X86-64-NEXT: movaps (%rsi), %xmm0 +; X86-64-NEXT: movaps %xmm0, (%rdi) +; X86-64-NEXT: retq +; +; NHM_64-LABEL: t2: +; NHM_64: ## %bb.0: ## %entry +; NHM_64-NEXT: movaps (%rsi), %xmm0 +; NHM_64-NEXT: movaps %xmm0, (%rdi) +; NHM_64-NEXT: retq +entry: + + + + %tmp2 = bitcast %struct.s0* %a to i8* ; [#uses=1] %tmp3 = bitcast %struct.s0* %b to i8* ; [#uses=1] tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 %tmp2, i8* align 16 %tmp3, i32 16, i1 false) @@ -91,48 +138,72 @@ entry: } define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp { -entry: ; SSE2-Darwin-LABEL: t3: -; SSE2-Darwin: movsd (%ecx), %xmm0 -; SSE2-Darwin: movsd 8(%ecx), %xmm1 -; SSE2-Darwin: movsd %xmm1, 8(%eax) -; SSE2-Darwin: movsd %xmm0, (%eax) - +; SSE2-Darwin: ## %bb.0: ## %entry +; SSE2-Darwin-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE2-Darwin-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SSE2-Darwin-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-Darwin-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE2-Darwin-NEXT: movsd %xmm1, 8(%eax) +; SSE2-Darwin-NEXT: movsd %xmm0, (%eax) +; SSE2-Darwin-NEXT: retl +; ; SSE2-Mingw32-LABEL: t3: -; SSE2-Mingw32: movsd (%ecx), %xmm0 -; SSE2-Mingw32: movsd 8(%ecx), %xmm1 -; SSE2-Mingw32: movsd %xmm1, 8(%eax) -; SSE2-Mingw32: movsd %xmm0, (%eax) - +; SSE2-Mingw32: # %bb.0: # %entry +; SSE2-Mingw32-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE2-Mingw32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SSE2-Mingw32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-Mingw32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE2-Mingw32-NEXT: movsd %xmm1, 8(%eax) +; SSE2-Mingw32-NEXT: movsd %xmm0, (%eax) +; SSE2-Mingw32-NEXT: retl +; ; SSE1-LABEL: t3: -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl - +; SSE1: ## %bb.0: ## %entry +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE1-NEXT: movl {{[0-9]+}}(%esp), %ecx +; SSE1-NEXT: movl 12(%ecx), %edx +; SSE1-NEXT: movl %edx, 12(%eax) +; SSE1-NEXT: movl 8(%ecx), %edx +; SSE1-NEXT: movl %edx, 8(%eax) +; SSE1-NEXT: movl (%ecx), %edx +; SSE1-NEXT: movl 4(%ecx), %ecx +; SSE1-NEXT: movl %ecx, 4(%eax) +; SSE1-NEXT: movl %edx, (%eax) +; SSE1-NEXT: retl +; ; NOSSE-LABEL: t3: -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl -; NOSSE: movl - +; NOSSE: ## %bb.0: ## %entry +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; NOSSE-NEXT: movl {{[0-9]+}}(%esp), %ecx +; NOSSE-NEXT: movl 12(%ecx), %edx +; NOSSE-NEXT: movl %edx, 12(%eax) +; NOSSE-NEXT: movl 8(%ecx), %edx +; NOSSE-NEXT: movl %edx, 8(%eax) +; NOSSE-NEXT: movl (%ecx), %edx +; NOSSE-NEXT: movl 4(%ecx), %ecx +; NOSSE-NEXT: movl %ecx, 4(%eax) +; NOSSE-NEXT: movl %edx, (%eax) +; NOSSE-NEXT: retl +; ; X86-64-LABEL: t3: -; X86-64: movq (%rsi), %rax -; X86-64: movq 8(%rsi), %rcx -; X86-64: movq %rcx, 8(%rdi) -; X86-64: movq %rax, (%rdi) +; X86-64: ## %bb.0: ## %entry +; X86-64-NEXT: movq (%rsi), %rax +; X86-64-NEXT: movq 8(%rsi), %rcx +; X86-64-NEXT: movq %rcx, 8(%rdi) +; X86-64-NEXT: movq %rax, (%rdi) +; X86-64-NEXT: retq +; +; NHM_64-LABEL: t3: +; NHM_64: ## %bb.0: ## %entry +; NHM_64-NEXT: movups (%rsi), %xmm0 +; NHM_64-NEXT: movups %xmm0, (%rdi) +; NHM_64-NEXT: retq +entry: + + + + %tmp2 = bitcast %struct.s0* %a to i8* ; [#uses=1] %tmp3 = bitcast %struct.s0* %b to i8* ; [#uses=1] tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 %tmp2, i8* align 8 %tmp3, i32 16, i1 false) @@ -140,65 +211,80 @@ entry: } define void @t4() nounwind { -entry: ; SSE2-Darwin-LABEL: t4: -; SSE2-Darwin: movw $120 -; SSE2-Darwin: movl $2021161080 -; SSE2-Darwin: movl $2021161080 -; SSE2-Darwin: movl $2021161080 -; SSE2-Darwin: movl $2021161080 -; SSE2-Darwin: movl $2021161080 -; SSE2-Darwin: movl $2021161080 -; SSE2-Darwin: movl $2021161080 - +; SSE2-Darwin: ## %bb.0: ## %entry +; SSE2-Darwin-NEXT: subl $32, %esp +; SSE2-Darwin-NEXT: movw $120, {{[0-9]+}}(%esp) +; SSE2-Darwin-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; SSE2-Darwin-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; SSE2-Darwin-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; SSE2-Darwin-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; SSE2-Darwin-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; SSE2-Darwin-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; SSE2-Darwin-NEXT: movl $2021161080, (%esp) ## imm = 0x78787878 +; ; SSE2-Mingw32-LABEL: t4: -; SSE2-Mingw32: movw $120 -; SSE2-Mingw32: movl $2021161080 -; SSE2-Mingw32: movl $2021161080 -; SSE2-Mingw32: movl $2021161080 -; SSE2-Mingw32: movl $2021161080 -; SSE2-Mingw32: movl $2021161080 -; SSE2-Mingw32: movl $2021161080 -; SSE2-Mingw32: movl $2021161080 - +; SSE2-Mingw32: # %bb.0: # %entry +; SSE2-Mingw32-NEXT: subl $32, %esp +; SSE2-Mingw32-NEXT: movw $120, {{[0-9]+}}(%esp) +; SSE2-Mingw32-NEXT: movl $2021161080, {{[0-9]+}}(%esp) # imm = 0x78787878 +; SSE2-Mingw32-NEXT: movl $2021161080, {{[0-9]+}}(%esp) # imm = 0x78787878 +; SSE2-Mingw32-NEXT: movl $2021161080, {{[0-9]+}}(%esp) # imm = 0x78787878 +; SSE2-Mingw32-NEXT: movl $2021161080, {{[0-9]+}}(%esp) # imm = 0x78787878 +; SSE2-Mingw32-NEXT: movl $2021161080, {{[0-9]+}}(%esp) # imm = 0x78787878 +; SSE2-Mingw32-NEXT: movl $2021161080, {{[0-9]+}}(%esp) # imm = 0x78787878 +; SSE2-Mingw32-NEXT: movl $2021161080, (%esp) # imm = 0x78787878 +; ; SSE1-LABEL: t4: -; SSE1: movw $120 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 - +; SSE1: ## %bb.0: ## %entry +; SSE1-NEXT: subl $32, %esp +; SSE1-NEXT: movw $120, {{[0-9]+}}(%esp) +; SSE1-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; SSE1-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; SSE1-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; SSE1-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; SSE1-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; SSE1-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; SSE1-NEXT: movl $2021161080, (%esp) ## imm = 0x78787878 +; ; NOSSE-LABEL: t4: -; NOSSE: movw $120 -; NOSSE: movl $2021161080 -; NOSSE: movl $2021161080 -; NOSSE: movl $2021161080 -; NOSSE: movl $2021161080 -; NOSSE: movl $2021161080 -; NOSSE: movl $2021161080 -; NOSSE: movl $2021161080 +; NOSSE: ## %bb.0: ## %entry +; NOSSE-NEXT: subl $32, %esp +; NOSSE-NEXT: movw $120, {{[0-9]+}}(%esp) +; NOSSE-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; NOSSE-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; NOSSE-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; NOSSE-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; NOSSE-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; NOSSE-NEXT: movl $2021161080, {{[0-9]+}}(%esp) ## imm = 0x78787878 +; NOSSE-NEXT: movl $2021161080, (%esp) ## imm = 0x78787878 +; +; X86-64-LABEL: t4: +; X86-64: ## %bb.0: ## %entry +; X86-64-NEXT: movabsq $33909456017848440, %rax ## imm = 0x78787878787878 +; X86-64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X86-64-NEXT: movabsq $8680820740569200760, %rax ## imm = 0x7878787878787878 +; X86-64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X86-64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; X86-64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) +; +; NHM_64-LABEL: t4: +; NHM_64: ## %bb.0: ## %entry +; NHM_64-NEXT: movups _.str2+{{.*}}(%rip), %xmm0 +; NHM_64-NEXT: movups %xmm0, -{{[0-9]+}}(%rsp) +; NHM_64-NEXT: movups {{.*}}(%rip), %xmm0 +; NHM_64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) +entry: + + + ;;; TODO: (1) Some of the loads and stores are certainly unaligned and (2) the first load and first ;;; store overlap with the second load and second store respectively. ;;; ;;; Is either of the sequences ideal? -; X86-64-LABEL: t4: -; X86-64: movabsq $33909456017848440, %rax ## imm = 0x78787878787878 -; X86-64: movq %rax, -10(%rsp) -; X86-64: movabsq $8680820740569200760, %rax ## imm = 0x7878787878787878 -; X86-64: movq %rax, -16(%rsp) -; X86-64: movq %rax, -24(%rsp) -; X86-64: movq %rax, -32(%rsp) -; NHM_64-LABEL: t4: -; NHM_64: movups _.str2+14(%rip), %xmm0 -; NHM_64: movups %xmm0, -26(%rsp) -; NHM_64: movups _.str2(%rip), %xmm0 -; NHM_64: movaps %xmm0, -40(%rsp) %tmp1 = alloca [30 x i8] %tmp2 = bitcast [30 x i8]* %tmp1 to i8* diff --git a/llvm/test/CodeGen/X86/misched-matrix.ll b/llvm/test/CodeGen/X86/misched-matrix.ll index 495ca711e989..cd0105501669 100644 --- a/llvm/test/CodeGen/X86/misched-matrix.ll +++ b/llvm/test/CodeGen/X86/misched-matrix.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-- -mcpu=core2 -pre-RA-sched=source -enable-misched \ ; RUN: -misched-topdown -verify-machineinstrs \ ; RUN: | FileCheck %s -check-prefix=TOPDOWN @@ -15,78 +16,232 @@ ; been reordered with the stores. This tests the scheduler's cheap ; alias analysis ability (that doesn't require any AliasAnalysis pass). ; -; TOPDOWN-LABEL: %for.body -; TOPDOWN: movl %{{.*}}, ( -; TOPDOWN-NOT: imull {{[0-9]*}}( -; TOPDOWN: movl %{{.*}}, 4( -; TOPDOWN-NOT: imull {{[0-9]*}}( -; TOPDOWN: movl %{{.*}}, 8( -; TOPDOWN: movl %{{.*}}, 12( -; TOPDOWN-LABEL: %for.end -; ; For -misched=ilpmin, verify that each expression subtree is ; scheduled independently, and that the imull/adds are interleaved. ; -; ILPMIN-LABEL: %for.body -; ILPMIN: movl %{{.*}}, ( -; ILPMIN: imull -; ILPMIN: imull -; ILPMIN: addl -; ILPMIN: imull -; ILPMIN: addl -; ILPMIN: imull -; ILPMIN: addl -; ILPMIN: movl %{{.*}}, 4( -; ILPMIN: imull -; ILPMIN: imull -; ILPMIN: addl -; ILPMIN: imull -; ILPMIN: addl -; ILPMIN: imull -; ILPMIN: addl -; ILPMIN: movl %{{.*}}, 8( -; ILPMIN: imull -; ILPMIN: imull -; ILPMIN: addl -; ILPMIN: imull -; ILPMIN: addl -; ILPMIN: imull -; ILPMIN: addl -; ILPMIN: movl %{{.*}}, 12( -; ILPMIN-LABEL: %for.end -; ; For -misched=ilpmax, verify that each expression subtree is ; scheduled independently, and that the imull/adds are clustered. ; -; ILPMAX-LABEL: %for.body -; ILPMAX: movl %{{.*}}, ( -; ILPMAX: imull -; ILPMAX: imull -; ILPMAX: imull -; ILPMAX: imull -; ILPMAX: addl -; ILPMAX: addl -; ILPMAX: addl -; ILPMAX: movl %{{.*}}, 4( -; ILPMAX: imull -; ILPMAX: imull -; ILPMAX: imull -; ILPMAX: imull -; ILPMAX: addl -; ILPMAX: addl -; ILPMAX: addl -; ILPMAX: movl %{{.*}}, 8( -; ILPMAX: imull -; ILPMAX: imull -; ILPMAX: imull -; ILPMAX: imull -; ILPMAX: addl -; ILPMAX: addl -; ILPMAX: addl -; ILPMAX: movl %{{.*}}, 12( -; ILPMAX-LABEL: %for.end - define void @mmult([4 x i32]* noalias nocapture %m1, [4 x i32]* noalias nocapture %m2, +; TOPDOWN-LABEL: mmult: +; TOPDOWN: # %bb.0: # %entry +; TOPDOWN-NEXT: pushq %rbp +; TOPDOWN-NEXT: .cfi_def_cfa_offset 16 +; TOPDOWN-NEXT: pushq %r14 +; TOPDOWN-NEXT: .cfi_def_cfa_offset 24 +; TOPDOWN-NEXT: pushq %rbx +; TOPDOWN-NEXT: .cfi_def_cfa_offset 32 +; TOPDOWN-NEXT: .cfi_offset %rbx, -32 +; TOPDOWN-NEXT: .cfi_offset %r14, -24 +; TOPDOWN-NEXT: .cfi_offset %rbp, -16 +; TOPDOWN-NEXT: xorl %eax, %eax +; TOPDOWN-NEXT: .p2align 4, 0x90 +; TOPDOWN-NEXT: .LBB0_1: # %for.body +; TOPDOWN-NEXT: # =>This Inner Loop Header: Depth=1 +; TOPDOWN-NEXT: movl (%rdi,%rax), %r10d +; TOPDOWN-NEXT: movl 4(%rdi,%rax), %r14d +; TOPDOWN-NEXT: movl 8(%rdi,%rax), %r9d +; TOPDOWN-NEXT: movl 12(%rdi,%rax), %r8d +; TOPDOWN-NEXT: movl (%rsi), %ecx +; TOPDOWN-NEXT: imull %r10d, %ecx +; TOPDOWN-NEXT: movl 16(%rsi), %ebx +; TOPDOWN-NEXT: imull %r14d, %ebx +; TOPDOWN-NEXT: addl %ecx, %ebx +; TOPDOWN-NEXT: movl 32(%rsi), %ecx +; TOPDOWN-NEXT: imull %r9d, %ecx +; TOPDOWN-NEXT: addl %ebx, %ecx +; TOPDOWN-NEXT: movl 48(%rsi), %r11d +; TOPDOWN-NEXT: imull %r8d, %r11d +; TOPDOWN-NEXT: addl %ecx, %r11d +; TOPDOWN-NEXT: movl 4(%rsi), %ecx +; TOPDOWN-NEXT: imull %r10d, %ecx +; TOPDOWN-NEXT: movl 20(%rsi), %ebx +; TOPDOWN-NEXT: imull %r14d, %ebx +; TOPDOWN-NEXT: addl %ecx, %ebx +; TOPDOWN-NEXT: movl 36(%rsi), %ecx +; TOPDOWN-NEXT: imull %r9d, %ecx +; TOPDOWN-NEXT: addl %ebx, %ecx +; TOPDOWN-NEXT: movl 52(%rsi), %ebx +; TOPDOWN-NEXT: imull %r8d, %ebx +; TOPDOWN-NEXT: addl %ecx, %ebx +; TOPDOWN-NEXT: movl 8(%rsi), %ecx +; TOPDOWN-NEXT: imull %r10d, %ecx +; TOPDOWN-NEXT: movl 24(%rsi), %ebp +; TOPDOWN-NEXT: imull %r14d, %ebp +; TOPDOWN-NEXT: addl %ecx, %ebp +; TOPDOWN-NEXT: movl 40(%rsi), %ecx +; TOPDOWN-NEXT: imull %r9d, %ecx +; TOPDOWN-NEXT: addl %ebp, %ecx +; TOPDOWN-NEXT: movl 56(%rsi), %ebp +; TOPDOWN-NEXT: imull %r8d, %ebp +; TOPDOWN-NEXT: addl %ecx, %ebp +; TOPDOWN-NEXT: imull 12(%rsi), %r10d +; TOPDOWN-NEXT: movl %r11d, (%rdx,%rax) +; TOPDOWN-NEXT: imull 28(%rsi), %r14d +; TOPDOWN-NEXT: addl %r10d, %r14d +; TOPDOWN-NEXT: movl %ebx, 4(%rdx,%rax) +; TOPDOWN-NEXT: imull 44(%rsi), %r9d +; TOPDOWN-NEXT: addl %r14d, %r9d +; TOPDOWN-NEXT: movl %ebp, 8(%rdx,%rax) +; TOPDOWN-NEXT: imull 60(%rsi), %r8d +; TOPDOWN-NEXT: addl %r9d, %r8d +; TOPDOWN-NEXT: movl %r8d, 12(%rdx,%rax) +; TOPDOWN-NEXT: addq $16, %rax +; TOPDOWN-NEXT: cmpl $64, %eax +; TOPDOWN-NEXT: jne .LBB0_1 +; TOPDOWN-NEXT: # %bb.2: # %for.end +; TOPDOWN-NEXT: popq %rbx +; TOPDOWN-NEXT: popq %r14 +; TOPDOWN-NEXT: popq %rbp +; TOPDOWN-NEXT: retq +; +; ILPMIN-LABEL: mmult: +; ILPMIN: # %bb.0: # %entry +; ILPMIN-NEXT: pushq %rbp +; ILPMIN-NEXT: .cfi_def_cfa_offset 16 +; ILPMIN-NEXT: pushq %r14 +; ILPMIN-NEXT: .cfi_def_cfa_offset 24 +; ILPMIN-NEXT: pushq %rbx +; ILPMIN-NEXT: .cfi_def_cfa_offset 32 +; ILPMIN-NEXT: .cfi_offset %rbx, -32 +; ILPMIN-NEXT: .cfi_offset %r14, -24 +; ILPMIN-NEXT: .cfi_offset %rbp, -16 +; ILPMIN-NEXT: xorl %r14d, %r14d +; ILPMIN-NEXT: .p2align 4, 0x90 +; ILPMIN-NEXT: .LBB0_1: # %for.body +; ILPMIN-NEXT: # =>This Inner Loop Header: Depth=1 +; ILPMIN-NEXT: movl (%rdi,%r14), %r8d +; ILPMIN-NEXT: movl 8(%rdi,%r14), %r9d +; ILPMIN-NEXT: movl 4(%rdi,%r14), %r11d +; ILPMIN-NEXT: movl 12(%rdi,%r14), %r10d +; ILPMIN-NEXT: movl (%rsi), %ecx +; ILPMIN-NEXT: movl 16(%rsi), %ebx +; ILPMIN-NEXT: movl 32(%rsi), %ebp +; ILPMIN-NEXT: imull %r8d, %ecx +; ILPMIN-NEXT: imull %r11d, %ebx +; ILPMIN-NEXT: addl %ecx, %ebx +; ILPMIN-NEXT: imull %r9d, %ebp +; ILPMIN-NEXT: addl %ebx, %ebp +; ILPMIN-NEXT: movl 48(%rsi), %ecx +; ILPMIN-NEXT: imull %r10d, %ecx +; ILPMIN-NEXT: addl %ebp, %ecx +; ILPMIN-NEXT: movl %ecx, (%rdx,%r14) +; ILPMIN-NEXT: movl 52(%rsi), %ecx +; ILPMIN-NEXT: movl 4(%rsi), %ebx +; ILPMIN-NEXT: movl 20(%rsi), %ebp +; ILPMIN-NEXT: movl 36(%rsi), %eax +; ILPMIN-NEXT: imull %r8d, %ebx +; ILPMIN-NEXT: imull %r11d, %ebp +; ILPMIN-NEXT: addl %ebx, %ebp +; ILPMIN-NEXT: imull %r9d, %eax +; ILPMIN-NEXT: addl %ebp, %eax +; ILPMIN-NEXT: imull %r10d, %ecx +; ILPMIN-NEXT: addl %eax, %ecx +; ILPMIN-NEXT: movl %ecx, 4(%rdx,%r14) +; ILPMIN-NEXT: movl 56(%rsi), %eax +; ILPMIN-NEXT: movl 8(%rsi), %ecx +; ILPMIN-NEXT: movl 24(%rsi), %ebx +; ILPMIN-NEXT: movl 40(%rsi), %ebp +; ILPMIN-NEXT: imull %r8d, %ecx +; ILPMIN-NEXT: imull %r11d, %ebx +; ILPMIN-NEXT: addl %ecx, %ebx +; ILPMIN-NEXT: imull %r9d, %ebp +; ILPMIN-NEXT: addl %ebx, %ebp +; ILPMIN-NEXT: imull %r10d, %eax +; ILPMIN-NEXT: addl %ebp, %eax +; ILPMIN-NEXT: movl %eax, 8(%rdx,%r14) +; ILPMIN-NEXT: imull 12(%rsi), %r8d +; ILPMIN-NEXT: imull 28(%rsi), %r11d +; ILPMIN-NEXT: addl %r8d, %r11d +; ILPMIN-NEXT: imull 44(%rsi), %r9d +; ILPMIN-NEXT: addl %r11d, %r9d +; ILPMIN-NEXT: imull 60(%rsi), %r10d +; ILPMIN-NEXT: addl %r9d, %r10d +; ILPMIN-NEXT: movl %r10d, 12(%rdx,%r14) +; ILPMIN-NEXT: addq $16, %r14 +; ILPMIN-NEXT: cmpl $64, %r14d +; ILPMIN-NEXT: jne .LBB0_1 +; ILPMIN-NEXT: # %bb.2: # %for.end +; ILPMIN-NEXT: popq %rbx +; ILPMIN-NEXT: popq %r14 +; ILPMIN-NEXT: popq %rbp +; ILPMIN-NEXT: retq +; +; ILPMAX-LABEL: mmult: +; ILPMAX: # %bb.0: # %entry +; ILPMAX-NEXT: pushq %rbp +; ILPMAX-NEXT: .cfi_def_cfa_offset 16 +; ILPMAX-NEXT: pushq %r15 +; ILPMAX-NEXT: .cfi_def_cfa_offset 24 +; ILPMAX-NEXT: pushq %r14 +; ILPMAX-NEXT: .cfi_def_cfa_offset 32 +; ILPMAX-NEXT: pushq %rbx +; ILPMAX-NEXT: .cfi_def_cfa_offset 40 +; ILPMAX-NEXT: .cfi_offset %rbx, -40 +; ILPMAX-NEXT: .cfi_offset %r14, -32 +; ILPMAX-NEXT: .cfi_offset %r15, -24 +; ILPMAX-NEXT: .cfi_offset %rbp, -16 +; ILPMAX-NEXT: xorl %r15d, %r15d +; ILPMAX-NEXT: .p2align 4, 0x90 +; ILPMAX-NEXT: .LBB0_1: # %for.body +; ILPMAX-NEXT: # =>This Inner Loop Header: Depth=1 +; ILPMAX-NEXT: movl (%rdi,%r15), %r8d +; ILPMAX-NEXT: movl 8(%rdi,%r15), %r9d +; ILPMAX-NEXT: movl 4(%rdi,%r15), %r14d +; ILPMAX-NEXT: movl 12(%rdi,%r15), %r10d +; ILPMAX-NEXT: movl 16(%rsi), %ebx +; ILPMAX-NEXT: imull %r14d, %ebx +; ILPMAX-NEXT: movl (%rsi), %ebp +; ILPMAX-NEXT: imull %r8d, %ebp +; ILPMAX-NEXT: movl 32(%rsi), %ecx +; ILPMAX-NEXT: imull %r9d, %ecx +; ILPMAX-NEXT: movl 48(%rsi), %r11d +; ILPMAX-NEXT: imull %r10d, %r11d +; ILPMAX-NEXT: addl %ebp, %ebx +; ILPMAX-NEXT: addl %ebx, %ecx +; ILPMAX-NEXT: addl %ecx, %r11d +; ILPMAX-NEXT: movl %r11d, (%rdx,%r15) +; ILPMAX-NEXT: movl 52(%rsi), %ecx +; ILPMAX-NEXT: imull %r10d, %ecx +; ILPMAX-NEXT: movl 4(%rsi), %ebp +; ILPMAX-NEXT: imull %r8d, %ebp +; ILPMAX-NEXT: movl 20(%rsi), %ebx +; ILPMAX-NEXT: imull %r14d, %ebx +; ILPMAX-NEXT: movl 36(%rsi), %eax +; ILPMAX-NEXT: imull %r9d, %eax +; ILPMAX-NEXT: addl %ebp, %ebx +; ILPMAX-NEXT: addl %ebx, %eax +; ILPMAX-NEXT: addl %eax, %ecx +; ILPMAX-NEXT: movl %ecx, 4(%rdx,%r15) +; ILPMAX-NEXT: movl 56(%rsi), %eax +; ILPMAX-NEXT: imull %r10d, %eax +; ILPMAX-NEXT: movl 8(%rsi), %ecx +; ILPMAX-NEXT: imull %r8d, %ecx +; ILPMAX-NEXT: movl 24(%rsi), %ebp +; ILPMAX-NEXT: imull %r14d, %ebp +; ILPMAX-NEXT: movl 40(%rsi), %ebx +; ILPMAX-NEXT: imull %r9d, %ebx +; ILPMAX-NEXT: addl %ecx, %ebp +; ILPMAX-NEXT: addl %ebp, %ebx +; ILPMAX-NEXT: addl %ebx, %eax +; ILPMAX-NEXT: movl %eax, 8(%rdx,%r15) +; ILPMAX-NEXT: imull 60(%rsi), %r10d +; ILPMAX-NEXT: imull 12(%rsi), %r8d +; ILPMAX-NEXT: imull 28(%rsi), %r14d +; ILPMAX-NEXT: imull 44(%rsi), %r9d +; ILPMAX-NEXT: addl %r8d, %r14d +; ILPMAX-NEXT: addl %r14d, %r9d +; ILPMAX-NEXT: addl %r9d, %r10d +; ILPMAX-NEXT: movl %r10d, 12(%rdx,%r15) +; ILPMAX-NEXT: addq $16, %r15 +; ILPMAX-NEXT: cmpl $64, %r15d +; ILPMAX-NEXT: jne .LBB0_1 +; ILPMAX-NEXT: # %bb.2: # %for.end +; ILPMAX-NEXT: popq %rbx +; ILPMAX-NEXT: popq %r14 +; ILPMAX-NEXT: popq %r15 +; ILPMAX-NEXT: popq %rbp +; ILPMAX-NEXT: retq [4 x i32]* noalias nocapture %m3) nounwind uwtable ssp { entry: br label %for.body diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll index a68db2d26df2..2309468dd9d9 100644 --- a/llvm/test/CodeGen/X86/mmx-schedule.ll +++ b/llvm/test/CodeGen/X86/mmx-schedule.ll @@ -642,7 +642,7 @@ define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) { ; GENERIC-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: movd %mm2, %ecx # sched: [1:0.33] ; GENERIC-NEXT: movd %mm0, %eax # sched: [1:0.33] -; GENERIC-NEXT: movl %ecx, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movd: @@ -675,7 +675,7 @@ define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) { ; SANDY-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] ; SANDY-NEXT: movd %mm2, %ecx # sched: [1:0.33] ; SANDY-NEXT: movd %mm0, %eax # sched: [1:0.33] -; SANDY-NEXT: movl %ecx, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movd: diff --git a/llvm/test/CodeGen/X86/pr31045.ll b/llvm/test/CodeGen/X86/pr31045.ll index f62836310bb0..c87b4b39a4fa 100644 --- a/llvm/test/CodeGen/X86/pr31045.ll +++ b/llvm/test/CodeGen/X86/pr31045.ll @@ -21,26 +21,25 @@ define void @_Z1av() local_unnamed_addr #0 { ; CHECK-NEXT: movl struct_obj_3+{{.*}}(%rip), %eax ; CHECK-NEXT: movsbl {{.*}}(%rip), %ecx ; CHECK-NEXT: movzbl {{.*}}(%rip), %edx -; CHECK-NEXT: movzbl {{.*}}(%rip), %esi ; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: leal (%rax,%rax), %edi -; CHECK-NEXT: subl %ecx, %edi -; CHECK-NEXT: subl %edx, %edi -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: notl %ecx -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: movw %cx, struct_obj_12+{{.*}}(%rip) +; CHECK-NEXT: leal (%rax,%rax), %esi +; CHECK-NEXT: subl %ecx, %esi +; CHECK-NEXT: subl %edx, %esi ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: cmovel %eax, %ecx +; CHECK-NEXT: movzbl {{.*}}(%rip), %edx ; CHECK-NEXT: andl struct_obj_8+{{.*}}(%rip), %ecx ; CHECK-NEXT: andl $1, %ecx ; CHECK-NEXT: negl %ecx -; CHECK-NEXT: andl %esi, %ecx +; CHECK-NEXT: andl %edx, %ecx ; CHECK-NEXT: negl %ecx ; CHECK-NEXT: andl %eax, %ecx ; CHECK-NEXT: negl %ecx -; CHECK-NEXT: testl %ecx, %edi +; CHECK-NEXT: testl %ecx, %esi +; CHECK-NEXT: notl %esi +; CHECK-NEXT: movzbl %sil, %eax +; CHECK-NEXT: movw %ax, struct_obj_12+{{.*}}(%rip) ; CHECK-NEXT: setne {{.*}}(%rip) ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/pr34080.ll b/llvm/test/CodeGen/X86/pr34080.ll index e0b09745ad9e..4eb156bf173f 100644 --- a/llvm/test/CodeGen/X86/pr34080.ll +++ b/llvm/test/CodeGen/X86/pr34080.ll @@ -54,10 +54,10 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 { ; SSE2-SCHEDULE-NEXT: movq %rsp, %rbp ; SSE2-SCHEDULE-NEXT: .cfi_def_cfa_register %rbp ; SSE2-SCHEDULE-NEXT: fnstcw -4(%rbp) -; SSE2-SCHEDULE-NEXT: fldt 16(%rbp) ; SSE2-SCHEDULE-NEXT: movzwl -4(%rbp), %eax ; SSE2-SCHEDULE-NEXT: movw $3199, -4(%rbp) ## imm = 0xC7F ; SSE2-SCHEDULE-NEXT: fldcw -4(%rbp) +; SSE2-SCHEDULE-NEXT: fldt 16(%rbp) ; SSE2-SCHEDULE-NEXT: movw %ax, -4(%rbp) ; SSE2-SCHEDULE-NEXT: fistl -8(%rbp) ; SSE2-SCHEDULE-NEXT: fldcw -4(%rbp) @@ -65,12 +65,12 @@ define void @_Z1fe(x86_fp80 %z) local_unnamed_addr #0 { ; SSE2-SCHEDULE-NEXT: movsd %xmm0, -64(%rbp) ; SSE2-SCHEDULE-NEXT: movsd %xmm0, -32(%rbp) ; SSE2-SCHEDULE-NEXT: fsubl -32(%rbp) -; SSE2-SCHEDULE-NEXT: flds {{.*}}(%rip) ; SSE2-SCHEDULE-NEXT: fnstcw -2(%rbp) -; SSE2-SCHEDULE-NEXT: fmul %st(0), %st(1) +; SSE2-SCHEDULE-NEXT: flds {{.*}}(%rip) ; SSE2-SCHEDULE-NEXT: movzwl -2(%rbp), %eax ; SSE2-SCHEDULE-NEXT: movw $3199, -2(%rbp) ## imm = 0xC7F ; SSE2-SCHEDULE-NEXT: fldcw -2(%rbp) +; SSE2-SCHEDULE-NEXT: fmul %st(0), %st(1) ; SSE2-SCHEDULE-NEXT: movw %ax, -2(%rbp) ; SSE2-SCHEDULE-NEXT: fxch %st(1) ; SSE2-SCHEDULE-NEXT: fistl -12(%rbp) diff --git a/llvm/test/CodeGen/X86/schedule-x86-64-shld.ll b/llvm/test/CodeGen/X86/schedule-x86-64-shld.ll index b8bf0fae6cff..3bc4362f5770 100644 --- a/llvm/test/CodeGen/X86/schedule-x86-64-shld.ll +++ b/llvm/test/CodeGen/X86/schedule-x86-64-shld.ll @@ -403,7 +403,7 @@ define void @lshift_mem_b(i64 %b) nounwind readnone { ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50] ; GENERIC-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67] -; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [5:1.00] +; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift_mem_b: @@ -437,7 +437,7 @@ define void @lshift_mem_b_optsize(i64 %b) nounwind readnone optsize { ; GENERIC: # %bb.0: # %entry ; GENERIC-NEXT: movq {{.*}}(%rip), %rax # sched: [5:0.50] ; GENERIC-NEXT: shrdq $54, %rdi, %rax # sched: [2:0.67] -; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [5:1.00] +; GENERIC-NEXT: movq %rax, {{.*}}(%rip) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: lshift_mem_b_optsize: diff --git a/llvm/test/CodeGen/X86/schedule-x86_64.ll b/llvm/test/CodeGen/X86/schedule-x86_64.ll index 9e3ce649e7f6..3792ab80e965 100644 --- a/llvm/test/CodeGen/X86/schedule-x86_64.ll +++ b/llvm/test/CodeGen/X86/schedule-x86_64.ll @@ -7669,8 +7669,8 @@ define void @test_movnti(i32 %a0, i32 *%a1, i64 %a2, i64 *%a3) optsize { ; GENERIC-LABEL: test_movnti: ; GENERIC: # %bb.0: ; GENERIC-NEXT: #APP -; GENERIC-NEXT: movntil %edi, (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: movntiq %rdx, (%rcx) # sched: [5:1.00] +; GENERIC-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] +; GENERIC-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -7693,8 +7693,8 @@ define void @test_movnti(i32 %a0, i32 *%a1, i64 %a2, i64 *%a3) optsize { ; SANDY-LABEL: test_movnti: ; SANDY: # %bb.0: ; SANDY-NEXT: #APP -; SANDY-NEXT: movntil %edi, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: movntiq %rdx, (%rcx) # sched: [5:1.00] +; SANDY-NEXT: movntil %edi, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: movntiq %rdx, (%rcx) # sched: [1:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -13540,22 +13540,22 @@ define void @test_setcc(i8 %a0, i8 *%a1) optsize { ; GENERIC-NEXT: setge %dil # sched: [1:0.50] ; GENERIC-NEXT: setle %dil # sched: [1:0.50] ; GENERIC-NEXT: setg %dil # sched: [1:0.50] -; GENERIC-NEXT: seto (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: setno (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: setb (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: setae (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: sete (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: setne (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: setbe (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: seta (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: sets (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: setns (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: setp (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: setnp (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: setl (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: setge (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: setle (%rsi) # sched: [5:1.00] -; GENERIC-NEXT: setg (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: seto (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: setno (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: setb (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: setae (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: sete (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: setne (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: setbe (%rsi) # sched: [3:1.00] +; GENERIC-NEXT: seta (%rsi) # sched: [3:1.00] +; GENERIC-NEXT: sets (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: setns (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: setp (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: setnp (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: setl (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: setge (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: setle (%rsi) # sched: [2:1.00] +; GENERIC-NEXT: setg (%rsi) # sched: [2:1.00] ; GENERIC-NEXT: #NO_APP ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -13654,22 +13654,22 @@ define void @test_setcc(i8 %a0, i8 *%a1) optsize { ; SANDY-NEXT: setge %dil # sched: [1:0.50] ; SANDY-NEXT: setle %dil # sched: [1:0.50] ; SANDY-NEXT: setg %dil # sched: [1:0.50] -; SANDY-NEXT: seto (%rsi) # sched: [5:1.00] -; SANDY-NEXT: setno (%rsi) # sched: [5:1.00] -; SANDY-NEXT: setb (%rsi) # sched: [5:1.00] -; SANDY-NEXT: setae (%rsi) # sched: [5:1.00] -; SANDY-NEXT: sete (%rsi) # sched: [5:1.00] -; SANDY-NEXT: setne (%rsi) # sched: [5:1.00] -; SANDY-NEXT: setbe (%rsi) # sched: [5:1.00] -; SANDY-NEXT: seta (%rsi) # sched: [5:1.00] -; SANDY-NEXT: sets (%rsi) # sched: [5:1.00] -; SANDY-NEXT: setns (%rsi) # sched: [5:1.00] -; SANDY-NEXT: setp (%rsi) # sched: [5:1.00] -; SANDY-NEXT: setnp (%rsi) # sched: [5:1.00] -; SANDY-NEXT: setl (%rsi) # sched: [5:1.00] -; SANDY-NEXT: setge (%rsi) # sched: [5:1.00] -; SANDY-NEXT: setle (%rsi) # sched: [5:1.00] -; SANDY-NEXT: setg (%rsi) # sched: [5:1.00] +; SANDY-NEXT: seto (%rsi) # sched: [2:1.00] +; SANDY-NEXT: setno (%rsi) # sched: [2:1.00] +; SANDY-NEXT: setb (%rsi) # sched: [2:1.00] +; SANDY-NEXT: setae (%rsi) # sched: [2:1.00] +; SANDY-NEXT: sete (%rsi) # sched: [2:1.00] +; SANDY-NEXT: setne (%rsi) # sched: [2:1.00] +; SANDY-NEXT: setbe (%rsi) # sched: [3:1.00] +; SANDY-NEXT: seta (%rsi) # sched: [3:1.00] +; SANDY-NEXT: sets (%rsi) # sched: [2:1.00] +; SANDY-NEXT: setns (%rsi) # sched: [2:1.00] +; SANDY-NEXT: setp (%rsi) # sched: [2:1.00] +; SANDY-NEXT: setnp (%rsi) # sched: [2:1.00] +; SANDY-NEXT: setl (%rsi) # sched: [2:1.00] +; SANDY-NEXT: setge (%rsi) # sched: [2:1.00] +; SANDY-NEXT: setle (%rsi) # sched: [2:1.00] +; SANDY-NEXT: setg (%rsi) # sched: [2:1.00] ; SANDY-NEXT: #NO_APP ; SANDY-NEXT: retq # sched: [1:1.00] ; diff --git a/llvm/test/CodeGen/X86/sse-schedule.ll b/llvm/test/CodeGen/X86/sse-schedule.ll index 61f309141cdc..51133eb4ca4e 100644 --- a/llvm/test/CodeGen/X86/sse-schedule.ll +++ b/llvm/test/CodeGen/X86/sse-schedule.ll @@ -1939,7 +1939,7 @@ define float @test_divss(float %a0, float %a1, float *%a2) { define void @test_ldmxcsr(i32 %a0) { ; GENERIC-LABEL: test_ldmxcsr: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1957,13 +1957,13 @@ define void @test_ldmxcsr(i32 %a0) { ; ; SANDY-SSE-LABEL: test_ldmxcsr: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; SANDY-SSE-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; SANDY-SSE-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_ldmxcsr: ; SANDY: # %bb.0: -; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] ; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -2487,7 +2487,7 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movaps %xmm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movaps: @@ -2508,14 +2508,14 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; SANDY-SSE: # %bb.0: ; SANDY-SSE-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: movaps %xmm0, (%rsi) # sched: [1:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movaps: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_movaps: @@ -2712,7 +2712,7 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] ; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] -; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movhps: @@ -2736,7 +2736,7 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] ; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] ; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] -; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movhps: @@ -2959,7 +2959,7 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] ; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movlps: @@ -2980,14 +2980,14 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; SANDY-SSE: # %bb.0: ; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] ; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movlps: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_movlps: @@ -3178,7 +3178,7 @@ declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_movntps: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movntps %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movntps: @@ -3199,12 +3199,12 @@ define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-SSE-LABEL: test_movntps: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-SSE-NEXT: movntps %xmm0, (%rdi) # sched: [1:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movntps: ; SANDY: # %bb.0: -; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_movntps: @@ -3275,7 +3275,7 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] ; GENERIC-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movss %xmm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movss_mem: @@ -3296,14 +3296,14 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; SANDY-SSE: # %bb.0: ; SANDY-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-SSE-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movss %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: movss %xmm0, (%rsi) # sched: [1:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movss_mem: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] ; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_movss_mem: @@ -3495,7 +3495,7 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; GENERIC: # %bb.0: ; GENERIC-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] ; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; GENERIC-NEXT: movups %xmm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movups: @@ -3516,14 +3516,14 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; SANDY-SSE: # %bb.0: ; SANDY-SSE-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-SSE-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-SSE-NEXT: movups %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-SSE-NEXT: movups %xmm0, (%rsi) # sched: [1:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_movups: ; SANDY: # %bb.0: ; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00] +; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_movups: