mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 21:53:12 +08:00
DAGCombiner: Don't stop finding better chain on 2 aliases
The comment says this was stopped because it was unlikely to be
profitable. This is not true if you want to combine vector loads
with multiple components.
For a simple case that looks like
t0 = load t0 ...
t1 = load t0 ...
t2 = load t0 ...
t3 = load t0 ...
t4 = store t0:1, t0:1
t5 = store t4, t1:0
t6 = store t5, t2:0
t7 = store t6, t3:0
We want to get all of these stores onto a chain
that is a TokenFactor of these N loads. This mostly
solves the AMDGPU merge-stores.ll regressions
with -combiner-alias-analysis for merging vector
stores of vector loads.
llvm-svn: 250138
This commit is contained in:
@@ -14298,14 +14298,12 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
|
||||
SDValue Chain = Chains.pop_back_val();
|
||||
|
||||
// For TokenFactor nodes, look at each operand and only continue up the
|
||||
// chain until we find two aliases. If we've seen two aliases, assume we'll
|
||||
// find more and revert to original chain since the xform is unlikely to be
|
||||
// profitable.
|
||||
// chain until we reach the depth limit.
|
||||
//
|
||||
// FIXME: The depth check could be made to return the last non-aliasing
|
||||
// chain we found before we hit a tokenfactor rather than the original
|
||||
// chain.
|
||||
if (Depth > 6 || Aliases.size() == 2) {
|
||||
if (Depth > 6) {
|
||||
Aliases.clear();
|
||||
Aliases.push_back(OriginalChain);
|
||||
return;
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN %s
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-NOAA %s
|
||||
|
||||
; RUN: llc -march=amdgcn -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
|
||||
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs -combiner-alias-analysis < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=GCN-AA %s
|
||||
|
||||
; Run with devices with different unaligned load restrictions.
|
||||
|
||||
@@ -151,11 +154,15 @@ define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
|
||||
|
||||
; FIXME: Should be able to merge this
|
||||
; GCN-LABEL: {{^}}merge_global_store_4_constants_mixed_i32_f32:
|
||||
; XGCN: buffer_store_dwordx4
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: buffer_store_dword
|
||||
; GCN: buffer_store_dword
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
|
||||
; GCN-AA: buffer_store_dwordx2
|
||||
; GCN-AA: buffer_store_dword v
|
||||
; GCN-AA: buffer_store_dword v
|
||||
|
||||
; GCN: s_endpgm
|
||||
define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
|
||||
%out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
|
||||
@@ -484,11 +491,15 @@ define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1
|
||||
; This works once AA is enabled on the subtarget
|
||||
; GCN-LABEL: {{^}}merge_global_store_4_vector_elts_loads_v4i32:
|
||||
; GCN: buffer_load_dwordx4 [[LOAD:v\[[0-9]+:[0-9]+\]]]
|
||||
; XGCN: buffer_store_dwordx4 [[LOAD]]
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
|
||||
; GCN-AA: buffer_store_dwordx4 [[LOAD]]
|
||||
|
||||
; GCN: s_endpgm
|
||||
define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
|
||||
%out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
|
||||
%out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
|
||||
@@ -606,18 +617,23 @@ define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) {
|
||||
ret void
|
||||
}
|
||||
|
||||
; FIXME: This should do 2 dwordx4 loads
|
||||
; GCN-LABEL: {{^}}merge_global_store_8_constants_i32:
|
||||
; XGCN: buffer_store_dwordx4
|
||||
; XGCN: buffer_store_dwordx4
|
||||
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
; GCN-NOAA: buffer_store_dword v
|
||||
|
||||
; GCN-AA: buffer_store_dwordx4
|
||||
; GCN-AA: buffer_store_dwordx2
|
||||
; GCN-AA: buffer_store_dwordx2
|
||||
|
||||
; GCN: s_endpgm
|
||||
define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
|
||||
store i32 34, i32 addrspace(1)* %out, align 4
|
||||
%idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
|
||||
|
||||
Reference in New Issue
Block a user