mirror of
https://github.com/intel/llvm.git
synced 2026-01-27 06:06:34 +08:00
[AMDGPU] Fix crash in SILoadStoreOptimizer
SILoadStoreOptimizer::checkAndPrepareMerge() expects base and paired instruction to come in order and scans MBB from base to the paired instruction. An original order can be changed if there were a dependent instruction in between and base instruction was moved. Fixed by bailing the optimization. In theory it might be possible still to perform a merge by swapping instructions, but on practice it bails anyway because it finds dependency on that same instruction which has resulted in the base move. Differential Revision: https://reviews.llvm.org/D77245
This commit is contained in:
@@ -884,8 +884,19 @@ bool SILoadStoreOptimizer::checkAndPrepareMerge(
|
||||
|
||||
MachineBasicBlock::iterator E = std::next(Paired.I);
|
||||
MachineBasicBlock::iterator MBBI = std::next(CI.I);
|
||||
MachineBasicBlock::iterator MBBE = CI.I->getParent()->end();
|
||||
for (; MBBI != E; ++MBBI) {
|
||||
|
||||
if (MBBI == MBBE) {
|
||||
// CombineInfo::Order is a hint on the instruction ordering within the
|
||||
// basic block. This hint suggests that CI precedes Paired, which is
|
||||
// true most of the time. However, moveInstsAfter() processing a
|
||||
// previous list may have changed this order in a situation when it
|
||||
// moves an instruction which exists in some other merge list.
|
||||
// In this case it must be dependent.
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((getInstClass(MBBI->getOpcode(), *TII) != InstClass) ||
|
||||
(getInstSubclass(MBBI->getOpcode(), *TII) != InstSubclass)) {
|
||||
// This is not a matching instruction, but we can keep looking as
|
||||
|
||||
28
llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.ll
Normal file
28
llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.ll
Normal file
@@ -0,0 +1,28 @@
|
||||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
@L = external local_unnamed_addr addrspace(3) global [9 x double], align 16
|
||||
@Ldisp = external local_unnamed_addr addrspace(3) global [96 x double], align 16
|
||||
|
||||
; Stores are reordered during loads merge. This case used to assert while
|
||||
; scanning for a paired instruction because it used to expect paired one
|
||||
; to follow a base one.
|
||||
|
||||
; GCN-LABEL: {{^}}out_of_order_merge:
|
||||
; GCN-COUNT2: ds_read2_b64
|
||||
; GCN-COUNT3: ds_write_b64
|
||||
define amdgpu_kernel void @out_of_order_merge() {
|
||||
entry:
|
||||
%gep1 = getelementptr inbounds [96 x double], [96 x double] addrspace(3)* @Ldisp, i32 0, i32 0
|
||||
%gep2 = getelementptr inbounds [96 x double], [96 x double] addrspace(3)* @Ldisp, i32 0, i32 1
|
||||
%tmp12 = load <2 x double>, <2 x double> addrspace(3)* bitcast (double addrspace(3)* getelementptr inbounds ([9 x double], [9 x double] addrspace(3)* @L, i32 0, i32 1) to <2 x double> addrspace(3)*), align 8
|
||||
%tmp14 = extractelement <2 x double> %tmp12, i32 0
|
||||
%tmp15 = extractelement <2 x double> %tmp12, i32 1
|
||||
%add50.i = fadd double %tmp14, %tmp15
|
||||
store double %add50.i, double addrspace(3)* %gep1, align 8
|
||||
%tmp16 = load double, double addrspace(3)* getelementptr inbounds ([9 x double], [9 x double] addrspace(3)* @L, i32 1, i32 0), align 8
|
||||
store double %tmp16, double addrspace(3)* %gep2, align 8
|
||||
%tmp17 = load <2 x double>, <2 x double> addrspace(3)* bitcast (double addrspace(3)* getelementptr inbounds ([9 x double], [9 x double] addrspace(3)* @L, i32 2, i32 1) to <2 x double> addrspace(3)*), align 8
|
||||
%tmp19 = extractelement <2 x double> %tmp17, i32 1
|
||||
store double %tmp19, double addrspace(3)* undef, align 8
|
||||
ret void
|
||||
}
|
||||
23
llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.mir
Normal file
23
llvm/test/CodeGen/AMDGPU/merge-out-of-order-ldst.mir
Normal file
@@ -0,0 +1,23 @@
|
||||
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -run-pass si-load-store-opt %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
# GCN-LABEL: name: out_of_order_merge
|
||||
# GCN: DS_READ2_B64_gfx9
|
||||
# GCN: DS_WRITE_B64_gfx9
|
||||
# GCN: DS_READ2_B64_gfx9
|
||||
# GCN: DS_WRITE_B64_gfx9
|
||||
# GCN: DS_WRITE_B64_gfx9
|
||||
---
|
||||
name: out_of_order_merge
|
||||
body: |
|
||||
bb.0:
|
||||
%4:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%5:vreg_64 = DS_READ_B64_gfx9 %4, 776, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef`, addrspace 3)
|
||||
%6:vreg_64 = DS_READ_B64_gfx9 %4, 784, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef` + 8, addrspace 3)
|
||||
%17:vreg_64 = DS_READ_B64_gfx9 %4, 840, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef`, addrspace 3)
|
||||
DS_WRITE_B64_gfx9 %4, %17, 8, 0, implicit $exec :: (store 8 into `double addrspace(3)* undef` + 8, addrspace 3)
|
||||
DS_WRITE_B64_gfx9 %4, %6, 0, 0, implicit $exec :: (store 8 into `double addrspace(3)* undef`, align 16, addrspace 3)
|
||||
%24:vreg_64 = DS_READ_B64_gfx9 %4, 928, 0, implicit $exec :: (load 8 from `double addrspace(3)* undef` + 8, addrspace 3)
|
||||
DS_WRITE_B64_gfx9 undef %29:vgpr_32, %5, 0, 0, implicit $exec :: (store 8 into `double addrspace(3)* undef`, addrspace 3)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
Reference in New Issue
Block a user