AMDGPU: Don't clobber VCC in MUBUF addr64 emulation

Introducing VCC defs during SIFixSGPRCopies is generally
problematic. Avoid it by starting with the VOP3 form with the general
condition register. This is the easiest to fix instance, but doesn't
solve any specific problems I'm looking at.

llvm-svn: 363904
This commit is contained in:
Matt Arsenault
2019-06-20 00:51:28 +00:00
parent d88e28d13e
commit c67c484f36
2 changed files with 24 additions and 17 deletions

View File

@@ -4403,21 +4403,28 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
unsigned NewVAddrHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
unsigned NewVAddr = MRI.createVirtualRegister(&AMDGPU::VReg_64RegClass);
const auto *BoolXExecRC = RI.getRegClass(AMDGPU::SReg_1_XEXECRegClassID);
unsigned CondReg0 = MRI.createVirtualRegister(BoolXExecRC);
unsigned CondReg1 = MRI.createVirtualRegister(BoolXExecRC);
unsigned RsrcPtr, NewSRsrc;
std::tie(RsrcPtr, NewSRsrc) = extractRsrcPtr(*this, MI, *Rsrc);
// NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
DebugLoc DL = MI.getDebugLoc();
fixImplicitOperands(*
BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), NewVAddrLo)
.addReg(RsrcPtr, 0, AMDGPU::sub0)
.addReg(VAddr->getReg(), 0, AMDGPU::sub0));
const DebugLoc &DL = MI.getDebugLoc();
BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e64), NewVAddrLo)
.addDef(CondReg0)
.addReg(RsrcPtr, 0, AMDGPU::sub0)
.addReg(VAddr->getReg(), 0, AMDGPU::sub0)
.addImm(0);
// NewVaddrHi = RsrcPtr:sub1 + VAddr:sub1
fixImplicitOperands(*
BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e32), NewVAddrHi)
.addReg(RsrcPtr, 0, AMDGPU::sub1)
.addReg(VAddr->getReg(), 0, AMDGPU::sub1));
BuildMI(MBB, MI, DL, get(AMDGPU::V_ADDC_U32_e64), NewVAddrHi)
.addDef(CondReg1, RegState::Dead)
.addReg(RsrcPtr, 0, AMDGPU::sub1)
.addReg(VAddr->getReg(), 0, AMDGPU::sub1)
.addReg(CondReg0, RegState::Kill)
.addImm(0);
// NewVaddr = {NewVaddrHi, NewVaddrLo}
BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::REG_SEQUENCE), NewVAddr)

View File

@@ -217,15 +217,15 @@ body: |
# ADDR64-LABEL: name: addr64
# ADDR64-LABEL: bb.0:
# ADDR64: %12:vreg_64 = COPY %8.sub0_sub1
# ADDR64: %13:sreg_64 = S_MOV_B64 0
# ADDR64: %14:sgpr_32 = S_MOV_B32 0
# ADDR64: %15:sgpr_32 = S_MOV_B32 61440
# ADDR64: %16:sreg_128 = REG_SEQUENCE %13, %subreg.sub0_sub1, %14, %subreg.sub2, %15, %subreg.sub3
# ADDR64: %9:vgpr_32 = V_ADD_I32_e32 %12.sub0, %4.sub0, implicit-def $vcc, implicit $exec
# ADDR64: %10:vgpr_32 = V_ADDC_U32_e32 %12.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
# ADDR64: %14:vreg_64 = COPY %8.sub0_sub1
# ADDR64: %15:sreg_64 = S_MOV_B64 0
# ADDR64: %16:sgpr_32 = S_MOV_B32 0
# ADDR64: %17:sgpr_32 = S_MOV_B32 61440
# ADDR64: %18:sreg_128 = REG_SEQUENCE %15, %subreg.sub0_sub1, %16, %subreg.sub2, %17, %subreg.sub3
# ADDR64: %9:vgpr_32, %12:sreg_64_xexec = V_ADD_I32_e64 %14.sub0, %4.sub0, 0, implicit $exec
# ADDR64: %10:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %14.sub1, %4.sub1, killed %12, 0, implicit $exec
# ADDR64: %11:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %10, %subreg.sub1
# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
---
name: addr64
liveins: