mirror of
https://github.com/intel/llvm.git
synced 2026-01-26 03:56:16 +08:00
AMDGPU: Fix off-by-one in SIRegisterInfo::eliminateFrameIndex
Summary: The method insertNOPs expected the number of wait states to be passed as parameter, while eliminateFrameIndex passed the immediate argument for the S_NOP, leading to an off-by-one error. Rename the method to make the meaning of its parameter clearer. The number of 4 / 5 wait states (which is what the method has always _tried_ to do according to the comment) is correct according to the hardware docs. I stumbled upon this while trying to track down the cause of https://bugs.freedesktop.org/show_bug.cgi?id=93264. While clearly needed, this patch unfortunately does not fix that bug... Reviewers: arsenm, tstellarAMD Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D15542 llvm-svn: 255906
This commit is contained in:
@@ -742,8 +742,8 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(MachineBasicBlock &MBB,
|
||||
return TmpReg;
|
||||
}
|
||||
|
||||
void SIInstrInfo::insertNOPs(MachineBasicBlock::iterator MI,
|
||||
int Count) const {
|
||||
void SIInstrInfo::insertWaitStates(MachineBasicBlock::iterator MI,
|
||||
int Count) const {
|
||||
while (Count > 0) {
|
||||
int Arg;
|
||||
if (Count >= 8)
|
||||
|
||||
@@ -441,7 +441,7 @@ public:
|
||||
void LoadM0(MachineInstr *MoveRel, MachineBasicBlock::iterator I,
|
||||
unsigned SavReg, unsigned IndexReg) const;
|
||||
|
||||
void insertNOPs(MachineBasicBlock::iterator MI, int Count) const;
|
||||
void insertWaitStates(MachineBasicBlock::iterator MI, int Count) const;
|
||||
|
||||
/// \brief Returns the operand named \p Op. If \p MI does not have an
|
||||
/// operand named \c Op, this function returns nullptr.
|
||||
|
||||
@@ -331,16 +331,17 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
// TODO: only do this when it is needed
|
||||
switch (MF->getSubtarget<AMDGPUSubtarget>().getGeneration()) {
|
||||
case AMDGPUSubtarget::SOUTHERN_ISLANDS:
|
||||
// "VALU writes SGPR" -> "SMRD reads that SGPR" needs "S_NOP 3" on SI
|
||||
TII->insertNOPs(MI, 3);
|
||||
// "VALU writes SGPR" -> "SMRD reads that SGPR" needs 4 wait states
|
||||
// ("S_NOP 3") on SI
|
||||
TII->insertWaitStates(MI, 4);
|
||||
break;
|
||||
case AMDGPUSubtarget::SEA_ISLANDS:
|
||||
break;
|
||||
default: // VOLCANIC_ISLANDS and later
|
||||
// "VALU writes SGPR -> VMEM reads that SGPR" needs "S_NOP 4" on VI
|
||||
// and later. This also applies to VALUs which write VCC, but we're
|
||||
// unlikely to see VMEM use VCC.
|
||||
TII->insertNOPs(MI, 4);
|
||||
// "VALU writes SGPR -> VMEM reads that SGPR" needs 5 wait states
|
||||
// ("S_NOP 4") on VI and later. This also applies to VALUs which write
|
||||
// VCC, but we're unlikely to see VMEM use VCC.
|
||||
TII->insertWaitStates(MI, 5);
|
||||
}
|
||||
|
||||
MI->eraseFromParent();
|
||||
|
||||
Reference in New Issue
Block a user