[RISCV] Begin moving post-isel vector peepholes to a MF pass (#70342)

We currently have three postprocess peephole optimisations for vector
pseudos:

1) Masked pseudo with all ones mask -> unmasked pseudo
2) Merge vmerge pseudo into operand pseudo's mask
3) vmerge pseudo with all ones mask -> vmv.v.v pseudo

This patch aims to move these peepholes out of SelectionDAG and into a
separate RISCVFoldMasks MachineFunction pass.

There are a few motivations for doing this:

* The current SelectionDAG implementation operates on MachineSDNodes,
which are essentially MachineInstrs but require a bunch of logic to
reason about chain and glue operands. The RISCVII::has*Op helper
functions also don't exactly line up with the SDNode operands. Mutating
these pseudos and their operands in place becomes a good bit easier at
the MachineInstr level. For example, we would no longer need to check
for cycles in the DAG during performCombineVMergeAndVOps.

* Although it's further down the line, moving this code out of
SelectionDAG allows it to be reused by GlobalISel later on.

* In performCombineVMergeAndVOps, it may be possible to commute the
operands to enable folding in more cases (see
test/CodeGen/RISCV/rvv/vmadd-vp.ll). There is existing machinery to
commute operands in TII::commuteInstruction, but it's implemented on
MachineInstrs.

The pass runs straight after ISel, before any of the other machine SSA
optimization passes run. This is so that dead-mi-elimination can mop up
any vmsets that are no longer used (but if preferred we could try and
erase them from inside RISCVFoldMasks itself). This also means that
these peepholes are no longer run at codegen -O0, so this patch isn't
strictly NFC.

Only the performVMergeToVMv peephole is refactored in this patch, the
remaining two would be implemented later. And as noted by @preames, it
should be possible to move doPeepholeSExtW out of SelectionDAG as well.
This commit is contained in:
Luke Lau
2023-10-30 15:17:00 +00:00
committed by GitHub
parent fe8335babb
commit 72e6c1c70d
7 changed files with 254 additions and 36 deletions

View File

@@ -33,6 +33,7 @@ add_llvm_target(RISCVCodeGen
RISCVMakeCompressible.cpp
RISCVExpandAtomicPseudoInsts.cpp
RISCVExpandPseudoInsts.cpp
RISCVFoldMasks.cpp
RISCVFrameLowering.cpp
RISCVGatherScatterLowering.cpp
RISCVInsertVSETVLI.cpp

View File

@@ -45,6 +45,9 @@ void initializeRISCVMakeCompressibleOptPass(PassRegistry &);
FunctionPass *createRISCVGatherScatterLoweringPass();
void initializeRISCVGatherScatterLoweringPass(PassRegistry &);
FunctionPass *createRISCVFoldMasksPass();
void initializeRISCVFoldMasksPass(PassRegistry &);
FunctionPass *createRISCVOptWInstrsPass();
void initializeRISCVOptWInstrsPass(PassRegistry &);

View File

@@ -0,0 +1,174 @@
//===- RISCVFoldMasks.cpp - MI Vector Pseudo Mask Peepholes ---------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
// This pass performs various peephole optimisations that fold masks into vector
// pseudo instructions after instruction selection.
//
// Currently it converts
// PseudoVMERGE_VVM %false, %false, %true, %allonesmask, %vl, %sew
// ->
// PseudoVMV_V_V %false, %true, %vl, %sew
//
//===---------------------------------------------------------------------===//
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
using namespace llvm;
#define DEBUG_TYPE "riscv-fold-masks"
namespace {
class RISCVFoldMasks : public MachineFunctionPass {
public:
static char ID;
const TargetInstrInfo *TII;
MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;
RISCVFoldMasks() : MachineFunctionPass(ID) {
initializeRISCVFoldMasksPass(*PassRegistry::getPassRegistry());
}
bool runOnMachineFunction(MachineFunction &MF) override;
MachineFunctionProperties getRequiredProperties() const override {
return MachineFunctionProperties().set(
MachineFunctionProperties::Property::IsSSA);
}
StringRef getPassName() const override { return "RISC-V Fold Masks"; }
private:
bool convertVMergeToVMv(MachineInstr &MI, MachineInstr *MaskDef);
bool isAllOnesMask(MachineInstr *MaskCopy);
};
} // namespace
char RISCVFoldMasks::ID = 0;
INITIALIZE_PASS(RISCVFoldMasks, DEBUG_TYPE, "RISC-V Fold Masks", false, false)
bool RISCVFoldMasks::isAllOnesMask(MachineInstr *MaskCopy) {
if (!MaskCopy)
return false;
assert(MaskCopy->isCopy() && MaskCopy->getOperand(0).getReg() == RISCV::V0);
Register SrcReg =
TRI->lookThruCopyLike(MaskCopy->getOperand(1).getReg(), MRI);
if (!SrcReg.isVirtual())
return false;
MachineInstr *SrcDef = MRI->getVRegDef(SrcReg);
if (!SrcDef)
return false;
// TODO: Check that the VMSET is the expected bitwidth? The pseudo has
// undefined behaviour if it's the wrong bitwidth, so we could choose to
// assume that it's all-ones? Same applies to its VL.
switch (SrcDef->getOpcode()) {
case RISCV::PseudoVMSET_M_B1:
case RISCV::PseudoVMSET_M_B2:
case RISCV::PseudoVMSET_M_B4:
case RISCV::PseudoVMSET_M_B8:
case RISCV::PseudoVMSET_M_B16:
case RISCV::PseudoVMSET_M_B32:
case RISCV::PseudoVMSET_M_B64:
return true;
default:
return false;
}
}
// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
bool RISCVFoldMasks::convertVMergeToVMv(MachineInstr &MI, MachineInstr *V0Def) {
#define CASE_VMERGE_TO_VMV(lmul) \
case RISCV::PseudoVMERGE_VVM_##lmul: \
NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
break;
unsigned NewOpc;
switch (MI.getOpcode()) {
default:
llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
CASE_VMERGE_TO_VMV(MF8)
CASE_VMERGE_TO_VMV(MF4)
CASE_VMERGE_TO_VMV(MF2)
CASE_VMERGE_TO_VMV(M1)
CASE_VMERGE_TO_VMV(M2)
CASE_VMERGE_TO_VMV(M4)
CASE_VMERGE_TO_VMV(M8)
}
Register MergeReg = MI.getOperand(1).getReg();
Register FalseReg = MI.getOperand(2).getReg();
// Check merge == false (or merge == undef)
if (MergeReg != RISCV::NoRegister && TRI->lookThruCopyLike(MergeReg, MRI) !=
TRI->lookThruCopyLike(FalseReg, MRI))
return false;
assert(MI.getOperand(4).isReg() && MI.getOperand(4).getReg() == RISCV::V0);
if (!isAllOnesMask(V0Def))
return false;
MI.setDesc(TII->get(NewOpc));
MI.removeOperand(1); // Merge operand
MI.tieOperands(0, 1); // Tie false to dest
MI.removeOperand(3); // Mask operand
MI.addOperand(
MachineOperand::CreateImm(RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED));
// vmv.v.v doesn't have a mask operand, so we may be able to inflate the
// register class for the destination and merge operands e.g. VRNoV0 -> VR
MRI->recomputeRegClass(MI.getOperand(0).getReg());
MRI->recomputeRegClass(MI.getOperand(1).getReg());
return true;
}
bool RISCVFoldMasks::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
// Skip if the vector extension is not enabled.
const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();
if (!ST.hasVInstructions())
return false;
TII = ST.getInstrInfo();
MRI = &MF.getRegInfo();
TRI = MRI->getTargetRegisterInfo();
bool Changed = false;
// Masked pseudos coming out of isel will have their mask operand in the form:
//
// $v0:vr = COPY %mask:vr
// %x:vr = Pseudo_MASK %a:vr, %b:br, $v0:vr
//
// Because $v0 isn't in SSA, keep track of it so we can check the mask operand
// on each pseudo.
MachineInstr *CurrentV0Def;
for (MachineBasicBlock &MBB : MF) {
CurrentV0Def = nullptr;
for (MachineInstr &MI : MBB) {
unsigned BaseOpc = RISCV::getRVVMCOpcode(MI.getOpcode());
if (BaseOpc == RISCV::VMERGE_VVM)
Changed |= convertVMergeToVMv(MI, CurrentV0Def);
if (MI.definesRegister(RISCV::V0, TRI))
CurrentV0Def = &MI;
}
}
return Changed;
}
FunctionPass *llvm::createRISCVFoldMasksPass() { return new RISCVFoldMasks(); }

View File

@@ -3685,40 +3685,6 @@ bool RISCVDAGToDAGISel::performCombineVMergeAndVOps(SDNode *N) {
return true;
}
// Transform (VMERGE_VVM_<LMUL> false, false, true, allones, vl, sew) to
// (VMV_V_V_<LMUL> false, true, vl, sew). It may decrease uses of VMSET.
bool RISCVDAGToDAGISel::performVMergeToVMv(SDNode *N) {
#define CASE_VMERGE_TO_VMV(lmul) \
case RISCV::PseudoVMERGE_VVM_##lmul: \
NewOpc = RISCV::PseudoVMV_V_V_##lmul; \
break;
unsigned NewOpc;
switch (N->getMachineOpcode()) {
default:
llvm_unreachable("Expected VMERGE_VVM_<LMUL> instruction.");
CASE_VMERGE_TO_VMV(MF8)
CASE_VMERGE_TO_VMV(MF4)
CASE_VMERGE_TO_VMV(MF2)
CASE_VMERGE_TO_VMV(M1)
CASE_VMERGE_TO_VMV(M2)
CASE_VMERGE_TO_VMV(M4)
CASE_VMERGE_TO_VMV(M8)
}
if (!usesAllOnesMask(N, /* MaskOpIdx */ 3))
return false;
SDLoc DL(N);
SDValue PolicyOp =
CurDAG->getTargetConstant(/*TUMU*/ 0, DL, Subtarget->getXLenVT());
SDNode *Result = CurDAG->getMachineNode(
NewOpc, DL, N->getValueType(0),
{N->getOperand(1), N->getOperand(2), N->getOperand(4), N->getOperand(5),
PolicyOp});
ReplaceUses(N, Result);
return true;
}
bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
bool MadeChange = false;
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
@@ -3730,8 +3696,6 @@ bool RISCVDAGToDAGISel::doPeepholeMergeVVMFold() {
if (IsVMerge(N) || IsVMv(N))
MadeChange |= performCombineVMergeAndVOps(N);
if (IsVMerge(N) && N->getOperand(0) == N->getOperand(1))
MadeChange |= performVMergeToVMv(N);
}
return MadeChange;
}

View File

@@ -101,6 +101,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
initializeRISCVOptWInstrsPass(*PR);
initializeRISCVPreRAExpandPseudoPass(*PR);
initializeRISCVExpandPseudoPass(*PR);
initializeRISCVFoldMasksPass(*PR);
initializeRISCVInsertVSETVLIPass(*PR);
initializeRISCVInsertReadWriteCSRPass(*PR);
initializeRISCVDAGToDAGISelPass(*PR);
@@ -414,7 +415,10 @@ void RISCVPassConfig::addPreEmitPass2() {
}
void RISCVPassConfig::addMachineSSAOptimization() {
addPass(createRISCVFoldMasksPass());
TargetPassConfig::addMachineSSAOptimization();
if (EnableMachineCombiner)
addPass(&MachineCombinerID);

View File

@@ -82,6 +82,7 @@
; CHECK-NEXT: Lazy Block Frequency Analysis
; CHECK-NEXT: RISC-V DAG->DAG Pattern Instruction Selection
; CHECK-NEXT: Finalize ISel and expand pseudo-instructions
; CHECK-NEXT: RISC-V Fold Masks
; CHECK-NEXT: Lazy Machine Block Frequency Analysis
; CHECK-NEXT: Early Tail Duplication
; CHECK-NEXT: Optimize machine instruction PHIs

View File

@@ -0,0 +1,71 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-fold-masks \
# RUN: -verify-machineinstrs | FileCheck %s
---
name: undef_passthru
body: |
bb.0:
liveins: $x1, $v8, $v9
; CHECK-LABEL: name: undef_passthru
; CHECK: liveins: $x1, $v8, $v9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %false:vr = COPY $v8
; CHECK-NEXT: %true:vr = COPY $v9
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5 /* e32 */, 0 /* tu, mu */
%false:vr = COPY $v8
%true:vr = COPY $v9
%avl:gprnox0 = COPY $x1
%mask:vmv0 = PseudoVMSET_M_B8 %avl, 5
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 $noreg, %false, %true, $v0, %avl, 5
...
---
name: undef_false
body: |
bb.0:
liveins: $x1, $v8, $v9
; CHECK-LABEL: name: undef_false
; CHECK: liveins: $x1, $v8, $v9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
; CHECK-NEXT: %false:vr = COPY $noreg
; CHECK-NEXT: %true:vr = COPY $v9
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5 /* e32 */
%pt:vrnov0 = COPY $v8
%false:vr = COPY $noreg
%true:vr = COPY $v9
%avl:gprnox0 = COPY $x1
%mask:vmv0 = PseudoVMSET_M_B8 %avl, 5
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5
...
---
name: equal_passthru_false
body: |
bb.0:
liveins: $x1, $v8, $v9
; CHECK-LABEL: name: equal_passthru_false
; CHECK: liveins: $x1, $v8, $v9
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: %false:vr = COPY $v8
; CHECK-NEXT: %pt:vrnov0 = COPY $v8
; CHECK-NEXT: %true:vr = COPY $v9
; CHECK-NEXT: %avl:gprnox0 = COPY $x1
; CHECK-NEXT: %mask:vmv0 = PseudoVMSET_M_B8 %avl, 5 /* e32 */
; CHECK-NEXT: $v0 = COPY %mask
; CHECK-NEXT: %x:vr = PseudoVMV_V_V_M1 %false, %true, %avl, 5 /* e32 */, 0 /* tu, mu */
%false:vr = COPY $v8
%pt:vrnov0 = COPY $v8
%true:vr = COPY $v9
%avl:gprnox0 = COPY $x1
%mask:vmv0 = PseudoVMSET_M_B8 %avl, 5
$v0 = COPY %mask
%x:vrnov0 = PseudoVMERGE_VVM_M1 %pt, %false, %true, $v0, %avl, 5
...