mirror of
https://github.com/intel/intel-graphics-compiler.git
synced 2025-11-04 08:21:06 +08:00
1150 lines
38 KiB
C++
1150 lines
38 KiB
C++
/*========================== begin_copyright_notice ============================
|
|
|
|
Copyright (C) 2017-2022 Intel Corporation
|
|
|
|
SPDX-License-Identifier: MIT
|
|
|
|
============================= end_copyright_notice ===========================*/
|
|
|
|
#include "Assertions.h"
|
|
#include "PointsToAnalysis.h"
|
|
#include "Rematerialization.h"
|
|
#include <vector>
|
|
|
|
namespace vISA {
|
|
void Rematerialization::populateRefs() {
|
|
unsigned int id = 0;
|
|
for (auto bb : kernel.fg) {
|
|
// Skip empty blocks.
|
|
if (bb->empty())
|
|
continue;
|
|
|
|
for (auto inst : *bb) {
|
|
inst->setLexicalId(id++);
|
|
|
|
if (inst->isPseudoKill())
|
|
continue;
|
|
|
|
auto dst = inst->getDst();
|
|
|
|
if (dst && !dst->isNullReg()) {
|
|
auto topdcl = dst->getTopDcl();
|
|
|
|
if (topdcl) {
|
|
operations[topdcl].def.push_back(std::make_pair(inst, bb));
|
|
}
|
|
}
|
|
|
|
for (unsigned int i = 0, numSrc = inst->getNumSrc(); i < numSrc; i++) {
|
|
auto srcOpnd = inst->getSrc(i);
|
|
if (srcOpnd && srcOpnd->isSrcRegRegion()) {
|
|
auto topdcl = srcOpnd->asSrcRegRegion()->getTopDcl();
|
|
unsigned int startRow =
|
|
srcOpnd->getLeftBound() / kernel.numEltPerGRF<Type_UB>();
|
|
unsigned int endRow =
|
|
srcOpnd->getRightBound() / kernel.numEltPerGRF<Type_UB>();
|
|
if (topdcl) {
|
|
auto dclIt = operations.find(topdcl);
|
|
if (dclIt == operations.end()) {
|
|
References r;
|
|
r.numUses = 1;
|
|
for (unsigned int k = startRow; k <= endRow; k++) {
|
|
r.rowsUsed.insert(k);
|
|
}
|
|
// r.uses.push_back(std::make_pair(inst, bb));
|
|
r.lastUseLexId = inst->getLexicalId();
|
|
operations.insert(std::make_pair(topdcl, r));
|
|
} else {
|
|
(*dclIt).second.numUses++;
|
|
for (unsigned int k = startRow; k <= endRow; k++) {
|
|
(*dclIt).second.rowsUsed.insert(k);
|
|
}
|
|
(*dclIt).second.lastUseLexId = inst->getLexicalId();
|
|
//(*dclIt).second.uses.push_back(std::make_pair(inst, bb));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Update lastUseLexId based on BB live-out set
|
|
const SparseBitVector &UseOut = liveness.use_out[bb->getId()];
|
|
const SparseBitVector &DefOut = liveness.def_out[bb->getId()];
|
|
SparseBitVector DefUseOutAnd = UseOut & DefOut;
|
|
for (auto I = DefUseOutAnd.begin(), E = DefUseOutAnd.end();
|
|
I != E;
|
|
++I) {
|
|
unsigned i = *I;
|
|
if (liveness.isLiveAtExit(bb, i)) {
|
|
auto lr = coloring.getLiveRanges()[i];
|
|
auto dclIt = operations.find(lr->getDcl()->getRootDeclare());
|
|
if (dclIt != operations.end()) {
|
|
(*dclIt).second.lastUseLexId = bb->back()->getLexicalId();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (auto &ref : operations) {
|
|
auto dcl = ref.first;
|
|
if (dcl->getRegVar() && dcl->getRegVar()->getPhyReg())
|
|
preDefinedVars.push_back(dcl);
|
|
}
|
|
}
|
|
|
|
void Rematerialization::populateSamplerHeaderMap() {
|
|
samplerHeaderMapPopulated = true;
|
|
|
|
if (!samplerHeader)
|
|
return;
|
|
|
|
for (auto bb : kernel.fg) {
|
|
G4_INST *samplerHeaderMov = nullptr;
|
|
for (auto inst : *bb) {
|
|
if (inst->getDst() && inst->getDst()->getTopDcl() == samplerHeader) {
|
|
samplerHeaderMov = inst;
|
|
continue;
|
|
}
|
|
|
|
const G4_SendDescRaw *descRaw = inst->getMsgDescRaw();
|
|
if (samplerHeaderMov && inst->isSplitSend() &&
|
|
descRaw &&
|
|
descRaw->isSampler() &&
|
|
descRaw->isHeaderPresent()) {
|
|
vISA_ASSERT(samplerHeaderMov->getExecSize() == 1,
|
|
"Unexpected sampler header");
|
|
samplerHeaderMap.insert(std::make_pair(inst, samplerHeaderMov));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void Rematerialization::deLVNSamplers(G4_BB *bb) {
|
|
// LVN pass removes redundant samplerHeader movs. This way
|
|
// several consecutive samplers can use same samplerHeader
|
|
// instruction. However, when remat is done, extra care
|
|
// needs to be taken so that all samplers still use same
|
|
// header as before. Consider this snippet:
|
|
//
|
|
// samplerHeader(0,2) = a
|
|
// send (16) ... samplerHeader ...
|
|
// = V1
|
|
// send (16) ... samplerHeader ...
|
|
//
|
|
// After remating V1:
|
|
//
|
|
// samplerHeader(0,2) = a
|
|
// send (16) ... samplerHeader ...
|
|
// samplerHeader(0,2) = b
|
|
// send (16) REMAT_V1 samplerHeader ...
|
|
// send (16) ... samplerHeader ... <-- Uses incorrect samplerHeader!
|
|
//
|
|
// This function deLVNs all samplerHeaders in the program and later
|
|
// we LVN them back after remating is done. This ensures correctness.
|
|
if (!samplerHeader)
|
|
return;
|
|
|
|
for (auto instIt = bb->begin(); instIt != bb->end();) {
|
|
auto inst = (*instIt);
|
|
|
|
if (inst->isSplitSend() && inst->getMsgDesc()->isSampler()) {
|
|
auto samplerHeaderInstIt = samplerHeaderMap.find(inst);
|
|
|
|
if (samplerHeaderInstIt != samplerHeaderMap.end()) {
|
|
auto samplerHeaderMov = (*samplerHeaderInstIt).second;
|
|
|
|
auto dupOp = samplerHeaderMov->cloneInst();
|
|
|
|
bb->insertBefore(instIt, dupOp);
|
|
}
|
|
}
|
|
|
|
instIt++;
|
|
}
|
|
}
|
|
|
|
bool Rematerialization::inSameSubroutine(G4_BB *use, G4_BB *def) {
|
|
// Return true if both BBs belong to same sub
|
|
auto defBBIt = BBPerSubroutine.find(def);
|
|
auto useBBIt = BBPerSubroutine.find(use);
|
|
|
|
// Neither BBs found in map means both are part of main kernel
|
|
if (defBBIt == BBPerSubroutine.end() && useBBIt == BBPerSubroutine.end())
|
|
return true;
|
|
|
|
if (defBBIt != BBPerSubroutine.end() && useBBIt != BBPerSubroutine.end()) {
|
|
// Both BBs part of same subroutine
|
|
if ((*defBBIt).second == (*useBBIt).second)
|
|
return true;
|
|
}
|
|
|
|
// BBs not part of same subroutine
|
|
return false;
|
|
}
|
|
|
|
// bb1 should block defining original computation and
|
|
// bb2 should be the block where remat is expected.
|
|
bool Rematerialization::areInSameLoop(G4_BB *bb1, G4_BB *bb2,
|
|
bool &bb1OutsideLoop) {
|
|
bb1OutsideLoop = false;
|
|
|
|
// Check whether bb1 is in any loop at all. If not,
|
|
// then we can allow remat even if bb2 is in a loop.
|
|
// The case that is disallowed is where bb1 and bb2
|
|
// are both in loops, but in different ones.
|
|
if (bb1->getNestLevel() == 0)
|
|
bb1OutsideLoop = true;
|
|
|
|
for (auto &loop : kernel.fg.getAllNaturalLoops()) {
|
|
auto &loopBody = loop.second;
|
|
auto bb1InLoop = loopBody.count(bb1) != 0;
|
|
auto bb2InLoop = loopBody.count(bb2) != 0;
|
|
|
|
// Both BBs must be present in all nested loops
|
|
if (bb1InLoop ^ bb2InLoop)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Rematerialization::isRangeSpilled(G4_Declare *dcl) {
|
|
if (dcl)
|
|
return dcl->isSpilled();
|
|
|
|
return false;
|
|
}
|
|
|
|
bool Rematerialization::areAllDefsInBB(G4_Declare *dcl, G4_BB *bb,
|
|
unsigned int lexId) {
|
|
auto defsIt = operations.find(dcl);
|
|
if (defsIt == operations.end())
|
|
return false;
|
|
|
|
auto &&refs = (*defsIt).second;
|
|
// Each def must be in same BB as sampler header must appear lexically before
|
|
// sampler
|
|
for (auto &&d : refs.def) {
|
|
if (d.second != bb)
|
|
return false;
|
|
|
|
if (d.first->getLexicalId() > lexId)
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
unsigned int Rematerialization::getLastUseLexId(G4_Declare *dcl) {
|
|
unsigned int lastLexId = 0;
|
|
auto it = operations.find(dcl);
|
|
if (it != operations.end())
|
|
lastLexId = (*it).second.lastUseLexId;
|
|
|
|
return lastLexId;
|
|
}
|
|
|
|
void Rematerialization::cleanRedundantSamplerHeaders() {
|
|
if (!samplerHeader)
|
|
return;
|
|
|
|
for (auto bb : kernel.fg) {
|
|
std::list<G4_INST *> lastMov;
|
|
|
|
INST_LIST_ITER toErase = bb->end();
|
|
|
|
if (deLVNedBBs.find(bb) == deLVNedBBs.end())
|
|
continue;
|
|
|
|
for (auto instIt = bb->begin(), instItEnd = bb->end();
|
|
instIt != instItEnd;) {
|
|
auto inst = (*instIt);
|
|
|
|
if (toErase != bb->end()) {
|
|
for (unsigned int i = 0; i != inst->getNumSrc(); ++i) {
|
|
auto src = inst->getSrc(i);
|
|
if (src && src->isSrcRegRegion()) {
|
|
auto topdcl = src->getTopDcl();
|
|
if (topdcl == samplerHeader) {
|
|
// samplerHeader is used, so can't erase it
|
|
toErase = bb->end();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (inst->isMov() && inst->getDst() && inst->getExecSize() == 1) {
|
|
// mov (1|NM) samplerHeader(0,2)<1>:ud imm
|
|
auto dstTopDcl = inst->getDst()->getTopDcl();
|
|
|
|
if (dstTopDcl == samplerHeader) {
|
|
if (toErase != bb->end()) {
|
|
lastMov.remove(*toErase);
|
|
bb->erase(toErase);
|
|
toErase = instIt;
|
|
}
|
|
|
|
if (lastMov.size() > 0) {
|
|
auto lastMovSrc0 = lastMov.back()->getSrc(0);
|
|
auto instSrc0 = inst->getSrc(0);
|
|
|
|
if (inst->getDst()->getSubRegOff() == 2 &&
|
|
lastMovSrc0->isImm() == instSrc0->isImm() &&
|
|
lastMovSrc0->asImm()->getImm() == instSrc0->asImm()->getImm() &&
|
|
lastMovSrc0->getType() == instSrc0->getType()) {
|
|
// Remove current instruction
|
|
instIt = bb->erase(instIt);
|
|
toErase = bb->end();
|
|
continue;
|
|
}
|
|
}
|
|
|
|
toErase = instIt;
|
|
|
|
lastMov.push_back(inst);
|
|
}
|
|
}
|
|
|
|
instIt++;
|
|
}
|
|
|
|
if (toErase != bb->end())
|
|
bb->erase(toErase);
|
|
}
|
|
}
|
|
|
|
bool Rematerialization::checkLocalWAR(G4_INST *defInst, G4_BB *bb,
|
|
INST_LIST_ITER useIter) {
|
|
INST_LIST_ITER currIter = useIter;
|
|
while (currIter != bb->begin()) {
|
|
currIter--;
|
|
auto currInst = *currIter;
|
|
if (currInst == defInst)
|
|
break;
|
|
|
|
auto currDst = currInst->getDst();
|
|
if (currDst && !currDst->isNullReg()) {
|
|
auto dstDcl = currDst->getTopDcl();
|
|
unsigned int curLb = currDst->getLeftBound();
|
|
unsigned int curRb = currDst->getRightBound();
|
|
|
|
for (unsigned int i = 0, numSrc = defInst->getNumSrc(); i < numSrc; i++) {
|
|
auto srcOpnd = defInst->getSrc(i);
|
|
if (srcOpnd && !(srcOpnd->isNullReg()) && srcOpnd->isSrcRegRegion()) {
|
|
G4_SrcRegRegion *srcRegion = srcOpnd->asSrcRegRegion();
|
|
auto srcDcl = srcRegion->getTopDcl();
|
|
unsigned int srcLb = srcRegion->getLeftBound(),
|
|
srcRb = srcRegion->getRightBound();
|
|
|
|
if (dstDcl == srcDcl && curRb >= srcLb && curLb <= srcRb) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
vISA_ASSERT(*currIter == defInst,
|
|
"Cannot find defInst for Remat candidate!");
|
|
|
|
return true;
|
|
}
|
|
|
|
bool Rematerialization::usesNoMaskWA(const Reference *uniqueDef) {
|
|
return false;
|
|
}
|
|
|
|
bool Rematerialization::isPartGRFBusyInput(G4_Declare *inputDcl,
|
|
unsigned int atLexId) {
|
|
// inputDcl is an input G4_Declare that has pre-defined assignment.
|
|
// Extending a pre-assigned assignment can be bad if its a scalar
|
|
// and no other part of that GRF is busy. OTOH, it may be beneficial
|
|
// to extend inputDcl if there is another pre-defined G4_Declare
|
|
// sharing physical register assignment (different sub-register)
|
|
// with inputDcl and is live beyond where we want to extend inputDcl.
|
|
|
|
// This function checks whether there is any other G4_Declare that
|
|
// shares same GRF assignment as inputDcl. If there is then check
|
|
// whether last use of that assignment is beyond atLexId. If one
|
|
// if found then return true. Return false otherwise.
|
|
|
|
if (!inputDcl->getRegVar()->getPhyReg() ||
|
|
!inputDcl->getRegVar()->getPhyReg()->isGreg()) {
|
|
return false;
|
|
}
|
|
|
|
auto inputRegNum = inputDcl->getRegVar()->getPhyReg()->asGreg()->getRegNum();
|
|
|
|
for (auto dcl : preDefinedVars) {
|
|
auto ref = operations.find(dcl);
|
|
if (ref == operations.end())
|
|
continue;
|
|
|
|
if (!dcl->getRegVar()->getPhyReg() ||
|
|
!dcl->getRegVar()->getPhyReg()->isGreg())
|
|
continue;
|
|
|
|
auto regNum = dcl->getRegVar()->getPhyReg()->asGreg()->getRegNum();
|
|
if (regNum == inputRegNum) {
|
|
if ((*ref).second.lastUseLexId >= atLexId)
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool Rematerialization::canRematerialize(G4_SrcRegRegion *src, G4_BB *bb,
|
|
const Reference *&ref,
|
|
INST_LIST_ITER instIter) {
|
|
// op1 (8) A B C
|
|
// ...
|
|
// op2 (8) D A X
|
|
//
|
|
// This function will check whether rematerialize an operand,
|
|
// eg A in op2 is possible.
|
|
//
|
|
auto topdcl = src->getTopDcl();
|
|
if (!topdcl)
|
|
return false;
|
|
|
|
if (src->getInst()->isSplitIntrinsic())
|
|
return false;
|
|
|
|
// ADDRESS/FLAG spilled declare
|
|
if (topdcl->getSpilledDeclare())
|
|
return false;
|
|
|
|
if (topdcl->getAddressed())
|
|
return false;
|
|
|
|
if (topdcl->getRegVar()->getPhyReg())
|
|
return false;
|
|
|
|
// Src must belong to GRF file
|
|
if ((topdcl->getRegFile() &
|
|
(G4_RegFileKind::G4_GRF | G4_RegFileKind::G4_INPUT)) == 0x0)
|
|
return false;
|
|
|
|
G4_AccRegSel accRegSel = src->getAccRegSel();
|
|
if (accRegSel != ACC_UNDEFINED && accRegSel != NOACC)
|
|
return false;
|
|
|
|
// Lookup defs of src in program
|
|
auto opIt = operations.find(topdcl);
|
|
if (opIt == operations.end())
|
|
return false;
|
|
|
|
auto &&refs = (*opIt).second;
|
|
auto uniqueDef = findUniqueDef(refs, src);
|
|
|
|
if (!uniqueDef)
|
|
return false;
|
|
|
|
if (gra.isNoRemat(uniqueDef->first))
|
|
return false;
|
|
|
|
// Def has a lot of uses so we will need lots of remat to make this profitable
|
|
if (refs.numUses > MAX_USES_REMAT)
|
|
return false;
|
|
|
|
if (uniqueDef->first->getCondMod())
|
|
return false;
|
|
|
|
if (uniqueDef->first->getPredicate() && !usesNoMaskWA(uniqueDef))
|
|
return false;
|
|
|
|
// It is illegal to rematerialize intrinsic.split instruction as it
|
|
// is dependent on an earlier send.
|
|
if (uniqueDef->first->isSplitIntrinsic())
|
|
return false;
|
|
|
|
ref = uniqueDef;
|
|
|
|
// Check whether op1 can be recomputed
|
|
auto srcInst = src->getInst();
|
|
auto uniqueDefInst = uniqueDef->first;
|
|
auto uniqueDefBB = uniqueDef->second;
|
|
|
|
if (!isRematCandidateOp(uniqueDefInst))
|
|
return false;
|
|
|
|
unsigned int srcLexId = srcInst->getLexicalId();
|
|
unsigned int origOpLexId = uniqueDefInst->getLexicalId();
|
|
|
|
if (origOpLexId > srcLexId)
|
|
return false;
|
|
|
|
// Def-use must be far away
|
|
unsigned int minDefUseDist = MIN_DEF_USE_DISTANCE;
|
|
|
|
// If def is a scalar and its def/use lie entirely in a BB,
|
|
// then increase min def use distance heuristic as remating
|
|
// closeby is unlikely to provide perf benefit.
|
|
if (uniqueDefInst->getExecSize() == 1) {
|
|
if (uniqueDefBB->back()->getLexicalId() >= refs.lastUseLexId)
|
|
minDefUseDist *= 2;
|
|
}
|
|
|
|
if ((srcLexId - origOpLexId) < minDefUseDist)
|
|
return false;
|
|
|
|
if (!inSameSubroutine(bb, uniqueDefBB))
|
|
return false;
|
|
|
|
// If uniqueDefBB is not under SIMD CF, current BB is under SIMD CF
|
|
// and use has NoMask set, then we can remat only if def has NoMask
|
|
// option set.
|
|
if (!uniqueDefBB->isDivergent() && bb->isDivergent() &&
|
|
!uniqueDefInst->isWriteEnableInst() && srcInst->isWriteEnableInst()) {
|
|
return false;
|
|
}
|
|
|
|
// Check whether they are in a loop. If yes, they should be in same loop.
|
|
bool uniqueDefOutsideLoop = false;
|
|
bool srcDclSpilled = isRangeSpilled(topdcl);
|
|
bool inSameLoop = areInSameLoop(uniqueDefBB, bb, uniqueDefOutsideLoop);
|
|
bool onlyUseInLoop = uniqueDefOutsideLoop && !inSameLoop;
|
|
bool doNumRematCheck = false;
|
|
|
|
// Decide whether it is profitable to push def inside loop before each use
|
|
if (onlyUseInLoop && !srcDclSpilled) {
|
|
// If topdcl does not interfere with other spilled
|
|
// range then skip remating this operation.
|
|
// Be less aggressive if this is SIMD8 since we run the
|
|
// chance of perf penalty with this.
|
|
if ((kernel.getSimdSize() == 8 && rpe.getRegisterPressure(srcInst) <
|
|
(float)rematLoopRegPressure * 1.6f) ||
|
|
rematCandidates[topdcl->getRegVar()->getId()] == false ||
|
|
rpe.getRegisterPressure(srcInst) < rematLoopRegPressure)
|
|
return false;
|
|
|
|
if (getNumRematsInLoop() > 0) {
|
|
// Restrict non-SIMD1 remats to a low percent of loop instructions.
|
|
float loopInstToTotalInstRatio =
|
|
(float)getNumRematsInLoop() / (float)loopInstsBeforeRemat * 100.0f;
|
|
if (rpe.getMaxRP() < rematRegPressure * 1.4f) {
|
|
// If max RPE is not very high, don't sink too many instructions in loop
|
|
if (loopInstToTotalInstRatio > 1.75f)
|
|
return false;
|
|
} else if (loopInstToTotalInstRatio > 3.89f)
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (!inSameLoop) {
|
|
if (!uniqueDefOutsideLoop)
|
|
return false;
|
|
else {
|
|
// When op1 is outside loop and op2 is indside loop,
|
|
// allow remat if op1 dst dcl is marked spilled.
|
|
// Because that means a load will be inserted in the
|
|
// loop and remat might be more efficient here.
|
|
if (!srcDclSpilled) {
|
|
// If src dcl is not spilled, check whether all
|
|
// src opnds of defInst have been remat'd atleast once.
|
|
// This heuristic helps decide if remat will be worthwhile
|
|
// in a loop.
|
|
doNumRematCheck = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (inSameLoop && !uniqueDefOutsideLoop) {
|
|
// Remat is done in loop only if declare
|
|
// is marked as spill, so remat will
|
|
// benefit it. Otherwise, if var has a
|
|
// single use within the loop then remat
|
|
// can be done as it doesnt contribute to
|
|
// increase in inst count.
|
|
if (!srcDclSpilled && refs.numUses > 1)
|
|
return false;
|
|
}
|
|
|
|
// Check liveness of each src operand in original op
|
|
const unsigned numSrc = uniqueDefInst->getNumSrc();
|
|
std::vector<bool> srcLive(numSrc, true);
|
|
bool anySrcNotLive = false;
|
|
for (unsigned int i = 0; i < numSrc; i++) {
|
|
auto srcOpnd = uniqueDefInst->getSrc(i);
|
|
if (!srcOpnd || srcOpnd->isImm() || srcOpnd->isNullReg())
|
|
continue;
|
|
|
|
if (srcOpnd->isSrcRegRegion()) {
|
|
// If src operand base is non-regvar (eg, architecture
|
|
// register) then don't remat. Moving around such
|
|
// registers could be dangerous.
|
|
if (!srcOpnd->getBase()->isRegVar())
|
|
return false;
|
|
|
|
// Check whether this src has a single unique def
|
|
auto srcOpndRgn = srcOpnd->asSrcRegRegion();
|
|
auto srcOpndTopDcl = srcOpndRgn->getTopDcl();
|
|
|
|
if (doNumRematCheck && getNumRemats(srcOpndTopDcl) == 0) {
|
|
return false;
|
|
}
|
|
|
|
const auto &pointsToSet =
|
|
liveness.getPointsToAnalysis().getIndrUseVectorForBB(bb->getId());
|
|
G4_RegVar *srcVar = srcOpndTopDcl->getRegVar();
|
|
auto it = std::find_if(pointsToSet.begin(), pointsToSet.end(),
|
|
[&srcVar](const pointInfo &element) {
|
|
return element.var == srcVar && element.off == 0;
|
|
});
|
|
|
|
if (srcOpndTopDcl->getAddressed() &&
|
|
((uniqueDefBB != bb) || it != pointsToSet.end())) {
|
|
// Indirectly addressed src opnd should not be extended
|
|
return false;
|
|
}
|
|
|
|
if ((srcOpndTopDcl->getRegFile() &
|
|
(G4_RegFileKind::G4_GRF | G4_RegFileKind::G4_INPUT)) == 0x0)
|
|
return false;
|
|
|
|
// If an instruction has physical registers allocated then
|
|
// don't optimize it.
|
|
if (srcOpndRgn->getBase()->asRegVar()->getPhyReg() &&
|
|
!srcOpndTopDcl->isInput())
|
|
return false;
|
|
|
|
if (srcOpndTopDcl->isInput()) {
|
|
auto opIt = operations.find(srcOpndTopDcl);
|
|
if (opIt != operations.end()) {
|
|
// Check whether input variable has explicit def in function
|
|
if ((*opIt).second.def.size() > 0)
|
|
return false;
|
|
|
|
if ((*opIt).second.lastUseLexId < srcLexId &&
|
|
(!isPartGRFBusyInput((*opIt).first, srcLexId) || !inSameLoop)) {
|
|
// Inputs are pre-assigned and extending such ranges
|
|
// could lead to worse RA results, unless the input
|
|
// already extends beyond where we intend to remat.
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Run separate checks for sampler
|
|
if (uniqueDefInst->isSplitSend() &&
|
|
uniqueDefInst->getMsgDesc()->isSampler() &&
|
|
// FIXME: For XE2+, the sammpler header prepparation will need
|
|
// more than one mov instructions. Current rematerialization mechanism
|
|
// doesn't handle it correctly.
|
|
!kernel.fg.builder->hasSamplerFeedbackSurface() &&
|
|
uniqueDefInst->getSrc(2)->isImm() &&
|
|
uniqueDefInst->getSrc(3)->isImm()) {
|
|
if (!kernel.getOptions()->getOption(vISA_cacheSamplerHeader))
|
|
return false;
|
|
|
|
// Sampler definition to be rematerialized
|
|
// clang-format off
|
|
// sends (8) V54(0,0):f samplerHeader(0,0) V53(0,0) 0x42:ud 0x24a7002:ud{Align1, Q1} resLen = 4, msgLen = 1, extMsgLen = 1
|
|
// clang-format on
|
|
// samplerHeader can be rematerialized as it is r0.0 with modified r0.2.
|
|
// V53 above will simply be extended since it requires extra computation
|
|
// to rematerialize. Above sampler inst has a header. Some sampler
|
|
// instructions may not have a header. For such headerless samplers we
|
|
// need to check whether it is profitable to extend both src operands.
|
|
|
|
// Ensure resLen > extMsgLen to make rematerialization profitable.
|
|
unsigned len = uniqueDefInst->getMsgDesc()->getSrc1LenRegs();
|
|
|
|
// For Sanity, just verify V53 has defs before sampler send only.
|
|
auto extMsgOpnd = uniqueDefInst->getSrc(1);
|
|
vISA_ASSERT(extMsgOpnd->isSrcRegRegion() == true,
|
|
"Unexpected src opnd for sampler");
|
|
|
|
// Don't remat if sampler def is outside loop and use inside loop
|
|
if (onlyUseInLoop)
|
|
return false;
|
|
|
|
if (!areAllDefsInBB(extMsgOpnd->asSrcRegRegion()->getTopDcl(),
|
|
uniqueDefBB, uniqueDefInst->getLexicalId()))
|
|
return false;
|
|
|
|
bool samplerHeaderNotUsed =
|
|
uniqueDefInst->getSrc(0)->asSrcRegRegion()->getTopDcl() !=
|
|
kernel.fg.builder->getBuiltinSamplerHeader();
|
|
|
|
const G4_SendDescRaw *descRaw = uniqueDefInst->getMsgDescRaw();
|
|
if (!descRaw || !descRaw->isHeaderPresent() || samplerHeaderNotUsed) {
|
|
len += uniqueDefInst->getMsgDesc()->getSrc0LenRegs();
|
|
|
|
auto msgOpnd = uniqueDefInst->getSrc(0);
|
|
if (!areAllDefsInBB(msgOpnd->asSrcRegRegion()->getTopDcl(),
|
|
uniqueDefBB, uniqueDefInst->getLexicalId()))
|
|
return false;
|
|
|
|
if (liveness.isLiveAtExit(
|
|
bb, msgOpnd->getTopDcl()->getRegVar()->getId()) ||
|
|
getLastUseLexId(msgOpnd->getTopDcl()) >= srcLexId)
|
|
len -= uniqueDefInst->getMsgDesc()->getSrc0LenRegs();
|
|
}
|
|
|
|
if (samplerHeaderNotUsed) {
|
|
// Ensure header creation instructions are used only by sampler
|
|
auto msgOpndTopDcl =
|
|
uniqueDefInst->getSrc(0)->asSrcRegRegion()->getTopDcl();
|
|
auto topDclOpsIt = operations.find(msgOpndTopDcl);
|
|
if (topDclOpsIt == operations.end())
|
|
return false;
|
|
|
|
if ((*topDclOpsIt).second.numUses > 1)
|
|
return false;
|
|
|
|
for (auto &def : (*topDclOpsIt).second.def) {
|
|
for (unsigned int i = 0, numSrc = def.first->getNumSrc();
|
|
i != numSrc; i++) {
|
|
auto src = def.first->getSrc(i);
|
|
if (!src)
|
|
continue;
|
|
|
|
if (src->isImm())
|
|
continue;
|
|
|
|
if (src->isSrcRegRegion() &&
|
|
(src->asSrcRegRegion()->getTopDcl() ==
|
|
kernel.fg.builder->getBuiltinSamplerHeader() ||
|
|
src->asSrcRegRegion()->getTopDcl() ==
|
|
kernel.fg.builder->getBuiltinR0()))
|
|
continue;
|
|
|
|
// Using some other var in payload src requires extra checks to
|
|
// remat, so skip it
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (liveness.isLiveAtExit(
|
|
bb, extMsgOpnd->getTopDcl()->getRegVar()->getId()) ||
|
|
getLastUseLexId(extMsgOpnd->getTopDcl()) >= srcLexId)
|
|
len -= uniqueDefInst->getMsgDesc()->getSrc1LenRegs();
|
|
|
|
if (refs.rowsUsed.size() <= len)
|
|
return false;
|
|
|
|
return true;
|
|
} else {
|
|
// Non-sampler definition to be rematerialized
|
|
if (uniqueDefInst->isSend())
|
|
return false;
|
|
|
|
auto opIt = operations.find(srcOpndTopDcl);
|
|
if (opIt == operations.end())
|
|
return false;
|
|
|
|
auto &&srcOpndRefs = (*opIt).second;
|
|
auto srcOpndUniqueDef = findUniqueDef(srcOpndRefs, srcOpndRgn);
|
|
|
|
bool isSrcAvailble = false;
|
|
if (kernel.getInt32KernelAttr(Attributes::ATTR_Target) == VISA_CM &&
|
|
uniqueDefBB == bb) {
|
|
isSrcAvailble = checkLocalWAR(uniqueDefInst, bb, instIter);
|
|
}
|
|
|
|
if (!srcOpndUniqueDef && !isSrcAvailble && !srcOpndTopDcl->isInput())
|
|
return false;
|
|
|
|
if (srcOpndUniqueDef && !inSameSubroutine(bb, srcOpndUniqueDef->second))
|
|
return false;
|
|
|
|
// Check if its live in/live out to/of current BB
|
|
unsigned int id = srcOpndTopDcl->getRegVar()->getId();
|
|
if (!liveness.isLiveAtExit(bb, id) &&
|
|
// Even if a var is not live-out, its live-range
|
|
// might extend till inst of interest.
|
|
srcOpndRefs.lastUseLexId < srcInst->getLexicalId()) {
|
|
// Opnd may not be live, but it is still possible to
|
|
// extend its live-range to remat it. For scalars, this
|
|
// could be profitable too.
|
|
srcLive[i] = false;
|
|
anySrcNotLive = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (anySrcNotLive) {
|
|
// Apply cost heuristic. It may be profitable to extend
|
|
// scalars sometimes.
|
|
for (unsigned int i = 0; i < numSrc; i++) {
|
|
if (!srcLive[i]) {
|
|
G4_SrcRegRegion *srcRgn = uniqueDefInst->getSrc(i)->asSrcRegRegion();
|
|
|
|
if (srcRgn->getTopDcl()->getNumElems() > 1 &&
|
|
getNumUses(srcRgn->getTopDcl()) < 20) {
|
|
// Extending non-scalar operands can be expensive
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Record remats in loop only for non-scalar operations. This is a heuristic
|
|
// used to not remat excessively in loops.
|
|
if (!inSameLoop && uniqueDefInst->getExecSize() > 1)
|
|
incNumRematsInLoop();
|
|
|
|
if (cr0DefBB && IS_TYPE_FLOAT_ALL(uniqueDefInst->getExecType())) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
G4_SrcRegRegion *Rematerialization::rematerialize(G4_SrcRegRegion *src,
|
|
G4_BB *bb,
|
|
const Reference *uniqueDef,
|
|
std::list<G4_INST *> &newInst,
|
|
G4_INST *&cacheInst) {
|
|
// op1 (8) A B C
|
|
// ...
|
|
// op2 (8) D A E
|
|
//
|
|
// =>
|
|
// op1 (8) A B C
|
|
// ...
|
|
// op1_dup (8) A1 B C
|
|
// op2 (8) D A1 E
|
|
|
|
G4_SrcRegRegion *rematSrc = nullptr;
|
|
|
|
auto dstInst = uniqueDef->first;
|
|
auto dst = dstInst->getDst();
|
|
gra.incRA.markForIntfUpdate(dst->getTopDcl());
|
|
bool isSampler = dstInst->isSplitSend() && dstInst->getMsgDesc()->isSampler();
|
|
|
|
for (unsigned int i = 0, numSrc = dstInst->getNumSrc(); i < numSrc; i++) {
|
|
G4_Operand *src = dstInst->getSrc(i);
|
|
if (src && src->isSrcRegRegion()) {
|
|
incNumRemat(src->asSrcRegRegion()->getTopDcl());
|
|
gra.incRA.markForIntfUpdate(src->getTopDcl());
|
|
}
|
|
}
|
|
|
|
if (!isSampler) {
|
|
unsigned int diffBound =
|
|
dst->getRightBound() -
|
|
(dst->getRegOff() * kernel.numEltPerGRF<Type_UB>());
|
|
unsigned numElems = (diffBound + 1) / dst->getTypeSize();
|
|
auto newTemp = kernel.fg.builder->createTempVar(numElems, dst->getType(),
|
|
Any, "REMAT_");
|
|
newTemp->copyAlign(dst->getTopDcl());
|
|
gra.copyAlignment(newTemp, dst->getTopDcl());
|
|
G4_DstRegRegion *newDst = kernel.fg.builder->createDst(
|
|
newTemp->getRegVar(), 0,
|
|
(dst->getLeftBound() % kernel.numEltPerGRF<Type_UB>()) /
|
|
dst->getTypeSize(),
|
|
dst->getHorzStride(), dst->getType());
|
|
G4_INST *dupOp = dstInst->cloneInst();
|
|
dupOp->setDest(newDst);
|
|
dupOp->inheritDIFrom(dstInst);
|
|
|
|
rematSrc = createSrcRgn(src, dst, newTemp);
|
|
|
|
newInst.push_back(dupOp);
|
|
|
|
cacheInst = newInst.back();
|
|
} else {
|
|
G4_Operand *src0 = nullptr;
|
|
// Look up samplerHeader(0,2) definition
|
|
auto sampleHeaderTopDcl =
|
|
uniqueDef->first->getSrc(0)->asSrcRegRegion()->getTopDcl();
|
|
if (sampleHeaderTopDcl == kernel.fg.builder->getBuiltinSamplerHeader()) {
|
|
samplerHeader = sampleHeaderTopDcl;
|
|
if (!samplerHeaderMapPopulated) {
|
|
populateSamplerHeaderMap();
|
|
}
|
|
|
|
if (deLVNedBBs.find(bb) == deLVNedBBs.end()) {
|
|
// DeLVN one bb at a time when required
|
|
deLVNSamplers(bb);
|
|
deLVNedBBs.insert(bb);
|
|
}
|
|
|
|
auto samplerDefIt = samplerHeaderMap.find(uniqueDef->first);
|
|
vASSERT(samplerDefIt != samplerHeaderMap.end());
|
|
auto prevHeaderMov = (*samplerDefIt).second;
|
|
|
|
src0 = dstInst->getSrc(0);
|
|
|
|
// Duplicate sampler header setup instruction
|
|
auto dupOp = prevHeaderMov->cloneInst();
|
|
newInst.push_back(dupOp);
|
|
} else {
|
|
// Handle sampler when src0 is not builtin sampler header
|
|
auto src0Rgn = uniqueDef->first->getSrc(0)->asSrcRegRegion();
|
|
auto src0TopDcl = src0Rgn->getTopDcl();
|
|
auto ops = operations.find(src0TopDcl);
|
|
vISA_ASSERT(ops != operations.end(), "Didn't find record in map");
|
|
vISA_ASSERT((*ops).second.numUses == 1,
|
|
"Expecting src0 to be used only in sampler");
|
|
|
|
G4_Declare *newSrc0Dcl = nullptr;
|
|
if (src0TopDcl->getRegVar()->isPhyRegAssigned()) {
|
|
newSrc0Dcl = src0TopDcl;
|
|
} else {
|
|
newSrc0Dcl = kernel.fg.builder->createTempVar(
|
|
src0TopDcl->getTotalElems(), src0TopDcl->getElemType(),
|
|
gra.getSubRegAlign(src0TopDcl));
|
|
// Clone all defining instructions for sampler's msg header
|
|
for (unsigned int i = 0; i != (*ops).second.def.size(); i++) {
|
|
auto &headerDefInst = (*ops).second.def[i].first;
|
|
|
|
auto dupOp = headerDefInst->cloneInst();
|
|
auto headerDefDst = headerDefInst->getDst();
|
|
vASSERT(!headerDefDst->isIndirect()); // we don't allow send header to
|
|
// be defined indirectly
|
|
dupOp->setDest(kernel.fg.builder->createDst(
|
|
newSrc0Dcl->getRegVar(), headerDefDst->getRegOff(),
|
|
headerDefDst->getSubRegOff(), headerDefDst->getHorzStride(),
|
|
headerDefDst->getType()));
|
|
newInst.push_back(dupOp);
|
|
}
|
|
}
|
|
|
|
auto rd = kernel.fg.builder->createRegionDesc(
|
|
src0Rgn->getRegion()->vertStride, src0Rgn->getRegion()->width,
|
|
src0Rgn->getRegion()->horzStride);
|
|
|
|
src0 = kernel.fg.builder->createSrc(
|
|
newSrc0Dcl->getRegVar(), src0Rgn->getRegOff(),
|
|
src0Rgn->getSubRegOff(), rd, src0Rgn->getType());
|
|
}
|
|
|
|
auto samplerDst = kernel.fg.builder->createTempVar(
|
|
dst->getTopDcl()->getTotalElems(), dst->getTopDcl()->getElemType(),
|
|
gra.getSubRegAlign(dst->getTopDcl()), "REMAT_SAMPLER_");
|
|
auto samplerDstRgn = kernel.fg.builder->createDst(
|
|
samplerDst->getRegVar(), 0, 0, 1, samplerDst->getElemType());
|
|
|
|
auto dstMsgDesc = dstInst->getMsgDescRaw();
|
|
vISA_ASSERT(dstMsgDesc, "expected raw descriptor");
|
|
|
|
auto newMsgDesc = kernel.fg.builder->createGeneralMsgDesc(
|
|
dstMsgDesc->getDesc(), dstMsgDesc->getExtendedDesc(),
|
|
dstMsgDesc->getAccess(),
|
|
kernel.fg.builder->duplicateOperand(dstMsgDesc->getBti()),
|
|
kernel.fg.builder->duplicateOperand(dstMsgDesc->getSti()));
|
|
|
|
auto dupOp = kernel.fg.builder->createSplitSendInst(
|
|
nullptr, dstInst->opcode(), dstInst->getExecSize(), samplerDstRgn,
|
|
kernel.fg.builder->duplicateOperand(src0)->asSrcRegRegion(),
|
|
kernel.fg.builder->duplicateOperand(dstInst->getSrc(1))
|
|
->asSrcRegRegion(),
|
|
kernel.fg.builder->duplicateOperand(
|
|
dstInst->asSendInst()->getMsgDescOperand()),
|
|
dstInst->getOption(), newMsgDesc,
|
|
kernel.fg.builder->duplicateOperand(dstInst->getSrc(3)), true);
|
|
dupOp->setVISAId(dstInst->getVISAId());
|
|
dupOp->inheritDIFrom(dstInst);
|
|
|
|
newInst.push_back(dupOp);
|
|
|
|
rematSrc = createSrcRgn(src, dst, samplerDst);
|
|
|
|
cacheInst = newInst.back();
|
|
}
|
|
|
|
return rematSrc;
|
|
}
|
|
|
|
G4_SrcRegRegion *Rematerialization::createSrcRgn(G4_SrcRegRegion *srcToRemat,
|
|
G4_DstRegRegion *uniqueDef,
|
|
G4_Declare *rematTemp) {
|
|
G4_SrcRegRegion *rematSrc = nullptr;
|
|
|
|
unsigned row = (srcToRemat->getLeftBound() / kernel.numEltPerGRF<Type_UB>()) -
|
|
(uniqueDef->getLeftBound() / kernel.numEltPerGRF<Type_UB>());
|
|
unsigned subReg =
|
|
(srcToRemat->getLeftBound() % kernel.numEltPerGRF<Type_UB>()) /
|
|
srcToRemat->getTypeSize();
|
|
|
|
rematSrc = kernel.fg.builder->createSrcRegRegion(
|
|
srcToRemat->getModifier(), Direct, rematTemp->getRegVar(), (short)row,
|
|
(short)subReg, srcToRemat->getRegion(), srcToRemat->getType());
|
|
|
|
return rematSrc;
|
|
}
|
|
|
|
const Reference *Rematerialization::findUniqueDef(References &refs,
|
|
G4_SrcRegRegion *src) {
|
|
// This function looks up list of definitions for a topdcl (src->getTopDcl())
|
|
// and returns a single dst region that defines that src region. If more than
|
|
// 1 def match lb/rb of src then nullptr is returned. If a partial unique def
|
|
// is found even then nullptr is returned.
|
|
|
|
Reference *uniqueDef = nullptr;
|
|
|
|
unsigned int lb = src->getLeftBound(), rb = src->getRightBound();
|
|
for (auto &&r : refs.def) {
|
|
auto curdst = r.first->getDst();
|
|
unsigned int curlb = curdst->getLeftBound();
|
|
unsigned int currb = curdst->getRightBound();
|
|
|
|
if (curlb <= lb && currb >= rb) {
|
|
if (uniqueDef) {
|
|
uniqueDef = nullptr;
|
|
break;
|
|
} else {
|
|
uniqueDef = &r;
|
|
}
|
|
} else if ((curlb <= lb && currb >= lb) || (curlb <= rb && currb >= lb)) {
|
|
// Partial overlap
|
|
uniqueDef = nullptr;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (uniqueDef) {
|
|
G4_RegFileKind rf =
|
|
refs.def.front().first->getDst()->getTopDcl()->getRegFile();
|
|
if (rf == G4_RegFileKind::G4_INPUT) {
|
|
// Variable is an input as well as has a def
|
|
uniqueDef = nullptr;
|
|
}
|
|
}
|
|
|
|
return uniqueDef;
|
|
}
|
|
|
|
unsigned int getNumSamplers(G4_Kernel &kernel) {
|
|
unsigned int numSampler = 0;
|
|
|
|
for (auto bb : kernel.fg) {
|
|
for (auto inst : *bb) {
|
|
if (inst->isSplitSend() && inst->getMsgDesc()->isSampler()) {
|
|
numSampler++;
|
|
}
|
|
}
|
|
}
|
|
|
|
return numSampler;
|
|
}
|
|
|
|
void Rematerialization::run() {
|
|
populateRefs();
|
|
|
|
auto firstProgInst = kernel.fg.getEntryBB()->getFirstInst();
|
|
|
|
for (auto bb : kernel.fg) {
|
|
if (kernel.getInt32KernelAttr(Attributes::ATTR_Target) ==
|
|
VISATarget::VISA_3D) {
|
|
// For Cm, assume cr0 def is live across BBs
|
|
// For IGC, assume cr0 is reset at each BB entry
|
|
cr0DefBB = false;
|
|
}
|
|
// Store cache of rematerialized operations so nearby instructions
|
|
// can reuse them.
|
|
// <Unique def, <Remat'd def, Lexical id of last ref>>
|
|
std::map<const Reference *, std::pair<G4_INST *, unsigned int>> rematValues;
|
|
for (auto instIt = bb->begin(); instIt != bb->end(); instIt++) {
|
|
auto inst = (*instIt);
|
|
auto dst = inst->getDst();
|
|
bool runRemat = false;
|
|
|
|
cr0DefBB |= dst && dst->isCrReg() && (inst != firstProgInst);
|
|
|
|
// Run remat if any src opnd is spilled
|
|
for (unsigned int opnd = 0, numSrc = inst->getNumSrc(); opnd < numSrc;
|
|
opnd++) {
|
|
auto src = inst->getSrc(opnd);
|
|
|
|
if (src && src->isSrcRegRegion()) {
|
|
auto srcTopDcl = src->getTopDcl();
|
|
if (srcTopDcl && srcTopDcl->getRegVar()->isRegAllocPartaker() &&
|
|
(isRangeSpilled(srcTopDcl) ||
|
|
rematCandidates[srcTopDcl->getRegVar()->getId()] == true)) {
|
|
// Run remat for spilled src opnd even if
|
|
// register pressure is low.
|
|
runRemat = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!runRemat) {
|
|
auto regPressure = rpe.getRegisterPressure(inst);
|
|
|
|
if (regPressure < rematRegPressure) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// High register pressure found at current instruction so try to remat
|
|
for (unsigned int opnd = 0, numSrc = inst->getNumSrc(); opnd < numSrc;
|
|
opnd++) {
|
|
auto src = inst->getSrc(opnd);
|
|
|
|
if (src && src->isSrcRegRegion()) {
|
|
const Reference *uniqueDef = nullptr;
|
|
G4_SrcRegRegion *rematSrc = nullptr;
|
|
|
|
bool canRemat =
|
|
canRematerialize(src->asSrcRegRegion(), bb, uniqueDef, instIt);
|
|
if (canRemat) {
|
|
bool reUseRemat = false;
|
|
auto prevRematIt = rematValues.find(uniqueDef);
|
|
if (prevRematIt != rematValues.end()) {
|
|
if ((inst->getLexicalId() - (*prevRematIt).second.second) <=
|
|
MAX_LOCAL_REMAT_REUSE_DISTANCE) {
|
|
reUseRemat = true;
|
|
rematSrc = createSrcRgn(
|
|
src->asSrcRegRegion(), uniqueDef->first->getDst(),
|
|
(*prevRematIt).second.first->getDst()->getTopDcl());
|
|
|
|
if (src->asSrcRegRegion()->getAccRegSel() == NOACC) {
|
|
rematSrc->setAccRegSel(NOACC);
|
|
}
|
|
|
|
reduceNumUses(src->getTopDcl());
|
|
}
|
|
(*prevRematIt).second.second = inst->getLexicalId();
|
|
}
|
|
|
|
if (!reUseRemat) {
|
|
std::list<G4_INST *> newInsts;
|
|
G4_INST *cacheInst = nullptr;
|
|
rematSrc = rematerialize(src->asSrcRegRegion(), bb, uniqueDef,
|
|
newInsts, cacheInst);
|
|
|
|
if (src->asSrcRegRegion()->getAccRegSel() == NOACC) {
|
|
rematSrc->setAccRegSel(NOACC);
|
|
}
|
|
|
|
while (!newInsts.empty()) {
|
|
bb->insertBefore(instIt, newInsts.front());
|
|
if (newInsts.front()->isWriteEnableInst() &&
|
|
gra.EUFusionNoMaskWANeeded()) {
|
|
gra.addEUFusionNoMaskWAInst(bb, newInsts.front());
|
|
}
|
|
newInsts.pop_front();
|
|
}
|
|
|
|
rematValues.insert(std::make_pair(
|
|
uniqueDef,
|
|
std::make_pair(cacheInst, src->getInst()->getLexicalId())));
|
|
|
|
reduceNumUses(src->getTopDcl());
|
|
|
|
IRChanged = true;
|
|
}
|
|
|
|
inst->setSrc(rematSrc, opnd);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!kernel.fg.builder->hasSamplerFeedbackSurface()) {
|
|
cleanRedundantSamplerHeaders();
|
|
}
|
|
|
|
kernel.dumpToFile("after.remat");
|
|
}
|
|
} // namespace vISA
|