Files
intel-graphics-compiler/visa/LocalRA.h
Cheng, Bu Qi e5df3df61b Fix the bug for SWSB dependence missing on undefined variable
cmp.eq  v5444, v5444
v5444 is undefined and will be assigned with any register.
As a result there may be RAW dependence in the SIMD control.
Current SWSB only track WAR WAW along the SIMD control.
2025-01-22 23:30:18 +01:00

556 lines
17 KiB
C++

/*========================== begin_copyright_notice ============================
Copyright (C) 2017-2021 Intel Corporation
SPDX-License-Identifier: MIT
============================= end_copyright_notice ===========================*/
#ifndef _INC_LOCALRA_H_
#define _INC_LOCALRA_H_
#include "BitSet.h"
#include "BuildIR.h"
#include "FlowGraph.h"
#include "G4_Opcode.h"
#include <list>
// Forward decls
namespace vISA {
class G4_Declare;
class G4_INST;
class G4_BB;
class LinearScan;
class LocalLiveRange;
class PhyRegLocalRA;
class PhyRegsManager;
class PhyRegSummary;
class BankConflictPass;
class GlobalRA;
} // namespace vISA
vISA::G4_Declare *GetTopDclFromRegRegion(vISA::G4_Operand *opnd);
typedef std::map<vISA::LocalLiveRange *,
std::vector<std::pair<INST_LIST_ITER, unsigned int>>>
LLR_USE_MAP;
typedef std::map<vISA::LocalLiveRange *,
std::vector<std::pair<INST_LIST_ITER, unsigned int>>>::iterator
LLR_USE_MAP_ITER;
// Each declaration will have a LocalLiveRange object allocated for it
namespace vISA {
class PhyRegsLocalRA;
class InputLiveRange;
class LocalRA {
private:
G4_Kernel &kernel;
IR_Builder &builder;
PhyRegsLocalRA *pregs = nullptr;
LLR_USE_MAP LLRUseMap;
unsigned int numRegLRA = 0;
unsigned int globalLRSize = 0;
bool doSplitLLR = false;
std::list<InputLiveRange> inputIntervals;
BankConflictPass &bc;
GlobalRA &gra;
bool doBCR = false;
bool highInternalConflict = false;
bool hasSplitInsts = false;
BankAlign getBankAlignForUniqueAssign(G4_Declare *dcl);
bool hasBackEdge();
void specialAlign();
void preLocalRAAnalysis();
void trivialAssignRA(bool &needGlobalRA, bool threeSourceCandidate);
bool localRAPass(bool doRoundRobin, bool doSplitLLR);
void resetMasks();
unsigned int getLargestInputGRF();
void blockOutputPhyRegs();
void removeUnrequiredLifetimeOps();
bool assignUniqueRegisters(bool twoBanksRA, bool twoDirectionsAssign,
bool hybridWithSpill);
bool unassignedRangeFound();
void updateRegUsage(PhyRegSummary *summary, unsigned int &numRegsUsed);
void localRAOptReport();
void markReferencesInOpnd(G4_Operand *opnd, bool isEOT,
INST_LIST_ITER inst_it, unsigned int pos);
void addOutOfBoundForbidden(G4_Declare *dcl, G4_Operand *opnd);
void markSpecialForbidden(INST_LIST_ITER inst_it);
void markReferencesInInst(INST_LIST_ITER inst_it);
void setLexicalID(bool includePseudo);
void markReferences(unsigned int &numRowsEOT, bool &lifetimeOpFound);
void calculateInputIntervals();
bool hasDstSrcOverlapPotential(G4_DstRegRegion *dst, G4_SrcRegRegion *src);
void calculateLiveIntervals(G4_BB *bb,
std::vector<LocalLiveRange *> &liveIntervals);
void printAddressTakenDecls();
void printLocalRACandidates();
void printLocalLiveIntervals(G4_BB *bb,
std::vector<LocalLiveRange *> &liveIntervals);
void printInputLiveIntervals();
bool countLiveIntervals();
// scratch fields used for parameter passing
G4_BB *curBB = nullptr;
public:
static void getRowInfo(int size, int &nrows, int &lastRowSize,
const IR_Builder &builder);
static unsigned int convertSubRegOffFromWords(G4_Declare *dcl,
int subregnuminwords);
static unsigned int convertSubRegOffToWords(G4_Declare *dcl, int subregnum);
static void
countLocalLiveIntervals(std::vector<LocalLiveRange *> &liveIntervals,
unsigned grfSize);
static void printLocalLiveIntervalDistribution(unsigned int numScalars,
unsigned int numHalfGRF,
unsigned int numOneGRF,
unsigned int numTwoOrMoreGRF,
unsigned int numTotal);
LocalRA(BankConflictPass &, GlobalRA &);
bool localRA();
void undoLocalRAAssignments(bool clearInterval);
bool doHybridBCR() const { return doBCR; }
bool hasHighInternalBC() const { return highInternalConflict; }
};
class LocalLiveRange {
private:
G4_Declare *topdcl;
G4_INST *firstRef;
G4_INST *lastRef;
unsigned int lrStartIdx, lrEndIdx;
bool isIndirectAccess;
G4_VarBase *preg;
// pregoff is stored in word here
// But subreg offset stored in regvar should be in units of dcl's element size
int pregoff;
unsigned int numRefsInFG;
G4_BB *prevBBRef;
bool eot;
bool assigned;
bool isSplit;
bool defined;
IR_Builder &builder;
std::unordered_set<unsigned int> forbiddenGRFs;
public:
LocalLiveRange(IR_Builder &b) : builder(b) {
topdcl = NULL;
firstRef = lastRef = NULL;
lrStartIdx = lrEndIdx = 0;
isIndirectAccess = false;
numRefsInFG = 0;
prevBBRef = NULL;
preg = NULL;
pregoff = 0;
assigned = false;
eot = false;
isSplit = false;
defined = false;
if (!builder.canWriteR0())
addForbidden(0);
if (builder.mustReserveR1())
addForbidden(1);
}
// A reference to this live range exists in bb basic block, record it
void recordRef(G4_BB *);
void markIndirectRef() { isIndirectAccess = true; }
// A live range is local if it is never accessed indirectly (via address taken
// register) and only a single basic block references the range
bool isLiveRangeLocal() const;
bool isLiveRangeGlobal();
bool isGRFRegAssigned();
void setTopDcl(G4_Declare *dcl) {
vISA_ASSERT(topdcl == NULL, "Redefining top dcl");
topdcl = dcl;
}
G4_Declare *getTopDcl() const { return topdcl; }
void *operator new(size_t sz, Mem_Manager &m) { return m.alloc(sz); }
bool hasIndirectAccess() const { return isIndirectAccess; }
void setFirstRef(G4_INST *inst, unsigned int idx) {
firstRef = inst;
lrStartIdx = idx;
}
G4_INST *getFirstRef(unsigned int &idx) const {
idx = lrStartIdx;
return firstRef;
}
void setLastRef(G4_INST *inst, unsigned int idx) {
lastRef = inst;
lrEndIdx = idx;
}
G4_INST *getLastRef(unsigned int &idx) {
idx = lrEndIdx;
return lastRef;
}
void setPhyReg(G4_VarBase *pr, int subreg) {
preg = pr;
pregoff = subreg;
}
G4_VarBase *getPhyReg(int &subreg) {
subreg = pregoff;
return preg;
}
unsigned int getSizeInWords();
void setAssigned(bool a) { assigned = a; }
bool getAssigned() const { return assigned; }
void markEOT() { eot = true; }
bool isEOT() const { return eot; }
void markSplit() { isSplit = true; }
bool getSplit() const { return isSplit; }
void markDefined() { defined = true; }
bool isDefined() const { return defined; }
void addForbidden(unsigned int f) { forbiddenGRFs.insert(f); }
std::unordered_set<unsigned int> &getForbidden() { return forbiddenGRFs; }
};
class InputLiveRange {
private:
unsigned int regWordIdx;
unsigned int lrEndIdx;
public:
InputLiveRange(unsigned int regId, unsigned int endId)
: regWordIdx(regId), lrEndIdx(endId) {}
unsigned int getRegWordIdx() { return regWordIdx; }
unsigned int getLrEndIdx() { return lrEndIdx; }
};
} // namespace vISA
#define SECOND_HALF_BANK_START_GRF 64
#define RR_HEURISTIC 0.66
enum {
WORD_FREE = 0,
WORD_BUSY = 1,
};
namespace vISA {
class PhyRegsLocalRA {
private:
const IR_Builder &builder;
unsigned int numRegs;
// nth bit represents whether the register's nth word is free/busy
// 1 - busy, 0 - free
// It is possible to use bit-vector in place of this array
// bitvector does not provide coarse grained access to mark
// entire grf busy/available
std::vector<uint32_t> regBusyVector;
std::vector<int32_t> regLastUse;
std::vector<bool> grfAvialable;
int lastUseSum1;
int lastUseSum2;
int bank1AvailableRegNum;
int bank2AvailableRegNum;
bool twoBanksRA;
bool simpleGRFAvailable;
bool r0Forbidden;
bool r1Forbidden;
int LraFFWindowSize;
public:
PhyRegsLocalRA(const IR_Builder &_builder, uint32_t nregs)
: builder(_builder), numRegs(nregs) {
uint32_t grfFree = 0;
regBusyVector.resize(numRegs);
regLastUse.resize(numRegs);
grfAvialable.resize(numRegs);
for (int i = 0; i < (int)nregs; i++) {
regBusyVector[i] = grfFree;
regLastUse[i] = 0;
grfAvialable[i] = true;
}
lastUseSum1 = 0;
lastUseSum2 = 0;
bank1AvailableRegNum = 0;
bank2AvailableRegNum = 0;
twoBanksRA = false;
simpleGRFAvailable = false;
r0Forbidden = false;
r1Forbidden = false;
LraFFWindowSize =
(int)builder.getOptions()->getuInt32Option(vISA_LraFFWindowSize);
}
void findRegisterCandiateWithAlignForward(int &i, BankAlign align,
bool evenAlign);
unsigned int get_bundle(unsigned int baseReg, int offset);
int findBundleConflictFreeRegister(int curReg, int endReg,
unsigned short occupiedBundles,
BankAlign align, bool evenAlign);
void findRegisterCandiateWithAlignBackward(int &i, BankAlign align,
bool evenAlign);
void setGRFBusy(int which);
void setGRFBusy(int which, bool isInput);
void setGRFBusy(int which, int howmany, bool isInput);
void setGRFBusy(int which, int howmany);
void setWordBusy(int whichgrf, int word, bool isInput);
void setGRFNotBusy(int which, int instID);
void setH1GRFBusy(int which);
void setH2GRFBusy(int which);
void setWordBusy(int whichgrf, int word);
void setWordBusy(int whichgrf, int word, int howmany, bool isInput);
void setWordBusy(int whichgrf, int word, int howmany);
void setWordNotBusy(int whichgrf, int word, int instID);
inline bool isGRFBusy(int which) const {
vISA_ASSERT(isGRFAvailable(which), "Invalid register");
return (regBusyVector[which] != 0);
}
inline bool isGRFBusy(int which, int howmany) const {
bool retval = false;
for (int i = 0; i < howmany && !retval; i++) {
retval |= isGRFBusy(which + i);
}
return retval;
}
bool isH1GRFBusy(int which);
bool isH2GRFBusy(int which);
inline bool isWordBusy(int whichgrf, int word);
bool isWordBusy(int whichgrf, int word, int howmany);
bool findFreeMultipleRegsForward(int regIdx, BankAlign align, int &regnum,
int nrows, int lastRowSize, int endReg,
unsigned short occupiedBundles, int instID,
bool isHybridAlloc,
std::unordered_set<unsigned int> &forbidden);
void markPhyRegs(G4_Declare *topdcl);
// Available/unavailable is different from busy/free
// Unavailable GRFs are not available for allocation
void setGRFUnavailable(int which) { grfAvialable[which] = false; }
bool isGRFAvailable(int which) const {
if (simpleGRFAvailable) {
if (which > 1) {
return true;
} else {
if (r0Forbidden && which == 0) {
return false;
}
if (r1Forbidden && which <= 1) {
return false;
}
return true;
}
} else {
vISA_ASSERT(which < (int)numRegs, "invalid GRF");
return (grfAvialable[which] == true);
}
}
bool isGRFAvailable(int which, int howmany) const {
if (simpleGRFAvailable) {
if (which > 1) {
return true;
} else {
if (r0Forbidden && which == 0) {
return false;
}
if (r1Forbidden && which <= 1) {
return false;
}
return true;
}
} else {
for (int i = 0; i < howmany; i++) {
if (!isGRFAvailable(which + i)) {
return false;
}
}
}
return true;
}
void printBusyRegs();
int getRegLastUse(int reg) { return regLastUse[reg]; }
int getLastUseSum1() { return lastUseSum1; }
int getLastUseSum2() { return lastUseSum2; }
int getBank1AvailableRegNum() { return bank1AvailableRegNum; }
int getBank2AvailableRegNum() { return bank2AvailableRegNum; }
void setBank1AvailableRegNum(int avail1) { bank1AvailableRegNum = avail1; }
void setBank2AvailableRegNum(int avail2) { bank2AvailableRegNum = avail2; }
void setTwoBanksRA(bool twoBanks) { twoBanksRA = twoBanks; }
void setSimpleGRFAvailable(bool simple) { simpleGRFAvailable = simple; }
void setR0Forbidden() { r0Forbidden = true; }
void setR1Forbidden() { r1Forbidden = true; }
bool
findFreeMultipleRegsBackward(int regIdx, BankAlign align, int &regnum,
int nrows, int lastRowSize, int endReg,
int instID, bool isHybridAlloc,
std::unordered_set<unsigned int> &forbidden);
bool findFreeSingleReg(int regIdx, G4_SubReg_Align subalign, int &regnum,
int &subregnum, int size);
bool findFreeSingleReg(int regIdx, int size, BankAlign align,
G4_SubReg_Align subalign, int &regnum, int &subregnum,
int endReg, int instID, bool isHybridAlloc,
bool forward,
std::unordered_set<unsigned int> &forbidden);
bool findFreeMultipleRegsForward(int regIdx, BankAlign align, int &regnum,
int nrows, int lastRowSize, int endReg,
int instID, const bool *forbidden);
bool findFreeSingleReg(int regIdx, int size, BankAlign align,
G4_SubReg_Align subalign, int &regnum, int &subregnum,
int endReg, const bool *forbidden);
};
class PhyRegsManager {
private:
const IR_Builder &builder;
PhyRegsLocalRA availableRegs;
bool twoBanksRA;
public:
PhyRegsManager(const IR_Builder &irb, const PhyRegsLocalRA& pregs, bool _twoBanksRA)
: builder(irb), availableRegs(pregs), twoBanksRA(_twoBanksRA) {
availableRegs.setTwoBanksRA(_twoBanksRA);
}
void freeRegs(int regnum, int subregnum, int numwords, int instID);
PhyRegsLocalRA *getAvailableRegs() { return &availableRegs; }
int findFreeRegs(int size, BankAlign align, G4_SubReg_Align subalign,
int &regnum, int &subregnum, int startRegNum, int endRegNum,
unsigned short occupiedBundles, unsigned int instID,
bool isHybridAlloc,
std::unordered_set<unsigned int> &forbidden);
int findFreeRegs(int size, BankAlign align, G4_SubReg_Align subalign,
int &regnum, int &subregnum, int startRegNum, int endRegNum,
unsigned int instID, const bool *forbidden);
};
class LinearScan {
private:
GlobalRA &gra;
IR_Builder &builder;
PhyRegsManager &pregManager;
PhyRegsLocalRA &initPregs;
std::vector<LocalLiveRange *> &liveIntervals;
std::list<InputLiveRange> &inputIntervals;
std::list<LocalLiveRange *> active;
PhyRegSummary *summary;
void expireRanges(unsigned int);
void expireInputRanges(unsigned int, unsigned int, unsigned int);
void expireAllActive();
bool allocateRegsFromBanks(LocalLiveRange *);
bool allocateRegs(LocalLiveRange *lr, G4_BB *bb, IR_Builder &builder,
LLR_USE_MAP &LLRUseMap);
void freeAllocedRegs(LocalLiveRange *, bool);
void updateActiveList(LocalLiveRange *);
BitSet pregs;
unsigned int simdSize;
unsigned int globalLRSize;
unsigned int *startGRFReg;
unsigned int numRegLRA;
unsigned int bank1StartGRFReg;
unsigned int bank2StartGRFReg;
unsigned int bank1_start;
unsigned int bank1_end;
unsigned int bank2_start;
unsigned int bank2_end;
bool useRoundRobin;
bool doBankConflict;
bool highInternalConflict;
bool doSplitLLR;
public:
LinearScan(GlobalRA &g, std::vector<LocalLiveRange *> &localLiveIntervals,
std::list<InputLiveRange> &inputLivelIntervals,
PhyRegsManager &pregMgr, PhyRegsLocalRA &pregs,
PhyRegSummary *s, unsigned int numReg,
unsigned int glrs, bool roundRobin, bool bankConflict,
bool internalConflict, bool splitLLR, unsigned int simdS);
void run(G4_BB *bb, IR_Builder &builder, LLR_USE_MAP &LLRUseMap);
};
class PhyRegSummary {
private:
const IR_Builder *builder = nullptr;
std::vector<bool> GRFUsage;
public:
PhyRegSummary() {}
PhyRegSummary(const IR_Builder *irb, uint32_t numGRF) : builder(irb) {
GRFUsage.resize(numGRF, false);
}
uint32_t getNumGRF() const { return GRFUsage.size(); }
void markPhyRegs(G4_VarBase *pr, unsigned int words);
bool isGRFBusy(int regnum) const { return GRFUsage[regnum]; }
void setGRFBusy(int regnum) { GRFUsage[regnum] = true; }
void printBusyRegs();
uint32_t getNumFreeGRF() const {
uint32_t numFreeGRF = 0;
for (int i = 0, size = getNumGRF(); i < size; ++i) {
if (!isGRFBusy(i)) {
numFreeGRF++;
}
}
return numFreeGRF;
}
};
} // namespace vISA
#endif // _INC_LOCALRA_H_