mirror of
https://github.com/intel/intel-graphics-compiler.git
synced 2025-10-30 08:18:26 +08:00
cmp.eq v5444, v5444 v5444 is undefined and will be assigned with any register. As a result there may be RAW dependence in the SIMD control. Current SWSB only track WAR WAW along the SIMD control.
556 lines
17 KiB
C++
556 lines
17 KiB
C++
/*========================== begin_copyright_notice ============================
|
|
|
|
Copyright (C) 2017-2021 Intel Corporation
|
|
|
|
SPDX-License-Identifier: MIT
|
|
|
|
============================= end_copyright_notice ===========================*/
|
|
|
|
#ifndef _INC_LOCALRA_H_
|
|
#define _INC_LOCALRA_H_
|
|
|
|
#include "BitSet.h"
|
|
#include "BuildIR.h"
|
|
#include "FlowGraph.h"
|
|
#include "G4_Opcode.h"
|
|
#include <list>
|
|
|
|
// Forward decls
|
|
namespace vISA {
|
|
class G4_Declare;
|
|
class G4_INST;
|
|
class G4_BB;
|
|
class LinearScan;
|
|
class LocalLiveRange;
|
|
class PhyRegLocalRA;
|
|
class PhyRegsManager;
|
|
class PhyRegSummary;
|
|
class BankConflictPass;
|
|
class GlobalRA;
|
|
} // namespace vISA
|
|
|
|
vISA::G4_Declare *GetTopDclFromRegRegion(vISA::G4_Operand *opnd);
|
|
|
|
typedef std::map<vISA::LocalLiveRange *,
|
|
std::vector<std::pair<INST_LIST_ITER, unsigned int>>>
|
|
LLR_USE_MAP;
|
|
typedef std::map<vISA::LocalLiveRange *,
|
|
std::vector<std::pair<INST_LIST_ITER, unsigned int>>>::iterator
|
|
LLR_USE_MAP_ITER;
|
|
|
|
// Each declaration will have a LocalLiveRange object allocated for it
|
|
namespace vISA {
|
|
class PhyRegsLocalRA;
|
|
class InputLiveRange;
|
|
|
|
class LocalRA {
|
|
private:
|
|
G4_Kernel &kernel;
|
|
IR_Builder &builder;
|
|
PhyRegsLocalRA *pregs = nullptr;
|
|
LLR_USE_MAP LLRUseMap;
|
|
unsigned int numRegLRA = 0;
|
|
unsigned int globalLRSize = 0;
|
|
bool doSplitLLR = false;
|
|
std::list<InputLiveRange> inputIntervals;
|
|
BankConflictPass &bc;
|
|
GlobalRA &gra;
|
|
bool doBCR = false;
|
|
bool highInternalConflict = false;
|
|
bool hasSplitInsts = false;
|
|
|
|
BankAlign getBankAlignForUniqueAssign(G4_Declare *dcl);
|
|
bool hasBackEdge();
|
|
void specialAlign();
|
|
void preLocalRAAnalysis();
|
|
void trivialAssignRA(bool &needGlobalRA, bool threeSourceCandidate);
|
|
bool localRAPass(bool doRoundRobin, bool doSplitLLR);
|
|
void resetMasks();
|
|
unsigned int getLargestInputGRF();
|
|
void blockOutputPhyRegs();
|
|
void removeUnrequiredLifetimeOps();
|
|
bool assignUniqueRegisters(bool twoBanksRA, bool twoDirectionsAssign,
|
|
bool hybridWithSpill);
|
|
bool unassignedRangeFound();
|
|
void updateRegUsage(PhyRegSummary *summary, unsigned int &numRegsUsed);
|
|
void localRAOptReport();
|
|
void markReferencesInOpnd(G4_Operand *opnd, bool isEOT,
|
|
INST_LIST_ITER inst_it, unsigned int pos);
|
|
void addOutOfBoundForbidden(G4_Declare *dcl, G4_Operand *opnd);
|
|
void markSpecialForbidden(INST_LIST_ITER inst_it);
|
|
void markReferencesInInst(INST_LIST_ITER inst_it);
|
|
void setLexicalID(bool includePseudo);
|
|
void markReferences(unsigned int &numRowsEOT, bool &lifetimeOpFound);
|
|
void calculateInputIntervals();
|
|
bool hasDstSrcOverlapPotential(G4_DstRegRegion *dst, G4_SrcRegRegion *src);
|
|
void calculateLiveIntervals(G4_BB *bb,
|
|
std::vector<LocalLiveRange *> &liveIntervals);
|
|
void printAddressTakenDecls();
|
|
void printLocalRACandidates();
|
|
void printLocalLiveIntervals(G4_BB *bb,
|
|
std::vector<LocalLiveRange *> &liveIntervals);
|
|
void printInputLiveIntervals();
|
|
bool countLiveIntervals();
|
|
|
|
// scratch fields used for parameter passing
|
|
G4_BB *curBB = nullptr;
|
|
|
|
public:
|
|
static void getRowInfo(int size, int &nrows, int &lastRowSize,
|
|
const IR_Builder &builder);
|
|
static unsigned int convertSubRegOffFromWords(G4_Declare *dcl,
|
|
int subregnuminwords);
|
|
static unsigned int convertSubRegOffToWords(G4_Declare *dcl, int subregnum);
|
|
static void
|
|
countLocalLiveIntervals(std::vector<LocalLiveRange *> &liveIntervals,
|
|
unsigned grfSize);
|
|
static void printLocalLiveIntervalDistribution(unsigned int numScalars,
|
|
unsigned int numHalfGRF,
|
|
unsigned int numOneGRF,
|
|
unsigned int numTwoOrMoreGRF,
|
|
unsigned int numTotal);
|
|
|
|
LocalRA(BankConflictPass &, GlobalRA &);
|
|
bool localRA();
|
|
void undoLocalRAAssignments(bool clearInterval);
|
|
bool doHybridBCR() const { return doBCR; }
|
|
bool hasHighInternalBC() const { return highInternalConflict; }
|
|
};
|
|
|
|
class LocalLiveRange {
|
|
private:
|
|
G4_Declare *topdcl;
|
|
G4_INST *firstRef;
|
|
G4_INST *lastRef;
|
|
unsigned int lrStartIdx, lrEndIdx;
|
|
bool isIndirectAccess;
|
|
G4_VarBase *preg;
|
|
// pregoff is stored in word here
|
|
// But subreg offset stored in regvar should be in units of dcl's element size
|
|
int pregoff;
|
|
|
|
unsigned int numRefsInFG;
|
|
G4_BB *prevBBRef;
|
|
bool eot;
|
|
|
|
bool assigned;
|
|
bool isSplit;
|
|
bool defined;
|
|
|
|
IR_Builder &builder;
|
|
|
|
std::unordered_set<unsigned int> forbiddenGRFs;
|
|
|
|
public:
|
|
LocalLiveRange(IR_Builder &b) : builder(b) {
|
|
topdcl = NULL;
|
|
firstRef = lastRef = NULL;
|
|
lrStartIdx = lrEndIdx = 0;
|
|
isIndirectAccess = false;
|
|
numRefsInFG = 0;
|
|
prevBBRef = NULL;
|
|
preg = NULL;
|
|
pregoff = 0;
|
|
assigned = false;
|
|
eot = false;
|
|
isSplit = false;
|
|
defined = false;
|
|
|
|
if (!builder.canWriteR0())
|
|
addForbidden(0);
|
|
if (builder.mustReserveR1())
|
|
addForbidden(1);
|
|
}
|
|
|
|
// A reference to this live range exists in bb basic block, record it
|
|
void recordRef(G4_BB *);
|
|
|
|
void markIndirectRef() { isIndirectAccess = true; }
|
|
|
|
// A live range is local if it is never accessed indirectly (via address taken
|
|
// register) and only a single basic block references the range
|
|
bool isLiveRangeLocal() const;
|
|
|
|
bool isLiveRangeGlobal();
|
|
|
|
bool isGRFRegAssigned();
|
|
|
|
void setTopDcl(G4_Declare *dcl) {
|
|
vISA_ASSERT(topdcl == NULL, "Redefining top dcl");
|
|
topdcl = dcl;
|
|
}
|
|
|
|
G4_Declare *getTopDcl() const { return topdcl; }
|
|
|
|
void *operator new(size_t sz, Mem_Manager &m) { return m.alloc(sz); }
|
|
|
|
bool hasIndirectAccess() const { return isIndirectAccess; }
|
|
|
|
void setFirstRef(G4_INST *inst, unsigned int idx) {
|
|
firstRef = inst;
|
|
lrStartIdx = idx;
|
|
}
|
|
|
|
G4_INST *getFirstRef(unsigned int &idx) const {
|
|
idx = lrStartIdx;
|
|
return firstRef;
|
|
}
|
|
|
|
void setLastRef(G4_INST *inst, unsigned int idx) {
|
|
lastRef = inst;
|
|
lrEndIdx = idx;
|
|
}
|
|
|
|
G4_INST *getLastRef(unsigned int &idx) {
|
|
idx = lrEndIdx;
|
|
return lastRef;
|
|
}
|
|
|
|
void setPhyReg(G4_VarBase *pr, int subreg) {
|
|
preg = pr;
|
|
pregoff = subreg;
|
|
}
|
|
G4_VarBase *getPhyReg(int &subreg) {
|
|
subreg = pregoff;
|
|
return preg;
|
|
}
|
|
|
|
unsigned int getSizeInWords();
|
|
|
|
void setAssigned(bool a) { assigned = a; }
|
|
bool getAssigned() const { return assigned; }
|
|
|
|
void markEOT() { eot = true; }
|
|
bool isEOT() const { return eot; }
|
|
|
|
void markSplit() { isSplit = true; }
|
|
bool getSplit() const { return isSplit; }
|
|
|
|
void markDefined() { defined = true; }
|
|
bool isDefined() const { return defined; }
|
|
|
|
void addForbidden(unsigned int f) { forbiddenGRFs.insert(f); }
|
|
std::unordered_set<unsigned int> &getForbidden() { return forbiddenGRFs; }
|
|
};
|
|
|
|
class InputLiveRange {
|
|
private:
|
|
unsigned int regWordIdx;
|
|
unsigned int lrEndIdx;
|
|
|
|
public:
|
|
InputLiveRange(unsigned int regId, unsigned int endId)
|
|
: regWordIdx(regId), lrEndIdx(endId) {}
|
|
|
|
unsigned int getRegWordIdx() { return regWordIdx; }
|
|
unsigned int getLrEndIdx() { return lrEndIdx; }
|
|
};
|
|
} // namespace vISA
|
|
#define SECOND_HALF_BANK_START_GRF 64
|
|
#define RR_HEURISTIC 0.66
|
|
enum {
|
|
WORD_FREE = 0,
|
|
WORD_BUSY = 1,
|
|
};
|
|
|
|
namespace vISA {
|
|
class PhyRegsLocalRA {
|
|
private:
|
|
const IR_Builder &builder;
|
|
unsigned int numRegs;
|
|
// nth bit represents whether the register's nth word is free/busy
|
|
// 1 - busy, 0 - free
|
|
// It is possible to use bit-vector in place of this array
|
|
// bitvector does not provide coarse grained access to mark
|
|
// entire grf busy/available
|
|
std::vector<uint32_t> regBusyVector;
|
|
std::vector<int32_t> regLastUse;
|
|
std::vector<bool> grfAvialable;
|
|
int lastUseSum1;
|
|
int lastUseSum2;
|
|
int bank1AvailableRegNum;
|
|
int bank2AvailableRegNum;
|
|
|
|
bool twoBanksRA;
|
|
bool simpleGRFAvailable;
|
|
bool r0Forbidden;
|
|
bool r1Forbidden;
|
|
|
|
int LraFFWindowSize;
|
|
|
|
public:
|
|
PhyRegsLocalRA(const IR_Builder &_builder, uint32_t nregs)
|
|
: builder(_builder), numRegs(nregs) {
|
|
uint32_t grfFree = 0;
|
|
|
|
regBusyVector.resize(numRegs);
|
|
regLastUse.resize(numRegs);
|
|
grfAvialable.resize(numRegs);
|
|
|
|
for (int i = 0; i < (int)nregs; i++) {
|
|
regBusyVector[i] = grfFree;
|
|
regLastUse[i] = 0;
|
|
grfAvialable[i] = true;
|
|
}
|
|
|
|
lastUseSum1 = 0;
|
|
lastUseSum2 = 0;
|
|
bank1AvailableRegNum = 0;
|
|
bank2AvailableRegNum = 0;
|
|
|
|
twoBanksRA = false;
|
|
simpleGRFAvailable = false;
|
|
r0Forbidden = false;
|
|
r1Forbidden = false;
|
|
LraFFWindowSize =
|
|
(int)builder.getOptions()->getuInt32Option(vISA_LraFFWindowSize);
|
|
}
|
|
|
|
void findRegisterCandiateWithAlignForward(int &i, BankAlign align,
|
|
bool evenAlign);
|
|
|
|
unsigned int get_bundle(unsigned int baseReg, int offset);
|
|
|
|
int findBundleConflictFreeRegister(int curReg, int endReg,
|
|
unsigned short occupiedBundles,
|
|
BankAlign align, bool evenAlign);
|
|
|
|
void findRegisterCandiateWithAlignBackward(int &i, BankAlign align,
|
|
bool evenAlign);
|
|
|
|
void setGRFBusy(int which);
|
|
void setGRFBusy(int which, bool isInput);
|
|
void setGRFBusy(int which, int howmany, bool isInput);
|
|
void setGRFBusy(int which, int howmany);
|
|
void setWordBusy(int whichgrf, int word, bool isInput);
|
|
void setGRFNotBusy(int which, int instID);
|
|
void setH1GRFBusy(int which);
|
|
void setH2GRFBusy(int which);
|
|
void setWordBusy(int whichgrf, int word);
|
|
void setWordBusy(int whichgrf, int word, int howmany, bool isInput);
|
|
void setWordBusy(int whichgrf, int word, int howmany);
|
|
void setWordNotBusy(int whichgrf, int word, int instID);
|
|
|
|
inline bool isGRFBusy(int which) const {
|
|
vISA_ASSERT(isGRFAvailable(which), "Invalid register");
|
|
return (regBusyVector[which] != 0);
|
|
}
|
|
|
|
inline bool isGRFBusy(int which, int howmany) const {
|
|
bool retval = false;
|
|
|
|
for (int i = 0; i < howmany && !retval; i++) {
|
|
retval |= isGRFBusy(which + i);
|
|
}
|
|
|
|
return retval;
|
|
}
|
|
|
|
bool isH1GRFBusy(int which);
|
|
bool isH2GRFBusy(int which);
|
|
inline bool isWordBusy(int whichgrf, int word);
|
|
bool isWordBusy(int whichgrf, int word, int howmany);
|
|
|
|
bool findFreeMultipleRegsForward(int regIdx, BankAlign align, int ®num,
|
|
int nrows, int lastRowSize, int endReg,
|
|
unsigned short occupiedBundles, int instID,
|
|
bool isHybridAlloc,
|
|
std::unordered_set<unsigned int> &forbidden);
|
|
|
|
void markPhyRegs(G4_Declare *topdcl);
|
|
|
|
// Available/unavailable is different from busy/free
|
|
// Unavailable GRFs are not available for allocation
|
|
void setGRFUnavailable(int which) { grfAvialable[which] = false; }
|
|
bool isGRFAvailable(int which) const {
|
|
|
|
if (simpleGRFAvailable) {
|
|
if (which > 1) {
|
|
return true;
|
|
} else {
|
|
if (r0Forbidden && which == 0) {
|
|
return false;
|
|
}
|
|
|
|
if (r1Forbidden && which <= 1) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
} else {
|
|
vISA_ASSERT(which < (int)numRegs, "invalid GRF");
|
|
return (grfAvialable[which] == true);
|
|
}
|
|
}
|
|
|
|
bool isGRFAvailable(int which, int howmany) const {
|
|
if (simpleGRFAvailable) {
|
|
if (which > 1) {
|
|
return true;
|
|
} else {
|
|
if (r0Forbidden && which == 0) {
|
|
return false;
|
|
}
|
|
|
|
if (r1Forbidden && which <= 1) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
} else {
|
|
for (int i = 0; i < howmany; i++) {
|
|
if (!isGRFAvailable(which + i)) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void printBusyRegs();
|
|
int getRegLastUse(int reg) { return regLastUse[reg]; }
|
|
|
|
int getLastUseSum1() { return lastUseSum1; }
|
|
int getLastUseSum2() { return lastUseSum2; }
|
|
int getBank1AvailableRegNum() { return bank1AvailableRegNum; }
|
|
int getBank2AvailableRegNum() { return bank2AvailableRegNum; }
|
|
|
|
void setBank1AvailableRegNum(int avail1) { bank1AvailableRegNum = avail1; }
|
|
void setBank2AvailableRegNum(int avail2) { bank2AvailableRegNum = avail2; }
|
|
|
|
void setTwoBanksRA(bool twoBanks) { twoBanksRA = twoBanks; }
|
|
void setSimpleGRFAvailable(bool simple) { simpleGRFAvailable = simple; }
|
|
void setR0Forbidden() { r0Forbidden = true; }
|
|
void setR1Forbidden() { r1Forbidden = true; }
|
|
bool
|
|
findFreeMultipleRegsBackward(int regIdx, BankAlign align, int ®num,
|
|
int nrows, int lastRowSize, int endReg,
|
|
int instID, bool isHybridAlloc,
|
|
std::unordered_set<unsigned int> &forbidden);
|
|
bool findFreeSingleReg(int regIdx, G4_SubReg_Align subalign, int ®num,
|
|
int &subregnum, int size);
|
|
bool findFreeSingleReg(int regIdx, int size, BankAlign align,
|
|
G4_SubReg_Align subalign, int ®num, int &subregnum,
|
|
int endReg, int instID, bool isHybridAlloc,
|
|
bool forward,
|
|
std::unordered_set<unsigned int> &forbidden);
|
|
|
|
bool findFreeMultipleRegsForward(int regIdx, BankAlign align, int ®num,
|
|
int nrows, int lastRowSize, int endReg,
|
|
int instID, const bool *forbidden);
|
|
|
|
bool findFreeSingleReg(int regIdx, int size, BankAlign align,
|
|
G4_SubReg_Align subalign, int ®num, int &subregnum,
|
|
int endReg, const bool *forbidden);
|
|
};
|
|
|
|
class PhyRegsManager {
|
|
private:
|
|
const IR_Builder &builder;
|
|
PhyRegsLocalRA availableRegs;
|
|
bool twoBanksRA;
|
|
|
|
public:
|
|
PhyRegsManager(const IR_Builder &irb, const PhyRegsLocalRA& pregs, bool _twoBanksRA)
|
|
: builder(irb), availableRegs(pregs), twoBanksRA(_twoBanksRA) {
|
|
availableRegs.setTwoBanksRA(_twoBanksRA);
|
|
}
|
|
|
|
void freeRegs(int regnum, int subregnum, int numwords, int instID);
|
|
PhyRegsLocalRA *getAvailableRegs() { return &availableRegs; }
|
|
int findFreeRegs(int size, BankAlign align, G4_SubReg_Align subalign,
|
|
int ®num, int &subregnum, int startRegNum, int endRegNum,
|
|
unsigned short occupiedBundles, unsigned int instID,
|
|
bool isHybridAlloc,
|
|
std::unordered_set<unsigned int> &forbidden);
|
|
int findFreeRegs(int size, BankAlign align, G4_SubReg_Align subalign,
|
|
int ®num, int &subregnum, int startRegNum, int endRegNum,
|
|
unsigned int instID, const bool *forbidden);
|
|
};
|
|
|
|
class LinearScan {
|
|
private:
|
|
GlobalRA &gra;
|
|
IR_Builder &builder;
|
|
PhyRegsManager &pregManager;
|
|
PhyRegsLocalRA &initPregs;
|
|
std::vector<LocalLiveRange *> &liveIntervals;
|
|
std::list<InputLiveRange> &inputIntervals;
|
|
std::list<LocalLiveRange *> active;
|
|
PhyRegSummary *summary;
|
|
|
|
void expireRanges(unsigned int);
|
|
void expireInputRanges(unsigned int, unsigned int, unsigned int);
|
|
void expireAllActive();
|
|
bool allocateRegsFromBanks(LocalLiveRange *);
|
|
bool allocateRegs(LocalLiveRange *lr, G4_BB *bb, IR_Builder &builder,
|
|
LLR_USE_MAP &LLRUseMap);
|
|
void freeAllocedRegs(LocalLiveRange *, bool);
|
|
void updateActiveList(LocalLiveRange *);
|
|
|
|
BitSet pregs;
|
|
unsigned int simdSize;
|
|
|
|
unsigned int globalLRSize;
|
|
unsigned int *startGRFReg;
|
|
unsigned int numRegLRA;
|
|
|
|
unsigned int bank1StartGRFReg;
|
|
unsigned int bank2StartGRFReg;
|
|
unsigned int bank1_start;
|
|
unsigned int bank1_end;
|
|
unsigned int bank2_start;
|
|
unsigned int bank2_end;
|
|
|
|
bool useRoundRobin;
|
|
bool doBankConflict;
|
|
bool highInternalConflict;
|
|
bool doSplitLLR;
|
|
|
|
public:
|
|
LinearScan(GlobalRA &g, std::vector<LocalLiveRange *> &localLiveIntervals,
|
|
std::list<InputLiveRange> &inputLivelIntervals,
|
|
PhyRegsManager &pregMgr, PhyRegsLocalRA &pregs,
|
|
PhyRegSummary *s, unsigned int numReg,
|
|
unsigned int glrs, bool roundRobin, bool bankConflict,
|
|
bool internalConflict, bool splitLLR, unsigned int simdS);
|
|
|
|
void run(G4_BB *bb, IR_Builder &builder, LLR_USE_MAP &LLRUseMap);
|
|
};
|
|
|
|
class PhyRegSummary {
|
|
private:
|
|
const IR_Builder *builder = nullptr;
|
|
std::vector<bool> GRFUsage;
|
|
|
|
public:
|
|
PhyRegSummary() {}
|
|
|
|
PhyRegSummary(const IR_Builder *irb, uint32_t numGRF) : builder(irb) {
|
|
GRFUsage.resize(numGRF, false);
|
|
}
|
|
|
|
uint32_t getNumGRF() const { return GRFUsage.size(); }
|
|
|
|
void markPhyRegs(G4_VarBase *pr, unsigned int words);
|
|
|
|
bool isGRFBusy(int regnum) const { return GRFUsage[regnum]; }
|
|
|
|
void setGRFBusy(int regnum) { GRFUsage[regnum] = true; }
|
|
|
|
void printBusyRegs();
|
|
|
|
uint32_t getNumFreeGRF() const {
|
|
uint32_t numFreeGRF = 0;
|
|
for (int i = 0, size = getNumGRF(); i < size; ++i) {
|
|
if (!isGRFBusy(i)) {
|
|
numFreeGRF++;
|
|
}
|
|
}
|
|
return numFreeGRF;
|
|
}
|
|
};
|
|
} // namespace vISA
|
|
#endif // _INC_LOCALRA_H_
|