mirror of
https://github.com/intel/intel-graphics-compiler.git
synced 2025-11-04 08:21:06 +08:00
1182 lines
44 KiB
C++
1182 lines
44 KiB
C++
/*===================== begin_copyright_notice ==================================
|
|
|
|
Copyright (c) 2017 Intel Corporation
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a
|
|
copy of this software and associated documentation files (the
|
|
"Software"), to deal in the Software without restriction, including
|
|
without limitation the rights to use, copy, modify, merge, publish,
|
|
distribute, sublicense, and/or sell copies of the Software, and to
|
|
permit persons to whom the Software is furnished to do so, subject to
|
|
the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included
|
|
in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
|
======================= end_copyright_notice ==================================*/
|
|
|
|
#ifndef __GRAPHCOLOR_H__
|
|
#define __GRAPHCOLOR_H__
|
|
|
|
#include "RegAlloc.h"
|
|
#include "Gen4_IR.hpp"
|
|
#include "SpillManagerGMRF.h"
|
|
#include <list>
|
|
#include <unordered_set>
|
|
#include <limits>
|
|
#include "RPE.h"
|
|
|
|
#include "BitSet.h"
|
|
|
|
#define BITS_DWORD 32
|
|
#define SCRATCH_MSG_LIMIT (128 * 1024)
|
|
|
|
extern unsigned int BitMask[BITS_DWORD];
|
|
namespace vISA
|
|
{
|
|
const float MAXSPILLCOST = (std::numeric_limits<float>::max());
|
|
const float MINSPILLCOST = -(std::numeric_limits<float>::max());
|
|
|
|
class BankConflictPass
|
|
{
|
|
private:
|
|
GlobalRA& gra;
|
|
|
|
BankConflict setupBankAccordingToSiblingOperand(BankConflict assignedBank, unsigned int offset, bool oneGRFBank);
|
|
bool hasInternalConflict2Srcs(BankConflict*srcBC);
|
|
void setupBankConflictsForDecls(G4_Declare* dcl_1, G4_Declare* dcl_2, unsigned int offset1, unsigned int offset2,
|
|
BankConflict &srcBC1, BankConflict &srcBC2, int &bank1RegNum, int &bank2RegNum, float GRFRatio, bool oneGRFBank);
|
|
void setupEvenOddBankConflictsForDecls(G4_Declare * dcl_1, G4_Declare * dcl_2, unsigned int offset1, unsigned int offset2,
|
|
BankConflict &srcBC1, BankConflict &srcBC2);
|
|
void setupBankConflictsOneGRFOld(G4_INST* inst, int &bank1RegNum, int &bank2RegNum, float GRFRatio, unsigned int &internalConflict);
|
|
void setupBankConflictsforTwoGRFs(G4_INST* inst);
|
|
void setupBankConflictsForBB(G4_BB* bb, unsigned int &threeSourceInstNum, unsigned int &sendInstNum, unsigned int numRegLRA,
|
|
unsigned int & internalConflict);
|
|
bool hasInternalConflict3Srcs(BankConflict *srcBC);
|
|
void setupBankForSrc0(G4_INST* inst, G4_INST* prevInst);
|
|
void setupToEvenBank(G4_INST* inst);
|
|
void getBanks(G4_INST* inst, BankConflict *srcBC, G4_Declare **dcls, G4_Declare **opndDcls, unsigned int *offset);
|
|
void getPrevBanks(G4_INST* inst, BankConflict *srcBC, G4_Declare **dcls, G4_Declare **opndDcls, unsigned int *offset);
|
|
|
|
|
|
|
|
public:
|
|
bool setupBankConflictsForKernel(G4_Kernel& kernel, bool doLocalRR, bool &threeSourceCandidate, unsigned int numRegLRA, bool &highInternalConflict);
|
|
|
|
BankConflictPass(GlobalRA& g) : gra(g)
|
|
{
|
|
|
|
}
|
|
};
|
|
|
|
class LiveRange final : public VarBasis
|
|
{
|
|
unsigned numRegNeeded;
|
|
unsigned degree;
|
|
unsigned refCount;
|
|
AssignedReg reg;
|
|
float spillCost;
|
|
bool active;
|
|
bool isInfiniteCost;
|
|
bool isCandidate;
|
|
bool isPseudoNode;
|
|
GlobalRA& gra;
|
|
|
|
public:
|
|
|
|
LiveRange(G4_RegVar* v, GlobalRA&, const Options *opt);
|
|
|
|
void* operator new(size_t sz, vISA::Mem_Manager& m){ return m.alloc(sz); }
|
|
|
|
void setDegree(unsigned d) {degree = d;}
|
|
unsigned getDegree() {return degree;}
|
|
|
|
unsigned getNumRegNeeded() {return numRegNeeded;}
|
|
|
|
void subtractDegree(unsigned d)
|
|
{
|
|
MUST_BE_TRUE(d <= degree, ERROR_INTERNAL_ARGUMENT);
|
|
degree -= d;
|
|
}
|
|
|
|
void setActive(bool v) {active = v;}
|
|
bool getActive() {return active;}
|
|
|
|
void dump()
|
|
{
|
|
VarBasis::dump();
|
|
}
|
|
|
|
virtual void emit(std::ostream& output, bool symbolreg=false)
|
|
{
|
|
output << getVar ()->getDeclare ()->getName();
|
|
if (reg.phyReg != NULL)
|
|
{
|
|
output << "(";
|
|
reg.phyReg->emit(output);
|
|
output << '.' << reg.subRegOff << ':';
|
|
output << G4_Type_Table[getVar ()->getDeclare()->getElemType()].str << ")";
|
|
}
|
|
output << "(size = " << getVar()->getDeclare()->getByteSize() <<
|
|
", spill cost = " << getSpillCost() << ", degree = " << getDegree() << ")";
|
|
}
|
|
|
|
unsigned getRefCount() {return refCount;}
|
|
void setRefCount(unsigned count) {refCount = count;}
|
|
|
|
float getSpillCost() {return spillCost;}
|
|
void setSpillCost(float cost) {spillCost = cost;}
|
|
|
|
bool getIsInfiniteSpillCost() { return isInfiniteCost; }
|
|
void checkForInfiniteSpillCost(INST_LIST& instList, std::list<G4_INST*>::reverse_iterator& it);
|
|
|
|
G4_VarBase* getPhyReg()
|
|
{
|
|
return reg.phyReg;
|
|
}
|
|
|
|
virtual unsigned getPhyRegOff()
|
|
{
|
|
return reg.subRegOff;
|
|
}
|
|
|
|
void setPhyReg(G4_VarBase* pr, unsigned off)
|
|
{
|
|
MUST_BE_TRUE(pr->isPhyReg(), ERROR_UNKNOWN);
|
|
reg.phyReg = pr;
|
|
reg.subRegOff = off;
|
|
}
|
|
|
|
virtual void resetPhyReg()
|
|
{
|
|
reg.phyReg = NULL;
|
|
reg.subRegOff = 0;
|
|
}
|
|
|
|
bool getIsPseudoNode() const { return isPseudoNode; }
|
|
void setIsPseudoNode() { isPseudoNode = true; }
|
|
};
|
|
}
|
|
typedef std::list<vISA::LiveRange*> LIVERANGE_LIST;
|
|
typedef std::list<vISA::LiveRange*>::iterator LIVERANGE_LIST_ITER;
|
|
|
|
// A mapping from the pseudo decl created for caller save/restore, to the ret val
|
|
// This is used in augmentIntfGraph to prune interference edges for fcall ret val
|
|
typedef std::map<vISA::G4_Declare*, vISA::G4_Declare*> FCALL_RET_MAP;
|
|
typedef std::map<vISA::G4_Declare*, vISA::G4_Declare*>::iterator FCALL_RET_MAP_ITER;
|
|
|
|
typedef std::map<vISA::G4_Declare*, std::pair<vISA::G4_INST*, unsigned int>> CALL_DECL_MAP;
|
|
typedef std::map<vISA::G4_Declare*, std::pair<vISA::G4_INST*, unsigned int>>::iterator CALL_DECL_MAP_ITER;
|
|
|
|
//
|
|
// A bit array records all interference information.
|
|
// (2D matrix is flatten to 1D array)
|
|
// Since the interference information is symmetric, we can use only
|
|
// half of the size. To simplify the implementation, we use the full
|
|
// size of the bit array.
|
|
//
|
|
namespace vISA
|
|
{
|
|
class Augmentation
|
|
{
|
|
private:
|
|
G4_Kernel& kernel;
|
|
Interference& intf;
|
|
GlobalRA& gra;
|
|
LivenessAnalysis& liveAnalysis;
|
|
LiveRange** lrs;
|
|
FCALL_RET_MAP& fcallRetMap;
|
|
CALL_DECL_MAP callDclMap;
|
|
std::vector<G4_Declare*> sortedIntervals;
|
|
std::list<G4_Declare*> defaultMask;
|
|
std::list<G4_Declare*> nonDefaultMask;
|
|
Mem_Manager& m;
|
|
|
|
bool updateDstMaskForScatter(G4_INST* inst, unsigned char* mask);
|
|
void updateDstMask(G4_INST* inst, bool checkCmodOnly);
|
|
static unsigned int getByteSizeFromMask(AugmentationMasks type);
|
|
bool isDefaultMaskDcl(G4_Declare* dcl, unsigned int simdSize, AugmentationMasks type);
|
|
bool isDefaultMaskSubDeclare(unsigned char* mask, unsigned int lb, unsigned int rb, G4_Declare* dcl, unsigned int simdSize);
|
|
void markNonDefaultMaskForSubDcl(G4_Declare *dcl, unsigned lb, unsigned rb, unsigned int simdSize);
|
|
bool verifyMaskIfInit(G4_Declare* dcl, AugmentationMasks mask);
|
|
bool checkGRFPattern3(G4_Declare* dcl, G4_DstRegRegion* dst, unsigned maskOff,
|
|
unsigned int lb, unsigned int rb, unsigned int execSize);
|
|
bool checkGRFPattern2(G4_Declare* dcl, G4_DstRegRegion* dst, unsigned maskOff,
|
|
unsigned int lb, unsigned int rb, unsigned int execSize);
|
|
bool checkGRFPattern1(G4_Declare* dcl, G4_DstRegRegion* dst, unsigned maskOff,
|
|
unsigned int lb, unsigned int rb, unsigned int execSize);
|
|
void markNonDefaultDstRgn(G4_INST* inst, G4_Operand* opnd);
|
|
bool markNonDefaultMaskDef();
|
|
G4_BB* getTopmostBBDst(G4_BB* src, G4_BB* end, G4_BB* origSrc, unsigned int traversal);
|
|
void updateStartIntervalForSubDcl(G4_Declare* dcl, G4_INST* curInst, G4_Operand *opnd);
|
|
void updateEndIntervalForSubDcl(G4_Declare* dcl, G4_INST* curInst, G4_Operand *opnd);
|
|
void updateStartInterval(G4_Declare* dcl, G4_INST* curInst);
|
|
void updateEndInterval(G4_Declare* dcl, G4_INST* curInst);
|
|
void updateStartIntervalForLocal(G4_Declare* dcl, G4_INST* curInst, G4_Operand *opnd);
|
|
void updateEndIntervalForLocal(G4_Declare* dcl, G4_INST* curInst, G4_Operand *opnd);
|
|
void buildLiveIntervals();
|
|
void clearIntervalInfo();
|
|
void sortLiveIntervals();
|
|
unsigned int getEnd(G4_Declare*& dcl);
|
|
bool isNoMask(G4_Declare* dcl, unsigned int size);
|
|
bool isConsecutiveBits(G4_Declare* dcl, unsigned int size);
|
|
bool isCompatible(G4_Declare* testDcl, G4_Declare* biggerDcl);
|
|
void buildInterferenceIncompatibleMask();
|
|
void expireIntervals(unsigned int startIdx);
|
|
void buildSIMDIntfDcl(G4_Declare* newDcl, bool isCall);
|
|
void buildSIMDIntfAll(G4_Declare* newDcl);
|
|
void handleSIMDIntf(G4_Declare* firstDcl, G4_Declare* secondDcl, bool isCall);
|
|
|
|
public:
|
|
Augmentation(G4_Kernel& k, Interference& i, LivenessAnalysis& l, LiveRange* ranges[], GlobalRA& g);
|
|
|
|
void augmentIntfGraph();
|
|
};
|
|
|
|
class Interference
|
|
{
|
|
friend class Augmentation;
|
|
|
|
protected:
|
|
// This stores compatible ranges for each variable. Such
|
|
// compatible ranges will not be present in sparseIntf set.
|
|
// We store G4_Declare* instead of id is because variables
|
|
// allocated by LRA will not have a valid id.
|
|
std::map<G4_Declare*, std::vector<G4_Declare*>> compatibleSparseIntf;
|
|
|
|
// list of fake declares for physical GRF
|
|
std::vector<G4_Declare*> localRADcls;
|
|
|
|
private:
|
|
GlobalRA& gra;
|
|
G4_Kernel& kernel;
|
|
LiveRange**& lrs;
|
|
IR_Builder& builder;
|
|
unsigned maxId;
|
|
unsigned splitStartId;
|
|
unsigned splitNum;
|
|
unsigned int* matrix;
|
|
LivenessAnalysis* liveAnalysis = nullptr;
|
|
|
|
std::vector<std::vector<unsigned int>> sparseIntf;
|
|
|
|
// sparse intefernece matrix.
|
|
// we don't directly update spraseIntf to ensure uniqueness
|
|
// like dense matrix, interference is not symmetric (that is, if v1 and v2 interfere and v1 < v2,
|
|
// we insert (v1, v2) but not (v2, v1)) for better cache behavior
|
|
std::vector<std::unordered_set<uint32_t> > sparseMatrix;
|
|
const uint32_t denseMatrixLimit = 32768;
|
|
|
|
void updateLiveness(BitSet& live, uint32_t id, bool val)
|
|
{
|
|
live.set(id, val);
|
|
}
|
|
|
|
// used by hybrid RA, this marks the first decl in kernel.Declares
|
|
// that is not a fake declare used to model physical GRF liveness
|
|
G4_Declare* firstOrigDcl = nullptr;
|
|
|
|
void getLocalRADcls();
|
|
|
|
public:
|
|
Interference(LivenessAnalysis* l, LiveRange**& lr, unsigned n, unsigned ns, unsigned nm,
|
|
G4_Declare* firstOrigDcl, GlobalRA& g);
|
|
|
|
~Interference()
|
|
{
|
|
if (useDenseMatrix())
|
|
{
|
|
delete[] matrix;
|
|
}
|
|
}
|
|
|
|
std::vector<G4_Declare*>* getCompatibleSparseIntf(G4_Declare* d)
|
|
{
|
|
if (compatibleSparseIntf.size() > 0)
|
|
{
|
|
auto it = compatibleSparseIntf.find(d);
|
|
if (it == compatibleSparseIntf.end())
|
|
{
|
|
return nullptr;
|
|
}
|
|
return &((*it).second);
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
void init(vISA::Mem_Manager& m)
|
|
{
|
|
if (useDenseMatrix())
|
|
{
|
|
unsigned N = getRowSize() * maxId;
|
|
matrix = new uint32_t[N];
|
|
memset(matrix, 0, N * sizeof(int));
|
|
}
|
|
else
|
|
{
|
|
sparseMatrix.resize(maxId);
|
|
}
|
|
}
|
|
|
|
bool useDenseMatrix() const
|
|
{
|
|
return maxId < denseMatrixLimit;
|
|
}
|
|
|
|
// Clean data filled while computing interference.
|
|
void clear()
|
|
{
|
|
sparseIntf.clear();
|
|
if (useDenseMatrix())
|
|
{
|
|
unsigned N = getRowSize() * maxId;
|
|
std::memset(matrix, 0, N * sizeof(int));
|
|
}
|
|
else
|
|
{
|
|
for (auto &I : sparseMatrix)
|
|
{
|
|
I.clear();
|
|
}
|
|
}
|
|
}
|
|
|
|
void computeInterference();
|
|
bool interfereBetween(unsigned v1, unsigned v2) const;
|
|
inline unsigned int getInterferenceBlk(unsigned idx) const
|
|
{
|
|
assert(useDenseMatrix() && "matrix is not initialized");
|
|
return matrix != nullptr ? matrix[idx] : 0;
|
|
}
|
|
inline unsigned int getRowSize() const
|
|
{
|
|
return maxId / BITS_DWORD + 1;
|
|
}
|
|
|
|
std::vector<unsigned int>& getSparseIntfForVar(unsigned int id) { return sparseIntf[id]; }
|
|
|
|
// Only upper-half matrix is now used in intf graph.
|
|
inline void safeSetInterference(unsigned v1, unsigned v2)
|
|
{
|
|
// Assume v1 < v2
|
|
if (useDenseMatrix())
|
|
{
|
|
unsigned col = v2 / BITS_DWORD;
|
|
matrix[v1 * getRowSize() + col] |= BitMask[v2 - col * BITS_DWORD];
|
|
}
|
|
else
|
|
{
|
|
sparseMatrix[v1].emplace(v2);
|
|
}
|
|
}
|
|
|
|
inline void setBlockInterferencesOneWay(unsigned v1, unsigned col, unsigned block)
|
|
{
|
|
if (useDenseMatrix())
|
|
{
|
|
#ifdef _DEBUG
|
|
MUST_BE_TRUE(sparseIntf.size() == 0, "Updating intf graph matrix after populating sparse intf graph");
|
|
#endif
|
|
|
|
matrix[v1 * getRowSize() + col] |= block;
|
|
}
|
|
else
|
|
{
|
|
auto&& intfSet = sparseMatrix[v1];
|
|
for (int i = 0; i < BITS_DWORD; ++i)
|
|
{
|
|
if (block & BitMask[i])
|
|
{
|
|
uint32_t v2 = col * BITS_DWORD + i;
|
|
intfSet.emplace(v2);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bool varSplitCheckBeforeIntf(unsigned v1, unsigned v2);
|
|
|
|
void checkAndSetIntf(unsigned v1, unsigned v2)
|
|
{
|
|
if (v1 < v2)
|
|
{
|
|
safeSetInterference(v1, v2);
|
|
}
|
|
else if (v1 > v2)
|
|
{
|
|
safeSetInterference(v2, v1);
|
|
}
|
|
}
|
|
|
|
void addCalleeSaveBias(BitSet& live);
|
|
void buildInterferenceAtBBExit(G4_BB* bb, BitSet& live);
|
|
void buildInterferenceWithinBB(G4_BB* bb, BitSet& live, G4_Declare* arg, G4_Declare* ret);
|
|
void buildInterferenceForDst(G4_BB* bb, BitSet& live, G4_INST* inst, std::list<G4_INST*>::reverse_iterator i, G4_DstRegRegion* dst);
|
|
void buildInterferenceForFcall(G4_BB* bb, BitSet& live, G4_INST* inst, std::list<G4_INST*>::reverse_iterator i, G4_VarBase* regVar);
|
|
|
|
inline void filterSplitDclares(unsigned startIdx, unsigned endIdx, unsigned n, unsigned col, unsigned &elt, bool is_split);
|
|
|
|
void buildInterferenceWithLive(BitSet& live, unsigned i);
|
|
void buildInterferenceWithSubDcl(unsigned lr_id, G4_Operand *opnd, BitSet& live, bool setLive, bool setIntf);
|
|
void buildInterferenceWithAllSubDcl(unsigned v1, unsigned v2);
|
|
|
|
void markInterferenceForSend(G4_BB* bb, G4_INST* inst, G4_DstRegRegion* dst);
|
|
|
|
void dumpInterference() const;
|
|
void interferenceVerificationForSplit() const;
|
|
|
|
void buildInterferenceWithLocalRA(G4_BB* bb);
|
|
|
|
void buildInterferenceAmongLiveIns();
|
|
|
|
void generateSparseIntfGraph();
|
|
bool isStrongEdgeBetween(G4_Declare*, G4_Declare*);
|
|
};
|
|
|
|
class GraphColor
|
|
{
|
|
GlobalRA& gra;
|
|
|
|
unsigned totalGRFRegCount; // .reg_count_total
|
|
unsigned numVar;
|
|
unsigned numSplitStartID;
|
|
unsigned numSplitVar;
|
|
unsigned *spAddrRegSig;
|
|
Interference intf;
|
|
PhyRegPool& regPool;
|
|
IR_Builder& builder;
|
|
LiveRange** lrs;
|
|
bool isHybrid;
|
|
LIVERANGE_LIST spilledLRs;
|
|
bool requireCallerSaveRestoreCode;
|
|
bool requireCalleeSaveRestoreCode;
|
|
bool requireA0CallerSaveRestoreCode;
|
|
bool requireFlagCallerSaveRestoreCode;
|
|
bool forceSpill;
|
|
vISA::Mem_Manager mem;
|
|
const Options *m_options;
|
|
|
|
unsigned evenTotalDegree;
|
|
unsigned oddTotalDegree;
|
|
unsigned evenTotalRegNum;
|
|
unsigned oddTotalRegNum;
|
|
unsigned evenMaxRegNum;
|
|
unsigned oddMaxRegNum;
|
|
|
|
G4_Kernel& kernel;
|
|
LivenessAnalysis& liveAnalysis;
|
|
|
|
std::vector<LiveRange*> colorOrder;
|
|
LIVERANGE_LIST unconstrainedWorklist;
|
|
LIVERANGE_LIST constrainedWorklist;
|
|
unsigned int numColor = 0;
|
|
|
|
#define GRAPH_COLOR_MEM_SIZE 16*1024
|
|
|
|
// This function returns the weight of interference edge lr1--lr2,
|
|
// which is used for computing the degree of lr1.
|
|
//
|
|
// When there is no alignment restriction, we should use the normal weight,
|
|
// which is lr1_nreg + lr2_nreg - 1.
|
|
//
|
|
// Otherewise, we need to take into account additional space that may be
|
|
// required because of the alignment restriction. For example,
|
|
// if lr1 has even alignment and lr2 has no alignment restriction,
|
|
// we need to consider the following cases that would require the
|
|
// maximal available GRF space for successful allocation:
|
|
// 1) lr1's size is odd, lr2's size is odd and lr2's start position is even,
|
|
// the total space required would be (lr1_nreg + lr2_nreg + 1)
|
|
// 2) lr1's size is odd, lr2's size is even and lr2's start position is even,
|
|
// the total space required would be (lr1_nreg + lr2_nreg)
|
|
// 3) lr1's size is even, lr2's size is odd and lr2's start position is odd,
|
|
// the total space required would be (lr1_nreg + lr2_nreg)
|
|
// 4) lr1's size is even, lr2's size is even and lr2's start position is odd,
|
|
// the total space required would be (lr1_nreg + lr2_nreg + 1)
|
|
// The above logic can be simplified to the following formula:
|
|
// lr1_nreg + lr2_nreg + 1 - ((lr1_nreg + lr2_nreg) % 2)
|
|
//
|
|
// If both lr1 and lr2 have even alignment restriction,
|
|
// we need to consider the following cases that would require the
|
|
// maximal available GRF space for successful allocation:
|
|
// 1) lr1's size is odd, lr2's size is odd and lr2's start position is even,
|
|
// the total space required would be (lr1_nreg + lr2_nreg + 1)
|
|
// 2) lr1's size is odd, lr2's size is even and lr2's start position is even,
|
|
// the total space required would be (lr1_nreg + lr2_nreg)
|
|
// 3) lr1's size is even, lr2's size is odd and lr2's start position is even,
|
|
// the total space required would be (lr1_nreg + lr2_nreg)
|
|
// 4) lr1's size is even, lr2's size is even and lr2's start position is even,
|
|
// the total space required would be (lr1_nreg + lr2_nreg - 1)
|
|
// The above logic can be simplified to the following formula:
|
|
// lr1_nreg + lr2_nreg - 1 + (lr1_nreg % 2) + (lr2_nreg % 2)
|
|
//
|
|
unsigned edgeWeightGRF(LiveRange* lr1, LiveRange* lr2)
|
|
{
|
|
G4_Align lr1_align = lr1->getVar()->getAlignment();
|
|
G4_Align lr2_align = lr2->getVar()->getAlignment();
|
|
unsigned lr1_nreg = lr1->getNumRegNeeded();
|
|
unsigned lr2_nreg = lr2->getNumRegNeeded();
|
|
|
|
MUST_BE_TRUE(lr1_align == Either ||
|
|
lr1_align == Even ||
|
|
lr2_align == Either ||
|
|
lr2_align == Even, "Found unsupported GRF alignment in register allocation!");
|
|
|
|
if (lr1_align == Either)
|
|
{
|
|
return lr1_nreg + lr2_nreg - 1;
|
|
}
|
|
else if (lr2_align == Either)
|
|
{
|
|
unsigned sum = lr1_nreg + lr2_nreg;
|
|
return sum + 1 - ((sum) % 2);
|
|
}
|
|
else if (lr2_align == Even)
|
|
{
|
|
return lr1_nreg + lr2_nreg - 1 + (lr1_nreg % 2) + (lr2_nreg % 2);
|
|
}
|
|
else
|
|
{
|
|
MUST_BE_TRUE(false, "Found unsupported alignment in register allocation!");
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
unsigned edgeWeightARF(LiveRange* lr1, LiveRange* lr2)
|
|
{
|
|
if (lr1->getRegKind() == G4_FLAG)
|
|
{
|
|
G4_SubReg_Align lr1_align = lr1->getVar()->getSubRegAlignment();
|
|
G4_SubReg_Align lr2_align = lr2->getVar()->getSubRegAlignment();
|
|
unsigned lr1_nreg = lr1->getNumRegNeeded();
|
|
unsigned lr2_nreg = lr2->getNumRegNeeded();
|
|
|
|
if (lr1_align == Any)
|
|
{
|
|
return lr1_nreg + lr2_nreg - 1;
|
|
}
|
|
else if (lr1_align == Even_Word && lr2_align == Any)
|
|
{
|
|
return lr1_nreg + lr2_nreg + 1 - ((lr1_nreg + lr2_nreg) % 2);
|
|
}
|
|
else if (lr1_align == Even_Word && lr2_align == Even_Word)
|
|
{
|
|
return lr1_nreg + lr2_nreg - 1 + (lr1_nreg % 2) + (lr2_nreg % 2);
|
|
}
|
|
else
|
|
{
|
|
MUST_BE_TRUE(false, "Found unsupported subRegAlignment in flag register allocation!");
|
|
return 0;
|
|
}
|
|
}
|
|
else if (lr1->getRegKind() == G4_ADDRESS)
|
|
{
|
|
G4_SubReg_Align lr1_align = lr1->getVar()->getSubRegAlignment();
|
|
G4_SubReg_Align lr2_align = lr2->getVar()->getSubRegAlignment();
|
|
unsigned lr1_nreg = lr1->getNumRegNeeded();
|
|
unsigned lr2_nreg = lr2->getNumRegNeeded();
|
|
|
|
if (lr1_align == Any)
|
|
{
|
|
return lr1_nreg + lr2_nreg - 1;
|
|
}
|
|
else if (lr1_align == Four_Word && lr2_align == Any)
|
|
{
|
|
return lr1_nreg + lr2_nreg + 3 - (lr1_nreg + lr2_nreg) % 4;
|
|
}
|
|
else if (lr1_align == Four_Word && lr2_align == Four_Word)
|
|
{
|
|
return lr1_nreg + lr2_nreg - 1 + (4 - lr1_nreg % 4) % 4 + (4 - lr2_nreg % 4) % 4;
|
|
}
|
|
else if (lr1_align == Eight_Word && lr2_align == Any)
|
|
{
|
|
return lr1_nreg + lr2_nreg + 7 - (lr1_nreg + lr2_nreg) % 8;
|
|
}
|
|
else if (lr1_align == Eight_Word && lr2_align == Four_Word)
|
|
{
|
|
if (((8 - lr1_nreg % 8) % 8) >= 4)
|
|
return lr1_nreg + lr2_nreg - 1 + (8 - lr1_nreg % 8) % 8 - 4;
|
|
return lr1_nreg + lr2_nreg - 1 + (8 - lr1_nreg % 8) % 8 +
|
|
(4 - lr2_nreg % 4) % 4;
|
|
}
|
|
else if (lr1_align == Eight_Word && lr2_align == Eight_Word)
|
|
{
|
|
return lr1_nreg + lr2_nreg - 1 + (8 - lr1_nreg % 8) % 8 +
|
|
(8 - lr2_nreg % 8) % 8;
|
|
}
|
|
else
|
|
{
|
|
MUST_BE_TRUE(false, "Found unsupported subRegAlignment in address register allocation!");
|
|
return 0;
|
|
}
|
|
}
|
|
MUST_BE_TRUE(false, "Found unsupported ARF reg type in register allocation!");
|
|
return 0;
|
|
}
|
|
|
|
void computeDegreeForGRF();
|
|
void computeDegreeForARF();
|
|
void computeSpillCosts(bool useSplitLLRHeuristic);
|
|
void determineColorOrdering();
|
|
void removeConstrained();
|
|
void relaxNeighborDegreeGRF(LiveRange* lr);
|
|
void relaxNeighborDegreeARF(LiveRange* lr);
|
|
bool assignColors(ColorHeuristic heuristicGRF, bool doBankConflict, bool highInternalConflict);
|
|
|
|
void clearSpillAddrLocSignature()
|
|
{
|
|
memset(spAddrRegSig, 0, getNumAddrRegisters() * sizeof(unsigned));
|
|
}
|
|
void pruneActiveSpillAddrLocs(G4_DstRegRegion*, unsigned, G4_Type);
|
|
void updateActiveSpillAddrLocs(G4_DstRegRegion*, G4_SrcRegRegion*, unsigned execSize);
|
|
bool redundantAddrFill(G4_DstRegRegion*, G4_SrcRegRegion*, unsigned execSize);
|
|
|
|
public:
|
|
GraphColor(LivenessAnalysis& live, unsigned totalGRF, bool hybrid, bool forceSpill_, G4_Declare* firstOrigDcl = nullptr);
|
|
|
|
static const char* StackCallStr;
|
|
|
|
const Options * getOptions() { return m_options; }
|
|
|
|
bool regAlloc(
|
|
bool doBankConflictReduction,
|
|
bool highInternalConflict,
|
|
bool reserveSpillReg, unsigned& spillRegSize, unsigned& indrSpillRegSize, RPE* rpe);
|
|
bool requireSpillCode() { return !spilledLRs.empty(); }
|
|
Interference * getIntf() { return &intf; }
|
|
void createLiveRanges(unsigned reserveSpillSize = 0);
|
|
LiveRange ** getLiveRanges() const { return lrs; }
|
|
const LIVERANGE_LIST & getSpilledLiveRanges() const { return spilledLRs; }
|
|
void confirmRegisterAssignments();
|
|
void resetTemporaryRegisterAssignments();
|
|
void cleanupRedundantARFFillCode();
|
|
void addA0SaveRestoreCode();
|
|
void addFlagSaveRestoreCode();
|
|
void addSaveRestoreCode(unsigned);
|
|
void addCallerSaveRestoreCode();
|
|
void addCalleeSaveRestoreCode();
|
|
void addFileScopeSaveRestoreCode();
|
|
void addGenxMainStackSetupCode();
|
|
void addCalleeStackSetupCode();
|
|
void saveSubRegs(
|
|
unsigned startReg, unsigned startSubReg, unsigned size, G4_Declare* scratchRegDcl, G4_Declare* framePtr,
|
|
unsigned frameOffset, INST_LIST& instList, INST_LIST_ITER insertIt);
|
|
void saveRegs(
|
|
unsigned startReg, unsigned owordSize, G4_Declare* scratchRegDcl, G4_Declare* framePtr,
|
|
unsigned frameOffset, INST_LIST& instList, INST_LIST_ITER insertIt);
|
|
void saveActiveRegs(
|
|
std::vector<bool>& saveRegs, unsigned startReg,
|
|
unsigned frameOffset, INST_LIST& instList, INST_LIST_ITER insertIt);
|
|
void restoreSubRegs(
|
|
unsigned startReg, unsigned startSubReg, unsigned size, G4_Declare* scratchRegDcl, G4_Declare* framePtr,
|
|
unsigned frameOffset, INST_LIST& instList, INST_LIST_ITER insertIt);
|
|
void restoreRegs(
|
|
unsigned startReg, unsigned owordSize, G4_Declare* scratchRegDcl, G4_Declare* framePtr,
|
|
unsigned frameOffset, INST_LIST& instList, INST_LIST_ITER insertIt);
|
|
void restoreActiveRegs(
|
|
std::vector<bool>& restoreRegs, unsigned startReg,
|
|
unsigned frameOffset, INST_LIST& instList, INST_LIST_ITER insertIt);
|
|
void OptimizeActiveRegsFootprint(std::vector<bool>& saveRegs);
|
|
void OptimizeActiveRegsFootprint(std::vector<bool>& saveRegs, std::vector<bool>& retRegs);
|
|
void saveFileScopeVar(G4_RegVar* filescopeVar, INST_LIST& instList, INST_LIST_ITER insertIt);
|
|
void restoreFileScopeVar(G4_RegVar* filescopeVar, INST_LIST& instList, INST_LIST_ITER insertIt);
|
|
void dumpRegisterPressure();
|
|
};
|
|
|
|
class RAVarInfo
|
|
{
|
|
public:
|
|
unsigned numSplit = 0;
|
|
unsigned int bb_id = UINT_MAX; // block local variable's block id.
|
|
G4_Declare* splittedDCL = nullptr;
|
|
LocalLiveRange* localLR = nullptr;
|
|
unsigned int numRefs = 0;
|
|
BankConflict conflict = BANK_CONFLICT_NONE; // used to indicate bank that should be assigned to dcl if possible
|
|
G4_INST* startInterval = nullptr;
|
|
G4_INST* endInterval = nullptr;
|
|
unsigned char* mask = nullptr;
|
|
AugmentationMasks maskType = AugmentationMasks::Undetermined;
|
|
std::vector<G4_Declare*> subDclList;
|
|
unsigned int subOff = 0;
|
|
};
|
|
|
|
class GlobalRA
|
|
{
|
|
private:
|
|
template <class REGION_TYPE> static unsigned getRegionDisp(REGION_TYPE * region);
|
|
unsigned getRegionByteSize(G4_DstRegRegion * region, unsigned execSize);
|
|
static unsigned owordMask();
|
|
static bool owordAligned(unsigned offset);
|
|
template <class REGION_TYPE> bool isUnalignedRegion(REGION_TYPE * region, unsigned execSize);
|
|
bool shouldPreloadDst(G4_DstRegRegion* spilledRangeRegion, uint8_t execSize, G4_INST* instContext, G4_BB* curBB);
|
|
static unsigned sendBlockSizeCode(unsigned owordSize);
|
|
void updateDefSet(std::set<G4_Declare*>& defs, G4_Declare* referencedDcl);
|
|
void detectUndefinedUses(LivenessAnalysis& liveAnalysis, G4_Kernel& kernel);
|
|
void markBlockLocalVar(G4_RegVar* var, unsigned bbId);
|
|
void markBlockLocalVars(G4_BB* bb, Mem_Manager& mem, bool doLocalRA);
|
|
void computePhyReg();
|
|
void fixAlignment();
|
|
|
|
RAVarInfo defaultValues;
|
|
std::vector<RAVarInfo> vars;
|
|
|
|
void resize(unsigned int id)
|
|
{
|
|
if (id >= vars.size())
|
|
vars.resize(id + 1);
|
|
}
|
|
|
|
public:
|
|
G4_Kernel& kernel;
|
|
IR_Builder& builder;
|
|
PhyRegPool& regPool;
|
|
PointsToAnalysis& pointsToAnalysis;
|
|
FCALL_RET_MAP fcallRetMap;
|
|
|
|
// RA specific fields
|
|
unsigned int getSplitVarNum(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
if (dclid >= vars.size())
|
|
{
|
|
return defaultValues.numSplit;
|
|
}
|
|
return vars[dclid].numSplit;
|
|
}
|
|
|
|
void setSplitVarNum(G4_Declare* dcl, unsigned int val)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].numSplit = val;
|
|
}
|
|
|
|
unsigned int getBBId(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
if (dclid >= vars.size())
|
|
{
|
|
return defaultValues.bb_id;
|
|
}
|
|
return vars[dclid].bb_id;
|
|
}
|
|
void setBBId(G4_Declare* dcl, unsigned int id)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].bb_id = id;
|
|
}
|
|
|
|
bool isBlockLocal(G4_Declare* dcl)
|
|
{
|
|
return getBBId(dcl) < (UINT_MAX - 1);
|
|
}
|
|
|
|
G4_Declare* getSplittedDeclare(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
if (dclid >= vars.size())
|
|
{
|
|
return defaultValues.splittedDCL;
|
|
}
|
|
return vars[dclid].splittedDCL;
|
|
}
|
|
|
|
void setSplittedDeclare(G4_Declare* dcl, G4_Declare* sd)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].splittedDCL = sd;
|
|
}
|
|
|
|
LocalLiveRange* getLocalLR(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
if (dclid >= vars.size())
|
|
{
|
|
return defaultValues.localLR;
|
|
}
|
|
return vars[dclid].localLR;
|
|
}
|
|
|
|
void setLocalLR(G4_Declare* dcl, LocalLiveRange* lr)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
MUST_BE_TRUE(vars[dclid].localLR == NULL, "Local live range already allocated for declaration");
|
|
vars[dclid].localLR = lr;
|
|
}
|
|
|
|
void resetLocalLR(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].localLR = nullptr;
|
|
}
|
|
|
|
void recordRef(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].numRefs += 1;
|
|
}
|
|
|
|
unsigned int getNumRefs(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
if (dclid >= vars.size())
|
|
{
|
|
return defaultValues.numRefs;
|
|
}
|
|
return vars[dclid].numRefs;
|
|
}
|
|
|
|
void setNumRefs(G4_Declare* dcl, unsigned int refs)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].numRefs = refs;
|
|
}
|
|
|
|
BankConflict getBankConflict(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
if (dclid >= vars.size())
|
|
{
|
|
return defaultValues.conflict;
|
|
}
|
|
return vars[dclid].conflict;
|
|
}
|
|
|
|
void setBankConflict(G4_Declare* dcl, BankConflict c)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].conflict = c;
|
|
}
|
|
|
|
G4_INST* getStartInterval(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
if (dclid >= vars.size())
|
|
{
|
|
return defaultValues.startInterval;
|
|
}
|
|
return vars[dclid].startInterval;
|
|
}
|
|
|
|
void setStartInterval(G4_Declare* dcl, G4_INST* inst)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].startInterval = inst;
|
|
}
|
|
|
|
G4_INST* getEndInterval(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
if (dclid >= vars.size())
|
|
{
|
|
return defaultValues.endInterval;
|
|
}
|
|
return vars[dclid].endInterval;
|
|
}
|
|
|
|
void setEndInterval(G4_Declare* dcl, G4_INST* inst)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].endInterval = inst;
|
|
}
|
|
|
|
unsigned char* getMask(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
if (dclid >= vars.size())
|
|
{
|
|
return defaultValues.mask;
|
|
}
|
|
return vars[dclid].mask;
|
|
}
|
|
|
|
void setMask(G4_Declare* dcl, unsigned char* m)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].mask = m;
|
|
}
|
|
|
|
AugmentationMasks getAugmentationMask(G4_Declare* dcl) const
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
if (dclid >= vars.size())
|
|
{
|
|
return defaultValues.maskType;
|
|
}
|
|
return vars[dclid].maskType;
|
|
}
|
|
|
|
void setAugmentationMask(G4_Declare* dcl, AugmentationMasks m)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].maskType = m;
|
|
if (dcl->getIsSplittedDcl())
|
|
{
|
|
auto dclSubDclSize = getSubDclSize(dcl);
|
|
for (unsigned i = 0; i < dclSubDclSize; i++)
|
|
{
|
|
G4_Declare * subDcl = getSubDcl(dcl, i);
|
|
setAugmentationMask(subDcl, m);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool getHasNonDefaultMaskDef(G4_Declare* dcl) const
|
|
{
|
|
return (getAugmentationMask(dcl) == AugmentationMasks::NonDefault);
|
|
}
|
|
|
|
void addSubDcl(G4_Declare *dcl, G4_Declare* subDcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].subDclList.push_back(subDcl);
|
|
}
|
|
|
|
void clearSubDcl(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].subDclList.clear();
|
|
}
|
|
|
|
G4_Declare* getSubDcl(G4_Declare* dcl, unsigned i)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
return vars[dclid].subDclList[i];
|
|
}
|
|
|
|
unsigned getSubDclSize(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
return (unsigned)(vars[dclid].subDclList.size());
|
|
}
|
|
|
|
unsigned int getSubOffset(G4_Declare* dcl)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
if (dclid >= vars.size())
|
|
{
|
|
return defaultValues.subOff;
|
|
}
|
|
return vars[dclid].subOff;
|
|
}
|
|
|
|
void setSubOffset(G4_Declare* dcl, unsigned int offset)
|
|
{
|
|
auto dclid = dcl->getDeclId();
|
|
resize(dclid);
|
|
vars[dclid].subOff = offset;
|
|
}
|
|
|
|
G4_Align getBankAlign(G4_Declare*);
|
|
bool areAllDefsNoMask(G4_Declare*);
|
|
void removeUnreferencedDcls();
|
|
LocalLiveRange* GetOrCreateLocalLiveRange(G4_Declare* topdcl, Mem_Manager& mem);
|
|
|
|
GlobalRA(G4_Kernel& k, PhyRegPool& r, PointsToAnalysis& p2a) : kernel(k), builder(*k.fg.builder), regPool(r),
|
|
pointsToAnalysis(p2a)
|
|
{
|
|
vars.resize(k.Declares.size());
|
|
}
|
|
|
|
void emitFGWithLiveness(LivenessAnalysis& liveAnalysis);
|
|
void reportSpillInfo(LivenessAnalysis& liveness, GraphColor& coloring);
|
|
static uint32_t getRefCount(int loopNestLevel);
|
|
bool isReRAPass();
|
|
void updateSubRegAlignment(unsigned char regFile, G4_SubReg_Align subAlign);
|
|
void updateAlignment(unsigned char regFile, G4_Align align);
|
|
void getBankAlignment(LiveRange* lr, G4_Align &align);
|
|
void printLiveIntervals();
|
|
void reportUndefinedUses(LivenessAnalysis& liveAnalysis, G4_BB* bb, G4_INST* inst, G4_Declare* referencedDcl, std::set<G4_Declare*>& defs, std::ofstream& optreport, Gen4_Operand_Number opndNum);
|
|
void detectNeverDefinedUses();
|
|
void emitVarLiveIntervals();
|
|
|
|
void determineSpillRegSize(unsigned& spillRegSize, unsigned& indrSpillRegSize);
|
|
G4_Imm* createMsgDesc(unsigned owordSize, bool writeType, bool isSplitSend);
|
|
void addrRegAlloc();
|
|
void flagRegAlloc();
|
|
bool hybridRA(bool doBankConflictReduction, bool highInternalConflict, DECLARE_LIST_ITER firstDclIter, LocalRA& lra);
|
|
void assignRegForAliasDcl();
|
|
void removeSplitDecl();
|
|
int coloringRegAlloc();
|
|
void addCallerSavePseudoCode();
|
|
void addCalleeSavePseudoCode();
|
|
void addStoreRestoreForFP();
|
|
void setABIForStackCallFunctionCalls();
|
|
void markGraphBlockLocalVars();
|
|
void verifyRA(LivenessAnalysis & liveAnalysis);
|
|
};
|
|
|
|
class VarSplit
|
|
{
|
|
private:
|
|
G4_Kernel& kernel;
|
|
GlobalRA& gra;
|
|
|
|
VarRange* splitVarRange(VarRange *src1, VarRange *src2, std::stack<VarRange*> *toDelete);
|
|
void rangeListSpliting(VAR_RANGE_LIST *rangeList, G4_Operand *opnd, std::stack<VarRange*> *toDelete);
|
|
static void getHeightWidth(G4_Type type, unsigned int numberElements, unsigned short &dclWidth, unsigned short &dclHeight, int &totalByteSize);
|
|
void createSubDcls(G4_Kernel& kernel, G4_Declare* oldDcl, std::vector<G4_Declare*> &splitDclList);
|
|
void insertMovesToTemp(IR_Builder& builder, G4_Declare* oldDcl, G4_Operand *dstOpnd, INST_LIST &instList, INST_LIST_ITER instIter, std::vector<G4_Declare*> &splitDclList);
|
|
void insertMovesFromTemp(G4_Kernel& kernel, G4_Declare* oldDcl, int index, G4_Operand *srcOpnd, int pos, INST_LIST &instList, INST_LIST_ITER instIter, std::vector<G4_Declare*> &splitDclList);
|
|
|
|
public:
|
|
bool didLocalSplit = false;
|
|
bool didGlobalSplit = false;
|
|
|
|
void localSplit(IR_Builder& builder, G4_BB* bb);
|
|
void globalSplit(IR_Builder& builder, G4_Kernel &kernel);
|
|
bool canDoGlobalSplit(IR_Builder& builder, G4_Kernel &kernel, uint32_t instNum, uint32_t spillRefCount, uint32_t sendSpillRefCount);
|
|
|
|
VarSplit(GlobalRA& g) : kernel(g.kernel), gra(g)
|
|
{
|
|
|
|
}
|
|
};
|
|
|
|
//
|
|
// Spill code clean up
|
|
//
|
|
typedef struct _CLEAN_NUM_PROFILE
|
|
{
|
|
unsigned spill_clean_num[10];
|
|
unsigned fill_clean_num[10];
|
|
} CLEAN_NUM_PROFILE;
|
|
|
|
typedef struct _SCRATCH_RANGE
|
|
{
|
|
unsigned int leftOff;
|
|
unsigned int rightOff;
|
|
}SCRATCH_RANGE;
|
|
|
|
typedef std::vector<SCRATCH_RANGE > SCRATCH_RANGE_VEC;
|
|
typedef std::vector<SCRATCH_RANGE >::iterator SCRATCH_RANGE_VEC_ITER;
|
|
|
|
typedef struct _RANGE
|
|
{
|
|
unsigned int linearizedStart;
|
|
unsigned int linearizedEnd;
|
|
bool predicate;
|
|
}REG_RANGE;
|
|
|
|
typedef std::vector<REG_RANGE > REG_RANGE_VEC;
|
|
typedef std::vector<REG_RANGE >::iterator REG_RANGE_VEC_ITER;
|
|
|
|
typedef std::pair<vISA::G4_INST *, int > RENAME_OPND;
|
|
typedef std::vector<RENAME_OPND> RANAME_VEC;
|
|
|
|
typedef struct _SCRATCH_ACCESS
|
|
{
|
|
//Basic info
|
|
#ifdef _DEBUG
|
|
int regNum;
|
|
#endif
|
|
vISA::G4_Declare* dcl;
|
|
vISA::G4_Operand* tgtOpnd;
|
|
INST_LIST_ITER inst_it;
|
|
|
|
unsigned int linearizedStart; //linearized start regsiter address
|
|
unsigned int linearizedEnd; //linearized end regsiter address
|
|
unsigned int leftOff; //left offset in scratch space
|
|
unsigned int rightOff; //right offset in the scratch space
|
|
unsigned int useCount;
|
|
|
|
bool isSpill;
|
|
bool isBlockLocal;
|
|
bool directKill;
|
|
|
|
bool regKilled;
|
|
bool regPartialKilled;
|
|
bool regOverKilled;
|
|
bool inRangePartialKilled;
|
|
bool regInUse;
|
|
|
|
bool fillInUse;
|
|
bool removeable;
|
|
bool instKilled;
|
|
bool evicted;
|
|
|
|
unsigned int maskFlag;
|
|
|
|
RANAME_VEC renameOperandVec;
|
|
SCRATCH_RANGE_VEC killedScratchRange;
|
|
REG_RANGE_VEC killedRegRange;
|
|
struct _SCRATCH_ACCESS* preScratchAccess;
|
|
struct _SCRATCH_ACCESS* prePreScratchAccess;
|
|
struct _SCRATCH_ACCESS* preFillAccess;
|
|
|
|
} SCRATCH_ACCESS;
|
|
|
|
typedef std::vector< SCRATCH_ACCESS *> SCRATCH_PTR_VEC;
|
|
|
|
typedef vISA::std_arena_based_allocator<SCRATCH_ACCESS*> SCRATCH_PTR_ALLOCATOR;
|
|
typedef std::list<SCRATCH_ACCESS*, SCRATCH_PTR_ALLOCATOR> SCRATCH_PTR_LIST;
|
|
typedef std::list<SCRATCH_ACCESS*, SCRATCH_PTR_ALLOCATOR>::iterator SCRATCH_PTR_LIST_ITER;
|
|
|
|
class FlagSpillCleanup
|
|
{
|
|
private:
|
|
GlobalRA& gra;
|
|
|
|
void FlagLineraizedStartAndEnd(G4_Declare* topdcl, unsigned int& linearizedStart, unsigned int& linearizedEnd);
|
|
bool replaceWithPreDcl(IR_Builder& builder, SCRATCH_ACCESS* scratchAccess, SCRATCH_ACCESS* preScratchAccess,
|
|
G4_RegFileKind regKind);
|
|
bool scratchKilledByPartial(SCRATCH_ACCESS* scratchAccess, SCRATCH_ACCESS* preScratchAccess);
|
|
bool addKilledGRFRanges(unsigned int linearizedStart, unsigned int linearizedEnd, SCRATCH_ACCESS* scratchAccess,
|
|
G4_RegFileKind regKind, G4_Predicate* predicate);
|
|
bool regFullyKilled(SCRATCH_ACCESS* scratchAccess, unsigned int linearizedStart, unsigned int linearizedEnd,
|
|
unsigned short maskFlag, G4_RegFileKind regKind, G4_Predicate* predicate);
|
|
bool inRangePartialKilled(SCRATCH_ACCESS* scratchAccess, unsigned int linearizedStart, unsigned int linearizedEnd,
|
|
unsigned short maskFlag);
|
|
bool regDefineAnalysis(SCRATCH_ACCESS* scratchAccess, unsigned int linearizedStart, unsigned int linearizedEnd,
|
|
unsigned short maskFlag, G4_RegFileKind regKind, G4_Predicate* predicate);
|
|
void regDefineFlag(SCRATCH_PTR_LIST* scratchTraceList, G4_INST* inst, G4_Operand* opnd, G4_RegFileKind regKind);
|
|
bool regUseAnalysis(SCRATCH_ACCESS* scratchAccess, unsigned int linearizedStart, unsigned int linearizedEnd);
|
|
void regUseFlag(SCRATCH_PTR_LIST* scratchTraceList, G4_INST* inst, G4_Operand* opnd, int opndIndex, G4_RegFileKind regKind);
|
|
void initializeScratchAccess(SCRATCH_ACCESS *scratchAccess, INST_LIST_ITER inst_it);
|
|
bool initializeFlagScratchAccess(SCRATCH_PTR_VEC* scratchAccessList, SCRATCH_ACCESS*& scratchAccess, INST_LIST_ITER inst_it);
|
|
void freeScratchAccess(SCRATCH_PTR_VEC *scratchAccessList);
|
|
void flagDefine(SCRATCH_PTR_LIST& scratchTraceList, G4_INST* inst);
|
|
void flagUse(SCRATCH_PTR_LIST& scratchTraceList, G4_INST* inst);
|
|
bool flagScratchDefineUse(G4_BB* bb, SCRATCH_PTR_LIST* scratchTraceList, SCRATCH_PTR_VEC* candidateList,
|
|
SCRATCH_ACCESS* scratchAccess, CLEAN_NUM_PROFILE* clean_num_profile);
|
|
void flagSpillFillClean(G4_BB* bb, INST_LIST_ITER inst_it, SCRATCH_PTR_VEC& scratchAccessList,
|
|
SCRATCH_PTR_LIST& scratchTraceList, SCRATCH_PTR_VEC& candidateList, CLEAN_NUM_PROFILE* clean_num_profile);
|
|
void regFillClean(IR_Builder& builder, G4_BB* bb, SCRATCH_PTR_VEC& candidateList, G4_RegFileKind regKind,
|
|
CLEAN_NUM_PROFILE* clean_num_profile);
|
|
void regSpillClean(IR_Builder& builder, G4_BB* bb, SCRATCH_PTR_VEC& candidateList, CLEAN_NUM_PROFILE* clean_num_profile);
|
|
|
|
public:
|
|
void spillFillCodeCleanFlag(IR_Builder& builder, G4_Kernel& kernel, G4_RegFileKind regKind, CLEAN_NUM_PROFILE* clean_num_profile);
|
|
FlagSpillCleanup(GlobalRA& g) : gra(g)
|
|
{
|
|
|
|
}
|
|
|
|
};
|
|
}
|
|
|
|
#endif // __GRAPHCOLOR_H__
|