mirror of
				https://github.com/intel/intel-graphics-compiler.git
				synced 2025-11-04 08:21:06 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			730 lines
		
	
	
		
			26 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			730 lines
		
	
	
		
			26 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
/*========================== begin_copyright_notice ============================
 | 
						|
 | 
						|
Copyright (C) 2017-2021 Intel Corporation
 | 
						|
 | 
						|
SPDX-License-Identifier: MIT
 | 
						|
 | 
						|
============================= end_copyright_notice ===========================*/
 | 
						|
 | 
						|
#ifndef __SPILLMANAGERGMRF_H__
 | 
						|
#define __SPILLMANAGERGMRF_H__
 | 
						|
 | 
						|
#include "Assertions.h"
 | 
						|
#include "BuildIR.h"
 | 
						|
#include "G4_Opcode.h"
 | 
						|
 | 
						|
#include <list>
 | 
						|
#include <utility>
 | 
						|
#include <vector>
 | 
						|
 | 
						|
// Forward declarations
 | 
						|
namespace vISA {
 | 
						|
class G4_Kernel;
 | 
						|
class G4_Declare;
 | 
						|
class G4_Operand;
 | 
						|
class G4_RegVar;
 | 
						|
class G4_RegVarTransient;
 | 
						|
class G4_DstRegRegion;
 | 
						|
class G4_SrcRegRegion;
 | 
						|
class G4_Imm;
 | 
						|
class G4_Predicate;
 | 
						|
class G4_INST;
 | 
						|
class IR_Builder;
 | 
						|
class LivenessAnalysis;
 | 
						|
class Interference;
 | 
						|
class LiveRange;
 | 
						|
class LSLiveRange;
 | 
						|
class PointsToAnalysis;
 | 
						|
class GlobalRA;
 | 
						|
class GraphColor;
 | 
						|
class G4_INST;
 | 
						|
} // namespace vISA
 | 
						|
struct RegionDesc;
 | 
						|
using LiveRangeVec = std::vector<vISA::LiveRange*>;
 | 
						|
 | 
						|
// Class definitions
 | 
						|
namespace vISA {
 | 
						|
// New fail safeRA implemenetation
 | 
						|
class BoundedRA {
 | 
						|
public:
 | 
						|
  // "Push" is inserted after prev iter
 | 
						|
  // "Pop" is inserted before next iter
 | 
						|
  INST_LIST_ITER prev, next;
 | 
						|
 | 
						|
  static const unsigned int NOT_FOUND = 0xffffffff;
 | 
						|
 | 
						|
  // Upper threshold of # variables spilled in current iteration
 | 
						|
  // to convert current regular (ie, non-fail safe) RA iteration
 | 
						|
  // in to fail-safe RA iteration.
 | 
						|
  static const unsigned int MaxSpillNumVars = 3;
 | 
						|
 | 
						|
  // Lower threshold # variables to convert current regular RA
 | 
						|
  // iteration in to fail safe RA iteration.
 | 
						|
  static const unsigned int LargeProgramSize = 20000;
 | 
						|
 | 
						|
  static unsigned int getNumPhyVarSlots(G4_Kernel &kernel) {
 | 
						|
    return kernel.getNumRegTotal() * kernel.numEltPerGRF<Type_UB>();
 | 
						|
  }
 | 
						|
 | 
						|
  void setInst(const G4_INST *i, G4_BB *bb) {
 | 
						|
    curInst = i;
 | 
						|
    curBB = bb;
 | 
						|
    computeBusy();
 | 
						|
  }
 | 
						|
 | 
						|
  const G4_INST *getInst() const { return curInst; }
 | 
						|
 | 
						|
  bool isFreeGRF(unsigned int reg) {
 | 
						|
    auto &entry = busyGRF[curInst];
 | 
						|
    return !entry.test(reg);
 | 
						|
  }
 | 
						|
 | 
						|
  bool isFreeGRFOtherInst(unsigned int reg, const G4_INST *inst) {
 | 
						|
    // Used only when looking up free GRFs in non-curInst
 | 
						|
    auto &entry = busyGRF[inst];
 | 
						|
    return !entry.test(reg);
 | 
						|
  }
 | 
						|
 | 
						|
  void markGRF(unsigned int reg) {
 | 
						|
    auto &entry = busyGRF[curInst];
 | 
						|
    entry.set(reg, true);
 | 
						|
  }
 | 
						|
 | 
						|
  void markGRFs(unsigned int reg, unsigned int num);
 | 
						|
 | 
						|
  unsigned int getConsecutiveFree(unsigned int num,
 | 
						|
                                  unsigned int forceStart = NOT_FOUND,
 | 
						|
                                  bool isIndirect = false) {
 | 
						|
    unsigned int start = forceStart, sizeFound = 0;
 | 
						|
    unsigned int lastReg = kernel.getNumRegTotal();
 | 
						|
 | 
						|
    if (start > kernel.getNumRegTotal())
 | 
						|
      start = 0;
 | 
						|
 | 
						|
    bool scannedOnce = false;
 | 
						|
    for (unsigned int i = start;; ++i) {
 | 
						|
      // No block found in entire GRF file search
 | 
						|
      if (scannedOnce && i == forceStart) {
 | 
						|
        vISA_ASSERT(false, "no free GRF found in fail safe");
 | 
						|
        return NOT_FOUND;
 | 
						|
      }
 | 
						|
 | 
						|
      if (i == lastReg) {
 | 
						|
        // Wrap around
 | 
						|
        i = 0;
 | 
						|
        sizeFound = 0;
 | 
						|
        start = i + 1;
 | 
						|
        scannedOnce = true;
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
 | 
						|
      if (!isFreeGRF(i) || (isIndirect && !isFreeIndir(i))) {
 | 
						|
        sizeFound = 0;
 | 
						|
        start = i + 1;
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      ++sizeFound;
 | 
						|
 | 
						|
      if (sizeFound == num) {
 | 
						|
        markGRFs(start, num);
 | 
						|
        if (isIndirect) {
 | 
						|
          vISA_ASSERT(addrDcl, "expecting non-nullptr addrDcl");
 | 
						|
          markIndirBusy(start, num);
 | 
						|
        }
 | 
						|
        return start;
 | 
						|
      }
 | 
						|
    }
 | 
						|
    return NOT_FOUND;
 | 
						|
  }
 | 
						|
 | 
						|
  unsigned int getFreeGRFIndir(unsigned int num,
 | 
						|
                               unsigned int forceStart = NOT_FOUND) {
 | 
						|
    return getFreeGRF(num, forceStart, true);
 | 
						|
  }
 | 
						|
 | 
						|
  unsigned int getFreeGRF(unsigned int num, unsigned int forceStart = NOT_FOUND,
 | 
						|
                          bool isIndirect = false) {
 | 
						|
    // EOT inst requires max GRF - 16 or higher allocation as per HW
 | 
						|
    if (!curInst->isEOT())
 | 
						|
      return getConsecutiveFree(
 | 
						|
          num, forceStart == NOT_FOUND ? reservedGRFStart : forceStart,
 | 
						|
          isIndirect);
 | 
						|
    else {
 | 
						|
      auto freeGRFStart =
 | 
						|
          getConsecutiveFree(num, kernel.getNumRegTotal() - 16, isIndirect);
 | 
						|
      vISA_ASSERT(freeGRFStart >= (kernel.getNumRegTotal() - 16) &&
 | 
						|
                       (freeGRFStart + num) < kernel.getNumRegTotal(),
 | 
						|
                   "unexpected EOT allocation");
 | 
						|
      return freeGRFStart;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  void setSpillOff(unsigned int off) {
 | 
						|
    // This is scratch offset after considering private storage used by IGC
 | 
						|
    spillOffset = off;
 | 
						|
  }
 | 
						|
 | 
						|
  void setReservedStart(unsigned int s) { reservedGRFStart = s; }
 | 
						|
 | 
						|
  void insertPushPop(bool useLSCMsg);
 | 
						|
 | 
						|
  void markIndirIntfs();
 | 
						|
  void setAddrDcl(G4_Declare *a) { addrDcl = a; }
 | 
						|
  void resetAddrDcl() { addrDcl = nullptr; }
 | 
						|
 | 
						|
  void computeAllBusy() {
 | 
						|
    for (auto bb : kernel.fg.getBBList())
 | 
						|
      for (auto inst : bb->getInstList())
 | 
						|
        setInst(inst, bb);
 | 
						|
  }
 | 
						|
 | 
						|
  void markClobbered(unsigned int reg, unsigned int numRegs) {
 | 
						|
    // Mark registers as being clobbered in current inst
 | 
						|
    auto &entry = clobberedGRFs[curInst];
 | 
						|
    for (unsigned int busyReg = reg; busyReg != (reg + numRegs); ++busyReg) {
 | 
						|
      entry.insert(busyReg);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  BoundedRA(GlobalRA &ra, const LiveRangeVec *l = nullptr);
 | 
						|
 | 
						|
private:
 | 
						|
  GlobalRA &gra;
 | 
						|
  G4_Kernel &kernel;
 | 
						|
  const LiveRangeVec *lrs;
 | 
						|
  static const unsigned int bitsetSz = 256;
 | 
						|
  // Map each inst -> list of busy GRFs
 | 
						|
  std::unordered_map<const G4_INST *, std::bitset<bitsetSz>> busyGRF;
 | 
						|
  std::unordered_map<const G4_INST *, std::set<unsigned short>> clobberedGRFs;
 | 
						|
  std::unordered_map<G4_Declare *, std::list<const G4_INST *>> busyIndir;
 | 
						|
 | 
						|
  const G4_INST *curInst = nullptr;
 | 
						|
  G4_BB *curBB = nullptr;
 | 
						|
  G4_Declare *addrDcl = nullptr;
 | 
						|
  unsigned int lastGRF = 1;
 | 
						|
  unsigned int spillOffset = NOT_FOUND;
 | 
						|
  unsigned int reservedGRFStart = NOT_FOUND;
 | 
						|
 | 
						|
  void markBusyGRFs();
 | 
						|
  void computeBusy() {
 | 
						|
    if (!curInst)
 | 
						|
      return;
 | 
						|
    markBusyGRFs();
 | 
						|
  }
 | 
						|
 | 
						|
  // Mark forbidden registers that are universal (eg, r0)
 | 
						|
  void markUniversalForbidden();
 | 
						|
  // Forbidden is marked per LR in GRA. In this case, we
 | 
						|
  // club forbidden for all operands in a instruction and
 | 
						|
  // apply it to the instruction for simplicity.
 | 
						|
  void markForbidden(LiveRange *lr);
 | 
						|
 | 
						|
  void markIndirBusy(unsigned int start, unsigned int num) {
 | 
						|
    vISA_ASSERT(busyIndir.find(addrDcl) != busyIndir.end(),
 | 
						|
                 "no inst found referencing addrDcl");
 | 
						|
 | 
						|
    auto &refs = busyIndir[addrDcl];
 | 
						|
 | 
						|
    // Mark GRFs as busy in all instruction where current
 | 
						|
    // address register is used as indirect.
 | 
						|
    for (auto inst : refs) {
 | 
						|
      for (unsigned int reg = start; reg != (start + num); ++reg)
 | 
						|
        busyGRF[inst].set(reg);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  bool isFreeIndir(unsigned int r);
 | 
						|
};
 | 
						|
 | 
						|
class SpillManagerGRF {
 | 
						|
public:
 | 
						|
  using LR_LIST = std::list<LiveRange *>;
 | 
						|
  using LSLR_LIST = std::list<LSLiveRange *>;
 | 
						|
 | 
						|
  // Construtor for GCRA.
 | 
						|
  SpillManagerGRF(GlobalRA &g, unsigned spillAreaOffset,
 | 
						|
                  const LivenessAnalysis *lvInfo, const Interference *intf,
 | 
						|
                  const LR_LIST *spilledLRs, bool useSpillReg,
 | 
						|
                  unsigned spillRegSize, unsigned indrSpillRegSize,
 | 
						|
                  bool enableSpillSpaceCompression, bool useScratchMsg);
 | 
						|
 | 
						|
  // Constructor for linear scan RA.
 | 
						|
  SpillManagerGRF(GlobalRA &g, unsigned spillAreaOffset,
 | 
						|
                  const LivenessAnalysis *lvInfo, LSLR_LIST *spilledLSLRs,
 | 
						|
                  bool enableSpillSpaceCompression, bool useScratchMsg);
 | 
						|
 | 
						|
  bool insertSpillFillCode(G4_Kernel *kernel,
 | 
						|
                           PointsToAnalysis &pointsToAnalysis);
 | 
						|
 | 
						|
  void expireRanges(unsigned int idx, std::list<LSLiveRange *> *liveList);
 | 
						|
 | 
						|
  void updateActiveList(LSLiveRange *lr, std::list<LSLiveRange *> *liveList);
 | 
						|
 | 
						|
  bool spillLiveRanges(G4_Kernel *kernel);
 | 
						|
 | 
						|
  // return the next cumulative logical offset. This does not non-spilled stuff
 | 
						|
  // like private variables placed by IGC (marked by spill_mem_offset) this
 | 
						|
  // should only be called after insertSpillFillCode()
 | 
						|
  uint32_t getNextOffset() const { return nextSpillOffset_; }
 | 
						|
  // return the cumulative scratch space offset for the next spilled variable.
 | 
						|
  // This adjusts for scratch space reserved for file scope vars and IGC/GT-pin
 | 
						|
  uint32_t getNextScratchOffset() const {
 | 
						|
    int offset = nextSpillOffset_;
 | 
						|
    getSpillOffset(offset);
 | 
						|
    return offset;
 | 
						|
  }
 | 
						|
 | 
						|
  // convert zero-based logical offset into the scratch space offset.
 | 
						|
  void getSpillOffset(int &logicalOffset) const {
 | 
						|
    logicalOffset += globalScratchOffset;
 | 
						|
  }
 | 
						|
 | 
						|
  static std::tuple<uint32_t, G4_ExecSize>
 | 
						|
  createSpillSendMsgDescOWord(const IR_Builder &builder, unsigned int height);
 | 
						|
 | 
						|
private:
 | 
						|
  G4_Declare *getOrCreateAddrSpillFillDcl(G4_RegVar *addrDcl,
 | 
						|
                                          G4_Declare *spilledAddrTakenDcl,
 | 
						|
                                          G4_Kernel *kernel);
 | 
						|
  bool handleAddrTakenSpills(G4_Kernel *kernel,
 | 
						|
                             PointsToAnalysis &pointsToAnalysis);
 | 
						|
  unsigned int handleAddrTakenLSSpills(G4_Kernel *kernel,
 | 
						|
                                       PointsToAnalysis &pointsToAnalysis);
 | 
						|
  void insertAddrTakenSpillFill(G4_Kernel *kernel,
 | 
						|
                                PointsToAnalysis &pointsToAnalysis);
 | 
						|
  void insertAddrTakenLSSpillFill(G4_Kernel *kernel,
 | 
						|
                                  PointsToAnalysis &pointsToAnalysis);
 | 
						|
  void insertAddrTakenSpillAndFillCode(G4_Kernel *kernel, G4_BB *bb,
 | 
						|
                                       INST_LIST::iterator inst_it,
 | 
						|
                                       G4_Operand *opnd,
 | 
						|
                                       PointsToAnalysis &pointsToAnalysis,
 | 
						|
                                       bool spill, unsigned int bbid);
 | 
						|
  void insertAddrTakenLSSpillAndFillCode(G4_Kernel *kernel, G4_BB *bb,
 | 
						|
                                         INST_LIST::iterator inst_it,
 | 
						|
                                         G4_Operand *opnd,
 | 
						|
                                         PointsToAnalysis &pointsToAnalysis,
 | 
						|
                                         bool spill, unsigned int bbid);
 | 
						|
  void prunePointsTo(G4_Kernel *kernel, PointsToAnalysis &pointsToAnalysis);
 | 
						|
 | 
						|
  void prunePointsToLS(G4_Kernel *kernel, PointsToAnalysis &pointsToAnalysis);
 | 
						|
 | 
						|
  bool isComprInst(G4_INST *inst) const;
 | 
						|
 | 
						|
  bool isMultiRegComprSource(G4_SrcRegRegion *src, G4_INST *inst) const;
 | 
						|
 | 
						|
  unsigned getSendMaxResponseLength() const;
 | 
						|
 | 
						|
  unsigned getSendMaxMessageLength() const;
 | 
						|
 | 
						|
  static unsigned getSendDescDataSizeBitOffset();
 | 
						|
 | 
						|
  unsigned getSendReadTypeBitOffset() const;
 | 
						|
 | 
						|
  static unsigned getSendWriteTypeBitOffset();
 | 
						|
 | 
						|
  unsigned getSendScReadType() const;
 | 
						|
 | 
						|
  unsigned getSendScWriteType() const;
 | 
						|
 | 
						|
  unsigned getSendOwordReadType() const;
 | 
						|
  static unsigned getSendOwordWriteType();
 | 
						|
  unsigned getSendExDesc(bool isWrite, bool isScatter) const;
 | 
						|
 | 
						|
  unsigned getSpillIndex();
 | 
						|
 | 
						|
  unsigned getFillIndex();
 | 
						|
 | 
						|
  unsigned getTmpIndex();
 | 
						|
 | 
						|
  unsigned getMsgSpillIndex();
 | 
						|
 | 
						|
  unsigned getMsgFillIndex();
 | 
						|
 | 
						|
  unsigned getAddrSpillFillIndex();
 | 
						|
 | 
						|
  template <class REGION_TYPE> G4_RegVar *getRegVar(REGION_TYPE *region) const;
 | 
						|
 | 
						|
  G4_RegFileKind getRFType(G4_RegVar *regvar) const;
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  G4_RegFileKind getRFType(REGION_TYPE *region) const;
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  unsigned getRegionOriginOffset(REGION_TYPE *region) const;
 | 
						|
 | 
						|
  unsigned grfMask() const;
 | 
						|
 | 
						|
  unsigned hwordMask() const;
 | 
						|
 | 
						|
  unsigned owordMask() const;
 | 
						|
 | 
						|
  bool owordAligned(unsigned offset) const;
 | 
						|
 | 
						|
  static unsigned cdiv(unsigned dvd, unsigned dvr);
 | 
						|
 | 
						|
  G4_RegVar *getRegVar(unsigned id) const;
 | 
						|
 | 
						|
  bool shouldSpillRegister(G4_RegVar *regVar) const;
 | 
						|
 | 
						|
  unsigned getByteSize(G4_RegVar *regVar) const;
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  unsigned getSegmentDisp(REGION_TYPE *region, G4_ExecSize execSize);
 | 
						|
 | 
						|
  unsigned getDisp(G4_RegVar *lRange);
 | 
						|
 | 
						|
  template <class REGION_TYPE> unsigned getRegionDisp(REGION_TYPE *region);
 | 
						|
 | 
						|
  unsigned calculateSpillDisp(G4_RegVar *lRange) const;
 | 
						|
 | 
						|
  unsigned calculateSpillDispForLS(G4_RegVar *regVar) const;
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  unsigned getMsgType(REGION_TYPE *region, G4_ExecSize execSize);
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  bool isUnalignedRegion(REGION_TYPE *region, G4_ExecSize execSize);
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  void calculateEncAlignedSegment(REGION_TYPE *region, G4_ExecSize execSize,
 | 
						|
                                  unsigned &start, unsigned &end,
 | 
						|
                                  unsigned &type);
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  unsigned getEncAlignedSegmentByteSize(REGION_TYPE *region,
 | 
						|
                                        G4_ExecSize execSize);
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  unsigned getEncAlignedSegmentDisp(REGION_TYPE *region, G4_ExecSize execSize);
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  unsigned getEncAlignedSegmentMsgType(REGION_TYPE *region,
 | 
						|
                                       G4_ExecSize execSize);
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  unsigned getSegmentByteSize(REGION_TYPE *region, G4_ExecSize execSize);
 | 
						|
 | 
						|
  unsigned getRegionByteSize(G4_DstRegRegion *region,
 | 
						|
                             G4_ExecSize execSize) const;
 | 
						|
 | 
						|
  unsigned getRegionByteSize(G4_SrcRegRegion *region,
 | 
						|
                             G4_ExecSize execSize) const;
 | 
						|
 | 
						|
  G4_Declare *createRangeDeclare(const char *name, G4_RegFileKind regFile,
 | 
						|
                                 unsigned short nElems, unsigned short nRows,
 | 
						|
                                 G4_Type type, DeclareType kind,
 | 
						|
                                 G4_RegVar *base, G4_Operand *repRegion,
 | 
						|
                                 G4_ExecSize execSize);
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  G4_Declare *createTransientGRFRangeDeclare(REGION_TYPE *region, bool isFill,
 | 
						|
                                             unsigned index,
 | 
						|
                                             G4_ExecSize execSize,
 | 
						|
                                             G4_INST *inst);
 | 
						|
 | 
						|
  G4_Declare *createPostDstSpillRangeDeclare(G4_INST *sendOut);
 | 
						|
 | 
						|
  G4_Declare *createSpillRangeDeclare(G4_DstRegRegion *spillRegion,
 | 
						|
                                      G4_ExecSize execSize, G4_INST *inst);
 | 
						|
 | 
						|
  G4_Declare *createGRFFillRangeDeclare(G4_SrcRegRegion *fillRegion,
 | 
						|
                                        G4_ExecSize execSize, G4_INST *inst);
 | 
						|
 | 
						|
  G4_Declare *createSendFillRangeDeclare(G4_SrcRegRegion *filledRegion,
 | 
						|
                                         G4_INST *sendInst);
 | 
						|
 | 
						|
  G4_Declare *createTemporaryRangeDeclare(G4_DstRegRegion *fillRegion,
 | 
						|
                                          G4_ExecSize execSize,
 | 
						|
                                          bool forceSegmentAlignment = false);
 | 
						|
 | 
						|
  G4_DstRegRegion *createSpillRangeDstRegion(G4_RegVar *spillRangeRegVar,
 | 
						|
                                             G4_DstRegRegion *spilledRegion,
 | 
						|
                                             G4_ExecSize execSize,
 | 
						|
                                             unsigned regOff = 0);
 | 
						|
 | 
						|
  G4_SrcRegRegion *createFillRangeSrcRegion(G4_RegVar *fillRangeRegVar,
 | 
						|
                                            G4_SrcRegRegion *filledRegion,
 | 
						|
                                            G4_ExecSize execSize);
 | 
						|
 | 
						|
  G4_SrcRegRegion *createTemporaryRangeSrcRegion(G4_RegVar *tmpRangeRegVar,
 | 
						|
                                                 G4_DstRegRegion *spilledRegion,
 | 
						|
                                                 G4_ExecSize execSize,
 | 
						|
                                                 unsigned regOff = 0);
 | 
						|
 | 
						|
  G4_SrcRegRegion *createBlockSpillRangeSrcRegion(G4_RegVar *spillRangeRegVar,
 | 
						|
                                                  unsigned regOff = 0,
 | 
						|
                                                  unsigned subregOff = 0);
 | 
						|
 | 
						|
  std::optional<G4_Declare *> getPreDefinedMRangeDeclare() const;
 | 
						|
 | 
						|
  G4_Declare *createMRangeDeclare(G4_RegVar *regVar);
 | 
						|
 | 
						|
  G4_Declare *createMRangeDeclare(G4_DstRegRegion *region,
 | 
						|
                                  G4_ExecSize execSize);
 | 
						|
 | 
						|
  G4_Declare *createMRangeDeclare(G4_SrcRegRegion *region,
 | 
						|
                                  G4_ExecSize execSize);
 | 
						|
 | 
						|
  G4_DstRegRegion *createMPayloadBlockWriteDstRegion(G4_RegVar *grfRange,
 | 
						|
                                                     unsigned regOff = 0,
 | 
						|
                                                     unsigned subregOff = 0);
 | 
						|
 | 
						|
  G4_DstRegRegion *createMHeaderInputDstRegion(G4_RegVar *grfRange,
 | 
						|
                                               unsigned subregOff = 0);
 | 
						|
 | 
						|
  G4_DstRegRegion *createMHeaderBlockOffsetDstRegion(G4_RegVar *grfRange);
 | 
						|
 | 
						|
  G4_SrcRegRegion *createInputPayloadSrcRegion();
 | 
						|
 | 
						|
  G4_Declare *initMHeader(G4_Declare *mRangeDcl);
 | 
						|
 | 
						|
  G4_Declare *createAndInitMHeader(G4_RegVar *regVar);
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  G4_Declare *initMHeader(G4_Declare *mRangeDcl, REGION_TYPE *region,
 | 
						|
                          G4_ExecSize execSize);
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  G4_Declare *createAndInitMHeader(REGION_TYPE *region, G4_ExecSize execSize);
 | 
						|
 | 
						|
  void sendInSpilledRegVarPortions(G4_Declare *fillRangeDcl,
 | 
						|
                                   G4_Declare *mRangeDcl, unsigned regOff,
 | 
						|
                                   unsigned height, unsigned srcRegOff = 0);
 | 
						|
 | 
						|
  void sendOutSpilledRegVarPortions(G4_Declare *spillRangeDcl,
 | 
						|
                                    G4_Declare *mRangeDcl, unsigned regOff,
 | 
						|
                                    unsigned height, unsigned srcRegOff = 0);
 | 
						|
 | 
						|
  void initMWritePayload(G4_Declare *spillRangeDcl, G4_Declare *mRangeDcl,
 | 
						|
                         unsigned regOff, unsigned height);
 | 
						|
 | 
						|
  void initMWritePayload(G4_Declare *spillRangeDcl, G4_Declare *mRangeDcl,
 | 
						|
                         G4_DstRegRegion *spilledRangeRegion,
 | 
						|
                         G4_ExecSize execSize, unsigned regOff = 0);
 | 
						|
 | 
						|
  static unsigned blockSendBlockSizeCode(unsigned regOff);
 | 
						|
 | 
						|
  unsigned scatterSendBlockSizeCode(unsigned regOff) const;
 | 
						|
 | 
						|
  G4_Imm *createSpillSendMsgDesc(unsigned regOff, unsigned height,
 | 
						|
                                 G4_ExecSize &execSize, G4_RegVar *base = NULL);
 | 
						|
 | 
						|
  std::tuple<G4_Imm *, G4_ExecSize>
 | 
						|
  createSpillSendMsgDesc(G4_DstRegRegion *spilledRangeRegion,
 | 
						|
                         G4_ExecSize execSize);
 | 
						|
 | 
						|
  G4_INST *createAddFPInst(G4_ExecSize execSize, G4_DstRegRegion *dst,
 | 
						|
                           G4_Operand *src);
 | 
						|
 | 
						|
  G4_INST *createMovInst(G4_ExecSize execSize, G4_DstRegRegion *dst,
 | 
						|
                         G4_Operand *src, G4_Predicate *predicate = NULL,
 | 
						|
                         G4_InstOpts options = InstOpt_WriteEnable);
 | 
						|
 | 
						|
  G4_INST *createSendInst(G4_ExecSize execSize, G4_DstRegRegion *postDst,
 | 
						|
                          G4_SrcRegRegion *payload, G4_Imm *desc, SFID funcID,
 | 
						|
                          bool isWrite, G4_InstOpts option);
 | 
						|
 | 
						|
  bool usesOWOrLSC(G4_DstRegRegion *spilledRegion);
 | 
						|
 | 
						|
  bool shouldPreloadSpillRange(G4_INST *instContext, G4_BB *parentBB);
 | 
						|
 | 
						|
  void preloadSpillRange(G4_Declare *spillRangeDcl, G4_Declare *mRangeDcl,
 | 
						|
                         G4_DstRegRegion *spilledRangeRegion,
 | 
						|
                         G4_ExecSize execSize);
 | 
						|
 | 
						|
  G4_INST *createSpillSendInstr(G4_Declare *spillRangeDcl,
 | 
						|
                                G4_Declare *mRangeDcl, unsigned regOff,
 | 
						|
                                unsigned height, unsigned spillOff);
 | 
						|
 | 
						|
  G4_INST *createSpillSendInstr(G4_Declare *spillRangeDcl,
 | 
						|
                                G4_Declare *mRangeDcl,
 | 
						|
                                G4_DstRegRegion *spilledRangeRegion,
 | 
						|
                                G4_ExecSize execSize, unsigned option);
 | 
						|
 | 
						|
  G4_Imm *createFillSendMsgDesc(unsigned regOff, unsigned height,
 | 
						|
                                G4_ExecSize &execSize, G4_RegVar *base = NULL);
 | 
						|
 | 
						|
  template <class REGION_TYPE>
 | 
						|
  G4_Imm *createFillSendMsgDesc(REGION_TYPE *filledRangeRegion,
 | 
						|
                                G4_ExecSize execSize);
 | 
						|
 | 
						|
  G4_INST *createFillSendInstr(G4_Declare *fillRangeDcl, G4_Declare *mRangeDcl,
 | 
						|
                               unsigned regOff, unsigned height,
 | 
						|
                               unsigned spillOff);
 | 
						|
 | 
						|
  G4_INST *createFillSendInstr(G4_Declare *fillRangeDcl, G4_Declare *mRangeDcl,
 | 
						|
                               G4_SrcRegRegion *filledRangeRegion,
 | 
						|
                               G4_ExecSize execSize);
 | 
						|
 | 
						|
  G4_SrcRegRegion *getLSCSpillFillHeader(const G4_Declare *fp, int offset);
 | 
						|
 | 
						|
  G4_INST *createLSCSpill(G4_Declare *spillRangeDcl, unsigned regOff,
 | 
						|
                          unsigned height, unsigned spillOff);
 | 
						|
 | 
						|
  G4_INST *createLSCSpill(G4_Declare *spillRangeDcl,
 | 
						|
                          G4_DstRegRegion *spilledRangeRegion,
 | 
						|
                          G4_ExecSize execSize, unsigned option,
 | 
						|
                          bool isScatter = false);
 | 
						|
 | 
						|
  G4_INST *createLSCFill(G4_Declare *fillRangeDcl, unsigned regOff,
 | 
						|
                         unsigned height, unsigned spillOff);
 | 
						|
 | 
						|
  G4_INST *createLSCFill(G4_Declare *fillRangeDcl,
 | 
						|
                         G4_SrcRegRegion *filledRangeRegion,
 | 
						|
                         G4_ExecSize execSize);
 | 
						|
 | 
						|
  void replaceSpilledRange(G4_Declare *spillRangeDcl,
 | 
						|
                           G4_DstRegRegion *spilledRegion, G4_INST *spilledInst,
 | 
						|
                           uint32_t subregOff);
 | 
						|
 | 
						|
  void replaceFilledRange(G4_Declare *fillRangeDcl,
 | 
						|
                          G4_SrcRegRegion *filledRegion, G4_INST *filledInst);
 | 
						|
 | 
						|
  void insertSpillRangeCode(INST_LIST::iterator spilledInstIter, G4_BB *bb);
 | 
						|
 | 
						|
  void insertSendFillRangeCode(G4_SrcRegRegion *filledRegion,
 | 
						|
                               INST_LIST::iterator filledInstIter, G4_BB *bb);
 | 
						|
 | 
						|
  void insertFillGRFRangeCode(G4_SrcRegRegion *filledRegion,
 | 
						|
                              INST_LIST::iterator filledInstIter, G4_BB *bb);
 | 
						|
 | 
						|
  bool useSplitSend() const;
 | 
						|
 | 
						|
  void getOverlappingIntervals(G4_Declare *dcl,
 | 
						|
                               std::vector<G4_Declare *> &intervals) const;
 | 
						|
 | 
						|
  // Data
 | 
						|
  GlobalRA &gra;
 | 
						|
  IR_Builder *builder_;
 | 
						|
  const unsigned varIdCount_;
 | 
						|
  unsigned latestImplicitVarIdCount_;
 | 
						|
  const LivenessAnalysis *lvInfo_;
 | 
						|
  const LiveRangeVec *lrInfo_;
 | 
						|
  const LR_LIST *spilledLRs_ = nullptr;
 | 
						|
  LSLR_LIST *spilledLSLRs_;
 | 
						|
  unsigned spillRangeCount_ = 0;
 | 
						|
  unsigned fillRangeCount_ = 0;
 | 
						|
  unsigned tmpRangeCount_ = 0;
 | 
						|
  unsigned msgSpillRangeCount_ = 0;
 | 
						|
  unsigned msgFillRangeCount_ = 0;
 | 
						|
  unsigned addrSpillFillRangeCount_ = 0;
 | 
						|
  unsigned nextSpillOffset_;
 | 
						|
  unsigned bbId_ = UINT_MAX;
 | 
						|
  unsigned spillAreaOffset_;
 | 
						|
  bool doSpillSpaceCompression;
 | 
						|
 | 
						|
  bool failSafeSpill_;
 | 
						|
  unsigned spillRegStart_ = 0;
 | 
						|
  unsigned indrSpillRegStart_ = 0;
 | 
						|
  unsigned spillRegOffset_ = 0;
 | 
						|
  std::unordered_set<G4_DstRegRegion *> noRMWNeeded;
 | 
						|
 | 
						|
  const Interference *spillIntf_ = nullptr;
 | 
						|
 | 
						|
  // CISA instruction id of current instruction
 | 
						|
  G4_INST *curInst;
 | 
						|
 | 
						|
  int globalScratchOffset;
 | 
						|
 | 
						|
  const bool useScratchMsg_;
 | 
						|
  // spilled declares that represent a scalar immediate (created due to encoding
 | 
						|
  // restrictions) We rematerialize the immediate value instead of spill/fill
 | 
						|
  // them. Map stores type used on dst that defined the immediate.
 | 
						|
  std::unordered_map<G4_Declare *, std::pair<G4_Type, G4_Imm *>> scalarImmSpill;
 | 
						|
  // distance to reuse filled scalar imm
 | 
						|
  const unsigned int scalarImmReuseDistance = 10;
 | 
						|
  // use cache to reuse filled scalar immediates for nearby uses
 | 
						|
  // map spilled declare -> fill bb, fill inst, lex id for distance heuristic
 | 
						|
  std::unordered_map<G4_Declare*, std::tuple<G4_BB*, G4_INST*, unsigned int>> scalarImmFillCache;
 | 
						|
 | 
						|
  VarReferences refs;
 | 
						|
 | 
						|
  // sorted list of all spilling intervals
 | 
						|
  std::vector<G4_Declare *> spillingIntervals;
 | 
						|
 | 
						|
  // analysis pass to assist in spill/fill code gen
 | 
						|
  // currently it identifies scalar imm variables that should be re-mat
 | 
						|
  // later on we can add detection to avoid unncessary read-modify-write for
 | 
						|
  // spills
 | 
						|
  void immMovSpillAnalysis();
 | 
						|
 | 
						|
  // Return true if spillDcl is a re-spill of a rematerialized imm
 | 
						|
  bool isScalarImmRespill(G4_Declare *spillDcl) const;
 | 
						|
  bool immFill(G4_SrcRegRegion *filledRegion,
 | 
						|
               INST_LIST::iterator filledInstIter, G4_BB *bb,
 | 
						|
               G4_Declare *spillDcl);
 | 
						|
 | 
						|
  bool checkUniqueDefAligned(G4_DstRegRegion *dst, G4_BB *defBB);
 | 
						|
  bool checkDefUseDomRel(G4_DstRegRegion *dst, G4_BB *bb);
 | 
						|
  bool isFirstLexicalDef(G4_DstRegRegion *dst);
 | 
						|
  void updateRMWNeeded();
 | 
						|
 | 
						|
  // Used for new fail safe RA mechanism.
 | 
						|
  BoundedRA context;
 | 
						|
 | 
						|
  // Used if an address-taken variable is spilled.
 | 
						|
  // Maps the old AddrExp operand on the spilled variable to the new AddrExp on
 | 
						|
  // the temp variable. The old AddrExp will be replaced by the new one when
 | 
						|
  // this round of spilling is done.
 | 
						|
  std::unordered_map<G4_AddrExp *, G4_AddrExp *> addrTakenSpillFill;
 | 
						|
 | 
						|
  void setAddrTakenSpillFill(G4_AddrExp *origOp, G4_AddrExp *newOp) {
 | 
						|
    addrTakenSpillFill[origOp] = newOp;
 | 
						|
  }
 | 
						|
  G4_AddrExp *getAddrTakenSpillFill(G4_AddrExp *addrOp) {
 | 
						|
    auto iter = addrTakenSpillFill.find(addrOp);
 | 
						|
    return iter == addrTakenSpillFill.end() ? nullptr : iter->second;
 | 
						|
  }
 | 
						|
 | 
						|
  bool headerNeeded() const;
 | 
						|
 | 
						|
  // return true if offset for spill/fill message needs to be GRF-aligned
 | 
						|
  bool needGRFAlignedOffset() const { return useScratchMsg_ || useSplitSend(); }
 | 
						|
}; // class SpillManagerGRF
 | 
						|
 | 
						|
// Check if the destination region is discontiguous or not.
 | 
						|
// A destination region is discontiguous if there are portions of the
 | 
						|
// region that are not written and unaffected.
 | 
						|
static inline bool isDisContRegion(G4_DstRegRegion *region, unsigned execSize) {
 | 
						|
  // If the horizontal stride is greater than 1, then it has gaps.
 | 
						|
  // NOTE: Horizontal stride of 0 is not allowed for destination regions.
 | 
						|
  return region->getHorzStride() != 1;
 | 
						|
}
 | 
						|
 | 
						|
// Check if the source region is discontiguous or not.
 | 
						|
// A source region is discontiguous in there are portions of the region
 | 
						|
// that are not read.
 | 
						|
static inline bool isDisContRegion(G4_SrcRegRegion *region, unsigned execSize) {
 | 
						|
  return region->getRegion()->isContiguous(execSize);
 | 
						|
}
 | 
						|
 | 
						|
// Check if the region is partial or not, i.e does it read/write the
 | 
						|
// whole segment.
 | 
						|
template <class REGION_TYPE>
 | 
						|
static inline bool isPartialRegion(REGION_TYPE *region, unsigned execSize) {
 | 
						|
  // If the region is discontiguous then it is partial.
 | 
						|
  if (isDisContRegion(region, execSize)) {
 | 
						|
    return true;
 | 
						|
  } else {
 | 
						|
    return false;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
// Return true if inst is a simple mov with exec size == 1 and imm as src0.
 | 
						|
// Def of such instructions are trivially fillable.
 | 
						|
[[maybe_unused]] static bool immFillCandidate(G4_INST *inst) {
 | 
						|
  auto src0 = inst->getSrc(0);
 | 
						|
  return inst->opcode() == G4_mov && inst->getExecSize() == g4::SIMD1 &&
 | 
						|
         src0->isImm() && inst->isWriteEnableInst() && !inst->getPredicate() &&
 | 
						|
         !inst->getCondMod() && !inst->getSaturate() && !src0->isRelocImm();
 | 
						|
}
 | 
						|
 | 
						|
G4_SrcRegRegion *getSpillFillHeader(IR_Builder &builder, G4_Declare *decl);
 | 
						|
 | 
						|
bool isEOTSpillWithFailSafeRA(const IR_Builder &builder, const LiveRange *lr,
 | 
						|
                              bool isFailSafeIter);
 | 
						|
 | 
						|
} // namespace vISA
 | 
						|
 | 
						|
#endif // __SPILLMANAGERGMRF_H__
 |