Files
intel-graphics-compiler/visa/SpillManagerGMRF.cpp
2018-01-30 10:00:45 -08:00

4622 lines
143 KiB
C++

/*===================== begin_copyright_notice ==================================
Copyright (c) 2017 Intel Corporation
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
======================= end_copyright_notice ==================================*/
#include "SpillManagerGMRF.h"
#include "Gen4_IR.hpp"
#include "Mem_Manager.h"
#include "FlowGraph.h"
#include "GraphColor.h"
#include "BuildIR.h"
#include "DebugInfo.h"
#include <math.h>
#include <sstream>
#include <fstream>
using namespace std;
using namespace vISA;
// Configurations
#define ADDRESS_SENSITIVE_SPILLS_IMPLEMENTED
//#define DISABLE_SPILL_MEMORY_COMPRESSION
//#define VERIFY_SPILL_ASSIGNMENTS
// Constant declarations
static const unsigned DWORD_BYTE_SIZE = 4;
static const unsigned OWORD_BYTE_SIZE = 16;
static const unsigned HWORD_BYTE_SIZE = 32;
static const unsigned PAYLOAD_INPUT_REG_OFFSET = 0;
static const unsigned PAYLOAD_INPUT_SUBREG_OFFSET = 0;
static const unsigned OWORD_PAYLOAD_SPOFFSET_REG_OFFSET = 0;
static const unsigned OWORD_PAYLOAD_SPOFFSET_SUBREG_OFFSET = 2;
static const unsigned DWORD_PAYLOAD_SPOFFSET_REG_OFFSET = 1;
static const unsigned DWORD_PAYLOAD_SPOFFSET_SUBREG_OFFSET = 0;
static const unsigned OWORD_PAYLOAD_WRITE_REG_OFFSET = 1;
static const unsigned OWORD_PAYLOAD_WRITE_SUBREG_OFFSET = 0;
// dword scatter is always in SIMD8 mode
static const unsigned DWORD_PAYLOAD_WRITE_REG_OFFSET = 2;
static const unsigned DWORD_PAYLOAD_WRITE_SUBREG_OFFSET = 0;
static const unsigned OWORD_PAYLOAD_HEADER_MIN_HEIGHT = 1;
static const unsigned DWORD_PAYLOAD_HEADER_MIN_HEIGHT = 2;
static const unsigned OWORD_PAYLOAD_HEADER_MAX_HEIGHT = 1;
static const unsigned DWORD_PAYLOAD_HEADER_MAX_HEIGHT = 3;
static const unsigned REG_DWORD_SIZE = 8;
static const unsigned REG_BYTE_SIZE = 32;
static const unsigned SCALAR_EXEC_SIZE = 1;
static const unsigned DEF_HORIZ_STRIDE = 1;
static const unsigned REG_ORIGIN = 0;
static const unsigned SUBREG_ORIGIN = 0;
static const unsigned SEND_GT_READ_TYPE_BIT_OFFSET = 13;
static const unsigned SEND_GT_WRITE_TYPE_BIT_OFFSET = 13;
static const unsigned SEND_GT_DESC_DATA_SIZE_BIT_OFFSET = 8;
static const unsigned SEND_GT_OW_READ_TYPE = 0;
static const unsigned SEND_GT_OW_WRITE_TYPE = 8;
static const unsigned SEND_GT_SC_READ_TYPE = 6;
static const unsigned SEND_GT_SC_WRITE_TYPE = 11;
static const unsigned SEND_GT_DP_RD_EX_DESC_IMM = 5;
static const unsigned SEND_GT_DP_SC_RD_EX_DESC_IMM = 4; //scatter reads go to sampler cache
static const unsigned SEND_GT_DP_WR_EX_DESC_IMM = 5;
static const unsigned SEND_IVB_MSG_TYPE_BIT_OFFSET = 14;
static const unsigned SEND_IVB_OW_READ_TYPE = 0;
static const unsigned SEND_IVB_OW_WRITE_TYPE = 8;
static const unsigned SEND_IVB_SC_READ_TYPE = 3;
static const unsigned SEND_IVB_SC_WRITE_TYPE = 11;
static const unsigned SEND_IVB_DP_RD_EX_DESC_IMM = 10; //data cache
static const unsigned SEND_IVB_DP_WR_EX_DESC_IMM = 10; //data cache
// Scratch msg
static const unsigned SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT = 1;
static const unsigned SCRATCH_MSG_DESC_CATEORY = 18;
static const unsigned SCRATCH_MSG_DESC_OPERATION_MODE = 17;
static const unsigned SCRATCH_MSG_DESC_CHANNEL_MODE = 16;
static const unsigned SCRATCH_MSG_INVALIDATE_AFTER_READ = 15;
static const unsigned SCRATCH_MSG_DESC_BLOCK_SIZE = 12;
static const uint32_t GRF_ALIGN_MASK = 0xFFFFFFE0;
// Macros
#define LIMIT_SEND_EXEC_SIZE(EXEC_SIZE)(((EXEC_SIZE) > 16)? 16: (EXEC_SIZE))
#define ROUND(x,y) ((x) + ((y - x % y) % y))
#define SPILL_PAYLOAD_HEIGHT_LIMIT 4
extern unsigned int getStackCallRegSize(bool reserveStackCallRegs);
// spill/fill temps are always GRF-aligned, and are also even/odd aligned
// following the original declare's alignment
static void setNewDclAlignment(G4_Declare* newDcl, G4_Align origAlign)
{
newDcl->setSubRegAlign(Sixteen_Word);
if (origAlign != Either)
{
newDcl->setAlign(origAlign);
}
}
// Constructor
SpillManagerGMRF::SpillManagerGMRF (
GlobalRA& g,
unsigned spillAreaOffset,
unsigned varIdCount,
const LivenessAnalysis * lvInfo,
LiveRange ** lrInfo,
Interference * intf,
std::vector<EDGE> & prevIntfEdges,
LR_LIST & spilledLRs,
unsigned iterationNo,
bool failSafeSpill,
unsigned spillRegSize,
unsigned indrSpillRegSize,
bool enableSpillSpaceCompression,
bool useScratchMsg
) : builder_ (g.kernel.fg.builder), varIdCount_ (varIdCount), latestImplicitVarIdCount_ (0),
lvInfo_ (lvInfo), lrInfo_ (lrInfo), prevIntfEdges_ (prevIntfEdges), spilledLRs_ (spilledLRs),
nextSpillOffset_ (spillAreaOffset), iterationNo_ (iterationNo), failSafeSpill_ (failSafeSpill),
doSpillSpaceCompression(enableSpillSpaceCompression), useScratchMsg_(useScratchMsg), bbId_(UINT_MAX), inSIMDCFContext_(false), mem_(1024),
spillIntf_(intf), numGRFSpill(0), numGRFFill(0), numGRFMove(0), gra(g)
{
const unsigned size = sizeof (unsigned) * varIdCount;
spillRangeCount_ = (unsigned *) allocMem (size);
memset (spillRangeCount_, 0, size);
fillRangeCount_ = (unsigned *) allocMem (size);
memset (fillRangeCount_, 0, size);
tmpRangeCount_ = (unsigned *) allocMem (size);
memset (tmpRangeCount_, 0, size);
msgSpillRangeCount_ = (unsigned *) allocMem (size);
memset (msgSpillRangeCount_, 0, size);
msgFillRangeCount_ = (unsigned *) allocMem (size);
memset (msgFillRangeCount_, 0, size);
spillAreaOffset_ = spillAreaOffset;
if (enableSpillSpaceCompression)
{
computeSpillIntf ();
}
builder_->instList.clear();
spillRegStart_ = builder_->getOptions()->getuInt32Option(vISA_TotalGRFNum);
indrSpillRegStart_ = spillRegStart_;
spillRegOffset_ = spillRegStart_;
if(failSafeSpill)
{
unsigned int stackCallRegSize = getStackCallRegSize(builder_->kernel.fg.getHasStackCalls() || builder_->kernel.fg.getIsStackCallFunc());
indrSpillRegStart_ -= (stackCallRegSize + indrSpillRegSize);
spillRegStart_ = indrSpillRegStart_ - spillRegSize;
}
curInst = NULL;
globalScratchOffset = builder_->getOptions()->getuInt32Option(vISA_SpillMemOffset);
if (builder_->getIsKernel())
{
// reserve space for file scope variables
globalScratchOffset += (builder_->kernel.fg.fileScopeSaveAreaSize * 16);
}
if (canDoSLMSpill())
{
if (!builder_->hasBlockedSLMMessage() && !builder_->getBuiltinSLMSpillAddr())
{
builder_->initBuiltinSLMSpillAddr(maxSLMScratchSize);
}
}
}
// Compute the interference graph for intereference of the memory segments
// occupied by the spilled live ranges.
void
SpillManagerGMRF::computeSpillIntf (
)
{
// Apply previous interferences that are relevant for this iteration.
for (auto& edge : prevIntfEdges_)
{
if (shouldSpillRegister (getRegVar (edge.first)) ||
shouldSpillRegister (getRegVar (edge.second))) {
spillIntf_->checkAndSetIntf(edge.first, edge.second);
}
}
LR_LIST::const_iterator ltEnd = spilledLRs_.end();
for (LR_LIST::const_iterator lt = spilledLRs_.begin();
lt != ltEnd; ++lt)
{
LiveRange* lr = (*lt);
unsigned int i = lr->getVar()->getId();
std::vector<unsigned int>& intfs = spillIntf_->getSparseIntfForVar(i);
for (auto it : intfs)
{
EDGE tempEdge;
tempEdge.first = it;
tempEdge.second = i;
prevIntfEdges_.push_back(tempEdge);
}
}
}
// Get the base regvar for the source or destination region.
template <class REGION_TYPE>
inline G4_RegVar *
SpillManagerGMRF::getRegVar (
REGION_TYPE * region
) const
{
G4_RegVar * spilledRegVar = (G4_RegVar *) region->getBase();
return spilledRegVar;
}
// Get the representative regvar that will be assigned a unique spill
// disp and not a relative spill disp.
inline G4_RegVar *
SpillManagerGMRF::getReprRegVar (
G4_RegVar * regVar
) const
{
G4_RegVar * absBase = regVar->getAbsBaseRegVar ();
if (absBase->isAliased ())
return getReprRegVar(absBase->getDeclare ()->getAliasDeclare ()->getRegVar ());
else
return absBase;
}
// Obtain the register file type of the regvar.
inline G4_RegFileKind
SpillManagerGMRF::getRFType (
G4_RegVar * regvar
) const
{
return regvar->getDeclare ()->getRegFile ();
}
// Obtain the register file type of the region.
template <class REGION_TYPE>
inline G4_RegFileKind
SpillManagerGMRF::getRFType (
REGION_TYPE * region
) const
{
if (region->getBase ()->isRegVar ())
return getRFType (region->getBase ()->asRegVar ());
else if (region->getBase ()->isGreg ())
return G4_GRF;
else
return G4_ADDRESS;
}
// Get the byte offset of the origin of the source or destination region.
// The row offset component is calculated based on the the parameters of
// the corresponding declare directive, while the column offset is calculated
// based on the region parameters.
template <class REGION_TYPE>
inline unsigned
SpillManagerGMRF::getRegionOriginOffset (
REGION_TYPE * region
) const
{
unsigned rowOffset = REG_BYTE_SIZE * region->getRegOff ();
unsigned columnOffset = region->getSubRegOff () * region->getElemSize ();
return rowOffset + columnOffset;
}
// Check if the destination region is discontiguous or not.
// A destination region is discontiguous if there are portions of the
// region that are not written and unaffected.
bool isDisContRegion (
G4_DstRegRegion * region,
unsigned execSize
)
{
// If the horizontal stride is greater than 1, then it has gaps.
// NOTE: Horizontal stride of 0 is not allowed for destination regions.
return region->getHorzStride() != 1;
}
// Check if the source region is discontiguous or not.
// A source region is discontiguous in there are portions of the region
// that are not read.
bool isDisContRegion (
G4_SrcRegRegion * region,
unsigned execSize
)
{
RegionDesc * regionDesc = region->getRegion ();
return regionDesc->isContiguous(execSize);
}
// Get an hexal word mask with the lower 5 bits zeroed.
inline unsigned
SpillManagerGMRF::hwordMask () const
{
unsigned mask = 0;
mask = (mask - 1);
mask = mask << 5;
return mask;
}
// Get an octal word mask with the lower 4 bits zeroed.
inline unsigned
SpillManagerGMRF::owordMask () const
{
unsigned mask = 0;
mask = (mask - 1);
mask = mask << 4;
return mask;
}
// Get an dword word mask with the lower 2 bits zeroed.
inline unsigned
SpillManagerGMRF::dwordMask () const
{
unsigned mask = 0;
mask = (mask - 1);
mask = mask << 2;
return mask;
}
// Test of the offset is oword aligned.
inline bool
SpillManagerGMRF::owordAligned (
unsigned offset
) const
{
return (offset & owordMask ()) == offset;
}
// Test of the offset is oword aligned.
inline bool
SpillManagerGMRF::dwordAligned (
unsigned offset
) const
{
return (offset & dwordMask ()) == offset;
}
// Get the ceil of the division.
inline unsigned
SpillManagerGMRF::cdiv (
unsigned dvd,
unsigned dvr
) const
{
return (dvd / dvr) + ((dvd % dvr)? 1: 0);
}
// Get the live range corresponding to id.
inline bool
SpillManagerGMRF::shouldSpillRegister (
G4_RegVar * regVar
) const
{
if (getRFType (regVar) == G4_ADDRESS)
{
return false;
}
G4_RegVar * actualRegVar =
(regVar->getDeclare ()->getAliasDeclare ())?
regVar->getDeclare ()->getAliasDeclare ()->getRegVar ():
regVar;
if (actualRegVar->getId () == UNDEFINED_VAL)
return false;
else if (regVar->isRegVarTransient () || regVar->isRegVarTmp ())
return false;
#ifndef ADDRESS_SENSITIVE_SPILLS_IMPLEMENTED
else if (lvInfo_->isAddressSensitive (regVar->getId ()))
return false;
#endif
else if (builder_->kernel.fg.isPseudoVCADcl(actualRegVar->getDeclare()) ||
builder_->kernel.fg.isPseudoVCEDcl(actualRegVar->getDeclare()))
return false;
else
return lrInfo_ [actualRegVar->getId ()]->getPhyReg () == NULL;
}
// Get the regvar with the id.
inline G4_RegVar *
SpillManagerGMRF::getRegVar (
unsigned id
) const
{
return (lvInfo_->vars)[id];
}
// Get the byte size of the live range.
inline unsigned
SpillManagerGMRF::getByteSize (
G4_RegVar * regVar
) const
{
unsigned normalizedRowSize =
(regVar->getDeclare ()->getNumRows () > 1)?
REG_BYTE_SIZE:
regVar->getDeclare ()->getNumElems () *
regVar->getDeclare ()->getElemSize ();
return
normalizedRowSize * regVar->getDeclare ()->getNumRows ();
}
// Check if the lifetime of the spill/fill memory of live range i interferes
// with the lifetime of the spill/fill memory of live range j
bool
SpillManagerGMRF::spillMemLifetimeInterfere (
unsigned i,
unsigned j
) const
{
G4_RegVar * ireg = getRegVar (i);
G4_RegVar * jreg = getRegVar (j);
G4_RegVar * irep = getReprRegVar (ireg);
G4_RegVar * jrep = getReprRegVar (jreg);
G4_RegVar * inont = ireg->getNonTransientBaseRegVar ();
G4_RegVar * jnont = jreg->getNonTransientBaseRegVar ();
if (ireg->isRegVarTmp ()) {
return
ireg->getBaseRegVar () == jrep ||
spillMemLifetimeInterfere (ireg->getBaseRegVar ()->getId (), j);
}
else if (jreg->isRegVarTmp ()) {
return
jreg->getBaseRegVar () == irep ||
spillMemLifetimeInterfere (jreg->getBaseRegVar ()->getId (), i);
}
else if (inont->isRegVarTmp ()) {
return
inont->getBaseRegVar () == jrep ||
spillMemLifetimeInterfere (inont->getBaseRegVar ()->getId (), j);
}
else if (jnont->isRegVarTmp ()) {
return
jnont->getBaseRegVar () == irep ||
spillMemLifetimeInterfere (jnont->getBaseRegVar ()->getId (), i);
}
else {
if (spillIntf_->interfereBetween (irep->getId (), jrep->getId ()))
return true;
else if (getRFType (irep) != getRFType (jrep))
return true;
else
#ifdef DISABLE_SPILL_MEMORY_COMPRESSION
return irep != jrep;
#else
return false;
#endif
}
}
// Calculate the spill memory displacement for the regvar.
unsigned
SpillManagerGMRF::calculateSpillDisp (
G4_RegVar * regVar
) const
{
assert (regVar->getDisp () == UINT_MAX);
// Locate the blocked locations calculated from the interfering
// spilled live ranges and put them into a list in ascending order.
typedef std::list < G4_RegVar * > LocList;
LocList locList;
unsigned lrId =
(regVar->getId () >= varIdCount_)?
regVar->getBaseRegVar ()->getId (): regVar->getId ();
assert (lrId < varIdCount_);
for (unsigned i = 0; i < varIdCount_; i++) {
if (spillMemLifetimeInterfere (lrId, i)) {
G4_RegVar * intfRegVar = getRegVar (i);
assert (getRegVar (i)->isAliased () == false);
if (intfRegVar->isRegVarTransient ()) continue;
unsigned iDisp = intfRegVar->getDisp ();
if (iDisp == UINT_MAX) continue;
LocList::iterator loc;
for (loc = locList.begin ();
loc != locList.end () && (*loc)->getDisp () < iDisp;
++loc);
if (loc != locList.end ())
locList.insert (loc, intfRegVar);
else
locList.push_back (intfRegVar);
}
}
// Find a spill slot for lRange within the locList.
// we always start searching from 0 to facilitate cross-iteration reuse
unsigned regVarLocDisp = 0;
unsigned regVarSize = getByteSize (regVar);
for (LocList::iterator curLoc = locList.begin (); curLoc != locList.end ();
++curLoc) {
unsigned curLocDisp = (*curLoc)->getDisp ();
if (regVarLocDisp < curLocDisp &&
regVarLocDisp + regVarSize <= curLocDisp)
break;
unsigned curLocEnd = curLocDisp + getByteSize (*curLoc);
{
if (useScratchMsg_)
{
if(curLocEnd % G4_GRF_REG_NBYTES != 0)
curLocEnd = (curLocEnd&(owordMask()<<1)) + G4_GRF_REG_NBYTES;
}
else
{
if (owordAligned (curLocEnd) == false) {
curLocEnd =
(curLocEnd & owordMask ()) + OWORD_BYTE_SIZE;
}
}
}
regVarLocDisp = (regVarLocDisp > curLocEnd)? regVarLocDisp: curLocEnd;
}
return regVarLocDisp;
}
// Get the spill/fill displacement of the segment containing the region.
// A segment is the smallest dword or oword aligned portion of memory
// containing the destination or source operand that can be read or saved.
template <class REGION_TYPE>
inline unsigned
SpillManagerGMRF::getSegmentDisp (
REGION_TYPE * region,
unsigned execSize
)
{
assert (region->getElemSize () && execSize);
if (isUnalignedRegion (region, execSize))
return getEncAlignedSegmentDisp (region, execSize);
else
return getRegionDisp (region);
}
// Get the spill/fill displacement of the regvar.
unsigned
SpillManagerGMRF::getDisp(
G4_RegVar * regVar
)
{
// Already calculated spill memory disp
if (regVar->getDisp() != UINT_MAX) {
// Do nothing.
}
// If it is an aliased regvar then calculate the disp for the
// actual regvar and then calculate the disp of the aliased regvar
// based on it.
else if (regVar->isAliased()) {
G4_Declare * regVarDcl = regVar->getDeclare();
return
getDisp(regVarDcl->getAliasDeclare()->getRegVar()) +
regVarDcl->getAliasOffset();
}
// If its base regvar has been assigned a disp, then the spill memory
// has already been allocated for it, simply calculate the disp based
// on the enclosing segment disp.
else if (regVar->isRegVarTransient() &&
getDisp(regVar->getBaseRegVar()) != UINT_MAX) {
assert(regVar->getBaseRegVar() != regVar);
unsigned itsDisp;
if (regVar->isRegVarSpill()) {
G4_RegVarTransient * tRegVar = static_cast <G4_RegVarTransient*> (regVar);
assert(
getSegmentByteSize(
tRegVar->getDstRepRegion(), tRegVar->getExecSize()) <=
getByteSize(tRegVar));
itsDisp =
getSegmentDisp(
tRegVar->getDstRepRegion(), tRegVar->getExecSize());
}
else if (regVar->isRegVarFill()) {
G4_RegVarTransient * tRegVar = static_cast <G4_RegVarTransient*> (regVar);
assert(
getSegmentByteSize(
tRegVar->getSrcRepRegion(), tRegVar->getExecSize()) <=
getByteSize(tRegVar));
itsDisp =
getSegmentDisp(
tRegVar->getSrcRepRegion(), tRegVar->getExecSize());
}
else {
MUST_BE_TRUE(false, "Incorrect spill/fill ranges.");
itsDisp = 0;
}
regVar->setDisp(itsDisp);
}
// Allocate the spill and evaluate its disp
else {
if (doSpillSpaceCompression)
{
assert(regVar->isRegVarTransient() == false);
regVar->setDisp(calculateSpillDisp(regVar));
}
else
{
assert(regVar->isRegVarTransient() == false);
if (regVar->getId() >= varIdCount_)
{
if (regVar->getBaseRegVar()->getDisp() != UINT_MAX)
{
regVar->setDisp(regVar->getBaseRegVar()->getDisp());
return regVar->getDisp();
}
}
if ((spillAreaOffset_) % G4_GRF_REG_NBYTES != 0)
{
(spillAreaOffset_) = ((spillAreaOffset_)&(owordMask() << 1)) + G4_GRF_REG_NBYTES;
}
if (canDoSLMSpill())
{
// don't have variables that cross the SLM/scratch boundary, makes our life a bit easier
// FIXME: may want to consider spill costs and put the important variables in SLM
if (spillAreaOffset_ < maxSLMScratchSize &&
spillAreaOffset_ + getByteSize(regVar) > maxSLMScratchSize)
{
spillAreaOffset_ = maxSLMScratchSize;
}
}
regVar->setDisp(spillAreaOffset_);
spillAreaOffset_ += getByteSize(regVar);
}
}
return regVar->getDisp();
}
// Get the spill/fill displacement of the region.
template <class REGION_TYPE>
inline unsigned
SpillManagerGMRF::getRegionDisp (
REGION_TYPE * region
)
{
return getDisp (getRegVar (region)) + getRegionOriginOffset (region);
}
// Get the type of send message to use to spill/fill the region.
// The type can be either on oword read/write or a scatter read/write.
// If the segment corresponding to the region is dword sized then a
// dword read/write is used else an oword read/write is used.
template <class REGION_TYPE>
inline unsigned
SpillManagerGMRF::getMsgType (
REGION_TYPE * region,
unsigned execSize
)
{
unsigned regionDisp = getRegionDisp (region);
unsigned regionByteSize = getRegionByteSize (region, execSize);
if (owordAligned (regionDisp) && owordAligned (regionByteSize))
return owordMask ();
else
return getEncAlignedSegmentMsgType (region, execSize);
}
// Determine if the region is unaligned w.r.t spill/fill memory read/writes.
// If the exact region cannot be read/written from spill/fill memory using
// one send instruction, then it is unaligned.
template <class REGION_TYPE>
inline bool
SpillManagerGMRF::isUnalignedRegion (
REGION_TYPE * region,
unsigned execSize
)
{
unsigned regionDisp = getRegionDisp (region);
unsigned regionByteSize = getRegionByteSize (region, execSize);
if( useScratchMsg_)
{
if( regionDisp%G4_GRF_REG_NBYTES == 0 && regionByteSize%G4_GRF_REG_NBYTES == 0 )
return
regionByteSize / G4_GRF_REG_NBYTES != 1 &&
regionByteSize / G4_GRF_REG_NBYTES != 2 &&
regionByteSize / G4_GRF_REG_NBYTES != 4;
else
return true;
}
else
{
if (owordAligned (regionDisp) && owordAligned (regionByteSize))
return
regionByteSize / OWORD_BYTE_SIZE != 1 &&
regionByteSize / OWORD_BYTE_SIZE != 2 &&
regionByteSize / OWORD_BYTE_SIZE != 4;
else
return true;
}
}
// Calculate the smallest aligned segment encompassing the region.
template <class REGION_TYPE>
void
SpillManagerGMRF::calculateEncAlignedSegment (
REGION_TYPE * region,
unsigned execSize,
unsigned & start,
unsigned & end,
unsigned & type
)
{
unsigned regionDisp = getRegionDisp (region);
unsigned regionByteSize = getRegionByteSize (region, execSize);
if( useScratchMsg_ )
{
unsigned hwordLB = regionDisp & hwordMask ();
unsigned hwordRB = hwordLB + HWORD_BYTE_SIZE;
unsigned blockSize = HWORD_BYTE_SIZE;
while (regionDisp + regionByteSize > hwordRB) {
hwordRB += blockSize;
}
assert ((hwordRB - hwordLB)/ REG_BYTE_SIZE <= 4);
start = hwordLB;
end = hwordRB;
type = hwordMask ();
}
else
{
unsigned owordLB = regionDisp & owordMask ();
unsigned owordRB = owordLB + OWORD_BYTE_SIZE;
unsigned blockSize = OWORD_BYTE_SIZE;
while (regionDisp + regionByteSize > owordRB) {
owordRB += blockSize;
blockSize *= 2;
}
assert ((owordRB - owordLB)/ REG_BYTE_SIZE <= 4);
start = owordLB;
end = owordRB;
type = owordMask ();
}
}
// Get the byte size of the aligned segment for the region.
template <class REGION_TYPE>
inline unsigned
SpillManagerGMRF::getEncAlignedSegmentByteSize (
REGION_TYPE * region,
unsigned execSize
)
{
unsigned start, end, type;
calculateEncAlignedSegment (region, execSize, start, end, type);
return end - start;
}
// Get the start offset of the aligned segment for the region.
template <class REGION_TYPE>
inline unsigned
SpillManagerGMRF::getEncAlignedSegmentDisp (
REGION_TYPE * region,
unsigned execSize
)
{
unsigned start, end, type;
calculateEncAlignedSegment (region, execSize, start, end, type);
return start;
}
// Get the type of message to be used to read/write the enclosing aligned
// segment for the region.
template <class REGION_TYPE>
inline unsigned
SpillManagerGMRF::getEncAlignedSegmentMsgType (
REGION_TYPE * region,
unsigned execSize
)
{
unsigned start, end, type;
calculateEncAlignedSegment (region, execSize, start, end, type);
return type;
}
// Get the byte size of the segment for the region.
template <class REGION_TYPE>
inline unsigned
SpillManagerGMRF::getSegmentByteSize (
REGION_TYPE * region,
unsigned execSize
)
{
assert (region->getElemSize () && execSize);
if (isUnalignedRegion (region, execSize))
return getEncAlignedSegmentByteSize (region, execSize);
else
return getRegionByteSize (region, execSize);
}
// Get the byte size of the destination region.
inline unsigned
SpillManagerGMRF::getRegionByteSize (
G4_DstRegRegion * region,
unsigned execSize
) const
{
unsigned size = region->getHorzStride() * region->getElemSize() *
(execSize - 1) + region->getElemSize();
return size;
}
// Get the byte size of the source region.
inline unsigned
SpillManagerGMRF::getRegionByteSize (
G4_SrcRegRegion * region,
unsigned execSize
) const
{
assert (execSize % region->getRegion ()->width == 0);
unsigned nRows = execSize / region->getRegion ()->width;
unsigned size = 0;
for (unsigned int i = 0; i < nRows - 1; i++) {
size += region->getRegion ()->vertStride * region->getElemSize ();
}
size +=
region->getRegion ()->horzStride * region->getElemSize () *
(region->getRegion ()->width - 1) + region->getElemSize ();
return size;
}
// Get the max exec size on a 256 bit vector for the input operand.
inline unsigned
SpillManagerGMRF::getMaxExecSize (
G4_Operand * operand
) const
{
const unsigned size = Type_UNDEF + 1;
static unsigned maxExecSize [size] = {8, 8, 16, 16, 16, 16, 8, 8, 0};
return maxExecSize [operand->getType ()];
}
// Check if the instruction is a SIMD 16 or 32 instruction that is logically
// equivalent to two instructions the second of which uses register operands
// at the following row with the same sub-register index.
inline bool
SpillManagerGMRF::isComprInst (
G4_INST * inst
) const
{
return inst->isComprInst ();
}
// Check if the source in a compressed instruction operand occupies a second
// register.
bool
SpillManagerGMRF::isMultiRegComprSource (
G4_SrcRegRegion* src,
G4_INST * inst
) const
{
if (inst->isComprInst () == false) {
return false;
}
else if (isScalarReplication(src)) {
return false;
}
else if (inst->getExecSize() <= 8) {
return false;
}
else if (!src->asSrcRegRegion()->crossGRF())
{
return false;
}
else if (inst->getExecSize () == 16 &&
inst->getDst () &&
G4_Type_Table[inst->getDst ()->getType ()].byteSize == 4 &&
inst->getDst()->getHorzStride () == 1 ) {
if (G4_Type_Table[src->getType()].byteSize == 2 &&
src->isNativePackedRegion()) {
return false;
}
else {
return true;
}
}
else {
return true;
}
}
// Send message information query
inline unsigned
SpillManagerGMRF::getSendRspLengthBitOffset () const
{
return SEND_GT_RSP_LENGTH_BIT_OFFSET;
}
// Send message information query
inline unsigned
SpillManagerGMRF::getSendMaxResponseLength () const
{
//return SEND_GT_MAX_RESPONSE_LENGTH;
return 8;
}
// Send message information query
inline unsigned
SpillManagerGMRF::getSendMsgLengthBitOffset () const
{
return SEND_GT_MSG_LENGTH_BIT_OFFSET;
}
// Send message information query
inline unsigned
SpillManagerGMRF::getSendMaxMessageLength () const
{
return SEND_GT_MAX_MESSAGE_LENGTH;
}
// Send message information query
inline unsigned
SpillManagerGMRF::getSendDescDataSizeBitOffset () const
{
return SEND_GT_DESC_DATA_SIZE_BIT_OFFSET;
}
// Send message information query
inline unsigned
SpillManagerGMRF::getSendReadTypeBitOffset () const
{
return SEND_IVB_MSG_TYPE_BIT_OFFSET;
}
// Send message information query
inline unsigned
SpillManagerGMRF::getSendWriteTypeBitOffset () const
{
return SEND_IVB_MSG_TYPE_BIT_OFFSET;
}
// Send message information query
inline unsigned
SpillManagerGMRF::getSendScReadType () const
{
return SEND_IVB_SC_READ_TYPE;
}
// Send message information query
inline unsigned
SpillManagerGMRF::getSendScWriteType () const
{
return SEND_IVB_SC_WRITE_TYPE;
}
// Send message information query
inline unsigned
SpillManagerGMRF::getSendOwordReadType () const
{
return SEND_IVB_OW_READ_TYPE;
}
// Send message information query
inline unsigned
SpillManagerGMRF::getSendOwordWriteType () const
{
return SEND_IVB_OW_WRITE_TYPE;
}
inline unsigned
SpillManagerGMRF::getSendExDesc( bool isWrite, bool isScatter ) const
{
return isWrite ? SEND_IVB_DP_WR_EX_DESC_IMM : SEND_IVB_DP_RD_EX_DESC_IMM;
}
// Custom memory allocator
inline void *
SpillManagerGMRF::allocMem (
unsigned size
) const
{
return builder_->mem.alloc (size);
}
bool SpillManagerGMRF::useSplitSend() const
{
return builder_->useSends();
}
// Get a unique spill range index for regvar.
inline unsigned
SpillManagerGMRF::getSpillIndex (
G4_RegVar * spilledRegVar
)
{
return spillRangeCount_ [spilledRegVar->getId ()]++;
}
// Get a unique fill range index for regvar.
inline unsigned
SpillManagerGMRF::getFillIndex (
G4_RegVar * spilledRegVar
)
{
return fillRangeCount_ [spilledRegVar->getId ()]++;
}
// Get a unique tmp index for spilled regvar.
inline unsigned
SpillManagerGMRF::getTmpIndex (
G4_RegVar * spilledRegVar
)
{
return tmpRangeCount_ [spilledRegVar->getId ()]++;
}
// Get a unique msg index for spilled regvar.
inline unsigned
SpillManagerGMRF::getMsgSpillIndex (
G4_RegVar * spilledRegVar
)
{
return msgSpillRangeCount_ [spilledRegVar->getId ()]++;
}
// Get a unique msg index for filled regvar.
inline unsigned
SpillManagerGMRF::getMsgFillIndex (
G4_RegVar * spilledRegVar
)
{
return msgFillRangeCount_ [spilledRegVar->getId ()]++;
}
// Create a unique name for a regvar representing a spill/fill/msg live range.
inline const char *
SpillManagerGMRF::createImplicitRangeName (
const char * baseName,
G4_RegVar * spilledRegVar,
unsigned index
)
{
stringstream nameStrm;
nameStrm << baseName << "_" << spilledRegVar->getName ()
<< "_" << index << ends;
int nameLen = unsigned(nameStrm.str().length()) + 1;
char * name = (char *) allocMem (nameLen);
strcpy_s(name, nameLen, nameStrm.str().c_str ());
return name;
}
// Check if the region is a scalar replication region.
inline bool
SpillManagerGMRF::isScalarReplication (
G4_SrcRegRegion * region
) const
{
return region->isScalar();
}
// Check if we have to repeat the simd16 source in the simd8 equivalents.
// The BPSEC mentions that if a replicated scalar appears in an simd16
// instruction, logically we need to repeat the source region used in
// the first simd8 instruction in the second simd8 instruction as well
// (i.e. the reg no is not incremented by one for the second).
inline bool
SpillManagerGMRF::repeatSIMD16or32Source (
G4_SrcRegRegion * region
) const
{
return isScalarReplication (region);
}
// Create a declare directive for a new live range (spill/fill/msg)
// introduced as part of the spill code generation.
G4_Declare *
SpillManagerGMRF::createRangeDeclare (
const char* name,
G4_RegFileKind regFile,
unsigned short nElems,
unsigned short nRows,
G4_Type type,
RegionDesc * srcRgn,
unsigned short dstRgn,
DeclareType kind,
G4_RegVar * base,
G4_Operand * repRegion,
unsigned execSize
)
{
G4_Declare * rangeDeclare =
builder_->createDeclareNoLookup (
name, regFile, nElems, nRows, type, kind,
base, repRegion, execSize);
rangeDeclare->getRegVar ()->setId (
varIdCount_ + latestImplicitVarIdCount_++);
gra.setBBId(rangeDeclare, bbId_);
return rangeDeclare;
}
// Create a GRF regvar and its declare directive to represent the spill/fill
// live range.
// The size of the regvar is calculated from the size of the spill/fill
// region. If the spill/fill region fits into one row, then width of the
// regvar is exactly as needed for the spill/fill segment, else it is
// made to occupy exactly two full rows. In either case the regvar is made
// to have 16 word alignment requirement. This is to satisfy the requirements
// of the send instruction used to save/load the value from memory. For
// region's in simd16 instruction contexts we multiply the height by 2
// except for source region's with scalar replication.
template <class REGION_TYPE>
G4_Declare *
SpillManagerGMRF::createTransientGRFRangeDeclare (
REGION_TYPE * region,
const char * baseName,
unsigned index,
unsigned execSize,
G4_INST * inst
)
{
const char * name =
createImplicitRangeName (baseName, getRegVar (region), index);
G4_Type type = region->getType ();
unsigned segmentByteSize = getSegmentByteSize (region, execSize);
DeclareType regVarKind =
(region->isDstRegRegion ())? DeclareType::Spill : DeclareType::Fill;
unsigned short width, height;
if (segmentByteSize > REG_BYTE_SIZE || region->crossGRF()) {
assert (REG_BYTE_SIZE % region->getElemSize () == 0);
width = REG_BYTE_SIZE / region->getElemSize ();
assert (segmentByteSize / REG_BYTE_SIZE <= 2);
height = 2;
}
else {
assert (segmentByteSize % region->getElemSize () == 0);
width = segmentByteSize / region->getElemSize ();
height = 1;
}
if( useScratchMsg_ )
{
// Read/write size when using scratch msg descriptor is 32-bytes
if( height == 1 && width < REG_BYTE_SIZE )
width = REG_BYTE_SIZE/region->getElemSize();
}
G4_Declare * transientRangeDeclare =
createRangeDeclare(
name, G4_GRF, width, height, type, NULL, DEF_HORIZ_STRIDE,
regVarKind, region->getBase ()->asRegVar (), region, execSize);
if( failSafeSpill_ )
{
transientRangeDeclare->getRegVar()->setPhyReg(builder_->phyregpool.getGreg(spillRegOffset_), 0);
spillRegOffset_ += height;
}
// FIXME: We should take the original declare's alignment too, but I'm worried
// we may get perf regression if FE is over-aligning or the alignment is not necessary for this inst.
// So Either is used for now and we can change it later if there are bugs
setNewDclAlignment(transientRangeDeclare, Either);
return transientRangeDeclare;
}
// Create a regvar and its declare directive to represent the spill live
// range that appears as a send instruction post destination GRF.
// The type of the regvar is set as dword and its width 8. The type of
// the post destination does not matter, so we just use type dword, and
// a width of 8 so that a row corresponds to a physical register.
G4_Declare *
SpillManagerGMRF::createPostDstSpillRangeDeclare (
G4_INST * sendOut,
G4_DstRegRegion * spilledRegion
)
{
G4_RegVar * spilledRegVar = getRegVar (spilledRegion);
const char * name =
createImplicitRangeName (
"SP_GRF", spilledRegVar, getSpillIndex (spilledRegVar));
unsigned short nRows;
G4_SendMsgDescriptor* msgDesc = sendOut->getMsgDesc();
if( msgDesc ) {
nRows = msgDesc->ResponseLength();
}
// Otherwise assume all following grfs (limited to 8) in the virtual
// register
else {
nRows =
spilledRegVar->getDeclare ()->getNumRows () -
spilledRegion->getRegOff ();
if (nRows > getSendMaxResponseLength ()) {
nRows = (unsigned short) getSendMaxResponseLength ();
}
}
G4_DstRegRegion * normalizedPostDst = builder_->createDstRegRegion(
Direct, spilledRegVar, spilledRegion->getRegOff (), SUBREG_ORIGIN,
DEF_HORIZ_STRIDE, Type_UD);
// We use the width as the user specified, the height however is
// calculated based on the message descriptor to limit register
// pressure induced by the spill range.
G4_Declare * transientRangeDeclare =
createRangeDeclare (
name, G4_GRF, REG_DWORD_SIZE, nRows, Type_UD, NULL, 0,
DeclareType::Spill, spilledRegVar, normalizedPostDst, REG_DWORD_SIZE);
if( failSafeSpill_ )
{
if( useSplitSend() )
{
transientRangeDeclare->getRegVar()->setPhyReg(builder_->phyregpool.getGreg(spillRegStart_), 0);
spillRegOffset_ += nRows;
}
else
{
transientRangeDeclare->getRegVar()->setPhyReg(builder_->phyregpool.getGreg(spillRegStart_+1), 0);
spillRegOffset_ += nRows + 1;
}
}
return transientRangeDeclare;
}
// Create a regvar and its declare directive to represent the spill live range.
inline G4_Declare *
SpillManagerGMRF::createSpillRangeDeclare (
G4_DstRegRegion * spilledRegion,
unsigned execSize,
G4_INST * inst
)
{
return
createTransientGRFRangeDeclare (
spilledRegion, "SP_GRF",
getSpillIndex (getRegVar (spilledRegion)),
execSize, inst);
}
// Create a regvar and its declare directive to represent the GRF fill live
// range.
inline G4_Declare *
SpillManagerGMRF::createGRFFillRangeDeclare (
G4_SrcRegRegion * fillRegion,
unsigned execSize,
G4_INST * inst
)
{
assert (getRFType (fillRegion) == G4_GRF);
G4_Declare * fillRangeDecl =
createTransientGRFRangeDeclare (
fillRegion, "FL_GRF", getFillIndex (getRegVar (fillRegion)),
execSize, inst);
return fillRangeDecl;
}
// Create a regvar and its declare directive to represent the MRF fill live
// range.
inline G4_Declare *
SpillManagerGMRF::createMRFFillRangeDeclare (
G4_SrcRegRegion * filledRegion,
G4_INST * sendInst
)
{
MUST_BE_TRUE ((sendInst->isSend() && (sendInst->getSrc(0)->asSrcRegRegion () == filledRegion)) ||
(sendInst->isSplitSend() && (sendInst->getSrc(1)->asSrcRegRegion () == filledRegion)),
"Error in createMRFFillRangeDeclare");
G4_RegVar * filledRegVar = getRegVar (filledRegion);
const char * name =
createImplicitRangeName (
"FL_MRF", filledRegVar, getFillIndex (filledRegVar));
unsigned short nRows = 0;
G4_SendMsgDescriptor* msgDesc = sendInst->getMsgDesc();
if( msgDesc )
{
if (sendInst->isSplitSend() &&
(sendInst->getSrc(1)->asSrcRegRegion () == filledRegion))
{
nRows = msgDesc->extMessageLength();
}
else
{
nRows = msgDesc->MessageLength();
}
}
else
{
nRows =
filledRegVar->getDeclare ()->getNumRows () -
filledRegion->getRegOff ();
if (nRows > getSendMaxMessageLength ()) {
nRows = (unsigned short) getSendMaxMessageLength ();
}
}
G4_SrcRegRegion * normalizedMRFSrc =
builder_->createSrcRegRegion(
filledRegion->getModifier(), Direct, filledRegVar,
filledRegion->getRegOff(), SUBREG_ORIGIN, filledRegion->getRegion(),
filledRegion->getType());
unsigned short width = REG_BYTE_SIZE / filledRegion->getElemSize ();
assert (REG_BYTE_SIZE % filledRegion->getElemSize () == 0);
//assert (width == 32 || width == 16 || width == 8);
// We use the width as the user specified, the height however is
// calculated based on the message descriptor to limit register
// pressure induced by the spill range.
G4_Declare * transientRangeDeclare =
createRangeDeclare(
name,
G4_GRF,
width, nRows, filledRegion->getType(), NULL, 0,
DeclareType::Fill, filledRegVar, normalizedMRFSrc,
width);
setNewDclAlignment(transientRangeDeclare, filledRegVar->getAlignment());
if( failSafeSpill_ )
{
if (sendInst->isEOT() && builder_->hasEOTGRFBinding())
{
// make sure eot src is in last 16 GRF
uint32_t eotStart = builder_->getOptions()->getuInt32Option(vISA_TotalGRFNum) - 16;
if (spillRegOffset_ < eotStart)
{
spillRegOffset_ = eotStart;
}
}
transientRangeDeclare->getRegVar()->setPhyReg(builder_->phyregpool.getGreg(spillRegOffset_), 0);
spillRegOffset_ += nRows;
}
return transientRangeDeclare;
}
// Create a regvar and its declare directive to represent the temporary live
// range.
G4_Declare *
SpillManagerGMRF::createTemporaryRangeDeclare (
G4_DstRegRegion * spilledRegion,
unsigned execSize,
bool forceSegmentAlignment
)
{
const char * name =
createImplicitRangeName (
"TM_GRF", getRegVar (spilledRegion),
getTmpIndex (getRegVar (spilledRegion)));
unsigned byteSize =
(forceSegmentAlignment)?
getSegmentByteSize (spilledRegion, execSize):
getRegionByteSize (spilledRegion, execSize);
assert (byteSize <= 2 * REG_BYTE_SIZE);
assert (byteSize % spilledRegion->getElemSize () == 0);
G4_Type type = spilledRegion->getType ();
DeclareType regVarKind = DeclareType::Tmp;
unsigned short width, height;
if( byteSize > REG_BYTE_SIZE )
{
height = 2;
width = REG_BYTE_SIZE/spilledRegion->getElemSize();
}
else
{
height = 1;
width = byteSize/spilledRegion->getElemSize();
}
G4_RegVar* spilledRegVar = getRegVar(spilledRegion);
G4_Declare * temporaryRangeDeclare =
createRangeDeclare(
name, G4_GRF, width, height, type, NULL, DEF_HORIZ_STRIDE,
regVarKind, spilledRegVar, NULL, 0);
if( failSafeSpill_ )
{
temporaryRangeDeclare->getRegVar()->setPhyReg(builder_->phyregpool.getGreg(spillRegOffset_), 0);
spillRegOffset_ += height;
}
setNewDclAlignment(temporaryRangeDeclare, Either);
return temporaryRangeDeclare;
}
// Create a destination region that could be used in place of the spill regvar.
// If the region is unaligned then the origin of the destination region
// is the displacement of the orginal region from its segment, else the
// origin is 0.
G4_DstRegRegion *
SpillManagerGMRF::createSpillRangeDstRegion (
G4_RegVar * spillRangeRegVar,
G4_DstRegRegion * spilledRegion,
unsigned execSize,
unsigned regOff
)
{
if (isUnalignedRegion (spilledRegion, execSize)) {
unsigned segmentDisp =
getEncAlignedSegmentDisp (spilledRegion, execSize);
unsigned regionDisp = getRegionDisp (spilledRegion);
assert (regionDisp >= segmentDisp);
unsigned short subRegOff =
(regionDisp - segmentDisp) / spilledRegion->getElemSize ();
assert (
(regionDisp - segmentDisp) % spilledRegion->getElemSize () == 0);
assert (subRegOff * spilledRegion->getElemSize () +
getRegionByteSize (spilledRegion, execSize) <=
2 * REG_BYTE_SIZE);
if(useScratchMsg_ )
{
G4_Declare* parent_dcl = spilledRegion->getBase()->asRegVar()->getDeclare();
unsigned off = 0;
while( parent_dcl->getAliasDeclare() != NULL )
{
// off is in bytes
off += parent_dcl->getAliasOffset();
parent_dcl = parent_dcl->getAliasDeclare();
}
off = off%G4_GRF_REG_NBYTES;
// sub-regoff is in units of element size
subRegOff = spilledRegion->getSubRegOff() + off/spilledRegion->getElemSize();
}
return builder_->createDstRegRegion(
Direct, spillRangeRegVar, (unsigned short) regOff, subRegOff,
spilledRegion->getHorzStride (), spilledRegion->getType ());
}
else {
return builder_->createDstRegRegion(
Direct, spillRangeRegVar, (short) regOff, SUBREG_ORIGIN,
spilledRegion->getHorzStride (), spilledRegion->getType ());
}
}
// Create a source region that could be used to copy out the temporary range
// (that was created to replace the portion of the spilled live range appearing
// in an instruction destination) into the segment aligned spill range for the
// spilled live range that can be written out to spill memory.
G4_SrcRegRegion *
SpillManagerGMRF::createTemporaryRangeSrcRegion (
G4_RegVar * tmpRangeRegVar,
G4_DstRegRegion * spilledRegion,
uint16_t execSize,
unsigned regOff
)
{
uint16_t horzStride = spilledRegion->getHorzStride();
// A scalar region is returned when execsize is 1.
RegionDesc *rDesc = builder_->createRegionDesc(execSize, horzStride, 1, 0);
return builder_->createSrcRegRegion(
Mod_src_undef, Direct, tmpRangeRegVar, (short) regOff, SUBREG_ORIGIN,
rDesc, spilledRegion->getType () );
}
// Create a source region that could be used in place of the fill regvar.
// If the region is unaligned then the origin of the destination region
// is the displacement of the orginal region from its segment, else the
// origin is 0.
G4_SrcRegRegion *
SpillManagerGMRF::createFillRangeSrcRegion (
G4_RegVar * fillRangeRegVar,
G4_SrcRegRegion * filledRegion,
unsigned execSize
)
{
// we need to preserve accRegSel if it's set
if (isUnalignedRegion (filledRegion, execSize)) {
unsigned segmentDisp =
getEncAlignedSegmentDisp (filledRegion, execSize);
unsigned regionDisp = getRegionDisp (filledRegion);
assert (regionDisp >= segmentDisp);
unsigned short subRegOff =
(regionDisp - segmentDisp) / filledRegion->getElemSize ();
assert (
(regionDisp - segmentDisp) % filledRegion->getElemSize () == 0);
assert (subRegOff * filledRegion->getElemSize () +
getRegionByteSize (filledRegion, execSize) <=
2 * REG_BYTE_SIZE);
return builder_->createSrcRegRegion(
filledRegion->getModifier (), Direct, fillRangeRegVar, REG_ORIGIN,
subRegOff, filledRegion->getRegion(), filledRegion->getType(), filledRegion->getAccRegSel());
}
else
{
return builder_->createSrcRegRegion(
filledRegion->getModifier (), Direct, fillRangeRegVar,
REG_ORIGIN, SUBREG_ORIGIN, filledRegion->getRegion (),
filledRegion->getType(), filledRegion->getAccRegSel());
}
}
// Create a source region for the spill regvar that can be used as an operand
// for a mov instruction used to copy the value to an MRF write payload for
// an oword block write message. The spillRangeRegVar segment is guaranteed
// to start at an dword boundary and of a dword aligned size by construction.
// The whole spillRangeRegVar segment needs to be copied out to the MRF write
// payload. The source region generated is <4;4,1>:ud so that a row occupies
// a packed oword. The exec size used in the copy instruction needs to be a
// multiple of 4 depending on the size of the spill regvar - 4 or 8 for the
// the spill regvar appearing as the destination in a regulat 2 cycle
// instructions and 16 when appearing in simd16 instructions.
inline G4_SrcRegRegion *
SpillManagerGMRF::createBlockSpillRangeSrcRegion (
G4_RegVar * spillRangeRegVar,
unsigned regOff,
unsigned subregOff
)
{
assert (getByteSize (spillRangeRegVar) % DWORD_BYTE_SIZE == 0);
RegionDesc * rDesc =
builder_->rgnpool.createRegion (DWORD_BYTE_SIZE, DWORD_BYTE_SIZE, 1);
return builder_->createSrcRegRegion(
Mod_src_undef, Direct, spillRangeRegVar, (short) regOff, (short) subregOff,
rDesc, Type_UD);
}
// Create a MRF regvar and a declare directive for it, to represent an
// implicit MFR live range that will be used as the send message payload
// header and write payload for spilling a regvar to memory.
G4_Declare *
SpillManagerGMRF::createMRangeDeclare (
G4_RegVar * regVar
)
{
if (useSplitSend())
{
return builder_->getBuiltinR0();
}
G4_RegVar * repRegVar =
(regVar->isRegVarTransient ())? regVar->getBaseRegVar (): regVar;
const char * name =
createImplicitRangeName (
"SP_MSG", repRegVar, getMsgSpillIndex (repRegVar));
unsigned regVarByteSize = getByteSize (regVar);
unsigned writePayloadHeight = cdiv (regVarByteSize, REG_BYTE_SIZE);
if (writePayloadHeight > SPILL_PAYLOAD_HEIGHT_LIMIT) {
writePayloadHeight = SPILL_PAYLOAD_HEIGHT_LIMIT;
}
unsigned payloadHeaderHeight =
(regVarByteSize != DWORD_BYTE_SIZE)?
OWORD_PAYLOAD_HEADER_MAX_HEIGHT: DWORD_PAYLOAD_HEADER_MAX_HEIGHT;
unsigned short height = payloadHeaderHeight + writePayloadHeight;
unsigned short width = REG_DWORD_SIZE;
// We should not find ourselves using dword scattered write
if( useScratchMsg_ )
{
assert( payloadHeaderHeight != DWORD_PAYLOAD_HEADER_MAX_HEIGHT );
}
G4_Declare * msgRangeDeclare =
createRangeDeclare (
name,
G4_GRF,
width, height, Type_UD, NULL, DEF_HORIZ_STRIDE,
DeclareType::Tmp, regVar->getNonTransientBaseRegVar (), NULL, 0);
if( failSafeSpill_ )
{
msgRangeDeclare->getRegVar()->setPhyReg(builder_->phyregpool.getGreg(spillRegStart_), 0);
}
return msgRangeDeclare;
}
// Create a MRF regvar and a declare directive for it, to represent an
// implicit MFR live range that will be used as the send message payload
// header and write payload for spilling a regvar region to memory.
G4_Declare *
SpillManagerGMRF::createMRangeDeclare (
G4_DstRegRegion * region,
unsigned execSize
)
{
if (useSplitSend())
{
return builder_->getBuiltinR0();
}
const char * name =
createImplicitRangeName (
"SP_MSG", getRegVar (region),
getMsgSpillIndex (getRegVar (region)));
unsigned regionByteSize = getSegmentByteSize (region, execSize);
unsigned writePayloadHeight = cdiv (regionByteSize, REG_BYTE_SIZE);
unsigned msgType = getMsgType (region, execSize);
unsigned payloadHeaderHeight =
( msgType == owordMask () ||
msgType == hwordMask () )?
OWORD_PAYLOAD_HEADER_MAX_HEIGHT: DWORD_PAYLOAD_HEADER_MAX_HEIGHT;
// We should not find ourselves using dword scattered write
if( useScratchMsg_ )
{
assert( payloadHeaderHeight != DWORD_PAYLOAD_HEADER_MAX_HEIGHT );
}
unsigned height = payloadHeaderHeight + writePayloadHeight;
unsigned short width = REG_DWORD_SIZE;
G4_Declare * msgRangeDeclare =
createRangeDeclare (
name,
G4_GRF,
width, (unsigned short) height, Type_UD, NULL, DEF_HORIZ_STRIDE,
DeclareType::Tmp, region->getBase ()->asRegVar (), NULL, 0);
if( failSafeSpill_ )
{
msgRangeDeclare->getRegVar()->setPhyReg(builder_->phyregpool.getGreg(spillRegOffset_), 0);
spillRegOffset_ += height;
}
return msgRangeDeclare;
}
// Create a MRF regvar and a declare directive for it, that will be used as
// the send message payload header and write payload for filling a regvar
// from memory.
G4_Declare *
SpillManagerGMRF::createMRangeDeclare (
G4_SrcRegRegion * region,
unsigned execSize
)
{
if (useSplitSend())
{
return builder_->getBuiltinR0();
}
const char * name =
createImplicitRangeName (
"FL_MSG", getRegVar (region),
getMsgFillIndex (getRegVar (region)));
getSegmentByteSize(region, execSize);
unsigned payloadHeaderHeight =
(getMsgType (region, execSize) == owordMask ())?
OWORD_PAYLOAD_HEADER_MIN_HEIGHT: DWORD_PAYLOAD_HEADER_MIN_HEIGHT;
// We should not find ourselves using dword scattered write
if( useScratchMsg_ )
{
assert( payloadHeaderHeight != DWORD_PAYLOAD_HEADER_MAX_HEIGHT );
// When using scratch msg descriptor we dont need to use a
// separate GRF for payload. Source operand of send can directly
// use r0.0.
return builder_->getBuiltinR0();
}
unsigned height = payloadHeaderHeight;
unsigned width = REG_DWORD_SIZE;
G4_Declare * msgRangeDeclare =
createRangeDeclare (
name,
G4_GRF,
(unsigned short) width, (unsigned short) height, Type_UD, NULL, DEF_HORIZ_STRIDE,
DeclareType::Tmp, region->getBase ()->asRegVar (), NULL, 0);
if( failSafeSpill_ )
{
msgRangeDeclare->getRegVar()->setPhyReg(builder_->phyregpool.getGreg(spillRegOffset_), 0);
spillRegOffset_ += height;
}
return msgRangeDeclare;
}
// Create a destination region for the MRF regvar for the write payload
// portion of the oword block send message (used for spill). The exec size
// can be either 4 or 8 for a regular 2 cycle instruction detination spills or
// 16 for simd16 instruction destination spills.
inline G4_DstRegRegion *
SpillManagerGMRF::createMPayloadBlockWriteDstRegion (
G4_RegVar * mrfRange,
unsigned regOff,
unsigned subregOff
)
{
regOff += OWORD_PAYLOAD_WRITE_REG_OFFSET;
subregOff += OWORD_PAYLOAD_WRITE_SUBREG_OFFSET;
return builder_->createDstRegRegion(
Direct, mrfRange, (short) regOff, (short) subregOff, DEF_HORIZ_STRIDE, Type_UD);
}
// Create a destination region for the MRF regvar for the input header
// payload portion of the send message to the data port. The exec size
// needs to be 8 for the mov instruction that uses this as a destination.
inline G4_DstRegRegion *
SpillManagerGMRF::createMHeaderInputDstRegion (
G4_RegVar * mrfRange,
unsigned subregOff
)
{
return builder_->createDstRegRegion(
Direct, mrfRange, PAYLOAD_INPUT_REG_OFFSET, (short) subregOff,
DEF_HORIZ_STRIDE, Type_UD);
}
// Create a destination region for the MRF regvar for the payload offset
// portion of the oword block send message. The exec size needs to be 1
// for the mov instruction that uses this as a destination.
inline G4_DstRegRegion *
SpillManagerGMRF::createMHeaderBlockOffsetDstRegion (
G4_RegVar * mrfRange
)
{
return builder_->createDstRegRegion(
Direct, mrfRange, OWORD_PAYLOAD_SPOFFSET_REG_OFFSET,
OWORD_PAYLOAD_SPOFFSET_SUBREG_OFFSET, DEF_HORIZ_STRIDE,
Type_UD);
}
// Create a source region for the input payload (r0.0). The exec size
// needs to be 8 for the mov instruction that uses this as a source.
inline G4_SrcRegRegion *
SpillManagerGMRF::createInputPayloadSrcRegion ()
{
G4_RegVar * inputPayloadDirectReg = builder_->getBuiltinR0()->getRegVar();
RegionDesc * rDesc =
builder_->rgnpool.createRegion (
REG_DWORD_SIZE, REG_DWORD_SIZE, DEF_HORIZ_STRIDE);
return builder_->createSrcRegRegion(
Mod_src_undef, Direct, inputPayloadDirectReg,
PAYLOAD_INPUT_REG_OFFSET, PAYLOAD_INPUT_SUBREG_OFFSET,
rDesc, Type_UD);
}
// Create and initialize the message header for the send instruction for
// save/load of value to/from memory.
// The header includes the input payload and the offset (for spill disp).
template <class REGION_TYPE>
inline G4_Declare *
SpillManagerGMRF::createAndInitMHeader (
REGION_TYPE * region,
unsigned execSize
)
{
// Create the MRF live range for the message.
if (canDoSLMSpill())
{
//SLM spill/fill functions create their own header
return nullptr;
}
G4_Declare * mRangeDcl = createMRangeDeclare (region, execSize);
return initMHeader (mRangeDcl, region, execSize);
}
// Initialize the message header for the send instruction for save/load
// of value to/from memory.
// The header includes the input payload and the offset (for spill disp).
template <class REGION_TYPE>
G4_Declare *
SpillManagerGMRF::initMHeader (
G4_Declare * mRangeDcl,
REGION_TYPE * region,
unsigned execSize
)
{
// Initialize the message header with the input payload.
if( useScratchMsg_ )
{
if( mRangeDcl == builder_->getBuiltinR0() )
{
// mRangeDcl is NULL for fills
return mRangeDcl;
}
}
G4_DstRegRegion* dstOpnd = builder_->createDstRegRegion(Direct, mRangeDcl->getRegVar(), 0, 0, 1, Type_UD);
auto newInst = builder_->createInst ( NULL, G4_pseudo_kill, NULL, false, 1, dstOpnd, NULL, NULL, 0 );
newInst->setCISAOff(curInst->getCISAOff());
G4_DstRegRegion * mHeaderInputDstRegion =
createMHeaderInputDstRegion (mRangeDcl->getRegVar ());
G4_SrcRegRegion * inputPayload = createInputPayloadSrcRegion ();
createMovInst (REG_DWORD_SIZE, mHeaderInputDstRegion, inputPayload);
numGRFMove ++;
if( useScratchMsg_ )
{
// Initialize msg header when region is a spill
// When using scratch msg description, we only need to copy
// r0.0 in to msg header. Memory offset will be
// specified in the msg descriptor.
}
else
// Initialize the message header with the spill disp for block
// read/write.
{
G4_DstRegRegion * mHeaderOffsetDstRegion =
createMHeaderBlockOffsetDstRegion (mRangeDcl->getRegVar ());
int offset = getSegmentDisp(region, execSize);
getSpillOffset(offset);
unsigned segmentDisp = offset / OWORD_BYTE_SIZE;
G4_Imm * segmentDispImm = builder_->createImm (segmentDisp, Type_UD);
G4_RegVar * baseRegVar = NULL;
if (region->isSrcRegRegion())
{
baseRegVar = getReprRegVar(region->asSrcRegRegion()->getBase()->asRegVar());
}
else if (region->isDstRegRegion())
{
baseRegVar = getReprRegVar(region->asDstRegRegion()->getBase()->asRegVar());
}
else
{
MUST_BE_TRUE (false, ERROR_GRAPHCOLOR);
}
if (builder_->getIsKernel() == false && baseRegVar->getDeclare()->getHasFileScope() == false)
{
createAddFPInst (
SCALAR_EXEC_SIZE, mHeaderOffsetDstRegion, segmentDispImm);
}
else
{
createMovInst (
SCALAR_EXEC_SIZE, mHeaderOffsetDstRegion, segmentDispImm);
}
numGRFMove ++;
}
// Initialize the message header with the spill disp for scatter
// read/write.
return mRangeDcl;
}
// Create and initialize the message header for the send instruction.
// The header includes the input payload (for spill disp).
inline G4_Declare *
SpillManagerGMRF::createAndInitMHeader (
G4_RegVar * regVar
)
{
// Create the MRF live range for the message.
if (canDoSLMSpill())
{
return nullptr;
}
G4_Declare * mRangeDcl = createMRangeDeclare (regVar);
return initMHeader (mRangeDcl);
}
// Initialize the message header for the send instruction.
// The header includes the input payload (for spill disp).
G4_Declare *
SpillManagerGMRF::initMHeader (
G4_Declare * mRangeDcl
)
{
// Initialize the message header with the input payload.
if( useScratchMsg_ )
{
if( mRangeDcl == builder_->getBuiltinR0() )
{
// mRangeDcl is NULL for fills
return mRangeDcl;
}
}
G4_DstRegRegion* dstOpnd = builder_->createDstRegRegion(Direct, mRangeDcl->getRegVar(), 0, 0, 1, Type_UD);
auto newInst = builder_->createInst ( NULL, G4_pseudo_kill, NULL, false, 1, dstOpnd, NULL, NULL, 0 );
newInst->setCISAOff(curInst->getCISAOff());
G4_DstRegRegion * mHeaderInputDstRegion =
createMHeaderInputDstRegion (mRangeDcl->getRegVar ());
G4_SrcRegRegion * inputPayload = createInputPayloadSrcRegion ();
createMovInst (REG_DWORD_SIZE, mHeaderInputDstRegion, inputPayload);
numGRFMove ++;
return mRangeDcl;
}
// Initialize the the write payload part of the message for spilled regvars.
// Either of the following restrictions for spillRangeDcl are assumed:
// - the regvar element type is dword and its 2 <= width <= 8 and
// height - regOff == 1
// - the regvar element type is dword and its width = 8 and
// 2 <= height - regOff <= 8
// - the regvar element type is dword and its width and height are 1
void
SpillManagerGMRF::initMWritePayload (
G4_Declare * spillRangeDcl,
G4_Declare * mRangeDcl,
unsigned regOff,
unsigned height
)
{
if (useSplitSend())
{
// no need for payload moves if using sends
return;
}
// We use an block write when the spilled regvars's segment is greater
// than a dword. Generate a mov to copy the oword aligned segment into
// the write payload part of the message.
{
unsigned nRows = height;
for (unsigned i = 0; i < nRows; i++) {
G4_SrcRegRegion * spillRangeSrcRegion =
createBlockSpillRangeSrcRegion (
spillRangeDcl->getRegVar (), i + regOff);
G4_DstRegRegion * mPayloadWriteDstRegion =
createMPayloadBlockWriteDstRegion (
mRangeDcl->getRegVar (), i);
unsigned char movExecSize =
(nRows > 1)? REG_DWORD_SIZE: spillRangeDcl->getNumElems ();
createMovInst (
movExecSize, mPayloadWriteDstRegion, spillRangeSrcRegion);
numGRFMove ++;
}
}
}
// Initialize the the write payload part of the message for spilled regions.
void
SpillManagerGMRF::initMWritePayload (
G4_Declare * spillRangeDcl,
G4_Declare * mRangeDcl,
G4_DstRegRegion * spilledRangeRegion,
unsigned execSize,
unsigned regOff
)
{
// We use an block write when the spilled region's segment is greater
// than a dword. Generate a mov to copy the oword aligned segment into
// the write payload part of the message.
if (useSplitSend())
{
// no need for payload moves
return;
}
else
{
G4_SrcRegRegion * spillRangeSrcRegion =
createBlockSpillRangeSrcRegion (
spillRangeDcl->getRegVar (), regOff);
G4_DstRegRegion * mPayloadWriteDstRegion =
createMPayloadBlockWriteDstRegion (mRangeDcl->getRegVar ());
unsigned segmentByteSize =
getSegmentByteSize (spilledRangeRegion, execSize);
unsigned char movExecSize = segmentByteSize / DWORD_BYTE_SIZE;
// Write entire GRF when using scratch msg descriptor
if( useScratchMsg_)
{
if( movExecSize <= 8 )
movExecSize = 8;
else if( movExecSize < 16 )
movExecSize = 16;
}
assert (segmentByteSize % DWORD_BYTE_SIZE == 0);
assert (movExecSize <= 16);
createMovInst (
movExecSize, mPayloadWriteDstRegion, spillRangeSrcRegion);
numGRFMove ++;
}
}
// Return the block size encoding for oword block reads.
inline unsigned
SpillManagerGMRF::blockSendBlockSizeCode (
unsigned size
) const
{
unsigned code;
switch (size) {
case 1:
code = 0;
break;
case 2:
code = 2;
break;
case 4:
code = 3;
break;
case 8:
code = 4;
break;
default:
assert (0);
code = 0;
}
return code << getSendDescDataSizeBitOffset ();
}
// Return the block size encoding for dword scatter reads.
inline unsigned
SpillManagerGMRF::scatterSendBlockSizeCode (
unsigned size
) const
{
unsigned code;
switch (size) {
case 1:
// We will use an exec size of 1 to perform 1 dword read/write.
case 8:
code = 0x02;
break;
case 16:
code = 0x03;
break;
default:
assert (0);
code = 0;
}
return code << getSendDescDataSizeBitOffset ();
}
static uint32_t getScratchBlocksizeEncoding(int size)
{
unsigned blocksize_encoding = 0;
if (size == 1)
{
blocksize_encoding = 0x0;
}
else if (size == 2)
{
blocksize_encoding = 0x1;
}
else if (size == 4)
{
blocksize_encoding = 0x2;
}
else if (size == 8)
{
assert(getGenxPlatform() >= GENX_SKL);
blocksize_encoding = 0x3;
}
else
assert(false);
return blocksize_encoding;
}
// Create the message descriptor for a spill send instruction for spilled
// post destinations of send instructions.
G4_Imm *
SpillManagerGMRF::createSpillSendMsgDesc (
unsigned regOff,
unsigned height,
unsigned & execSize,
G4_RegVar* base
)
{
unsigned message = 0;
if( useScratchMsg_)
{
unsigned headerPresent = 0x80000;
message = headerPresent;
unsigned msgLength = useSplitSend() ? SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT : SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT + height;
message |= (msgLength << getSendMsgLengthBitOffset() );
message |= (1 << SCRATCH_MSG_DESC_CATEORY);
message |= (1 << SCRATCH_MSG_DESC_CHANNEL_MODE);
message |= (1 << SCRATCH_MSG_DESC_OPERATION_MODE);
unsigned blocksize_encoding = getScratchBlocksizeEncoding(height);
message |= (blocksize_encoding << SCRATCH_MSG_DESC_BLOCK_SIZE);
int offset = getDisp(base);
getSpillOffset(offset);
message |= (offset >> 5) + regOff;
execSize = 16;
}
else
{
unsigned segmentByteSize = height * REG_BYTE_SIZE;
unsigned writePayloadCount = cdiv (segmentByteSize, REG_BYTE_SIZE);
unsigned statelessSurfaceIndex = 0xFF;
message = statelessSurfaceIndex;
unsigned headerPresent = 0x80000;
message |= headerPresent;
unsigned messageType = getSendOwordWriteType();
message |= messageType << getSendWriteTypeBitOffset ();
unsigned payloadHeaderCount = OWORD_PAYLOAD_HEADER_MAX_HEIGHT;
unsigned messageLength = useSplitSend() ? payloadHeaderCount : writePayloadCount + payloadHeaderCount;
message |= messageLength << getSendMsgLengthBitOffset ();
unsigned segmentOwordSize = cdiv(segmentByteSize, OWORD_BYTE_SIZE);
message |= blockSendBlockSizeCode (segmentOwordSize);
execSize = LIMIT_SEND_EXEC_SIZE (segmentOwordSize * DWORD_BYTE_SIZE);
}
return builder_->createImm (message, Type_UD);
}
// Create the message descriptor for a spill send instruction for spilled
// destination regions.
G4_Imm *
SpillManagerGMRF::createSpillSendMsgDesc (
G4_DstRegRegion * spilledRangeRegion,
unsigned & execSize
)
{
unsigned message = 0;
if( useScratchMsg_)
{
/*
bits description
18:0 function control
19 Header present
24:20 Response length
28:25 Message length
31:29 MBZ
18:0
11:0 Offset (12b hword offset)
13:12 Block size (00 - 1 register, 01 - 2 regs, 10 - reserved, 11 - 4 regs)
14 MBZ
15 Invalidate after read (0 - no invalidate, 1 - invalidate)
16 Channel mode (0 - oword, 1 - dword)
17 Operation type (0 - read, 1 - write)
18 Category (1 - scratch block read/write)
*/
unsigned segmentByteSize = getSegmentByteSize (spilledRangeRegion, execSize);
unsigned writePayloadCount = cdiv (segmentByteSize, REG_BYTE_SIZE);
unsigned headerPresent = 0x80000;
message |= headerPresent;
unsigned payloadHeaderCount = SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT;
// message length = 1 if we are using sends, 1 + payload otherwise
unsigned messageLength = useSplitSend() ? payloadHeaderCount :
writePayloadCount + payloadHeaderCount;
message |= (messageLength << getSendMsgLengthBitOffset() );
message |= (1 << SCRATCH_MSG_DESC_CATEORY); // category
message |= (1 << SCRATCH_MSG_DESC_CHANNEL_MODE); // channel mode
message |= (1 << SCRATCH_MSG_DESC_OPERATION_MODE); // write operation
unsigned numGRFs = cdiv(segmentByteSize, G4_GRF_REG_NBYTES);
unsigned blocksize_encoding = getScratchBlocksizeEncoding(numGRFs);
message |= (blocksize_encoding << SCRATCH_MSG_DESC_BLOCK_SIZE);
int offset = getRegionDisp(spilledRangeRegion);
getSpillOffset(offset);
message |= offset >> 5;
if (numGRFs > 1)
{
execSize = 16;
}
else
{
if (execSize > 8)
{
execSize = 16;
}
else
{
execSize = 8;
}
}
}
else
{
unsigned segmentByteSize =
getSegmentByteSize (spilledRangeRegion, execSize);
unsigned writePayloadCount = cdiv (segmentByteSize, REG_BYTE_SIZE);
unsigned statelessSurfaceIndex = 0xFF;
message = statelessSurfaceIndex;
unsigned headerPresent = 0x80000;
message |= headerPresent;
unsigned messageType = getSendOwordWriteType();
message |= messageType << getSendWriteTypeBitOffset ();
unsigned payloadHeaderCount = OWORD_PAYLOAD_HEADER_MAX_HEIGHT;
unsigned messageLength = useSplitSend() ? payloadHeaderCount : writePayloadCount + payloadHeaderCount;
message |= messageLength << getSendMsgLengthBitOffset ();
unsigned segmentOwordSize = cdiv(segmentByteSize, OWORD_BYTE_SIZE);
message |= blockSendBlockSizeCode (segmentOwordSize);
execSize = LIMIT_SEND_EXEC_SIZE (segmentOwordSize * DWORD_BYTE_SIZE);
}
return builder_->createImm (message, Type_UD);
}
uint32_t
SpillManagerGMRF::getUntypedSLMMsgDesc(int numReg, bool isRead) const
{
// SLM Untyped write
uint32_t message = 0;
unsigned responseLength = isRead ? numReg : 0;
unsigned SLMIndex = 0xFE;
message |= SLMIndex;
uint32_t messageType = isRead ? DC1_UNTYPED_SURFACE_READ : DC1_UNTYPED_SURFACE_WRITE;
message |= messageType << getSendReadTypeBitOffset();
uint32_t simdMode = numReg == 1 ? MDC_SM3_SIMD8 : MDC_SM3_SIMD16;
message |= simdMode << 12;
uint32_t chMask = getChMaskForSpill(numReg).getHWEncoding();
message |= chMask << 8;
uint32_t messageLength = numReg == 1 ? 1 : 2;
message |= messageLength << getSendMsgLengthBitOffset();
message |= responseLength << getSendRspLengthBitOffset();
return message;
}
// Create the message descriptor for a spill send instruction for spilled
// destination regions.
G4_Imm *
SpillManagerGMRF::createSpillSendMsgDesc(
bool doSLMSpill,
int size,
int offset
)
{
unsigned message = 0;
if (doSLMSpill)
{
if (builder_->hasBlockedSLMMessage())
{
// SLM Hword aligned block write
unsigned writePayloadCount = size;
unsigned SLMIndex = 0xFE;
message |= SLMIndex;
unsigned dataElements = getHWordEncoding(writePayloadCount);
message |= dataElements << 8;
message |= 1 << 13; // HWord
unsigned messageType = DC1_HWORD_ALIGNED_BLOCK_WRITE;
message |= messageType << getSendReadTypeBitOffset();
unsigned headerPresent = 0x80000;
message |= headerPresent;
unsigned messageLength = useSplitSend() ? 1 : writePayloadCount + 1;
message |= messageLength << getSendMsgLengthBitOffset();
unsigned responseLength = 0;
message |= responseLength << getSendRspLengthBitOffset();
}
else
{
message = getUntypedSLMMsgDesc(size, false);
}
}
else if (useScratchMsg_)
{
/*
bits description
18:0 function control
19 Header present
24:20 Response length
28:25 Message length
31:29 MBZ
18:0
11:0 Offset (12b hword offset)
13:12 Block size (00 - 1 register, 01 - 2 regs, 10 - reserved, 11 - 4 regs)
14 MBZ
15 Invalidate after read (0 - no invalidate, 1 - invalidate)
16 Channel mode (0 - oword, 1 - dword)
17 Operation type (0 - read, 1 - write)
18 Category (1 - scratch block read/write)
*/
unsigned writePayloadCount = size;
unsigned headerPresent = 0x80000;
message |= headerPresent;
unsigned payloadHeaderCount = SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT;
// message length = 1 if we are using sends, 1 + payload otherwise
unsigned messageLength = useSplitSend() ? payloadHeaderCount :
writePayloadCount + payloadHeaderCount;
message |= (messageLength << getSendMsgLengthBitOffset());
message |= (1 << SCRATCH_MSG_DESC_CATEORY); // category
message |= (1 << SCRATCH_MSG_DESC_CHANNEL_MODE); // channel mode
message |= (1 << SCRATCH_MSG_DESC_OPERATION_MODE); // write operation
unsigned numGRFs = size;
unsigned blocksize_encoding = getScratchBlocksizeEncoding(numGRFs);
message |= (blocksize_encoding << SCRATCH_MSG_DESC_BLOCK_SIZE);
message |= (offset >> 5); // displacement
}
else
{
MUST_BE_TRUE(false, "should not reach here");
}
return builder_->createImm(message, Type_UD);
}
// Create an add instruction to add the FP needed for generating spill/fill code.
// We always set the NoMask flag and use a null conditional modifier.
inline G4_INST *
SpillManagerGMRF::createAddFPInst (
unsigned char execSize,
G4_DstRegRegion * dst,
G4_Operand * src,
G4_Predicate * predicate
)
{
RegionDesc* rDesc = builder_->getRegionScalar();
G4_Operand* fp = builder_->createSrcRegRegion(
Mod_src_undef, Direct, builder_->kernel.fg.framePtrDcl->getRegVar(),
0, 0, rDesc, Type_UD);
auto newInst = builder_->createInst (
predicate, G4_add, NULL, false, execSize, dst,
fp, src, InstOpt_WriteEnable);
newInst->setCISAOff(curInst->getCISAOff());
return newInst;
}
// Create a mov instruction needed for generating spill/fill code.
// We always set the NoMask flag and use a null conditional modifier.
inline G4_INST *
SpillManagerGMRF::createMovInst (
unsigned char execSize,
G4_DstRegRegion * dst,
G4_Operand * src,
G4_Predicate * predicate,
unsigned int options
)
{
auto newInst = builder_->createInst (
predicate, G4_mov, NULL, false, execSize, dst,
src, NULL, options);
newInst->setCISAOff(curInst->getCISAOff());
return newInst;
}
// Create a send instruction needed for generating spill/fill code.
// We always set the NoMask flag and use a null predicate and conditional
// modifier.
inline G4_INST *
SpillManagerGMRF::createSendInst(
unsigned char execSize,
G4_DstRegRegion * postDst,
G4_SrcRegRegion * payload,
G4_Imm * desc,
CISA_SHARED_FUNCTION_ID funcID,
bool isWrite,
unsigned option
)
{
G4_INST* sendInst;
G4_Imm *exDesc = builder_->createImm(funcID, Type_UD);
sendInst = builder_->createSendInst(
NULL, G4_send, execSize, postDst,
payload, exDesc, desc, option, !isWrite, isWrite, nullptr);
sendInst->setCISAOff(curInst->getCISAOff());
sendInst->setSpillOrFill();
return sendInst;
}
// Create the send instructions to fill in the value of spillRangeDcl into
// fillRangeDcl in aligned portions.
static int getNextSize(int height, bool useHWordMsg)
{
if (getGenxPlatform() >= GENX_SKL && height >= 8 && useHWordMsg)
{
return 8;
}
else if (height >= 4)
{
return 4;
}
else if (height >= 2)
{
return 2;
}
else if (height >= 1)
{
return 1;
}
return 0;
}
void
SpillManagerGMRF::sendInSpilledRegVarPortions (
G4_Declare * fillRangeDcl,
G4_Declare * mRangeDcl,
unsigned regOff,
unsigned height,
unsigned srcRegOff
)
{
if (canDoSLMSpill())
{
// break fills into 8/4/2/1 chunks
int offset = 0;
G4_RegVar* r = fillRangeDcl->getRegVar();
G4_RegVarTmp* rvar = static_cast<G4_RegVarTmp*> (r);
int memOffset = getDisp(rvar->getBaseRegVar()) & GRF_ALIGN_MASK;
while (height > 0)
{
int size = getNextSize(height, true);
memOffset += offset * 32;
createFill(fillRangeDcl, offset, size, memOffset);
height -= size;
offset += size;
}
return;
}
if (useScratchMsg_)
{
// Skip initializing message header
}
else
{
// Initialize the message header with the spill disp for portion.
int offset = getDisp(fillRangeDcl->getRegVar()) + regOff * REG_BYTE_SIZE;
getSpillOffset(offset);
unsigned segmentDisp = offset / OWORD_BYTE_SIZE;
G4_Imm * segmentDispImm = builder_->createImm (segmentDisp, Type_UD);
G4_DstRegRegion * mHeaderOffsetDstRegion =
createMHeaderBlockOffsetDstRegion (mRangeDcl->getRegVar ());
if (builder_->getIsKernel() == false &&
getReprRegVar(fillRangeDcl->getRegVar())->getDeclare()->getHasFileScope() == false)
{
createAddFPInst (
SCALAR_EXEC_SIZE, mHeaderOffsetDstRegion, segmentDispImm);
}
else
{
createMovInst (SCALAR_EXEC_SIZE, mHeaderOffsetDstRegion, segmentDispImm);
}
numGRFMove ++;
}
// Read in the portions using a greedy approach.
int currentStride = getNextSize(height, useScratchMsg_);
if (currentStride)
{
createFillSendInstr(fillRangeDcl, mRangeDcl, regOff, currentStride, srcRegOff);
numGRFFill++;
if (height - currentStride > 0)
{
sendInSpilledRegVarPortions (
fillRangeDcl, mRangeDcl, regOff + currentStride, height -currentStride, srcRegOff + currentStride);
}
}
}
// Copy out the source regvar to the destination regvar starting at the dstOff
// row. The regvars have to be 256 bit wide.
void
SpillManagerGMRF::copyOut256BitWideRegVar (
G4_Declare * dstRegDcl,
G4_Declare * srcRegDcl,
unsigned dstOff
)
{
assert (srcRegDcl->getNumElems () * srcRegDcl->getElemSize () ==
REG_BYTE_SIZE &&
dstRegDcl->getNumElems () * dstRegDcl->getElemSize () ==
REG_BYTE_SIZE );
int numCopies = dstRegDcl->getNumRows () - dstOff;
for (int i = 0; i < numCopies; i++) {
RegionDesc * rDesc =
builder_->rgnpool.createRegion (REG_DWORD_SIZE, REG_DWORD_SIZE, 1);
G4_SrcRegRegion * srcRegRegion =
builder_->createSrcRegRegion(
Mod_src_undef, Direct, srcRegDcl->getRegVar (),
(short) i, SUBREG_ORIGIN, rDesc, Type_UD);
G4_DstRegRegion * dstRegRegion = builder_->createDstRegRegion(
Direct, dstRegDcl->getRegVar (), dstOff + i, SUBREG_ORIGIN,
DEF_HORIZ_STRIDE, Type_UD);
createMovInst (REG_DWORD_SIZE, dstRegRegion, srcRegRegion);
numGRFMove ++;
}
}
// Check if we need to perform the pre-load of the spilled region's
// segment from spill memory. A pre-load is required under the following
// circumstances:
// - for partial writes - horizontal stride greater than one, and when
// the emask and predicate can possibly disable channels (for now if
// predicates or condition modofoers are present then we conservatively
// assume a partial write)
// - write's where the segment is larger than the actaully written region
// (either because the spill offset for the region or its size is not
// oword or dword aligned for writing the exact region)
bool
SpillManagerGMRF::shouldPreloadSpillRange (
G4_DstRegRegion * spilledRangeRegion,
uint8_t execSize,
G4_INST * instContext
)
{
// Check for partial and unaligned regions and add pre-load code, if
// necessary.
if (isPartialRegion (spilledRangeRegion, execSize) ||
isUnalignedRegion (spilledRangeRegion, execSize) ||
isPartialContext (spilledRangeRegion, instContext, inSIMDCFContext_))
{
#if 0
// special check for scalar variables: no need for pre-fill if instruction is not predicated
// FIXME: need to update this if we ever decide to pack scalar variables in memory
if (spilledRangeRegion->getTopDcl()->getNumElems() == 1 &&
instContext->getPredicate() == nullptr)
{
return false;
}
#endif
return true;
}
// No pre-load for whole and aligned region writes
else {
return false;
}
}
// Create the send instruction to perform the pre-load of the spilled region's
// segment into spill memory.
void SpillManagerGMRF::preloadSpillRange (
G4_Declare * spillRangeDcl,
G4_Declare * mRangeDcl,
G4_DstRegRegion * spilledRangeRegion,
uint8_t execSize
)
{
if (canDoSLMSpill())
{
int numGRF = spilledRangeRegion->crossGRF() ? 2 : 1;
int offset = getRegionDisp(spilledRangeRegion) & GRF_ALIGN_MASK;
createFill(spillRangeDcl, 0, numGRF, offset);
return;
}
// When execSize is 32, regions <32, 32, 1> or <64; 32, 2> are invalid.
// Use a uniform region descriptor <stride; 1, 0>. Note that stride could
// be 0 when execsize is 1.
uint16_t hstride = spilledRangeRegion->getHorzStride();
RegionDesc *rDesc = builder_->createRegionDesc(execSize, hstride, 1, 0);
if( useScratchMsg_)
{
// src region's base refers to the filled region's base
// The size of src region is equal to number of rows that
// have to be filled, starting at the reg offset specified
// in the original operand. For eg,
// Let the spilled operand be V40(3,3)
//
// => mov (1) V40(3,3)<1>:ud V30(0,0)<0;1,0>:ud
// When this will be replaced with a preload fill,
// => mov (1) TM_GRF_V40_0(0,0)<1>:ud V30(0,0)<0;1,0>:ud
// => send (16) SP_V40_0(0,0)<1>:ud ... <--- load V40's 3rd row in SP_V40_0
// => mov (1) SP_V40_0(0,3)<1>:ud TM_GRF_V40_0(0,0)<8;8,1>:ud <--- overlay
// => send (16) null ... <--- store V40's updated 3rd row to memory
//
// Since the filled register's register offset is 0,0 in first
// send instruction, this change is made when creating the operand
// itself.
G4_SrcRegRegion * preloadRegion = builder_->createSrcRegRegion(
Mod_src_undef, Direct, spillRangeDcl->getRegVar (),
REG_ORIGIN, spilledRangeRegion->getSubRegOff(),
rDesc, spilledRangeRegion->getType ());
// Attach preloadRegion to dummy mov so getLeftBound/getRightBound won't crash when called from crossGRF in createFillSendMsgDesc
builder_->createInternalInst(NULL, G4_mov, NULL, false, execSize, builder_->createNullDst(Type_UD), preloadRegion, NULL, 0);
numGRFFill++;
createFillSendInstr (
spillRangeDcl, mRangeDcl, preloadRegion, execSize);
}
else
{
G4_SrcRegRegion * preloadRegion = builder_->createSrcRegRegion(
Mod_src_undef, Direct, spillRangeDcl->getRegVar (),
spilledRangeRegion->getRegOff (), spilledRangeRegion->getSubRegOff (),
rDesc, spilledRangeRegion->getType ());
numGRFFill++;
createFillSendInstr(
spillRangeDcl, mRangeDcl, preloadRegion, execSize, 0);
}
}
// Create the send instruction to perform the spill of the spilled regvars's
// segment into spill memory.
G4_INST *
SpillManagerGMRF::createSpillSendInstr (
G4_Declare * spillRangeDcl,
G4_Declare * mRangeDcl,
unsigned regOff,
unsigned height,
unsigned srcRegOff
)
{
unsigned execSize (0);
G4_Imm * messageDescImm = NULL;
if( useScratchMsg_)
{
G4_RegVar* r = spillRangeDcl->getRegVar();
G4_RegVarTmp* rvar = static_cast<G4_RegVarTmp*> (r);
messageDescImm =
createSpillSendMsgDesc (srcRegOff, height, execSize, rvar->getBaseRegVar());
#ifdef _DEBUG
int offset = (messageDescImm->getInt() & 0xFFF) * GENX_GRF_REG_SIZ;
MUST_BE_TRUE(offset >= globalScratchOffset, "incorrect offset");
#endif
}
else
{
messageDescImm =
createSpillSendMsgDesc (regOff, height, execSize);
}
G4_DstRegRegion * postDst = builder_->createNullDst(execSize > 8 ? Type_UW : Type_UD);
G4_INST* sendInst = NULL;
if (useSplitSend())
{
unsigned extMsgLength = height;
uint16_t extFuncCtrl = 0;
// both scratch and block read use DC
CISA_SHARED_FUNCTION_ID funcID = SFID_DP_DC;
G4_SendMsgDescriptor* desc = builder_->createSendMsgDesc( messageDescImm->getInt() & 0x0007FFFFu, 0, 1, funcID, false, extMsgLength, extFuncCtrl, false, true, NULL, NULL);
RegionDesc* region = builder_->getRegionStride1();
G4_SrcRegRegion* headerOpnd = builder_->Create_Src_Opnd_From_Dcl(builder_->getBuiltinR0(), region);
G4_SrcRegRegion* srcOpnd = createBlockSpillRangeSrcRegion(spillRangeDcl->getRegVar (), regOff);
sendInst = builder_->createSplitSendInst( NULL, G4_sends, (unsigned char) execSize, postDst, headerOpnd, srcOpnd, messageDescImm, InstOpt_WriteEnable, desc, NULL, 0);
sendInst->setSpillOrFill();
sendInst->setCISAOff(curInst->getCISAOff());
}
else
{
G4_SrcRegRegion * payload = builder_->createSrcRegRegion(Mod_src_undef, Direct,
mRangeDcl->getRegVar(), 0, 0, builder_->getRegionStride1(), Type_UD);
sendInst = createSendInst ((unsigned char) execSize, postDst, payload, messageDescImm, SFID_DP_DC, true, InstOpt_WriteEnable);
}
return sendInst;
}
// Create the send instruction to perform the spill of the spilled region's
// segment into spill memory.
G4_INST *
SpillManagerGMRF::createSpillSendInstr (
G4_Declare * spillRangeDcl,
G4_Declare * mRangeDcl,
G4_DstRegRegion * spilledRangeRegion,
unsigned execSize,
unsigned option
)
{
G4_Imm * messageDescImm =
createSpillSendMsgDesc (spilledRangeRegion, execSize);
#ifdef _DEBUG
if (useScratchMsg_)
{
int offset = (messageDescImm->getInt() & 0xFFF) * GENX_GRF_REG_SIZ;
MUST_BE_TRUE(offset >= globalScratchOffset, "incorrect offset");
}
#endif
G4_DstRegRegion * postDst = builder_->createNullDst(execSize > 8 ? Type_UW : Type_UD);
G4_INST* sendInst = NULL;
if (useSplitSend())
{
unsigned extMsgLength = spillRangeDcl->getNumRows();
uint16_t extFuncCtrl = 0;
// both scratch and block read use DC
CISA_SHARED_FUNCTION_ID funcID = SFID_DP_DC;
G4_SendMsgDescriptor* desc = builder_->createSendMsgDesc( messageDescImm->getInt() & 0x0007FFFFu, 0, 1, funcID, false, extMsgLength, extFuncCtrl, false, true, NULL, NULL);
RegionDesc* region = builder_->getRegionStride1();
G4_SrcRegRegion* headerOpnd = builder_->Create_Src_Opnd_From_Dcl(builder_->getBuiltinR0(), region);
G4_SrcRegRegion* srcOpnd = builder_->Create_Src_Opnd_From_Dcl(spillRangeDcl, region);
sendInst = builder_->createSplitSendInst( NULL, G4_sends, (unsigned char) execSize, postDst, headerOpnd, srcOpnd, messageDescImm, option, desc, NULL, 0);
sendInst->setSpillOrFill();
sendInst->setCISAOff(curInst->getCISAOff());
}
else
{
G4_SrcRegRegion * payload = builder_->createSrcRegRegion(Mod_src_undef, Direct,
mRangeDcl->getRegVar(), 0, 0, builder_->getRegionStride1(), Type_UD);
sendInst = createSendInst ((unsigned char) execSize, postDst, payload, messageDescImm, SFID_DP_DC, true, option);
}
return sendInst;
}
void SpillManagerGMRF::createSpill(
G4_Declare* spillDcl,
int spillRegOff,
int size,
int logicalOffset,
uint32_t spillMask,
int oldExecSize
)
{
int varOffset = logicalOffset;
bool doSLMSpill = getSpillOffset(varOffset);
G4_Imm * messageDescImm = createSpillSendMsgDesc(doSLMSpill, size, varOffset);
CISA_SHARED_FUNCTION_ID funcID = SFID_DP_DC;
G4_Declare* sendSrc0 = nullptr;
int esize = size > 1 || oldExecSize > 8 ? 16 : 8;
if (spillMask & InstOpt_WriteEnable)
{
spillMask = InstOpt_WriteEnable;
}
if (doSLMSpill)
{
if (builder_->hasBlockedSLMMessage())
{
// create 1 GRF header
// mov (1) H0.2<1>:ud offset:uw {NoMask}
// also add a psuedo kill to make sure the header's life range is properly terminated
sendSrc0 = builder_->createDeclareNoLookup("Spill_Header", G4_GRF, 8, 1, Type_UD);
G4_DstRegRegion* psuedoKill = builder_->Create_Dst_Opnd_From_Dcl(sendSrc0, 1);
auto newInst = builder_->createInst(nullptr, G4_pseudo_kill, nullptr, false, 1, psuedoKill, nullptr, nullptr, InstOpt_NoOpt);
newInst->setCISAOff(curInst->getCISAOff());
G4_DstRegRegion* dst = builder_->createDstRegRegion(Direct, sendSrc0->getRegVar(), 0, 2,
1, Type_UD);
G4_Imm* imm = builder_->createImm(varOffset, Type_UW);
createMovInst(1, dst, imm);
}
else
{
// esize is dependent on number of GRFs we spill. This should match the oldExecSize
// if pre-fill is not required
esize = size == 1 ? 8 : 16;
funcID = SFID_DP_DC1;
sendSrc0 = createSLMSpillAddr(size, varOffset);
}
}
else
{
sendSrc0 = builder_->getBuiltinR0();
}
unsigned extMsgLength = size;
uint16_t extFuncCtrl = 0;
G4_SendMsgDescriptor* desc = builder_->createSendMsgDesc(messageDescImm->getInt() & 0x0007FFFFu, 0, 1, funcID, false, extMsgLength, extFuncCtrl, false, true, NULL, NULL);
G4_SrcRegRegion* headerOpnd = builder_->Create_Src_Opnd_From_Dcl(sendSrc0, builder_->getRegionStride1());
G4_SrcRegRegion* srcOpnd = createBlockSpillRangeSrcRegion(spillDcl->getRegVar(), spillRegOff);
G4_DstRegRegion * postDst = builder_->createNullDst(esize == 16 ? Type_UW : Type_UD);
G4_INST* sendInst = builder_->createSplitSendInst(NULL, G4_sends, (unsigned char)esize, postDst, headerOpnd, srcOpnd, messageDescImm, spillMask, desc, NULL, 0);
sendInst->setSpillOrFill();
sendInst->setCISAOff(curInst->getCISAOff());
}
// Create the message description for a fill send instruction for filled
// regvars.
G4_Imm *
SpillManagerGMRF::createFillSendMsgDesc (
unsigned regOff,
unsigned height,
unsigned & execSize,
G4_RegVar* base
)
{
unsigned message = 0;
if( useScratchMsg_)
{
unsigned segmentByteSize = height * REG_BYTE_SIZE;
unsigned responseLength = cdiv (segmentByteSize, REG_BYTE_SIZE);
message = responseLength << getSendRspLengthBitOffset ();
unsigned headerPresent = 0x80000;
message |= SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT << getSendMsgLengthBitOffset ();
message |= headerPresent;
message |= (1 << SCRATCH_MSG_DESC_CATEORY);
message |= (0 << SCRATCH_MSG_INVALIDATE_AFTER_READ);
unsigned blocksize_encoding = getScratchBlocksizeEncoding(height);
message |= (blocksize_encoding << SCRATCH_MSG_DESC_BLOCK_SIZE);
int offset = getDisp(base);
getSpillOffset(offset);
message |= ((offset >> 5) + regOff);
execSize = 16;
}
else
{
unsigned segmentByteSize = height * REG_BYTE_SIZE;
unsigned statelessSurfaceIndex = 0xFF;
unsigned responseLength = cdiv (segmentByteSize, REG_BYTE_SIZE);
responseLength = responseLength << getSendRspLengthBitOffset ();
message = statelessSurfaceIndex | responseLength;
unsigned headerPresent = 0x80000;
message |= headerPresent;
unsigned messageType = getSendOwordReadType ();
message |= messageType << getSendReadTypeBitOffset ();
unsigned messageLength = OWORD_PAYLOAD_HEADER_MIN_HEIGHT;
message |= messageLength << getSendMsgLengthBitOffset ();
unsigned segmentOwordSize =
cdiv (segmentByteSize, OWORD_BYTE_SIZE);
assert (segmentOwordSize <= 8);
message |= blockSendBlockSizeCode (segmentOwordSize);
execSize = LIMIT_SEND_EXEC_SIZE (segmentOwordSize * DWORD_BYTE_SIZE);
}
return builder_->createImm (message, Type_UD);
}
// Create the message description for a fill send instruction for filled
// source regions.
template <class REGION_TYPE>
G4_Imm *
SpillManagerGMRF::createFillSendMsgDesc (
REGION_TYPE * filledRangeRegion,
unsigned & execSize
)
{
unsigned message = 0;
if( useScratchMsg_)
{
unsigned segmentByteSize =
getSegmentByteSize (filledRangeRegion, execSize);
if (filledRangeRegion->crossGRF()) {
segmentByteSize = 2 * REG_BYTE_SIZE;
}
unsigned responseLength = cdiv (segmentByteSize, REG_BYTE_SIZE);
message = responseLength << getSendRspLengthBitOffset ();
unsigned headerPresent = 0x80000;
message |= headerPresent;
message |= (SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT << getSendMsgLengthBitOffset());
message |= (1 << SCRATCH_MSG_DESC_CATEORY);
message |= (0 << SCRATCH_MSG_INVALIDATE_AFTER_READ);
// Scratch msg descriptor requires a special encoding for block size
/*
00 - 1 GRF
01 - 2 GRFs
10 - reserved
11 - 4 GRFs
*/
unsigned blocksize_encoding = getScratchBlocksizeEncoding(responseLength);
message |= (blocksize_encoding << SCRATCH_MSG_DESC_BLOCK_SIZE);
int offset = getRegionDisp(filledRangeRegion);
getSpillOffset(offset);
message |= offset >> 5;
execSize = 16;
}
else
{
unsigned segmentByteSize =
getSegmentByteSize (filledRangeRegion, execSize);
unsigned statelessSurfaceIndex = 0xFF;
unsigned responseLength = cdiv (segmentByteSize, REG_BYTE_SIZE);
responseLength = responseLength << getSendRspLengthBitOffset ();
message = statelessSurfaceIndex | responseLength;
unsigned headerPresent = 0x80000;
message |= headerPresent;
unsigned messageType = getSendOwordReadType ();
message |= messageType << getSendReadTypeBitOffset ();
unsigned messageLength = OWORD_PAYLOAD_HEADER_MIN_HEIGHT;
message |= messageLength << getSendMsgLengthBitOffset ();
unsigned segmentOwordSize =
cdiv (segmentByteSize, OWORD_BYTE_SIZE);
message |= blockSendBlockSizeCode (segmentOwordSize);
execSize = LIMIT_SEND_EXEC_SIZE (segmentOwordSize * DWORD_BYTE_SIZE);
}
return builder_->createImm (message, Type_UD);
}
// size -- number of GRFs to read
// offset -- in bytes (may be either in scratch or SLM)
G4_Imm* SpillManagerGMRF::createFillSendMsgDesc(
bool doSLMFill,
int size,
int offset)
{
uint32_t message = 0;
if (doSLMFill)
{
if (builder_->hasBlockedSLMMessage())
{
// SLM Hword block read
unsigned responseLength = size;
unsigned SLMIndex = 0xFE;
message |= SLMIndex;
unsigned dataElements = getHWordEncoding(responseLength);
message |= dataElements << 8;
message |= 1 << 13; // HWord
unsigned messageType = DC1_HWORD_ALIGNED_BLOCK_READ;
message |= messageType << getSendReadTypeBitOffset();
unsigned headerPresent = 0x80000;
message |= headerPresent;
unsigned messageLength = 1;
message |= messageLength << getSendMsgLengthBitOffset();
message |= responseLength << getSendRspLengthBitOffset();
}
else
{
// SLM untyped read
message = getUntypedSLMMsgDesc(size, true);
}
}
else if (useScratchMsg_)
{
unsigned responseLength = size;
message = responseLength << getSendRspLengthBitOffset();
unsigned headerPresent = 0x80000;
message |= headerPresent;
message |= (SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT << getSendMsgLengthBitOffset());
message |= (1 << SCRATCH_MSG_DESC_CATEORY);
message |= (0 << SCRATCH_MSG_INVALIDATE_AFTER_READ);
// Scratch msg descriptor requires a special encoding for block size
/*
00 - 1 GRF
01 - 2 GRFs
10 - reserved
11 - 4 GRFs
*/
unsigned blocksize_encoding = getScratchBlocksizeEncoding(responseLength);
message |= (blocksize_encoding << SCRATCH_MSG_DESC_BLOCK_SIZE);
message |= (offset >> 5);
}
else
{
MUST_BE_TRUE(false, "should not reach here");
}
return builder_->createImm(message, Type_UD);
}
// Create the send instruction to perform the fill of the spilled regvars's
// segment from spill memory.
G4_INST *
SpillManagerGMRF::createFillSendInstr (
G4_Declare * fillRangeDcl,
G4_Declare * mRangeDcl,
unsigned regOff,
unsigned height,
unsigned srcRegOff
)
{
unsigned execSize (0);
G4_Imm * messageDescImm = NULL;
if( useScratchMsg_)
{
G4_RegVar* r = fillRangeDcl->getRegVar();
G4_RegVarTmp* rvar = static_cast<G4_RegVarTmp*> (r);
messageDescImm =
createFillSendMsgDesc (srcRegOff, height, execSize, rvar->getBaseRegVar());
#ifdef _DEBUG
int offset = (messageDescImm->getInt() & 0xFFF) * GENX_GRF_REG_SIZ;
MUST_BE_TRUE(offset >= globalScratchOffset, "incorrect offset");
#endif
}
else
{
messageDescImm =
createFillSendMsgDesc (regOff, height, execSize);
}
G4_DstRegRegion * postDst = builder_->createDstRegRegion(
Direct, fillRangeDcl->getRegVar (), (short) regOff, SUBREG_ORIGIN,
DEF_HORIZ_STRIDE, (execSize > 8)? Type_UW: Type_UD);
G4_SrcRegRegion * payload = builder_->createSrcRegRegion(Mod_src_undef, Direct,
mRangeDcl->getRegVar(), 0, 0, builder_->getRegionStride1(), Type_UD);
G4_DstRegRegion* dstOpnd = builder_->createDstRegRegion(Direct, fillRangeDcl->getRegVar(), 0, 0, 1, Type_UD);
builder_->createInst(NULL, G4_pseudo_kill, NULL, false, 1, dstOpnd, NULL, NULL, 0);
return createSendInst ((unsigned char) execSize, postDst, payload, messageDescImm, SFID_DP_DC, false, InstOpt_WriteEnable);
}
// Create the send instruction to perform the fill of the filled region's
// segment into fill memory.
G4_INST *
SpillManagerGMRF::createFillSendInstr (
G4_Declare * fillRangeDcl,
G4_Declare * mRangeDcl,
G4_SrcRegRegion * filledRangeRegion,
unsigned execSize,
unsigned regOff
)
{
G4_Imm * messageDescImm =
createFillSendMsgDesc (filledRangeRegion, execSize);
#ifdef _DEBUG
if (useScratchMsg_)
{
int offset = (messageDescImm->getInt() & 0xFFF) * GENX_GRF_REG_SIZ;
MUST_BE_TRUE(offset >= globalScratchOffset, "incorrect offset");
}
#endif
if( useScratchMsg_)
{
execSize = 16;
}
G4_DstRegRegion * postDst = builder_->createDstRegRegion(
Direct, fillRangeDcl->getRegVar (), (short) regOff, SUBREG_ORIGIN,
DEF_HORIZ_STRIDE, (execSize > 8)? Type_UW : Type_UD);
G4_SrcRegRegion * payload = builder_->createSrcRegRegion(Mod_src_undef, Direct,
mRangeDcl->getRegVar(), 0, 0, builder_->getRegionStride1(), Type_UD);
G4_DstRegRegion* dstOpnd = builder_->createDstRegRegion(Direct, fillRangeDcl->getRegVar(), 0, 0, 1, Type_UD);
builder_->createInst(NULL, G4_pseudo_kill, NULL, false, 1, dstOpnd, NULL, NULL, 0);
return createSendInst ((unsigned char) execSize, postDst, payload, messageDescImm, SFID_DP_DC, false, InstOpt_WriteEnable);
}
/// compute the 8/16 addresses for SLM spill/fill if untyped message is used
G4_Declare* SpillManagerGMRF::createSLMSpillAddr(int numReg, uint32_t spillOffset)
{
//
// r1 <-- builtinSLMSpillAddr([0, 4, 8, ... 60] + perThreadSLMStart), created in prolog
// r3 <-- builtinImmVector4([0, 4, 8, ... 60]), created in prolog
// if numReg > 2:
// mad (16) r2:ud r1:uw r3:uw (numReg / 2 - 1)
// add (16) r2:ud r1:uw offset
G4_Declare* SLMSpillBase = builder_->getBuiltinSLMSpillAddr();
int numAddr = numReg == 1 ? 8 : 16;
G4_Declare* sendSrc = builder_->createTempVar(numAddr, Type_UD, Either, Any);
if (numReg > 2)
{
G4_SrcRegRegion *madSrc0 = builder_->Create_Src_Opnd_From_Dcl(SLMSpillBase, builder_->getRegionStride1());
G4_SrcRegRegion *madSrc1 = builder_->Create_Src_Opnd_From_Dcl(builder_->getBuiltinImmVector4(), builder_->getRegionStride1());
uint32_t numChannels = getChMaskForSpill(numReg).getNumEnabledChannels();
G4_Imm* madSrc2 = builder_->createImm(numChannels - 1, Type_UW);
G4_DstRegRegion *dst = builder_->Create_Dst_Opnd_From_Dcl(sendSrc, 1);
builder_->createInst(nullptr, G4_mad, nullptr, false, numAddr, dst, madSrc0, madSrc1, madSrc2, InstOpt_WriteEnable);
}
G4_SrcRegRegion* src = builder_->Create_Src_Opnd_From_Dcl(numReg > 2 ? sendSrc : SLMSpillBase, builder_->getRegionStride1());
G4_Imm* offset = builder_->createImm(spillOffset, Type_UW);
G4_DstRegRegion* address = builder_->Create_Dst_Opnd_From_Dcl(sendSrc, 1);
builder_->createInst(nullptr, G4_add, nullptr, false, numAddr, address, src, offset, InstOpt_WriteEnable);
return sendSrc;
}
// create the fill send instruction (with optional header moves) and append them to the end
// of the global inst list. They will be inserted to the correct code position by the caller.
// @fillDcl -- destination of the fill
// @fillRegOff -- GRF offset of the fill dcl
// @size -- number of GRFs to read
// @logicalOffset -- the logical byte offset to read from. This will be translated to the
// actual address in SLM/scratch
void SpillManagerGMRF::createFill(
G4_Declare* fillDcl,
int fillRegOff,
int size,
int logicalOffset
)
{
int varOffset = logicalOffset;
bool doSLMFill = getSpillOffset(varOffset);
G4_Imm * messageDescImm =
createFillSendMsgDesc(doSLMFill, size, varOffset);
CISA_SHARED_FUNCTION_ID funcID = SFID_DP_DC;
G4_Declare* sendSrc = nullptr;
int esize = 16;
if (doSLMFill && builder_->hasBlockedSLMMessage())
{
// create 1 GRF header
// mov (1) H0.2<1>:ud offset:uw {NoMask}
// also create a pseudo kill so that the header's life range is properly terminated
sendSrc = builder_->createDeclareNoLookup("Fill_Header", G4_GRF, 8, 1, Type_UD);
G4_DstRegRegion* psuedoKill = builder_->Create_Dst_Opnd_From_Dcl(sendSrc, 1);
auto newInst = builder_->createInst(nullptr, G4_pseudo_kill, nullptr, false, 1, psuedoKill, nullptr, nullptr, InstOpt_NoOpt);
newInst->setCISAOff(curInst->getCISAOff());
G4_DstRegRegion* dst = builder_->createDstRegRegion(Direct, sendSrc->getRegVar(), 0, 2,
1, Type_UD);
G4_Imm* imm = builder_->createImm(varOffset, Type_UW);
createMovInst(1, dst, imm);
numGRFMove++;
}
else if (doSLMFill)
{
esize = size == 1 ? 8 : 16;
sendSrc = createSLMSpillAddr(size, varOffset);
funcID = SFID_DP_DC1;
}
else
{
sendSrc = builder_->getBuiltinR0();
}
assert(sendSrc != nullptr);
//
// add pseudo-kill to limit fill dst's life range (why is this necessary?)
G4_DstRegRegion* dstOpnd = builder_->createDstRegRegion(Direct, fillDcl->getRegVar(), 0, 0, 1, Type_UD);
builder_->createInst(NULL, G4_pseudo_kill, NULL, false, 1, dstOpnd, NULL, NULL, 0);
G4_DstRegRegion * postDst = builder_->createDstRegRegion(
Direct, fillDcl->getRegVar(), (short)fillRegOff, 0, 1, Type_UW);
G4_SrcRegRegion* payload = builder_->Create_Src_Opnd_From_Dcl(sendSrc, builder_->getRegionStride1());
G4_INST* fillInst = createSendInst((unsigned char)esize, postDst, payload, messageDescImm, funcID, false, InstOpt_WriteEnable);
fillInst->setSpillOrFill();
numGRFFill++;
}
// Replace the reference to the spilled region with a reference to an
// equivalent reference to the spill range region.
void
SpillManagerGMRF::replaceSpilledRange (
G4_Declare * spillRangeDcl,
G4_DstRegRegion * spilledRegion,
G4_INST * spilledInst
)
{
// we need to preserve accRegSel if it's set
G4_DstRegRegion * tmpRangeDstRegion = builder_->createDstRegRegion(
Direct, spillRangeDcl->getRegVar (), REG_ORIGIN, SUBREG_ORIGIN,
spilledRegion->getHorzStride (), spilledRegion->getType(), spilledRegion->getAccRegSel() );
spilledInst->setDest (tmpRangeDstRegion);
}
// Replace the reference to the filled region with a reference to an
// equivalent reference to the fill range region.
void
SpillManagerGMRF::replaceFilledRange (
G4_Declare * fillRangeDcl,
G4_SrcRegRegion * filledRegion,
G4_INST * filledInst
)
{
unsigned execSize =
isMultiRegComprSource (filledRegion, filledInst)?
filledInst->getExecSize () / 2:
filledInst->getExecSize ();
G4_SrcRegRegion * fillRangeSrcRegion =
createFillRangeSrcRegion (
fillRangeDcl->getRegVar (), filledRegion, execSize);
for (int i = 0; i < G4_MAX_SRCS; i++) {
G4_SrcRegRegion * src =
(G4_SrcRegRegion *) filledInst->getSrc(i);
if (src != NULL && *src == *filledRegion)
filledInst->setSrc (fillRangeSrcRegion, i);
}
}
// Create the send instructions to write out the spillRangeDcl in aligned
// portions.
void
SpillManagerGMRF::sendOutSpilledRegVarPortions (
G4_Declare * spillRangeDcl,
G4_Declare * mRangeDcl,
unsigned regOff,
unsigned height,
unsigned srcRegOff
)
{
if (canDoSLMSpill())
{
// break spills into 8/4/2/1 chunks
int offset = 0;
G4_RegVar* r = spillRangeDcl->getRegVar();
G4_RegVarTmp* rvar = static_cast<G4_RegVarTmp*> (r);
int memOffset = getDisp(rvar->getBaseRegVar()) & GRF_ALIGN_MASK;
while (height > 0)
{
int size = getNextSize(height, true);
memOffset += offset * 32;
createSpill(spillRangeDcl, offset, size, memOffset, InstOpt_WriteEnable, 16);
height -= size;
offset += size;
}
return;
}
if( useScratchMsg_)
{
// No need to make a copy of offset because when using
// scratch msg descriptor, the offset is part of send
// msg descriptor and not the header.
}
else
{
// Initialize the message header with the spill disp for portion.
int offset = getDisp(spillRangeDcl->getRegVar()) + regOff * REG_BYTE_SIZE;
getSpillOffset(offset);
unsigned segmentDisp = offset / OWORD_BYTE_SIZE;
G4_Imm * segmentDispImm = builder_->createImm (segmentDisp, Type_UD);
G4_DstRegRegion * mHeaderOffsetDstRegion =
createMHeaderBlockOffsetDstRegion (mRangeDcl->getRegVar ());
if (builder_->getIsKernel() == false &&
getReprRegVar(spillRangeDcl->getRegVar())->getDeclare()->getHasFileScope() == false)
{
createAddFPInst (
SCALAR_EXEC_SIZE, mHeaderOffsetDstRegion, segmentDispImm);
}
else
{
createMovInst (SCALAR_EXEC_SIZE, mHeaderOffsetDstRegion, segmentDispImm);
}
numGRFMove ++;
}
// Write out the portions using a greedy approach.
int currentStride = getNextSize(height, useScratchMsg_);
if (currentStride)
{
initMWritePayload (spillRangeDcl, mRangeDcl, regOff, currentStride);
createSpillSendInstr (spillRangeDcl, mRangeDcl, regOff, currentStride, srcRegOff);
numGRFSpill++;
if (height - currentStride > 0) {
sendOutSpilledRegVarPortions (
spillRangeDcl, mRangeDcl, regOff + currentStride, height - currentStride, srcRegOff + currentStride);
}
}
}
// Create the code to create the spill range and save it to spill memory.
INST_LIST::iterator
SpillManagerGMRF::insertSpillRangeCode (
G4_DstRegRegion * spilledRegion,
INST_LIST::iterator spilledInstIter,
INST_LIST & instList
)
{
unsigned char execSize = (*spilledInstIter)->getExecSize ();
G4_Declare * replacementRangeDcl;
builder_->instList.clear();
bool optimizeSplitLLR = false;
G4_INST* inst = *spilledInstIter;
G4_INST* spillSendInst = NULL;
// Handle send instructions (special treatment)
// Create the spill range for the whole post destination, assign spill
// offset to the spill range and create the instructions to load the
// save the spill range to spill memory.
if ((*spilledInstIter)->isSend ()) {
INST_LIST::iterator sendOutIter = spilledInstIter;
assert (getRFType (spilledRegion) == G4_GRF);
G4_Declare * spillRangeDcl =
createPostDstSpillRangeDeclare (*sendOutIter, spilledRegion);
G4_Declare * mRangeDcl =
createAndInitMHeader (
(G4_RegVarTransient *) spillRangeDcl->getRegVar ());
sendInSpilledRegVarPortions (
spillRangeDcl, mRangeDcl, 0,
spillRangeDcl->getNumRows (),
spilledRegion->getRegOff());
INST_LIST::iterator insertPos = sendOutIter;
instList.splice (insertPos, builder_->instList);
sendOutSpilledRegVarPortions (
spillRangeDcl, mRangeDcl, 0, spillRangeDcl->getNumRows (),
spilledRegion->getRegOff());
replacementRangeDcl = spillRangeDcl;
}
// Handle other regular single/multi destination register instructions.
// Create the spill range for the destination region, assign spill
// offset to the spill range and create the instructions to load the
// save the spill range to spill memory.
else {
// Create the segment aligned spill range
G4_Declare * spillRangeDcl =
createSpillRangeDeclare (
spilledRegion, execSize,
*spilledInstIter);
// Create and initialize the message header
G4_Declare * mRangeDcl =
createAndInitMHeader (spilledRegion, execSize);
// Unaligned region specific handling.
unsigned int spillSendOption = InstOpt_WriteEnable;
if (shouldPreloadSpillRange (
spilledRegion, execSize, *spilledInstIter)) {
// Preload the segment aligned spill range from memory to use
// as an overlay
preloadSpillRange (
spillRangeDcl, mRangeDcl, spilledRegion, execSize);
// Create the temporary range to use as a replacement range.
G4_Declare * tmpRangeDcl =
createTemporaryRangeDeclare (spilledRegion, execSize);
// Copy out the value in the temporary range into its
// location in the spill range.
G4_DstRegRegion * spillRangeDstRegion =
createSpillRangeDstRegion (
spillRangeDcl->getRegVar (), spilledRegion, execSize);
G4_SrcRegRegion * tmpRangeSrcRegion =
createTemporaryRangeSrcRegion (
tmpRangeDcl->getRegVar (), spilledRegion, execSize);
// NOTE: Never use a predicate for the final mov if the spilled
// instruction was a sel (even in a SIMD CF context).
G4_Predicate* predicate =
((*spilledInstIter)->opcode() != G4_sel)?
(*spilledInstIter)->getPredicate () : nullptr;
createMovInst (
execSize, spillRangeDstRegion, tmpRangeSrcRegion,
predicate != nullptr ? builder_->duplicateOperand(predicate) : predicate,
(*spilledInstIter)->getMaskOption());
numGRFMove ++;
replacementRangeDcl = tmpRangeDcl;
}
// Aligned regions do not need a temporary range.
else {
LocalLiveRange* spilledLLR = gra.getLocalLR(spilledRegion->getBase()->asRegVar()->getDeclare());
if (!canDoSLMSpill() && spilledLLR && spilledLLR->getSplit())
{
// if we are spilling the dest of a copy move introduced by local live-range splitting,
// we can spill the source value instead and delete the move
// ToDo: we should generalize this to cover all moves
G4_SrcRegRegion* srcRegion = inst->getSrc(0)->asSrcRegRegion();
G4_Declare* srcDcl = srcRegion->getBase()->asRegVar()->getDeclare();
unsigned int lb = srcRegion->getLeftBound();
unsigned int rb = srcRegion->getRightBound();
G4_RegVar * regVar = NULL;
if (srcRegion->getBase()->isRegVar())
{
regVar = getRegVar(srcRegion);
}
if (srcDcl->getSubRegAlign() == Sixteen_Word &&
lb % REG_BYTE_SIZE == 0 &&
(rb + 1) % REG_BYTE_SIZE == 0 &&
(rb - lb + 1) == spillRangeDcl->getByteSize() &&
regVar &&
!shouldSpillRegister(regVar))
{
optimizeSplitLLR = true;
}
}
replacementRangeDcl = spillRangeDcl;
if (inSIMDCFContext_)
{
spillSendOption = (*spilledInstIter)->getMaskOption();
}
}
// Save the spill range to memory.
if (canDoSLMSpill())
{
int offset = getRegionDisp(spilledRegion) & GRF_ALIGN_MASK;
int numGRF = spilledRegion->crossGRF() ? 2 : 1;
createSpill(spillRangeDcl, 0, numGRF, offset, spillSendOption, execSize);
}
else
{
initMWritePayload(
spillRangeDcl, mRangeDcl, spilledRegion, execSize);
spillSendInst = createSpillSendInstr(
spillRangeDcl, mRangeDcl, spilledRegion, execSize, spillSendOption);
numGRFSpill++;
}
if (failSafeSpill_)
{
spillRegOffset_ = spillRegStart_;
}
}
// Replace the spilled range with the spill range and insert spill
// instructions.
INST_LIST::iterator insertPos = spilledInstIter;
insertPos++;
replaceSpilledRange (replacementRangeDcl, spilledRegion, *spilledInstIter);
INST_LIST::iterator nextIter = spilledInstIter;
++nextIter;
instList.splice (insertPos, builder_->instList);
if (optimizeSplitLLR && spillSendInst && spillSendInst->isSplitSend())
{
// delete the move and spill the source instead. Note that we can't do this if split send
// is not enabled, as payload contains header
instList.erase(spilledInstIter);
unsigned int pos = 1;
spillSendInst->setSrc(inst->getSrc(0), pos);
}
else
{
INST_LIST::iterator pseudoKillPos = spilledInstIter;
G4_DstRegRegion* dstOpnd = builder_->createDstRegRegion(Direct, replacementRangeDcl->getRegVar(), 0, 0, 1, Type_UD);
auto newInst = builder_->createInst(NULL, G4_pseudo_kill, NULL, false, 1, dstOpnd, NULL, NULL, 0);
newInst->setCISAOff(curInst->getCISAOff());
instList.splice(pseudoKillPos, builder_->instList);
}
return nextIter;
}
// Create the code to create the GRF fill range and load it to spill memory.
INST_LIST::iterator
SpillManagerGMRF::insertFillGRFRangeCode (
G4_SrcRegRegion * filledRegion,
INST_LIST::iterator filledInstIter,
INST_LIST & instList
)
{
unsigned execSize = (*filledInstIter)->getExecSize ();
// Create the fill range, assign spill offset to the fill range and
// create the instructions to load the fill range from spill memory.
G4_Declare * fillRangeDcl = nullptr;
bool optimizeSplitLLR = false;
G4_INST* inst = *filledInstIter;
G4_DstRegRegion* dstRegion = inst->getDst()->asDstRegRegion();
G4_INST* fillSendInst = NULL;
if (canDoSLMSpill())
{
// we fill either 1 or 2 GRF here
//FIXME: do we need to explicitly mark this as a fill var?
int dclRows = filledRegion->crossGRF() ? 2 : 1;
int offset = getRegionDisp(filledRegion) & GRF_ALIGN_MASK;
G4_RegVar * filledRegVar = getRegVar(filledRegion);
const char* name = createImplicitRangeName("FL", filledRegVar, getFillIndex(filledRegVar));
fillRangeDcl = builder_->createDeclareNoLookup(name, G4_GRF, 8, (unsigned short)dclRows, Type_UD);
createFill(fillRangeDcl, 0, dclRows, offset);
}
else
{
fillRangeDcl =
createGRFFillRangeDeclare(
filledRegion, execSize,
*filledInstIter);
G4_Declare * mRangeDcl =
createAndInitMHeader(filledRegion, execSize);
numGRFFill++;
fillSendInst = createFillSendInstr(fillRangeDcl, mRangeDcl, filledRegion, execSize);
LocalLiveRange* filledLLR = gra.getLocalLR(filledRegion->getBase()->asRegVar()->getDeclare());
if (filledLLR && filledLLR->getSplit())
{
G4_Declare* dstDcl = dstRegion->getBase()->asRegVar()->getDeclare();
unsigned int lb = dstRegion->getLeftBound();
unsigned int rb = dstRegion->getRightBound();
if (dstDcl->getSubRegAlign() == Sixteen_Word &&
lb % REG_BYTE_SIZE == 0 &&
(rb + 1) % REG_BYTE_SIZE == 0 &&
(rb - lb + 1) == fillRangeDcl->getByteSize())
{
optimizeSplitLLR = true;
}
}
}
// Replace the spilled range with the fill range and insert spill
// instructions.
replaceFilledRange (fillRangeDcl, filledRegion, *filledInstIter);
INST_LIST::iterator insertPos = filledInstIter;
instList.splice (insertPos, builder_->instList);
if (optimizeSplitLLR)
{
INST_LIST::iterator nextIter = filledInstIter;
INST_LIST::iterator prevIter = filledInstIter;
nextIter++;
prevIter--;
prevIter--;
instList.erase(filledInstIter);
fillSendInst->setDest(dstRegion);
G4_INST* prevInst = (*prevIter);
if (prevInst->opcode() == G4_pseudo_kill &&
GetTopDclFromRegRegion(prevInst->getDst()) == fillRangeDcl)
{
prevInst->setDest(builder_->createDstRegRegion(Direct, GetTopDclFromRegRegion(dstRegion)->getRegVar(), 0, 0, 1, Type_UD));
}
return nextIter;
}
else
{
return ++filledInstIter;
}
}
// Create the code to create the MRF fill range and load it to spill memory.
INST_LIST::iterator
SpillManagerGMRF::insertFillMRFRangeCode (
G4_SrcRegRegion * filledRegion,
INST_LIST::iterator filledInstIter,
INST_LIST & instList
)
{
G4_INST * sendInst = *filledInstIter;
unsigned width = REG_BYTE_SIZE / filledRegion->getElemSize();
// Create the fill range, assign spill offset to the fill range
G4_Declare * fillMRFRangeDcl =
createMRFFillRangeDeclare(filledRegion, sendInst);
// Create the instructions to load the fill range from spill memory.
G4_Declare * mRangeDcl = createMRangeDeclare(filledRegion, width);
initMHeader(mRangeDcl);
sendInSpilledRegVarPortions(
fillMRFRangeDcl, mRangeDcl, 0,
fillMRFRangeDcl->getNumRows(), filledRegion->getRegOff());
// Replace the spilled range with the fill range and insert spill
// instructions.
replaceFilledRange(fillMRFRangeDcl, filledRegion, *filledInstIter);
INST_LIST::iterator insertPos = filledInstIter;
instList.splice(insertPos, builder_->instList);
// Return the next instruction
return ++filledInstIter;
}
G4_Declare* getOrCreateSpillFillDcl(G4_Declare* spilledAddrTakenDcl, G4_Kernel* kernel)
{
// If spilledAddrTakenDcl already has a spill/fill range created, return it.
// Else create new one and return it.
G4_Declare* temp = spilledAddrTakenDcl->getAddrTakenSpillFill();
if (temp == NULL)
{
#define ADDR_SPILL_FILL_NAME_SIZE 32
char* dclName = kernel->fg.builder->getNameString(kernel->fg.mem, ADDR_SPILL_FILL_NAME_SIZE,
"ADDR_SP_FL_V%d", spilledAddrTakenDcl->getDeclId());
// temp is created of sub-class G4_RegVarTmp so that is
// assigned infinite spill cost when coloring.
temp = kernel->fg.builder->createDeclareNoLookup((const char*)dclName,
G4_GRF, spilledAddrTakenDcl->getNumElems(),
spilledAddrTakenDcl->getNumRows(), spilledAddrTakenDcl->getElemType() , DeclareType::Tmp, spilledAddrTakenDcl->getRegVar());
spilledAddrTakenDcl->setAddrTakenSpillFill(temp);
}
return temp;
}
// For each address taken register spill find an available physical register
// and assign it to the decl. This physical register will be used for inserting
// spill/fill code for indirect reference instructions that point to the
// spilled range.
// Return true if enough registers found, false if sufficient registers unavailable.
bool SpillManagerGMRF::handleAddrTakenSpills( G4_Kernel * kernel, PointsToAnalysis& pointsToAnalysis )
{
bool success = true;
unsigned int numAddrTakenSpills = 0;
for (LR_LIST::const_iterator lt = spilledLRs_.begin ();
lt != spilledLRs_.end (); ++lt)
{
LiveRange* lr = (*lt);
if( lr->getVar()->getDeclare()->getAddressed() )
{
getOrCreateSpillFillDcl(lr->getVar()->getDeclare(), kernel);
}
if( lvInfo_->isAddressSensitive( lr->getVar()->getId() ) )
{
numAddrTakenSpills++;
}
}
if(numAddrTakenSpills > 0)
{
insertAddrTakenSpillFill( kernel, pointsToAnalysis );
prunePointsTo( kernel, pointsToAnalysis );
}
#ifdef _DEBUG
if( success )
{
// Verify that each spilled address taken has a spill/fill registers assigned
for (LR_LIST::const_iterator lt = spilledLRs_.begin ();
lt != spilledLRs_.end (); ++lt)
{
if( (*lt)->getVar()->getDeclare()->getAddressed() )
MUST_BE_TRUE( (*lt)->getVar()->getDeclare()->getAddrTakenSpillFill() != NULL, "Spilled addr taken does not have assigned spill/fill GRF");
}
}
#endif
return success;
}
// Insert spill and fill code for indirect GRF accesses
void SpillManagerGMRF::insertAddrTakenSpillAndFillCode( G4_Kernel* kernel, INST_LIST& instList, INST_LIST::iterator inst_it, G4_Operand* opnd, PointsToAnalysis& pointsToAnalysis, bool spill, unsigned int bbid )
{
curInst = (*inst_it);
INST_LIST::iterator next_inst_it = ++inst_it;
inst_it--;
// Check whether spill operand points to any spilled range
for (LR_LIST::const_iterator lr_it = spilledLRs_.begin ();
lr_it != spilledLRs_.end (); ++lr_it) {
LiveRange* lr = (*lr_it);
G4_RegVar* var = NULL;
if( opnd->isDstRegRegion() && opnd->asDstRegRegion()->getBase()->asRegVar() )
var = opnd->asDstRegRegion()->getBase()->asRegVar();
if( opnd->isSrcRegRegion() && opnd->asSrcRegRegion()->getBase()->asRegVar() )
var = opnd->asSrcRegRegion()->getBase()->asRegVar();
MUST_BE_TRUE( var != NULL, "Fill operand is neither a source nor dst region");
if( var &&
pointsToAnalysis.isPresentInPointsTo( var,
lr->getVar() ) )
{
unsigned int numrows = lr->getVar()->getDeclare()->getNumRows();
G4_Declare* temp = getOrCreateSpillFillDcl(lr->getVar()->getDeclare(), kernel);
if (failSafeSpill_ &&
temp->getRegVar()->getPhyReg() == NULL)
{
temp->getRegVar()->setPhyReg(builder_->phyregpool.getGreg(spillRegOffset_), 0);
spillRegOffset_ += numrows;
}
G4_DstRegRegion* dstOpnd = builder_->createDstRegRegion(Direct, temp->getRegVar(), 0, 0, 1, Type_UD);
auto newInst = builder_->createInternalInst(NULL, G4_pseudo_kill, NULL, false, 1, dstOpnd, NULL, NULL, 0);
instList.insert(inst_it, newInst);
if( numrows > 1 || (lr->getVar()->getDeclare()->getNumElems() * lr->getVar()->getDeclare()->getElemSize() == 32) )
{
if (useScratchMsg_ || useSplitSend())
{
G4_Declare * fillMRFRangeDcl = temp;
G4_Declare * mRangeDcl =
createAndInitMHeader(
(G4_RegVarTransient *)temp->getRegVar()->getBaseRegVar());
sendInSpilledRegVarPortions(
fillMRFRangeDcl, mRangeDcl, 0,
temp->getNumRows(), 0);
instList.splice(inst_it, builder_->instList);
if (spill)
{
sendOutSpilledRegVarPortions (
temp, mRangeDcl, 0, temp->getNumRows(),
0);
instList.splice(next_inst_it, builder_->instList);
}
}
else
{
for( unsigned int i = 0; i < numrows; i++ )
{
G4_INST* inst;
RegionDesc* rd = kernel->fg.builder->getRegionStride1();
unsigned char curExSize = 8;
if( (i + 1 ) < numrows )
curExSize = 16;
G4_SrcRegRegion* srcRex = kernel->fg.builder->createSrcRegRegion(Mod_src_undef, Direct, lr->getVar(), (short)i, 0, rd, Type_F);
G4_DstRegRegion* dstRex = kernel->fg.builder->createDstRegRegion(Direct, temp->getRegVar(), (short)i, 0, 1, Type_F);
inst = kernel->fg.builder->createInternalInst( NULL, G4_mov, NULL, false, curExSize,
dstRex, srcRex, NULL, InstOpt_WriteEnable, curInst->getLineNo(), curInst->getCISAOff(), curInst->getSrcFilename() );
instList.insert( inst_it, inst );
if( spill )
{
// Also insert spill code
G4_SrcRegRegion* srcRex = kernel->fg.builder->createSrcRegRegion(Mod_src_undef, Direct, temp->getRegVar(), (short)i, 0, rd, Type_F);
G4_DstRegRegion* dstRex = kernel->fg.builder->createDstRegRegion(Direct, lr->getVar(), (short)i, 0, 1, Type_F);
inst = kernel->fg.builder->createInternalInst( NULL, G4_mov, NULL, false, curExSize,
dstRex, srcRex, NULL, InstOpt_WriteEnable, curInst->getLineNo(), curInst->getCISAOff(), curInst->getSrcFilename() );
instList.insert( next_inst_it, inst );
}
// If 2 rows were processed then increment induction var suitably
if( curExSize == 16 )
i++;
}
}
// Update points to
// Note: points2 set should be updated after inserting fill code,
// however, this sets a bit in liveness bit-vector that
// causes the temp variable to be marked as live-out from
// that BB. A general fix should treat address taken variables
// more accurately wrt liveness so they dont escape via
// unfeasible paths.
//pointsToAnalysis.addFillToPointsTo( bbid, var, temp->getRegVar() );
}
else if( numrows == 1 )
{
// Insert spill/fill when there decl uses a single row, that too not completely
unsigned char curExSize = 16;
unsigned short numbytes = lr->getVar()->getDeclare()->getNumElems() * lr->getVar()->getDeclare()->getElemSize();
//temp->setAddressed();
short off = 0;
while( numbytes > 0 )
{
G4_INST* inst;
G4_Type type = Type_W;
if( numbytes >= 16 )
curExSize = 8;
else if( numbytes >= 8 && numbytes < 16 )
curExSize = 4;
else if( numbytes >= 4 && numbytes < 8 )
curExSize = 2;
else if( numbytes >= 2 && numbytes < 4 )
curExSize = 1;
else if( numbytes == 1 )
{
// If a region has odd number of bytes, copy last byte in final iteration
curExSize = 1;
type = Type_UB;
}
else {
MUST_BE_TRUE( false, "Cannot emit SIMD1 for byte");
curExSize = 0;
}
RegionDesc* rd = kernel->fg.builder->getRegionStride1();
G4_SrcRegRegion* srcRex = kernel->fg.builder->createSrcRegRegion(Mod_src_undef, Direct, lr->getVar(), 0, off, rd, type);
G4_DstRegRegion* dstRex = kernel->fg.builder->createDstRegRegion(Direct, temp->getRegVar(), 0, off, 1, type);
inst = kernel->fg.builder->createInternalInst( NULL, G4_mov, NULL, false, curExSize,
dstRex, srcRex, NULL, InstOpt_WriteEnable, curInst->getLineNo(), curInst->getCISAOff(), curInst->getSrcFilename() );
instList.insert( inst_it, inst );
if( spill )
{
// Also insert spill code
G4_SrcRegRegion* srcRex = kernel->fg.builder->createSrcRegRegion(Mod_src_undef, Direct, temp->getRegVar(), 0, off, rd, type);
G4_DstRegRegion* dstRex = kernel->fg.builder->createDstRegRegion(Direct, lr->getVar(), 0, off, 1, type);
inst = kernel->fg.builder->createInternalInst( NULL, G4_mov, NULL, false, curExSize,
dstRex, srcRex, NULL, InstOpt_WriteEnable, curInst->getLineNo(), curInst->getCISAOff(), curInst->getSrcFilename() );
instList.insert( next_inst_it, inst );
}
off += curExSize;
numbytes -= curExSize*2;
}
// Update points to
//pointsToAnalysis.addFillToPointsTo( bbid, var, temp->getRegVar() );
}
if (!spill)
{
// Insert pseudo_use node so that liveness keeps the
// filled variable live through the indirect access.
// Not required for spill because for spill we will
// anyway insert a ues of the variable to emit store.
RegionDesc* rd = kernel->fg.builder->getRegionScalar();
G4_SrcRegRegion* pseudoUseSrc = kernel->fg.builder->createSrcRegRegion(Mod_src_undef, Direct, temp->getRegVar(),
0, 0, rd, Type_F);
G4_INST* pseudoUseInst = kernel->fg.builder->createInternalIntrinsicInst(nullptr, Intrinsic::Use, 1, nullptr, pseudoUseSrc, nullptr, nullptr, InstOpt_NoOpt);
instList.insert(next_inst_it, pseudoUseInst);
}
}
}
}
// Insert any spill/fills for address taken
void SpillManagerGMRF::insertAddrTakenSpillFill( G4_Kernel* kernel, PointsToAnalysis& pointsToAnalysis )
{
for( BB_LIST_ITER bb_it = kernel->fg.BBs.begin();
bb_it != kernel->fg.BBs.end();
bb_it++ )
{
G4_BB* bb = (*bb_it);
for( INST_LIST_ITER inst_it = bb->instList.begin();
inst_it != bb->instList.end();
inst_it++ )
{
G4_INST* curInst = (*inst_it);
if (failSafeSpill_)
{
spillRegOffset_ = indrSpillRegStart_;
}
// Handle indirect destination
G4_DstRegRegion* dst = curInst->getDst();
if( dst && dst->getRegAccess() == IndirGRF )
{
insertAddrTakenSpillAndFillCode( kernel, bb->instList, inst_it, dst, pointsToAnalysis, true, bb->getId() );
}
for( int i = 0; i < G4_MAX_SRCS; i++ )
{
G4_Operand* src = curInst->getSrc(i);
if( src && src->isSrcRegRegion() && src->asSrcRegRegion()->getRegAccess() == IndirGRF )
{
insertAddrTakenSpillAndFillCode( kernel, bb->instList, inst_it, src, pointsToAnalysis, false, bb->getId() );
}
}
}
}
}
// For address spill/fill code inserted remove from points of each indirect operand
// the original regvar that is spilled.
void SpillManagerGMRF::prunePointsTo( G4_Kernel* kernel, PointsToAnalysis& pointsToAnalysis )
{
for( BB_LIST_ITER bb_it = kernel->fg.BBs.begin();
bb_it != kernel->fg.BBs.end();
bb_it++ )
{
G4_BB* bb = (*bb_it);
for( INST_LIST_ITER inst_it = bb->instList.begin();
inst_it != bb->instList.end();
inst_it++ )
{
G4_INST* curInst = (*inst_it);
std::stack<G4_Operand*> st;
// Handle indirect destination
G4_DstRegRegion* dst = curInst->getDst();
if( dst && dst->getRegAccess() == IndirGRF )
{
st.push( dst );
}
for( int i = 0; i < G4_MAX_SRCS; i++ )
{
G4_Operand* src = curInst->getSrc(i);
if( src && src->isSrcRegRegion() && src->asSrcRegRegion()->getRegAccess() == IndirGRF )
{
st.push( src );
}
}
while (st.size() > 0 )
{
G4_Operand* cur = st.top();
st.pop();
// Check whether spill operand points to any spilled range
for (LR_LIST::const_iterator lr_it = spilledLRs_.begin ();
lr_it != spilledLRs_.end (); ++lr_it) {
LiveRange* lr = (*lr_it);
G4_RegVar* var = NULL;
if( cur->isDstRegRegion() && cur->asDstRegRegion()->getBase()->asRegVar() )
var = cur->asDstRegRegion()->getBase()->asRegVar();
if( cur->isSrcRegRegion() && cur->asSrcRegRegion()->getBase()->asRegVar() )
var = cur->asSrcRegRegion()->getBase()->asRegVar();
MUST_BE_TRUE( var != NULL, "Operand is neither a source nor dst region");
if( var &&
pointsToAnalysis.isPresentInPointsTo( var,
lr->getVar() ) )
{
// Remove this from points to
pointsToAnalysis.removeFromPointsTo( var, lr->getVar() );
}
}
}
}
}
}
// Insert spill/fill code for all registers that have not been assigned
// physical registers in the current iteration of the graph coloring
// allocator.
// returns false if spill fails somehow
bool
SpillManagerGMRF::insertSpillFillCode (
G4_Kernel * kernel, PointsToAnalysis& pointsToAnalysis
)
{
auto refCount = [](LiveRange* l1, LiveRange* l2) { return l1->getRefCount() > l2->getRefCount(); };
if (canDoSLMSpill())
{
// sort the spill LRs based on their reference count so that the more expensives ones
// will land in SLM
spilledLRs_.sort(refCount);
}
// Set the spill flag of all spilled regvars.
for (LR_LIST::const_iterator lt = spilledLRs_.begin ();
lt != spilledLRs_.end (); ++lt) {
G4_Declare *dcl = (*lt)->getVar()->getDeclare();
if (dcl->getIsSplittedDcl())
{
dcl->setIsSplittedDcl(false);
gra.clearSubDcl(dcl);
}
// Ignore request to spill/fill the spill/fill ranges
// as it does not help the allocator.
if (shouldSpillRegister ((*lt)->getVar ()) == false)
{
bool needsEOTGRF = (*lt)->getEOTSrc() && builder_->hasEOTGRFBinding();
if (failSafeSpill_ && needsEOTGRF &&
((*lt)->getVar()->isRegVarTransient() ||
(*lt)->getVar()->isRegVarTmp()))
{
(*lt)->getVar()->setPhyReg(builder_->phyregpool.getGreg(spillRegStart_ > (builder_->getOptions()->getuInt32Option(vISA_TotalGRFNum) - 16) ? spillRegStart_ : (builder_->getOptions()->getuInt32Option(vISA_TotalGRFNum) - 16)), 0);
continue;
}
else if (lvInfo_->isAddressSensitive((*lt)->getVar()->getId())) {
DEBUG_MSG("Register allocation warning: Spilling of variable("
<< (*lt)->getVar ()->getDeclare ()->getName()
<< ") whose address is taken!"
<< endl);
}
else {
DEBUG_MSG("Register allocation warning: Spilling infinite live range ("
<< (*lt)->getVar ()->getDeclare ()->getName()
<< ")!"
<< endl);
}
return false;
}
else
{
(*lt)->getVar ()->getDeclare ()->setSpillFlag ();
if (canDoSLMSpill())
{
getDisp((*lt)->getVar());
}
}
}
// Handle address taken spills
bool success = handleAddrTakenSpills( kernel, pointsToAnalysis );
if( !success )
{
DEBUG_MSG( "Enough physical register not available for handling address taken spills" << std::endl );
return false;
}
// Insert spill/fill code for all basic blocks.
FlowGraph& fg = kernel->fg;
for (BB_LIST_ITER it = fg.BBs.begin(); it != fg.BBs.end(); it++)
{
inSIMDCFContext_ = (*it)->isInSimdFlow();
bbId_ = (*it)->getId();
INST_LIST::iterator jt = (*it)->instList.begin ();
while (jt != (*it)->instList.end ()) {
INST_LIST::iterator kt = jt;
++kt;
G4_INST * inst = *jt;
curInst = inst;
if (failSafeSpill_)
{
spillRegOffset_ = spillRegStart_;
}
// Insert spill code, when the target is a spilled register.
if (inst->getDst())
{
G4_RegVar * regVar = NULL;
if( inst->getDst()->getBase()->isRegVar() )
{
regVar = getRegVar (inst->getDst());
}
if (regVar && shouldSpillRegister (regVar))
{
if (getRFType (regVar) == G4_GRF)
{
if(inst->isPseudoKill())
{
(*it)->instList.erase(jt);
jt = kt;
continue;
}
insertSpillRangeCode (
inst->getDst ()->asDstRegRegion (), jt,
(*it)->instList);
}
else
{
assert (0);
}
}
}
// Insert fill code, when the source is a spilled register.
for (unsigned i = 0; i < G4_MAX_SRCS; i++)
{
if (inst->getSrc (i) &&
inst->getSrc (i)->isSrcRegRegion ())
{
G4_RegVar * regVar = NULL;
if( inst->getSrc(i)->asSrcRegRegion()->getBase()->isRegVar() )
{
regVar = getRegVar (inst->getSrc (i)->asSrcRegRegion ());
}
if (regVar && shouldSpillRegister (regVar))
{
if(inst->isLifeTimeEnd())
{
(*it)->instList.erase(jt);
break;
}
if ((inst->isSend() && i == 0) ||
(inst->isSplitSend() && i == 1)) {
// treat it as MRF since we may need to spill >2 GRFs
insertFillMRFRangeCode (
inst->getSrc (i)->asSrcRegRegion (), jt,
(*it)->instList);
}
else if (getRFType (regVar) == G4_GRF)
insertFillGRFRangeCode (
inst->getSrc (i)->asSrcRegRegion (), jt,
(*it)->instList);
else
assert (0);
}
}
}
jt = kt;
}
}
bbId_ = UINT_MAX;
inSIMDCFContext_ = false;
// Calculate the spill memory used in this iteration
for (auto spill : spilledLRs_)
{
unsigned disp = spill->getVar ()->getDisp ();
if (spill->getVar ()->isSpilled ())
{
if (disp != UINT_MAX)
{
nextSpillOffset_ = std::max(nextSpillOffset_, disp + getByteSize(spill->getVar()));
}
}
}
// Verify the spill memory assignments for spill ranges introduced
#if defined (_DEBUG) || defined (VERIFY_SPILL_ASSIGNMENTS)
LIVERANGE_LIST::const_iterator kt = spilledLRs_.begin ();
LIVERANGE_LIST::const_iterator ktEnd = spilledLRs_.end ();
for (; kt != ktEnd; ++kt) {
if ((*kt)->getVar ()->isSpilled () == false) continue;
G4_RegVar * sRange1 = getReprRegVar ((*kt)->getVar ());
unsigned sidx1 = sRange1->getId ();
G4_RegVar * tRange1 =
(*kt)->getVar ()->getNonTransientBaseRegVar ();
unsigned tidx1 = tRange1->getId ();
for (unsigned lidx = 0; lidx < varIdCount_; ++lidx) {
if (getRegVar (lidx)->isSpilled() == false) continue;
G4_RegVar * sRange2 = getReprRegVar (getRegVar (lidx));
unsigned sidx2 = sRange2->getId ();
G4_RegVar * tRange2 =
getRegVar (lidx)->getNonTransientBaseRegVar ();
unsigned tidx2 = tRange2->getId ();
while (true) {
if (spillMemLifetimeInterfere (sidx1, sidx2)) {
unsigned disp1 = sRange1->getDisp ();
unsigned size1 = getByteSize (sRange1);
unsigned disp2 = sRange2->getDisp ();
unsigned size2 = getByteSize (sRange2);
if (disp1 == disp2) {
MUST_BE_TRUE(false, "Bad spill displacements !");
}
else if (disp1 < disp2) {
if (disp1 + size1 > disp2) {
MUST_BE_TRUE(false, "Bad spill displacements !");
}
}
else {
if (disp2 + size2 > disp1) {
MUST_BE_TRUE(false, "Bad spill displacements !");
}
}
}
if (sidx1 == tidx1 && sidx2 == tidx2) {
break;
}
else if (sidx1 == tidx1) {
sidx2 = tidx2;
sRange2 = tRange2;
}
else {
sidx1 = tidx1;
sRange1 = tRange1;
}
}
}
}
#endif
// Emit the instruction with the introduced spill/fill ranges in the
// current iteration.
#ifndef NDEBUG
#ifdef DEBUG_VERBOSE_ON1
std::stringstream fname;
fname << "spill_code_" << iterationNo_++ << "_" << kernel->getName()
<< ends;
std::ofstream sout;
sout.open (fname.str ().c_str ());
kernel->emit_asm (sout, true, 0);
sout.close ();
#endif
#endif
return true;
}
// Replace Scratch Block Read/Write message with OWord Block Read/Write message
void
SpillManagerGMRF::fixSpillFillCode (
G4_Kernel * kernel
)
{
FlowGraph& fg = kernel->fg;
unsigned statelessSurfaceIndex = 0xFF;
for( BB_LIST_ITER it = fg.BBs.begin(); it != fg.BBs.end(); it++ )
{
INST_LIST::iterator jt = (*it)->instList.begin ();
while( jt != (*it)->instList.end () )
{
INST_LIST::iterator kt = jt;
++kt;
G4_INST * inst = *jt;
if( inst->isSend() &&
inst->getSpillOrFill() == true )
{
if( inst->getMsgDesc()->isScratchRead() )
{
// Fix fill message
G4_Operand * curDst = inst->getSrc(0);
G4_Declare * mRangeDcl = NULL;
if( curDst->getTopDcl() == builder_->getBuiltinR0() )
{
G4_Operand * postDst = inst->getDst ();
G4_RegVar * fillRegVar = postDst->getTopDcl()->getRegVar()->getBaseRegVar ();
const char * name =
createImplicitRangeName (
"FL_MSG", fillRegVar,
getMsgFillIndex (fillRegVar));
mRangeDcl =
createRangeDeclare (
name,
G4_GRF,
REG_DWORD_SIZE, 1, Type_UD, NULL, DEF_HORIZ_STRIDE,
DeclareType::Tmp, fillRegVar, NULL, 0);
G4_DstRegRegion * mHeaderInputDstRegion =
createMHeaderInputDstRegion (mRangeDcl->getRegVar ());
G4_SrcRegRegion * inputPayload = createInputPayloadSrcRegion ();
G4_INST * movInst = builder_->createInternalInst( NULL, G4_mov, NULL, false, REG_DWORD_SIZE,
mHeaderInputDstRegion, inputPayload, NULL, InstOpt_WriteEnable, inst->getLineNo(), inst->getCISAOff(), inst->getSrcFilename() );
(*it)->instList.insert( jt, movInst );
curDst = createMHeaderInputDstRegion (mRangeDcl->getRegVar ());
G4_SrcRegRegion* curSrcOpnd = builder_->createSrcRegRegion(Mod_src_undef, Direct, mRangeDcl->getRegVar(), 0, 0,
builder_->getRegionStride1(), Type_UD );
inst->setSrc( curSrcOpnd, 0 );
}
else
{
mRangeDcl = curDst->getTopDcl();
}
unsigned offset = inst->getMsgDesc()->getScratchRWOffset();
offset = offset * (G4_GRF_REG_NBYTES / OWORD_BYTE_SIZE);
G4_Imm * offsetImm = builder_->createImm (offset, Type_UD);
G4_DstRegRegion * mHeaderOffsetDstRegion =
createMHeaderBlockOffsetDstRegion (mRangeDcl->getRegVar ());
G4_INST* mov_inst = builder_->createInternalInst (NULL, G4_mov, NULL, false, SCALAR_EXEC_SIZE, mHeaderOffsetDstRegion,
offsetImm, NULL, InstOpt_WriteEnable, inst->getLineNo(), inst->getCISAOff(), inst->getSrcFilename());
(*it)->instList.insert( jt, mov_inst );
unsigned segmentByteSize = inst->getMsgDesc()->ResponseLength() * REG_BYTE_SIZE;
unsigned responseLength = cdiv (segmentByteSize, REG_BYTE_SIZE);
responseLength = responseLength << getSendRspLengthBitOffset ();
unsigned message = statelessSurfaceIndex | responseLength;
unsigned headerPresent = 0x80000;
message |= headerPresent;
unsigned messageType = getSendOwordReadType ();
message |= messageType << getSendReadTypeBitOffset ();
unsigned messageLength = OWORD_PAYLOAD_HEADER_MIN_HEIGHT;
message |= messageLength << getSendMsgLengthBitOffset ();
unsigned segmentOwordSize =
cdiv (segmentByteSize, OWORD_BYTE_SIZE);
message |= blockSendBlockSizeCode (segmentOwordSize);
unsigned char execSize = LIMIT_SEND_EXEC_SIZE (segmentOwordSize * DWORD_BYTE_SIZE);
G4_Operand * msg = builder_->createImm (message, Type_UD);
unsigned int regs2snd = ( message >> getSendMsgLengthBitOffset() ) & 0xF;
unsigned int regs2rcv = ( message >> getSendRspLengthBitOffset() ) & 0x1F;
G4_SendMsgDescriptor * msgDesc = builder_->createSendMsgDesc( message,
regs2rcv, regs2snd, inst->getMsgDesc()->getFuncId(), inst->getMsgDesc()->isEOTInst(),
0, inst->getMsgDesc()->getExtFuncCtrl(), true, false, NULL, NULL);
inst->setSrc( msg, 1 );
inst->setMsgDesc( msgDesc );
inst->setExecSize( execSize );
}
else if( inst->getMsgDesc()->isScratchWrite() )
{
// Fix spill message
G4_Operand * curDst = inst->getSrc(0);
G4_Declare * mRangeDcl = curDst->getTopDcl();
unsigned offset = inst->getMsgDesc()->getScratchRWOffset();
offset = offset * (G4_GRF_REG_NBYTES / OWORD_BYTE_SIZE);
G4_Imm * offsetImm = builder_->createImm (offset, Type_UD);
G4_DstRegRegion * mHeaderOffsetDstRegion =
createMHeaderBlockOffsetDstRegion (mRangeDcl->getRegVar ());
G4_INST* mov_inst = builder_->createInternalInst (NULL, G4_mov, NULL, false, SCALAR_EXEC_SIZE, mHeaderOffsetDstRegion,
offsetImm, NULL, InstOpt_WriteEnable, inst->getLineNo(), inst->getCISAOff(), inst->getSrcFilename());
(*it)->instList.insert( jt, mov_inst );
unsigned segmentByteSize = (inst->getMsgDesc()->MessageLength() - SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT) * REG_BYTE_SIZE;
unsigned writePayloadCount = cdiv (segmentByteSize, REG_BYTE_SIZE);
unsigned message = statelessSurfaceIndex;
unsigned headerPresent = 0x80000;
message |= headerPresent;
unsigned messageType = getSendOwordWriteType();
message |= messageType << getSendWriteTypeBitOffset ();
unsigned payloadHeaderCount = OWORD_PAYLOAD_HEADER_MAX_HEIGHT;
unsigned messageLength = writePayloadCount + payloadHeaderCount;
message |= messageLength << getSendMsgLengthBitOffset ();
unsigned segmentOwordSize = cdiv(segmentByteSize, OWORD_BYTE_SIZE);
message |= blockSendBlockSizeCode (segmentOwordSize);
unsigned char execSize = LIMIT_SEND_EXEC_SIZE (segmentOwordSize * DWORD_BYTE_SIZE);
G4_Operand * msg = builder_->createImm (message, Type_UD);
unsigned int regs2snd = ( message >> getSendMsgLengthBitOffset() ) & 0xF;
unsigned int regs2rcv = ( message >> getSendRspLengthBitOffset() ) & 0x1F;
G4_SendMsgDescriptor * msgDesc = builder_->createSendMsgDesc( message,
regs2rcv, regs2snd, inst->getMsgDesc()->getFuncId(), inst->getMsgDesc()->isEOTInst(), 0,
inst->getMsgDesc()->getExtFuncCtrl(), false, true, NULL, NULL );
inst->setSrc( msg, 1 );
inst->setMsgDesc( msgDesc );
inst->setExecSize( execSize );
}
}
jt = kt;
}
}
}
uint32_t computeSpillMsgDesc(unsigned int payloadSize, unsigned int offsetInGrfUnits)
{
// Compute msg descriptor given payload size and offset.
unsigned headerPresent = 0x80000;
uint32_t message = headerPresent;
unsigned msgLength = SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT;
message |= (msgLength << getSendMsgLengthBitOffset());
message |= (1 << SCRATCH_MSG_DESC_CATEORY);
message |= (1 << SCRATCH_MSG_DESC_CHANNEL_MODE);
message |= (1 << SCRATCH_MSG_DESC_OPERATION_MODE);
unsigned blocksize_encoding = getScratchBlocksizeEncoding(payloadSize);
message |= (blocksize_encoding << SCRATCH_MSG_DESC_BLOCK_SIZE);
int offset = offsetInGrfUnits;
message |= offset;
return message;
/*
unsigned headerPresent = 0x80000;
message = headerPresent;
unsigned msgLength = useSplitSend() ? SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT : SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT + height;
message |= (msgLength << getSendMsgLengthBitOffset() );
message |= (1 << SCRATCH_MSG_DESC_CATEORY);
message |= (1 << SCRATCH_MSG_DESC_CHANNEL_MODE);
message |= (1 << SCRATCH_MSG_DESC_OPERATION_MODE);
unsigned blocksize_encoding = getScratchBlocksizeEncoding(height);
message |= (blocksize_encoding << SCRATCH_MSG_DESC_BLOCK_SIZE);
int offset = getDisp(base);
getSpillOffset(offset);
message |= (offset >> 5) + regOff;
execSize = 16;
*/
}
uint32_t computeFillMsgDesc(unsigned int payloadSize, unsigned int offsetInGrfUnits)
{
// Compute msg descriptor given payload size and offset.
unsigned headerPresent = 0x80000;
uint32_t message = headerPresent;
unsigned msgLength = 1;
message |= (msgLength << getSendMsgLengthBitOffset());
message |= (1 << SCRATCH_MSG_DESC_CATEORY);
message |= (0 << SCRATCH_MSG_INVALIDATE_AFTER_READ);
unsigned blocksize_encoding = getScratchBlocksizeEncoding(payloadSize);
message |= (blocksize_encoding << SCRATCH_MSG_DESC_BLOCK_SIZE);
message |= offsetInGrfUnits;
return message;
/*
unsigned headerPresent = 0x80000;
message |= SCRATCH_PAYLOAD_HEADER_MAX_HEIGHT << getSendMsgLengthBitOffset ();
message |= headerPresent;
message |= (1 << SCRATCH_MSG_DESC_CATEORY);
message |= (0 << SCRATCH_MSG_INVALIDATE_AFTER_READ);
unsigned blocksize_encoding = getScratchBlocksizeEncoding(height);
message |= (blocksize_encoding << SCRATCH_MSG_DESC_BLOCK_SIZE);
int offset = getDisp(base);
getSpillOffset(offset);
message |= ((offset >> 5) + regOff);
execSize = 16;
*/
}