mirror of
https://github.com/intel/intel-graphics-compiler.git
synced 2025-11-04 08:21:06 +08:00
Default initialize small std::arrays to zero-values in BuildIR.h. Remove dead code from FlowGraph.cpp.
4693 lines
157 KiB
C++
4693 lines
157 KiB
C++
/*========================== begin_copyright_notice ============================
|
|
|
|
Copyright (C) 2017-2023 Intel Corporation
|
|
|
|
SPDX-License-Identifier: MIT
|
|
|
|
============================= end_copyright_notice ===========================*/
|
|
|
|
|
|
#include "FlowGraph.h"
|
|
#include "BuildIR.h"
|
|
#include "CFGStructurizer.h"
|
|
#include "DebugInfo.h"
|
|
#include "G4_Kernel.hpp"
|
|
#include "Option.h"
|
|
#include "visa_wa.h"
|
|
|
|
#include <algorithm>
|
|
#include <chrono>
|
|
#include <cstdlib>
|
|
#include <iostream>
|
|
#include <iterator>
|
|
#include <random>
|
|
|
|
using namespace vISA;
|
|
|
|
//
|
|
// Helper class for processGoto to merge join's execution masks.
|
|
// For example,
|
|
// (p1) goto (8|M8) label
|
|
// ....
|
|
// (p2) goto (4|M4) label
|
|
// ....
|
|
// label:
|
|
// join (16|M0)
|
|
// Merge( (8|M8) and (4|M4)) will be (16|M0)!
|
|
//
|
|
// Normally, we don't see this kind of code. (Visa will generate macro sequence
|
|
// like the following.) If it happens, we have to match join's execMask to all
|
|
// of its gotos. we do so by tracking excution mask (execSize + mask offset).
|
|
//
|
|
// (p) goto (8|M8) L
|
|
// ......
|
|
// L:
|
|
// join (8|M8) // M8, not M0
|
|
//
|
|
class ExecMaskInfo {
|
|
uint8_t ExecSize; // 1|2|4|8|16|32
|
|
uint8_t MaskOffset; // 0|4|8|12|16|20|24|28
|
|
|
|
void mergeEM(ExecMaskInfo &aEM) {
|
|
// The new execMask should cover at least [left, right)
|
|
const uint32_t left = std::min(MaskOffset, aEM.getMaskOffset());
|
|
const uint32_t right = std::max(MaskOffset + ExecSize,
|
|
aEM.getMaskOffset() + aEM.getExecSize());
|
|
// Divide 32 channels into 8 quarters
|
|
uint32_t lowQuarter = left / 4;
|
|
uint32_t highQuarter = (right - 1) / 4;
|
|
if (lowQuarter < 4 && highQuarter >= 4) {
|
|
// (32, M0)
|
|
ExecSize = 32;
|
|
MaskOffset = 0;
|
|
} else if (lowQuarter < 2 && highQuarter >= 2) {
|
|
// (16, M0|M16)
|
|
ExecSize = 16;
|
|
MaskOffset = 0;
|
|
} else if (lowQuarter < 6 && highQuarter >= 6) {
|
|
// (16, M16)
|
|
ExecSize = 16;
|
|
MaskOffset = 16;
|
|
}
|
|
// at this time, the range resides in one of [Q0,Q1], [Q2,Q3], [Q4,Q5], and
|
|
// [Q6,Q7].
|
|
else {
|
|
// (4|8, ...)
|
|
ExecSize = (lowQuarter != highQuarter ? 8 : 4);
|
|
MaskOffset = left;
|
|
}
|
|
}
|
|
|
|
public:
|
|
ExecMaskInfo() : ExecSize(0), MaskOffset(0){};
|
|
ExecMaskInfo(uint8_t aE, uint8_t aM) : ExecSize(aE), MaskOffset(aM) {}
|
|
|
|
uint8_t getExecSize() const { return ExecSize; }
|
|
uint8_t getMaskOffset() const { return MaskOffset; }
|
|
|
|
void mergeExecMask(G4_ExecSize aExSize, uint8_t aMaskOffset) {
|
|
ExecMaskInfo anotherEM{aExSize, aMaskOffset};
|
|
mergeEM(anotherEM);
|
|
}
|
|
};
|
|
|
|
void GlobalOpndHashTable::HashNode::insert(uint16_t newLB, uint16_t newRB) {
|
|
// check if the newLB/RB either subsumes or can be subsumed by an existing
|
|
// bound ToDo: consider merging bound as well
|
|
for (uint32_t &bound : bounds) {
|
|
uint16_t nodeLB = getLB(bound);
|
|
uint16_t nodeRB = getRB(bound);
|
|
if (newLB >= nodeLB && newRB <= nodeRB) {
|
|
return;
|
|
} else if (newLB <= nodeLB && newRB >= nodeRB) {
|
|
bound = packBound(newLB, newRB);
|
|
return;
|
|
}
|
|
}
|
|
bounds.push_back(packBound(newLB, newRB));
|
|
}
|
|
|
|
bool GlobalOpndHashTable::HashNode::isInNode(uint16_t lb, uint16_t rb) const {
|
|
for (uint32_t bound : bounds) {
|
|
uint16_t nodeLB = getLB(bound);
|
|
uint16_t nodeRB = getRB(bound);
|
|
if (lb <= nodeLB && rb >= nodeLB) {
|
|
return true;
|
|
} else if (lb > nodeLB && lb <= nodeRB) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void GlobalOpndHashTable::clearHashTable() {
|
|
for (auto &iter : globalVars) {
|
|
iter.second->~HashNode();
|
|
}
|
|
globalVars.clear();
|
|
globalOpnds.clear();
|
|
}
|
|
|
|
// all of the operand in this table are srcRegion
|
|
void GlobalOpndHashTable::addGlobalOpnd(G4_Operand *opnd) {
|
|
G4_Declare *topDcl = opnd->getTopDcl();
|
|
|
|
if (topDcl) {
|
|
// global operands must have a declare
|
|
auto entry = globalVars.find(topDcl);
|
|
if (entry != globalVars.end()) {
|
|
entry->second->insert((uint16_t)opnd->getLeftBound(),
|
|
(uint16_t)opnd->getRightBound());
|
|
} else {
|
|
HashNode *node = new (mem)
|
|
HashNode((uint16_t)opnd->getLeftBound(),
|
|
(uint16_t)opnd->getRightBound(), private_arena_allocator);
|
|
globalVars[topDcl] = node;
|
|
}
|
|
globalOpnds.push_back(opnd);
|
|
}
|
|
}
|
|
|
|
// if def overlaps with any operand in this table, it is treated as global
|
|
bool GlobalOpndHashTable::isOpndGlobal(G4_Operand *opnd) const {
|
|
G4_Declare *dcl = opnd->getTopDcl();
|
|
if (dcl == NULL) {
|
|
return false;
|
|
} else if (dcl->getAddressed()) {
|
|
// Conservatively assume that all address taken
|
|
// virtual registers are global
|
|
return true;
|
|
} else {
|
|
auto entry = globalVars.find(dcl);
|
|
if (entry == globalVars.end()) {
|
|
return false;
|
|
}
|
|
HashNode *node = entry->second;
|
|
return node->isInNode((uint16_t)opnd->getLeftBound(),
|
|
(uint16_t)opnd->getRightBound());
|
|
}
|
|
}
|
|
|
|
void GlobalOpndHashTable::dump(std::ostream &os) const {
|
|
os << "Global variables:\n";
|
|
for (auto &&entry : globalVars) {
|
|
G4_Declare *dcl = entry.first;
|
|
dcl->dump();
|
|
if ((dcl->getRegFile() & G4_FLAG) == 0) {
|
|
std::vector<bool> globalElt;
|
|
globalElt.resize(dcl->getByteSize(), false);
|
|
auto ranges = entry.second;
|
|
for (auto bound : ranges->bounds) {
|
|
uint16_t lb = getLB(bound);
|
|
uint16_t rb = getRB(bound);
|
|
for (int i = lb; i <= rb; ++i) {
|
|
globalElt[i] = true;
|
|
}
|
|
}
|
|
bool inRange = false;
|
|
for (int i = 0, size = (int)globalElt.size(); i < size; ++i) {
|
|
if (globalElt[i] && !inRange) {
|
|
// start of new range
|
|
os << "[" << i << ",";
|
|
inRange = true;
|
|
} else if (!globalElt[i] && inRange) {
|
|
// end of range
|
|
os << i - 1 << "], ";
|
|
inRange = false;
|
|
}
|
|
}
|
|
if (inRange) {
|
|
// close last range
|
|
os << globalElt.size() - 1 << "]";
|
|
}
|
|
}
|
|
os << "\n";
|
|
}
|
|
os << "Instructions with global operands:\n";
|
|
for (auto opnd : globalOpnds) {
|
|
if (opnd->getInst()) {
|
|
opnd->getInst()->print(os);
|
|
}
|
|
}
|
|
}
|
|
|
|
void FlowGraph::setPhysicalLink(G4_BB *pred, G4_BB *succ) {
|
|
if (pred) {
|
|
pred->setPhysicalSucc(succ);
|
|
}
|
|
if (succ) {
|
|
succ->setPhysicalPred(pred);
|
|
}
|
|
}
|
|
|
|
BB_LIST_ITER FlowGraph::insert(BB_LIST_ITER iter, G4_BB *bb) {
|
|
G4_BB *prev = iter != BBs.begin() ? *std::prev(iter) : nullptr;
|
|
G4_BB *next = iter != BBs.end() ? *iter : nullptr;
|
|
setPhysicalLink(prev, bb);
|
|
setPhysicalLink(bb, next);
|
|
if (prev)
|
|
updateFuncInfoPredSucc(prev, bb);
|
|
else {
|
|
updateFuncInfoPredSucc(bb, next);
|
|
}
|
|
markStale();
|
|
return BBs.insert(iter, bb);
|
|
}
|
|
|
|
void FlowGraph::erase(BB_LIST_ITER iter) {
|
|
G4_BB *prev = (iter != BBs.begin()) ? *std::prev(iter) : nullptr;
|
|
G4_BB *next = (std::next(iter) != BBs.end()) ? *std::next(iter) : nullptr;
|
|
auto *funcInfo = (*iter)->getFuncInfo();
|
|
if (funcInfo) {
|
|
funcInfo->eraseBB(*iter);
|
|
(*iter)->setFuncInfo(nullptr);
|
|
}
|
|
setPhysicalLink(prev, next);
|
|
BBs.erase(iter);
|
|
markStale();
|
|
}
|
|
|
|
void FlowGraph::addPrologBB(G4_BB *BB) {
|
|
G4_BB *oldEntry = getEntryBB();
|
|
insert(BBs.begin(), BB);
|
|
addPredSuccEdges(BB, oldEntry);
|
|
}
|
|
|
|
void FlowGraph::append(const FlowGraph &otherFG) {
|
|
markStale();
|
|
|
|
for (auto I = otherFG.cbegin(), E = otherFG.cend(); I != E; ++I) {
|
|
auto bb = *I;
|
|
BBs.push_back(bb);
|
|
incrementNumBBs();
|
|
}
|
|
}
|
|
|
|
//
|
|
// return label's corresponding BB
|
|
// if label's BB is not yet created, then create one and add map label to BB
|
|
//
|
|
G4_BB *FlowGraph::getLabelBB(Label_BB_Map &map, G4_Label *label) {
|
|
if (map.find(label) != map.end()) {
|
|
return map[label];
|
|
} else {
|
|
G4_BB *bb = createNewBB();
|
|
map[label] = bb;
|
|
return bb;
|
|
}
|
|
}
|
|
|
|
//
|
|
// first is the first inst of a BB
|
|
//
|
|
G4_BB *FlowGraph::beginBB(Label_BB_Map &map, G4_INST *first) {
|
|
if (first == NULL)
|
|
return NULL;
|
|
G4_INST *labelInst;
|
|
bool newLabelInst = false;
|
|
if (first->isLabel()) {
|
|
labelInst = first;
|
|
} else {
|
|
// no label for this BB, create one!
|
|
G4_Label *label = builder->createLocalBlockLabel();
|
|
labelInst = createNewLabelInst(label);
|
|
newLabelInst = true;
|
|
}
|
|
G4_BB *bb = getLabelBB(map, labelInst->getLabel());
|
|
push_back(bb); // append to BBs list
|
|
if (newLabelInst) {
|
|
bb->push_front(labelInst);
|
|
}
|
|
return bb;
|
|
}
|
|
|
|
G4_INST *FlowGraph::createNewLabelInst(G4_Label *label) {
|
|
// srcFileName is NULL
|
|
// TODO: remove this (use createLabelInst)
|
|
return builder->createInternalInst(NULL, G4_label, NULL, g4::NOSAT, g4::SIMD1,
|
|
NULL, label, NULL, 0);
|
|
}
|
|
|
|
void FlowGraph::removePredSuccEdges(G4_BB *pred, G4_BB *succ) {
|
|
vISA_ASSERT(pred != NULL && succ != NULL, ERROR_INTERNAL_ARGUMENT);
|
|
|
|
BB_LIST_ITER lt = pred->Succs.begin();
|
|
for (; lt != pred->Succs.end(); ++lt) {
|
|
if ((*lt) == succ) {
|
|
pred->Succs.erase(lt);
|
|
break;
|
|
}
|
|
}
|
|
|
|
lt = succ->Preds.begin();
|
|
for (; lt != succ->Preds.end(); ++lt) {
|
|
if ((*lt) == pred) {
|
|
succ->Preds.erase(lt);
|
|
break;
|
|
}
|
|
}
|
|
|
|
markStale();
|
|
}
|
|
|
|
G4_BB *FlowGraph::createNewBB(bool insertInFG) {
|
|
G4_BB *bb = new (mem) G4_BB(instListAlloc, numBBId, this);
|
|
|
|
// Increment counter only when new BB is inserted in FlowGraph
|
|
if (insertInFG)
|
|
numBBId++;
|
|
|
|
BBAllocList.push_back(bb);
|
|
return bb;
|
|
}
|
|
|
|
G4_BB *FlowGraph::createNewBBWithLabel(const char *LabelSuffix) {
|
|
G4_BB *newBB = createNewBB(true);
|
|
G4_Label *lbl = LabelSuffix != nullptr
|
|
? builder->createLocalBlockLabel(LabelSuffix)
|
|
: builder->createLocalBlockLabel();
|
|
G4_INST *inst = createNewLabelInst(lbl);
|
|
newBB->push_back(inst);
|
|
return newBB;
|
|
}
|
|
|
|
static int globalCount = 1;
|
|
|
|
int64_t FlowGraph::insertDummyUUIDMov() {
|
|
// Here when -addKernelId is passed
|
|
if (builder->getIsKernel()) {
|
|
// Add mov inst as first instruction in kernel. This
|
|
// has to be *first* instruction in the kernel so debugger
|
|
// can detect the pattern.
|
|
//
|
|
// 32-bit random number is generated and set as src0 operand.
|
|
// Earlier nop was being generated with 64-bit UUID overloading
|
|
// MBZ bits and this turned out to be a problem for the
|
|
// debugger (random clobbering of r0).
|
|
|
|
for (auto bb : BBs) {
|
|
uint32_t seed = (uint32_t)std::chrono::high_resolution_clock::now()
|
|
.time_since_epoch()
|
|
.count();
|
|
std::mt19937 mt_rand(seed * globalCount);
|
|
globalCount++;
|
|
|
|
G4_DstRegRegion *nullDst = builder->createNullDst(Type_UD);
|
|
int64_t uuID = (int64_t)mt_rand();
|
|
G4_Operand *randImm = (G4_Operand *)builder->createImm(uuID, Type_UD);
|
|
G4_INST *movInst =
|
|
builder->createMov(g4::SIMD1, nullDst, randImm, InstOpt_NoOpt, false);
|
|
|
|
auto instItEnd = bb->end();
|
|
auto it = bb->begin();
|
|
|
|
if (it != instItEnd) {
|
|
if ((*it)->isLabel()) {
|
|
bb->insertBefore(++it, movInst);
|
|
return uuID;
|
|
}
|
|
|
|
bb->push_front(movInst);
|
|
return uuID;
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
//
|
|
// (1) check if-else-endif and iff-endif pairs
|
|
// (2) add label for those omitted ones
|
|
//
|
|
bool FlowGraph::matchBranch(int &sn, INST_LIST &instlist, INST_LIST_ITER &it) {
|
|
G4_INST *inst = *it;
|
|
//
|
|
// process if-endif or if-else-endif
|
|
//
|
|
if (inst->opcode() == G4_if) {
|
|
// check label / add label
|
|
G4_Label *if_label = NULL;
|
|
G4_Label *else_label = NULL;
|
|
G4_Label *endif_label = NULL; // the label immediately before the endif
|
|
G4_INST *ifInst = inst;
|
|
vISA_ASSERT(inst->asCFInst()->getJip() == nullptr,
|
|
"IF should not have a label at this point");
|
|
|
|
// create if_label
|
|
if_label = builder->createLocalBlockLabel("ifJip");
|
|
inst->asCFInst()->setJip(if_label);
|
|
|
|
// look for else/endif
|
|
int elseCount = 0;
|
|
it++;
|
|
while (it != instlist.end()) {
|
|
inst = *it;
|
|
// meet iff or if
|
|
if (inst->opcode() == G4_if) {
|
|
if (!matchBranch(sn, instlist, it))
|
|
return false;
|
|
} else if (inst->opcode() == G4_else) {
|
|
if (elseCount != 0) {
|
|
vISA_ASSERT(
|
|
false,
|
|
"ERROR: Mismatched if-else: more than one else following if!");
|
|
return false;
|
|
}
|
|
elseCount++;
|
|
INST_LIST_ITER it1 = it;
|
|
it1++;
|
|
|
|
// add endif label to "else"
|
|
else_label = builder->createLocalBlockLabel("elseJip");
|
|
inst->asCFInst()->setJip(else_label);
|
|
|
|
// insert if-else label
|
|
G4_INST *label = createNewLabelInst(if_label);
|
|
instlist.insert(it1, label);
|
|
|
|
// Uip must be the same as Jip for else instructions.
|
|
inst->asCFInst()->setUip(else_label);
|
|
} else if (inst->opcode() == G4_endif) {
|
|
if (elseCount == 0) // if-endif case
|
|
{
|
|
// insert endif label
|
|
G4_INST *label = createNewLabelInst(if_label);
|
|
instlist.insert(it, label);
|
|
endif_label = if_label;
|
|
} else // if-else-endif case
|
|
{
|
|
// insert endif label
|
|
G4_INST *label = createNewLabelInst(else_label);
|
|
instlist.insert(it, label);
|
|
endif_label = else_label;
|
|
}
|
|
|
|
// we must also set the UIP of if to point to its corresponding endif
|
|
ifInst->asCFInst()->setUip(endif_label);
|
|
return true;
|
|
} // if opcode
|
|
if (it == instlist.end()) {
|
|
vISA_ASSERT_UNREACHABLE("ERROR: Can not find endif for if!");
|
|
return false;
|
|
}
|
|
it++;
|
|
} // while
|
|
}
|
|
//
|
|
// process iff-endif
|
|
//
|
|
else
|
|
vISA_ASSERT_UNREACHABLE(ERROR_FLOWGRAPH);
|
|
|
|
return false;
|
|
}
|
|
|
|
//
|
|
// HW Rules about the jip and uip for the control flow instructions
|
|
// if:
|
|
// <branch_ctrl> set
|
|
// jip = first join in the if block
|
|
// uip = endif
|
|
// <branch_ctrl> not set
|
|
// jip = instruction right after the else, or the endif if else doesn't
|
|
// exist uip = endif
|
|
// else:
|
|
// <branch_ctrl> set
|
|
// jip = first join in the else block
|
|
// uip = endif
|
|
// <branch_ctrl> not set
|
|
// jip = uip = endif
|
|
// endif:
|
|
// jip = uip = next enclosing endif/while
|
|
// while:
|
|
// jip = loop head label
|
|
// uip = don't care
|
|
// break:
|
|
// jip = end of innermost CF block (else/endif/while)
|
|
// uip = while
|
|
// cont:
|
|
// jip = end of innermost CF block (else/endif/while)
|
|
// uip = while
|
|
//
|
|
|
|
//
|
|
// Preprocess the instruction and kernel list, including:
|
|
// 1. Check if the labels and kernel names are unique
|
|
// 2. Check if all the labels used by jmp, CALL, cont, break, goto is defined,
|
|
// determine if goto is forward or backward
|
|
// 3. Process the non-label "if-else-endif" cases
|
|
//
|
|
void FlowGraph::preprocess(INST_LIST &instlist) {
|
|
// It is easier to add WA before control-flow is build for partial HW
|
|
// fix to fused call HW bug.
|
|
|
|
std::unordered_set<G4_Label *> labels; // label inst we have seen so far
|
|
|
|
// Mark goto/jmpi/call as either forward or backward
|
|
for (G4_INST *i : instlist) {
|
|
if (i->isLabel()) {
|
|
labels.emplace(i->getLabel());
|
|
} else if (i->opcode() == G4_goto) {
|
|
if (labels.count(i->asCFInst()->getUip()->asLabel())) {
|
|
i->asCFInst()->setBackward(true);
|
|
}
|
|
} else if ((i->opcode() == G4_jmpi || i->isCall()) && i->getSrc(0) &&
|
|
i->getSrc(0)->isLabel()) {
|
|
if (labels.count(i->getSrc(0)->asLabel())) {
|
|
i->asCFInst()->setBackward(true);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
// sanity check to make sure we don't have undefined labels
|
|
for (G4_INST *i : instlist) {
|
|
if (i->opcode() == G4_goto) {
|
|
G4_Label *target = i->asCFInst()->getUip()->asLabel();
|
|
vISA_ASSERT(labels.count(target), "undefined goto label");
|
|
} else if ((i->opcode() == G4_jmpi || i->isCall()) && i->getSrc(0) &&
|
|
i->getSrc(0)->isLabel()) {
|
|
vISA_ASSERT(labels.count((G4_Label *)i->getSrc(0)),
|
|
"undefined jmpi/call label");
|
|
}
|
|
}
|
|
#endif
|
|
|
|
//
|
|
// Process the non-label "if-else-endif" cases as following.
|
|
// ToDo: remove this once we stop generating if-else-endif for the IEEE macros
|
|
//
|
|
{
|
|
int sn = 0;
|
|
for (INST_LIST_ITER it = instlist.begin(), instlistEnd = instlist.end();
|
|
it != instlistEnd; ++it) {
|
|
G4_INST *inst = *it;
|
|
if (inst->opcode() == G4_if) {
|
|
if (!matchBranch(sn, instlist, it)) {
|
|
vISA_ASSERT(false, "ERROR: mismatched if-branch %x", inst);
|
|
break;
|
|
}
|
|
} // fi
|
|
} // for
|
|
}
|
|
}
|
|
|
|
void FlowGraph::normalizeFlowGraph() {
|
|
// For CISA 3 flowgraph there will be pseudo_fcall instructions to invoke
|
|
// stack functions. Save/restore code will be added around this at the time of
|
|
// register allocation. If fcall's successor has more than 1 predecessor then
|
|
// it will create problems inserting code. This function handles such patterns
|
|
// by creating new basic block and guaranteeing that fcall's successor has a
|
|
// single predecessor.
|
|
for (BB_LIST_ITER it = BBs.begin(); it != BBs.end(); it++) {
|
|
G4_BB *bb = *it;
|
|
|
|
if (bb->isEndWithFCall()) {
|
|
G4_BB *retBB = bb->Succs.front();
|
|
if (retBB->Preds.size() > 1) {
|
|
|
|
// To insert restore code we need to guarantee that save code has
|
|
// been executed. If retBB has multiple preds, this may not be
|
|
// guaranteed, so insert a new node.
|
|
G4_BB *newNode = createNewBB();
|
|
|
|
// Remove old edge
|
|
removePredSuccEdges(bb, retBB);
|
|
|
|
// Add new edges
|
|
addPredSuccEdges(bb, newNode);
|
|
addPredSuccEdges(newNode, retBB);
|
|
|
|
// Create and insert label inst
|
|
G4_Label *lbl = builder->createLocalBlockLabel();
|
|
G4_INST *inst = createNewLabelInst(lbl);
|
|
newNode->push_back(inst);
|
|
insert(++it, newNode);
|
|
it--;
|
|
|
|
retBB = newNode;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// build a control flow graph from the inst list
|
|
// we want to keep the original BB order (same as shown in assembly code) for
|
|
// linear scan register allocation. We use beginBB to indicate that we encounter
|
|
// the beginning a BB and add the BB to the BB list and use getLabelBB to create
|
|
// a block for a label but not to add to the BB list.For instance, when we come
|
|
// across a "jmp target", getLabelBB is called to create a BB for target but the
|
|
// target BB is added into the list later ( assume forward jmp) as the target
|
|
// label is visited.
|
|
//
|
|
//
|
|
void FlowGraph::constructFlowGraph(INST_LIST &instlist) {
|
|
//vISA_ASSERT(!instlist.empty(), ERROR_SYNTAX("empty instruction list"));
|
|
setCurrentDebugPass("CFG");
|
|
VISA_DEBUG(std::cout << "Entering CFG construction\n");
|
|
|
|
bool scratchUse = false;
|
|
if (builder->hasScratchSurface() && (isStackCallFunc || hasStackCalls || scratchUse)) {
|
|
// unfortunately we can't put this in stack call prolog since this has to be
|
|
// done before RA ToDo: just hard-wire the scratch-surface offset register?
|
|
builder->initScratchSurfaceOffset();
|
|
}
|
|
if (builder->hasFusedEU() && !builder->getOption(vISA_KeepScalarJmp) &&
|
|
getKernel()->getInt32KernelAttr(Attributes::ATTR_Target) == VISA_CM) {
|
|
getKernel()->getOptions()->setOptionInternally(vISA_EnableScalarJmp, false);
|
|
}
|
|
|
|
//
|
|
// The funcInfoHashTable maintains a map between the id of the function's INIT
|
|
// block to its FuncInfo definition.
|
|
//
|
|
FuncInfoHashTable funcInfoHashTable;
|
|
|
|
preprocess(instlist);
|
|
|
|
//
|
|
// map label to its corresponding BB
|
|
//
|
|
std::unordered_map<G4_Label *, G4_BB *> labelMap;
|
|
//
|
|
// create the entry block of the flow graph
|
|
//
|
|
G4_BB *curr_BB = beginBB(labelMap, instlist.front());
|
|
|
|
kernelInfo = new (mem) FuncInfo(UINT_MAX, curr_BB, NULL);
|
|
|
|
std::vector<G4_BB *> subroutineStartBB; // needed by handleExit()
|
|
|
|
bool hasSIMDCF = false, hasNoUniformGoto = false;
|
|
G4_Label *currSubroutine = nullptr;
|
|
|
|
auto addToSubroutine = [this](G4_BB *bb, G4_Label *currSubroutine) {
|
|
if (currSubroutine) {
|
|
subroutines[currSubroutine].push_back(bb);
|
|
}
|
|
};
|
|
|
|
while (!instlist.empty()) {
|
|
INST_LIST_ITER iter = instlist.begin();
|
|
G4_INST *i = *iter;
|
|
|
|
vISA_ASSERT(curr_BB != NULL, "Current BB must not be empty");
|
|
//
|
|
// inst i belongs to the current BB
|
|
// remove inst i from instlist and relink it to curr_BB's instList
|
|
//
|
|
curr_BB->splice(curr_BB->end(), instlist, iter);
|
|
G4_INST *next_i = (instlist.empty()) ? nullptr : instlist.front();
|
|
|
|
if (i->isLabel() && i->getLabel()->isSubroutine()) {
|
|
std::vector<G4_BB *> bbvec;
|
|
subroutines[i->getLabel()] = std::move(bbvec);
|
|
currSubroutine = i->getLabel();
|
|
subroutineStartBB.push_back(curr_BB);
|
|
}
|
|
|
|
//
|
|
// do and endif do not have predicate and jump-to label,so we treated them
|
|
// as non-control instruction the labels around them will decides the
|
|
// beginning of a new BB
|
|
//
|
|
if (i->isFlowControl() && i->opcode() != G4_endif) {
|
|
addToSubroutine(curr_BB, currSubroutine);
|
|
G4_BB *next_BB = beginBB(labelMap, next_i);
|
|
|
|
// next_BB may be null if the kernel ends on an CF inst (e.g., backward
|
|
// goto/jmpi) This should be ok because we should not fall-through to
|
|
// next_BB in such case (i.e., goto/jmpi must not be predicated)
|
|
{
|
|
if (i->opcode() == G4_jmpi || i->isCall()) {
|
|
//
|
|
// add control flow edges <curr_BB,target>
|
|
//
|
|
if (i->getSrc(0)->isLabel()) {
|
|
addPredSuccEdges(curr_BB,
|
|
getLabelBB(labelMap, i->getSrc(0)->asLabel()));
|
|
} else if (i->asCFInst()->isIndirectJmp()) {
|
|
// indirect jmp
|
|
// For each label in the switch table, we insert a jmpi
|
|
// to that label. We keep the jmpi in the same
|
|
// block as the indirect jmp instead of creaing a new block for
|
|
// each of them, as the offset actually determines which jmpi
|
|
// instruction we will hit.
|
|
// FIXME: We may want to delay the emission of the jmps to
|
|
// each individual labels, so that we still maintain the property
|
|
// that every basic block ends with a control flow instruction
|
|
const std::list<G4_Label *> &jmpTargets =
|
|
i->asCFInst()->getIndirectJmpLabels();
|
|
for (auto it = jmpTargets.begin(), jmpTrgEnd = jmpTargets.end();
|
|
it != jmpTrgEnd; ++it) {
|
|
G4_INST *jmpInst =
|
|
builder->createJmp(nullptr, *it, InstOpt_NoOpt, true);
|
|
indirectJmpTarget.emplace(jmpInst);
|
|
INST_LIST_ITER jmpInstIter = builder->instList.end();
|
|
curr_BB->splice(curr_BB->end(), builder->instList, --jmpInstIter);
|
|
addPredSuccEdges(curr_BB, getLabelBB(labelMap, (*it)));
|
|
}
|
|
}
|
|
|
|
if (i->isCall()) {
|
|
//
|
|
// the <CALL,return addr> link is added temporarily to facilitate
|
|
// linking RETURN with return addresses. The link will be removed
|
|
// after it serves its purpose NOTE: No matter it has predicate, we
|
|
// add this link. We will check the predicate in handleReturn() and
|
|
// only remove the link when it is not a conditional call
|
|
//
|
|
addPredSuccEdges(curr_BB, next_BB);
|
|
} else if (i->getPredicate()) {
|
|
// add fall through edge
|
|
addUniquePredSuccEdges(curr_BB, next_BB);
|
|
}
|
|
} // if (i->opcode()
|
|
else if (i->opcode() == G4_if) {
|
|
hasSIMDCF = true;
|
|
if (i->asCFInst()->getJip()->isLabel()) {
|
|
if (i->getPredicate()) {
|
|
addPredSuccEdges(curr_BB,
|
|
getLabelBB(labelMap, i->asCFInst()->getJip()));
|
|
}
|
|
}
|
|
// always fall through
|
|
addPredSuccEdges(curr_BB, next_BB);
|
|
} else if (i->isReturn() || i->opcode() == G4_pseudo_exit) {
|
|
// does nothing for unconditional return;
|
|
// later phase will link the return and the return address
|
|
if (i->getPredicate()) {
|
|
// add fall through edge
|
|
addPredSuccEdges(curr_BB, next_BB);
|
|
}
|
|
} else if (i->opcode() == G4_pseudo_fcall ||
|
|
i->opcode() == G4_pseudo_fc_call) {
|
|
addPredSuccEdges(curr_BB, next_BB);
|
|
} else if (i->opcode() == G4_pseudo_fret ||
|
|
i->opcode() == G4_pseudo_fc_ret) {
|
|
if (i->getPredicate()) {
|
|
// need to add fall through edge
|
|
addPredSuccEdges(curr_BB, next_BB);
|
|
}
|
|
} else if (i->opcode() == G4_goto) {
|
|
hasNoUniformGoto = true;
|
|
hasSIMDCF = true;
|
|
addPredSuccEdges(curr_BB,
|
|
getLabelBB(labelMap, i->asCFInst()->getUip()));
|
|
|
|
if (i->getPredicate()) {
|
|
// fall through, but check if goto target is same as fall-thru
|
|
// FIXME: replace all addPredSuccEdges with addUniquePredSuccEdges?
|
|
addUniquePredSuccEdges(curr_BB, next_BB);
|
|
}
|
|
} else {
|
|
vISA_ASSERT_UNREACHABLE("should not reach here");
|
|
}
|
|
} // need edge
|
|
curr_BB = next_BB;
|
|
} // flow control
|
|
else if (curr_BB->isLastInstEOT()) {
|
|
addToSubroutine(curr_BB, currSubroutine);
|
|
// this is a send instruction that marks end of thread.
|
|
// the basic block will end here with no outgoing links
|
|
curr_BB = beginBB(labelMap, next_i);
|
|
} else if (next_i && next_i->isLabel()) {
|
|
addToSubroutine(curr_BB, currSubroutine);
|
|
G4_BB *next_BB = beginBB(labelMap, next_i);
|
|
addPredSuccEdges(curr_BB, next_BB);
|
|
curr_BB = next_BB;
|
|
}
|
|
}
|
|
if (curr_BB) {
|
|
addToSubroutine(curr_BB, currSubroutine);
|
|
}
|
|
|
|
if (builder->getFreqInfoManager().isProfileEmbeddingEnabled())
|
|
builder->getFreqInfoManager().updateStaticFrequency(subroutines);
|
|
|
|
// we can do this only after fg is constructed
|
|
pKernel->calculateSimdSize();
|
|
|
|
// Jmpi instruction cannot be used when EU Fusion is enabled.
|
|
bool hasGoto = hasNoUniformGoto;
|
|
if (builder->noScalarJmp()) {
|
|
// No jmpi should be inserted after this point.
|
|
hasGoto |= convertJmpiToGoto();
|
|
}
|
|
|
|
pKernel->dumpToFile("after.CFGConstruction");
|
|
|
|
// Do call WA before return/exit processing.
|
|
if (builder->hasFusedEU() &&
|
|
builder->getuint32Option(vISA_fusedCallWA) == 2) {
|
|
hasGoto |= convertPredCall(labelMap);
|
|
}
|
|
|
|
removeRedundantLabels();
|
|
|
|
pKernel->dumpToFile("after.RemoveRedundantLabels");
|
|
|
|
handleExit(subroutineStartBB.size() > 1 ? subroutineStartBB[1] : nullptr);
|
|
|
|
handleReturn(labelMap, funcInfoHashTable);
|
|
mergeReturn(funcInfoHashTable);
|
|
normalizeSubRoutineBB(funcInfoHashTable);
|
|
|
|
handleWait();
|
|
|
|
if (isStackCallFunc) {
|
|
mergeFReturns();
|
|
}
|
|
|
|
setPhysicalPredSucc();
|
|
removeUnreachableBlocks(funcInfoHashTable);
|
|
removeRedundantLabels();
|
|
|
|
pKernel->dumpToFile("after.RemoveUnreachableBlocks");
|
|
|
|
//
|
|
// build the table of function info nodes
|
|
//
|
|
|
|
for (FuncInfoHashTable::iterator it = funcInfoHashTable.begin(),
|
|
end = funcInfoHashTable.end();
|
|
it != end; ++it) {
|
|
FuncInfo *funcInfo = (*it).second;
|
|
funcInfo->getInitBB()->setFuncInfo(funcInfo);
|
|
funcInfo->getExitBB()->setFuncInfo(funcInfo);
|
|
funcInfoTable.push_back(funcInfo);
|
|
}
|
|
|
|
if (hasGoto) {
|
|
// Structurizer requires that the last BB has no goto (ie, the
|
|
// last BB is either a return or an exit).
|
|
if (builder->getOption(vISA_EnableStructurizer) && !endWithGotoInLastBB()) {
|
|
doCFGStructurize(this);
|
|
pKernel->dumpToFile("after.CFGStructurizer");
|
|
|
|
removeRedundantLabels();
|
|
pKernel->dumpToFile("after.PostStructurizerRedundantLabels");
|
|
} else {
|
|
processGoto();
|
|
pKernel->dumpToFile("after.ProcessGoto");
|
|
}
|
|
}
|
|
|
|
// This finds back edges and populates blocks into function info objects.
|
|
// The latter will be used to mark SIMD blocks. Prior to RA, no transformation
|
|
// shall invalidate back edges (G4_BB -> G4_BB).
|
|
reassignBlockIDs();
|
|
findBackEdges();
|
|
|
|
// TODO: I think this can be removed since vISA no longer has structured CF.
|
|
// For math intrinsics if/endif may still be generated, but they are present
|
|
// for IGC as well, which suggests this pass is not needed.
|
|
if (hasSIMDCF &&
|
|
pKernel->getInt32KernelAttr(Attributes::ATTR_Target) == VISA_CM) {
|
|
processSCF();
|
|
addSIMDEdges();
|
|
}
|
|
|
|
// patch the last BB for the kernel
|
|
if (funcInfoTable.size() > 0) {
|
|
// subroutineStartBB[1] may be dead or no longer entry BB of the first
|
|
// subroutine(due to normalizeSubRoutineBB()/removeUnreachableBlocks()).
|
|
// Need to find out the entry BB of the first subroutine.
|
|
auto iter = std::find_if(BBs.begin(), BBs.end(),
|
|
[](G4_BB* B) { return (B->getBBType() & G4_BB_INIT_TYPE); });
|
|
G4_BB* exitBB = nullptr;
|
|
if (iter != BBs.end())
|
|
exitBB = (*iter)->getPhysicalPred();
|
|
else
|
|
exitBB = BBs.back();
|
|
kernelInfo->updateExitBB(exitBB);
|
|
topologicalSortCallGraph();
|
|
}
|
|
else {
|
|
kernelInfo->updateExitBB(BBs.back());
|
|
}
|
|
|
|
// For non-kernel function, always invoke markDivergentBBs to
|
|
// conservatively assume divergence.
|
|
if ((hasSIMDCF || hasGoto || builder->getIsFunction()) &&
|
|
builder->getOption(vISA_divergentBB)) {
|
|
markDivergentBBs();
|
|
}
|
|
|
|
builder->materializeGlobalImm(getEntryBB());
|
|
normalizeRegionDescriptors();
|
|
localDataFlowAnalysis();
|
|
|
|
for (auto *funcInfo : funcInfoTable) {
|
|
for (auto *bb : funcInfo->getBBList())
|
|
bb->setFuncInfo(funcInfo);
|
|
}
|
|
|
|
for (auto *bb : kernelInfo->getBBList())
|
|
bb->setFuncInfo(kernelInfo);
|
|
|
|
canUpdateFuncInfo = true;
|
|
|
|
markStale();
|
|
setCurrentDebugPass(nullptr);
|
|
}
|
|
|
|
void FlowGraph::normalizeRegionDescriptors() {
|
|
for (auto bb : BBs) {
|
|
for (auto inst : *bb) {
|
|
uint16_t execSize = inst->getExecSize();
|
|
for (unsigned i = 0, e = (unsigned)inst->getNumSrc(); i < e; ++i) {
|
|
G4_Operand *src = inst->getSrc(i);
|
|
if (!src || !src->isSrcRegRegion())
|
|
continue;
|
|
|
|
G4_SrcRegRegion *srcRegion = src->asSrcRegRegion();
|
|
const RegionDesc *desc = srcRegion->getRegion();
|
|
auto normDesc = builder->getNormalizedRegion(execSize, desc);
|
|
if (normDesc && normDesc != desc) {
|
|
srcRegion->setRegion(*builder, normDesc, /*invariant*/ true);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// materialize the values in global Imm at entry BB
|
|
void IR_Builder::materializeGlobalImm(G4_BB *entryBB) {
|
|
InstSplitPass instSplitter(this);
|
|
for (int i = 0, numImm = immPool.size(); i < numImm; ++i) {
|
|
auto &&immVal = immPool.getImmVal(i);
|
|
auto dcl = immPool.getImmDcl(i);
|
|
G4_INST *inst = createMov(G4_ExecSize((unsigned)immVal.numElt),
|
|
createDstRegRegion(dcl, 1), immVal.imm,
|
|
InstOpt_WriteEnable, false);
|
|
|
|
// Check if entryBB has instruction with SSO
|
|
// and set insertion point after this instruction
|
|
G4_INST* instSSO = getSSOInst();
|
|
INST_LIST_ITER iter;
|
|
if (instSSO) {
|
|
iter = std::find_if(entryBB->begin(), entryBB->end(),
|
|
[&instSSO](G4_INST* inst) { return inst == instSSO; });
|
|
iter++;
|
|
} else {
|
|
iter = std::find_if(entryBB->begin(), entryBB->end(),
|
|
[](G4_INST *inst) { return !inst->isLabel(); });
|
|
}
|
|
|
|
INST_LIST_ITER newMov = entryBB->insertBefore(iter, inst);
|
|
instSplitter.splitInstruction(newMov, entryBB->getInstList());
|
|
}
|
|
}
|
|
|
|
//
|
|
// attempt to sink the pseudo-wait into the immediate succeeding send
|
|
// instruction (in same BB) by changing it into a sendc. If sinking fails,
|
|
// generate a fence with sendc.
|
|
//
|
|
void FlowGraph::handleWait() {
|
|
for (auto bb : BBs) {
|
|
auto iter = bb->begin(), instEnd = bb->end();
|
|
while (iter != instEnd) {
|
|
auto inst = *iter;
|
|
if (inst->isIntrinsic() &&
|
|
inst->asIntrinsicInst()->getIntrinsicId() == Intrinsic::Wait) {
|
|
bool sunk = false;
|
|
auto nextIter = iter;
|
|
++nextIter;
|
|
while (nextIter != instEnd) {
|
|
G4_INST *nextInst = *nextIter;
|
|
if (nextInst->isSend()) {
|
|
nextInst->asSendInst()->setSendc();
|
|
sunk = true;
|
|
break;
|
|
} else if (nextInst->isOptBarrier()) {
|
|
break;
|
|
}
|
|
++nextIter;
|
|
}
|
|
if (!sunk) {
|
|
auto fenceInst = builder->createSLMFence(nullptr);
|
|
auto sendInst = fenceInst->asSendInst();
|
|
if (sendInst != NULL) {
|
|
sendInst->setSendc();
|
|
bb->insertBefore(iter, fenceInst);
|
|
}
|
|
}
|
|
iter = bb->erase(iter);
|
|
} else {
|
|
++iter;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// handleExit():
|
|
// Each g4_pseudo_exit instruction will be translated into EOT send.
|
|
//
|
|
// Terminology:
|
|
// Exit BB:
|
|
// any BBs that end with EOT send.
|
|
// G4_pseudo_exit :
|
|
// a 'ret' instruction. It may be translated either to an exit BB or
|
|
// to a jump to an exit BB.
|
|
// Raw EOT send:
|
|
// send with EOT set already coming into this function.
|
|
// EOT send:
|
|
// either raw EOT send or send in which g4_pseudo_exit is folded in
|
|
// this function.
|
|
//
|
|
// Computer Kernel:
|
|
// Normalize CFG so that it has a single exit BB and it is the last BB.
|
|
// (If g4_pseudo_exit is predicated or under diverged control-flow, it
|
|
// should be translated to a jump to the final exit BB.)
|
|
// 3D shader:
|
|
// Normalize CFG so that the last BB is the exit BB. If the input has
|
|
// more than one exit BBs (it seems igc always generate a single one),
|
|
// it remains that way without merging.
|
|
//
|
|
// Note:
|
|
// (1) this code implies 3d shaders should have a send immediately
|
|
// before ret and the send must support EOT (-foldEOT must be set).
|
|
// (2) it also implies that 3d shaders don't support predicated ret
|
|
// instructions as predicated ret results in a gateway EOT send,
|
|
// which is only for compute kernels.
|
|
// Special case for raw EOT sends, they are handled similar to the
|
|
// folded EOT send of 3D shaders.
|
|
//
|
|
// With this, the last BB will always be an exit BB. In addition, compute
|
|
// kernels should have a single exit BB at the end (not counting raw EOT send).
|
|
//
|
|
// Last, if there is neither g4_pseudo_exit nor raw EOT send, it remains this
|
|
// way without an exit BB. (Probably not right, but several lit tests have no
|
|
// ret inst. Just remain this way.)
|
|
//
|
|
// Note: FC kernel behavior remains unchanged with this refactor (10/2023).
|
|
void FlowGraph::handleExit(G4_BB* firstSubroutineBB) {
|
|
|
|
// blocks that end with non-uniform or conditional return
|
|
std::vector<G4_BB *> exitBlocks;
|
|
// Last BB with EOT folded into send, including raw send.
|
|
G4_BB *lastEOTBB = nullptr;
|
|
BB_LIST_ITER iter = BBs.begin(), iterEnd = BBs.end();
|
|
for (; iter != iterEnd; ++iter) {
|
|
G4_BB *bb = *iter;
|
|
if (bb->size() == 0) {
|
|
continue;
|
|
}
|
|
|
|
if (bb == firstSubroutineBB) {
|
|
// we've reached the first subroutine's entry BB,
|
|
break;
|
|
}
|
|
|
|
G4_INST *lastInst = bb->back();
|
|
if (lastInst->opcode() == G4_pseudo_exit) {
|
|
bool needsEOTSend = true;
|
|
if (lastInst->getExecSize() == g4::SIMD1 &&
|
|
!lastInst->getPredicate() &&
|
|
!builder->getFCPatchInfo()->getFCComposableKernel()) {
|
|
// uniform & unconditional return. Try to fold EOT into the BB's last
|
|
// instruction if it's a send that supports EOT. (Folding should happen
|
|
// for 3d shaders as vISA can insert EOT for compute kernel only.)
|
|
if (builder->getOption(vISA_foldEOTtoPrevSend) && bb->size() > 2) {
|
|
auto iter2 = std::prev(bb->end(), 2);
|
|
G4_InstSend* secondToLastInst = (*iter2)->asSendInst();
|
|
if (secondToLastInst && secondToLastInst->canBeEOT() &&
|
|
!(secondToLastInst->getMsgDesc()->getSrc1LenRegs() > 2 &&
|
|
VISA_WA_CHECK(builder->getPWaTable(),
|
|
WaSendsSrc1SizeLimitWhenEOT))) {
|
|
// common case for 3d shaders
|
|
// (note: expect 3d shaders not to have predicated ret)
|
|
secondToLastInst->setEOT();
|
|
bb->pop_back();
|
|
needsEOTSend = false;
|
|
lastEOTBB = bb;
|
|
if (builder->getHasNullReturnSampler() &&
|
|
VISA_WA_CHECK(builder->getPWaTable(), Wa_1607871015)) {
|
|
bb->addSamplerFlushBeforeEOT();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (needsEOTSend) {
|
|
exitBlocks.push_back(bb);
|
|
}
|
|
} else if (lastInst->isSend() && lastInst->asSendInst()->isEOT()) {
|
|
// likely, raw send
|
|
lastEOTBB = bb;
|
|
}
|
|
}
|
|
|
|
// Last BB of the entry
|
|
auto lastBB = *(std::prev(iter));
|
|
if (exitBlocks.size() == 1 && exitBlocks[0] == lastBB) {
|
|
// Common case for compute kernel:
|
|
// single & unconditional exit that is already at the end.
|
|
//
|
|
// Rationale for checking null predicate:
|
|
// if an exit has predicate, it shall have fall-thru BB and
|
|
// therefore it will not be the last BB.
|
|
vASSERT(lastBB->size() > 0 &&
|
|
lastBB->back()->opcode() == G4_pseudo_exit &&
|
|
!lastBB->back()->getPredicate());
|
|
G4_INST *lastInst = lastBB->back();
|
|
lastBB->pop_back();
|
|
if (!builder->getFCPatchInfo()->getFCComposableKernel()) {
|
|
// Don't insert EOT send for FC composable kernels
|
|
lastBB->addEOTSend(lastInst);
|
|
}
|
|
} else if (exitBlocks.size() > 0) {
|
|
// Create a new exit BB (note: should be for compute kernels).
|
|
G4_BB *exitBB = createNewBB();
|
|
if (builder->getFCPatchInfo()->getFCComposableKernel()) {
|
|
// For FC composable kernels insert exitBB as
|
|
// last BB in BBs list. This automatically does
|
|
// jump threading.
|
|
push_back(exitBB);
|
|
} else {
|
|
insert(iter, exitBB);
|
|
}
|
|
|
|
G4_Label *exitLabel = builder->createLocalBlockLabel("EXIT_BB");
|
|
G4_INST *label = createNewLabelInst(exitLabel);
|
|
exitBB->push_back(label);
|
|
|
|
// For debugInfo, use last G4_pseudo_exit's DI for EOT
|
|
G4_BB *lastRetBB = exitBlocks.back();
|
|
vASSERT(lastRetBB->size() > 0);
|
|
G4_INST *lastRetI = lastRetBB->back();
|
|
|
|
if (!builder->getFCPatchInfo()->getFCComposableKernel()) {
|
|
// Don't insert EOT send for FC composable kernels
|
|
exitBB->addEOTSend(lastRetI);
|
|
}
|
|
|
|
for (int i = 0, size = (int)exitBlocks.size(); i < size; i++) {
|
|
G4_BB *retBB = exitBlocks[i];
|
|
addPredSuccEdges(retBB, exitBB, false);
|
|
G4_INST *retInst = retBB->back();
|
|
retBB->pop_back();
|
|
|
|
// Insert jump only if newly inserted exitBB is not lexical
|
|
// successor of current BB
|
|
auto lastBBIt = BBs.end();
|
|
lastBBIt--;
|
|
if ((*lastBBIt) == exitBB) {
|
|
lastBBIt--;
|
|
if ((*lastBBIt) == retBB) {
|
|
// This condition is BB layout dependent.
|
|
// However, we don't change BB layout in JIT
|
|
// and in case we do it in future, we
|
|
// will need to insert correct jumps
|
|
// there to preserve correctness.
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (retInst->getExecSize() == g4::SIMD1) {
|
|
G4_INST* jmpInst = builder->createJmp(retInst->getPredicate(),
|
|
exitLabel, InstOpt_NoOpt, false);
|
|
jmpInst->inheritDIFrom(retInst);
|
|
retBB->push_back(jmpInst);
|
|
} else {
|
|
// uip for goto will be fixed later
|
|
G4_INST* gotoInst = builder->createInternalCFInst(
|
|
retInst->getPredicate(), G4_goto, retInst->getExecSize(), exitLabel,
|
|
exitLabel, InstOpt_NoOpt);
|
|
gotoInst->inheritDIFrom(retInst);
|
|
retBB->push_back(gotoInst);
|
|
}
|
|
}
|
|
} else if (lastEOTBB && lastEOTBB != lastBB) {
|
|
// create a new exit bb and move lastEOTBB's insts over
|
|
G4_BB *newExitBB = createNewBBWithLabel("EXIT_BB");
|
|
G4_Label *newExitLabel = newExitBB->getLabel();
|
|
newExitBB->splice(newExitBB->end(),
|
|
lastEOTBB, lastEOTBB->getFirstInsertPos(), lastEOTBB->end());
|
|
// insert goto to jump from the lastEOTBB to this new BB.
|
|
G4_INST *gotoInst = builder->createInternalCFInst(
|
|
nullptr, G4_goto, pKernel->getSimdSize(), newExitLabel,
|
|
newExitLabel, InstOpt_NoOpt);
|
|
|
|
// Use DI from lastEOTBB's first inst
|
|
gotoInst->inheritDIFrom(newExitBB->getFirstInst());
|
|
|
|
lastEOTBB->push_back(gotoInst);
|
|
|
|
insert(iter, newExitBB);
|
|
addPredSuccEdges(lastEOTBB, newExitBB, true);
|
|
}
|
|
|
|
#ifdef _DEBUG
|
|
|
|
// sanity check
|
|
for (const G4_BB *bb : BBs) {
|
|
if (bb->size() == 0) {
|
|
continue;
|
|
}
|
|
const G4_INST *lastInst = bb->back();
|
|
if (lastInst->opcode() == G4_pseudo_exit) {
|
|
vISA_ASSERT_UNREACHABLE("All pseudo exits should be removed at this point");
|
|
}
|
|
}
|
|
|
|
#endif
|
|
}
|
|
|
|
//
|
|
// This phase iterates each BB and checks if the last inst of a BB is a "call
|
|
// foo". If yes, the algorithm traverses from the block of foo to search for
|
|
// RETURN and link the block of RETURN with the block of the return address
|
|
//
|
|
void FlowGraph::handleReturn(Label_BB_Map &labelMap,
|
|
FuncInfoHashTable &funcInfoHashTable) {
|
|
for (std::list<G4_BB *>::iterator it = BBs.begin(), itEnd = BBs.end();
|
|
it != itEnd; ++it) {
|
|
G4_BB *bb = (*it);
|
|
|
|
if (bb->isEndWithCall()) {
|
|
bb->setBBType(G4_BB_CALL_TYPE);
|
|
G4_INST *last = bb->back();
|
|
if (last->getSrc(0)->isLabel()) {
|
|
// make sure bb has only two successors, one subroutine and one return
|
|
// addr
|
|
vASSERT(bb->Succs.size() == 2);
|
|
|
|
// find the subroutine BB and return Addr BB
|
|
G4_BB *subBB = labelMap[last->getSrc(0)->asLabel()];
|
|
//
|
|
// the fall through BB must be the front
|
|
//
|
|
G4_BB *retAddr = bb->Succs.front();
|
|
if (retAddr->Preds.size() > 1) {
|
|
// Create an empty BB as a new RETURN BB as we want RETURN BB
|
|
// to be reached only by CALL BB. Note that at this time, call
|
|
// return edge has not been added yet.
|
|
G4_INST *I0 = retAddr->getFirstInst();
|
|
if (I0 == nullptr)
|
|
I0 = last;
|
|
G4_BB *newRetBB = createNewBBWithLabel("Return_BB");
|
|
bb->removeSuccEdge(retAddr);
|
|
retAddr->removePredEdge(bb);
|
|
addPredSuccEdges(bb, newRetBB, true);
|
|
addPredSuccEdges(newRetBB, retAddr, true);
|
|
|
|
insert(std::next(it), newRetBB);
|
|
retAddr = newRetBB;
|
|
}
|
|
// Add EXIT->RETURN edge.
|
|
linkReturnAddr(subBB, retAddr);
|
|
|
|
// set callee info for CALL
|
|
FuncInfoHashTable::iterator calleeInfoLoc =
|
|
funcInfoHashTable.find(subBB->getId());
|
|
|
|
if (calleeInfoLoc != funcInfoHashTable.end()) {
|
|
(*calleeInfoLoc).second->incrementCallCount();
|
|
bb->setCalleeInfo((*calleeInfoLoc).second);
|
|
doIPA = true;
|
|
} else {
|
|
unsigned funcId = (unsigned)funcInfoHashTable.size();
|
|
FuncInfo *funcInfo =
|
|
new (mem) FuncInfo(funcId, subBB, retAddr->Preds.front());
|
|
std::pair<FuncInfoHashTable::iterator, bool> loc =
|
|
funcInfoHashTable.insert(
|
|
std::make_pair(subBB->getId(), funcInfo));
|
|
subBB->setBBType(G4_BB_INIT_TYPE);
|
|
retAddr->Preds.front()->setBBType(G4_BB_EXIT_TYPE);
|
|
vISA_ASSERT(loc.second, ERROR_FLOWGRAPH);
|
|
bb->setCalleeInfo((*(loc.first)).second);
|
|
}
|
|
retAddr->setBBType(G4_BB_RETURN_TYPE);
|
|
}
|
|
}
|
|
|
|
if (bb->isEndWithFCall()) {
|
|
// normalizeFlowGraph() process FCALL BB. (Maybe should be processed
|
|
// here?) Here just set BB type
|
|
bb->setBBType(G4_BB_FCALL_TYPE);
|
|
}
|
|
}
|
|
//
|
|
// remove <CALL, return addr> link when it is not a conditional call
|
|
//
|
|
for (G4_BB *bb : BBs) {
|
|
if (bb->isEndWithCall()) {
|
|
G4_INST *last = bb->back();
|
|
if (last->getPredicate() == NULL) {
|
|
vISA_ASSERT(!bb->Succs.empty(), ERROR_FLOWGRAPH);
|
|
G4_BB *retAddr = bb->Succs.front(); // return BB must be the front
|
|
bb->removeSuccEdge(retAddr);
|
|
retAddr->removePredEdge(bb);
|
|
}
|
|
}
|
|
|
|
if (bb->isLastInstEOT() && !bb->back()->isReturn()) {
|
|
G4_BB *succ = bb->getPhysicalSucc();
|
|
if (succ && succ->Preds.empty() &&
|
|
succ->getBBType() != G4_BB_INIT_TYPE) {
|
|
succ->Preds.push_back(bb);
|
|
bb->Succs.push_back(succ);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void FlowGraph::linkReturnAddr(G4_BB *entryBB, G4_BB *returnAddr) {
|
|
|
|
vISA_ASSERT(entryBB->size() > 0 && entryBB->front()->isLabel(),
|
|
"BB should start with a label");
|
|
auto label = entryBB->front()->getLabel();
|
|
vISA_ASSERT(subroutines.count(label), "can't find subroutine label");
|
|
auto subroutineBBs = subroutines[label];
|
|
|
|
for (auto bb : subroutineBBs) {
|
|
if (!bb->empty() && bb->back()->isReturn()) {
|
|
//
|
|
// check the direct recursive call here!
|
|
//
|
|
if (bb == returnAddr && hasStackCalls == false) {
|
|
vISA_ASSERT(false, "ERROR: Do not support recursive subroutine call!");
|
|
}
|
|
addPredSuccEdges(
|
|
bb, returnAddr,
|
|
false); // IMPORTANT: add to the back to allow fall through edge is in
|
|
// the front, which is used by fallThroughBB()
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// This phase iterates each BB and checks if the last inst of a BB is a "call
|
|
// foo". If yes, the algorith traverses from the block of foo to search for
|
|
// RETURN. If multiple returns exist, the algorithm will merge returns into one
|
|
// [Reason]:to handle the case when spill codes are needed between multiple
|
|
// return BBs of one subroutine
|
|
// and the afterCAll BB. It is impossible to insert spill codes of
|
|
// different return BBs all before afterCall BB.
|
|
//
|
|
void FlowGraph::mergeReturn(FuncInfoHashTable &funcInfoHashTable) {
|
|
|
|
for (auto I = subroutines.begin(), E = subroutines.end(); I != E; ++I) {
|
|
auto label = I->first;
|
|
G4_BB *subBB = I->second.front();
|
|
G4_BB *mergedExitBB = mergeSubRoutineReturn(label);
|
|
|
|
// update callee exit block info
|
|
FuncInfoHashTable::iterator calleeInfoLoc =
|
|
funcInfoHashTable.find(subBB->getId());
|
|
// it's possible for the subroutine to never be called (e.g., kernel
|
|
// function)
|
|
if (calleeInfoLoc != funcInfoHashTable.end() && mergedExitBB) {
|
|
calleeInfoLoc->second->getExitBB()->unsetBBType(G4_BB_EXIT_TYPE);
|
|
mergedExitBB->setBBType(G4_BB_EXIT_TYPE);
|
|
calleeInfoLoc->second->updateExitBB(mergedExitBB);
|
|
}
|
|
}
|
|
}
|
|
|
|
G4_BB *FlowGraph::mergeSubRoutineReturn(G4_Label *subroutineLabel) {
|
|
|
|
G4_BB *newBB = nullptr;
|
|
|
|
auto subroutineBB = subroutines[subroutineLabel];
|
|
|
|
std::vector<G4_BB *> retBBList;
|
|
for (auto bb : subroutineBB) {
|
|
if (!bb->empty() && bb->back()->isReturn()) {
|
|
retBBList.push_back(bb);
|
|
}
|
|
}
|
|
|
|
if (retBBList.size() > 1) // For return number >1, need to merge returns
|
|
{
|
|
builder->instList.clear();
|
|
|
|
//
|
|
// insert newBB to fg.BBs list
|
|
//
|
|
newBB = createNewBB();
|
|
insert(BBs.end(), newBB);
|
|
// choose the last BB in retBBList as a candidate
|
|
G4_BB *candidateBB = *(retBBList.rbegin());
|
|
// Add <newBB, succBB> edges
|
|
G4_INST *last = candidateBB->back();
|
|
BB_LIST_ITER succIt = (last->getPredicate() == NULL)
|
|
? candidateBB->Succs.begin()
|
|
: (++candidateBB->Succs.begin());
|
|
BB_LIST_ITER succItEnd = candidateBB->Succs.end();
|
|
// link new ret BB with each call site
|
|
for (; succIt != succItEnd; ++succIt) {
|
|
addPredSuccEdges(newBB, (*succIt), false);
|
|
}
|
|
|
|
//
|
|
// Create a label for the newBB and insert return to new BB
|
|
//
|
|
G4_Label *lab = builder->createLocalBlockLabel();
|
|
G4_INST *labelInst = createNewLabelInst(lab);
|
|
|
|
// exitBB is really just a dummy one for analysis, and does not need a
|
|
// return we will instead leave the return in each of its predecessors
|
|
newBB->push_back(labelInst);
|
|
|
|
//
|
|
// Deal with all return BBs
|
|
//
|
|
for (G4_BB *retBB : retBBList) {
|
|
if (retBB->getId() == newBB->getId()) {
|
|
continue;
|
|
}
|
|
last = retBB->back();
|
|
// remove <retBB,its succBB> edges, do not remove the fall through edge if
|
|
// predicated
|
|
BB_LIST retSuccsList;
|
|
retSuccsList.assign(retBB->Succs.begin(), retBB->Succs.end());
|
|
for (BB_LIST_ITER retSuccIt = retSuccsList.begin();
|
|
retSuccIt != retSuccsList.end(); ++retSuccIt) {
|
|
G4_BB *retSuccBB = (*retSuccIt);
|
|
if (last->getPredicate() != NULL && retSuccIt == retSuccsList.begin())
|
|
continue;
|
|
retBB->removeSuccEdge(retSuccBB);
|
|
retSuccBB->removePredEdge(retBB);
|
|
}
|
|
// Add <retBB,newBB> edges
|
|
addPredSuccEdges(retBB, newBB, false);
|
|
}
|
|
}
|
|
|
|
return newBB;
|
|
}
|
|
|
|
void FlowGraph::decoupleInitBlock(G4_BB *bb,
|
|
FuncInfoHashTable &funcInfoHashTable) {
|
|
G4_BB *oldInitBB = bb;
|
|
G4_BB *newInitBB = createNewBB();
|
|
BB_LIST_ITER insertBefore = std::find(BBs.begin(), BBs.end(), bb);
|
|
vISA_ASSERT(insertBefore != BBs.end(), ERROR_FLOWGRAPH);
|
|
insert(insertBefore, newInitBB);
|
|
|
|
BB_LIST_ITER kt = oldInitBB->Preds.begin();
|
|
while (kt != oldInitBB->Preds.end()) {
|
|
// the pred of this new INIT BB are all call BB
|
|
if ((*kt)->getBBType() & G4_BB_CALL_TYPE) {
|
|
|
|
newInitBB->Preds.push_back((*kt));
|
|
|
|
BB_LIST_ITER jt = (*kt)->Succs.begin();
|
|
while (jt != (*kt)->Succs.end()) {
|
|
if ((*jt) == oldInitBB) {
|
|
break;
|
|
}
|
|
jt++;
|
|
}
|
|
vISA_ASSERT(jt != (*kt)->Succs.end(), ERROR_FLOWGRAPH);
|
|
(*kt)->Succs.insert(jt, newInitBB);
|
|
(*kt)->Succs.erase(jt);
|
|
|
|
BB_LIST_ITER tmp_kt = kt;
|
|
++kt;
|
|
// erase this pred from old INIT BB's pred
|
|
oldInitBB->Preds.erase(tmp_kt);
|
|
} else {
|
|
++kt;
|
|
}
|
|
}
|
|
|
|
FuncInfoHashTable::iterator old_iter =
|
|
funcInfoHashTable.find(oldInitBB->getId());
|
|
vISA_ASSERT(old_iter != funcInfoHashTable.end(),
|
|
" Function info is not in hashtable.");
|
|
FuncInfo *funcInfo = old_iter->second;
|
|
|
|
// Erase the old item from unordered_map and add the new one.
|
|
funcInfo->updateInitBB(newInitBB);
|
|
funcInfoHashTable.erase(old_iter);
|
|
funcInfoHashTable.insert(std::make_pair(newInitBB->getId(), funcInfo));
|
|
|
|
oldInitBB->unsetBBType(G4_BB_INIT_TYPE);
|
|
newInitBB->setBBType(G4_BB_INIT_TYPE);
|
|
addPredSuccEdges(newInitBB, oldInitBB);
|
|
|
|
// newInitBB has the original label; oldInitBB has the new one.
|
|
G4_Label *label = builder->createLocalBlockLabel("origEntry");
|
|
G4_INST *labelInst = createNewLabelInst(label);
|
|
G4_INST* origLabelInst = oldInitBB->front();
|
|
oldInitBB->pop_front();
|
|
oldInitBB->push_front(labelInst);
|
|
newInitBB->push_back(origLabelInst);
|
|
}
|
|
|
|
void FlowGraph::decoupleReturnBlock(G4_BB *bb) {
|
|
G4_BB *oldRetBB = bb;
|
|
G4_BB *itsExitBB = oldRetBB->BBBeforeCall()->getCalleeInfo()->getExitBB();
|
|
G4_BB *newRetBB = createNewBB();
|
|
BB_LIST_ITER insertBefore = std::find(BBs.begin(), BBs.end(), bb);
|
|
vISA_ASSERT(insertBefore != BBs.end(), ERROR_FLOWGRAPH);
|
|
insert(insertBefore, newRetBB);
|
|
|
|
std::replace(itsExitBB->Succs.begin(), itsExitBB->Succs.end(), oldRetBB,
|
|
newRetBB);
|
|
newRetBB->Preds.push_back(itsExitBB);
|
|
newRetBB->Succs.push_back(oldRetBB);
|
|
|
|
std::replace(oldRetBB->Preds.begin(), oldRetBB->Preds.end(), itsExitBB,
|
|
newRetBB);
|
|
oldRetBB->Preds.unique();
|
|
|
|
oldRetBB->unsetBBType(G4_BB_RETURN_TYPE);
|
|
newRetBB->setBBType(G4_BB_RETURN_TYPE);
|
|
|
|
newRetBB->markEmpty(builder);
|
|
}
|
|
|
|
// Make sure that a BB is at most one of CALL/RETURN/EXIT/INIT. If any
|
|
// BB is both, say INIT and CALL, decouple them by inserting a new BB.
|
|
// [The above comments are post-added from reading the code.]
|
|
//
|
|
// The inserted BB must be in the original place so that each subroutine
|
|
// has a list of consecutive BBs.
|
|
void FlowGraph::normalizeSubRoutineBB(FuncInfoHashTable &funcInfoTable) {
|
|
setPhysicalPredSucc();
|
|
for (G4_BB *bb : BBs) {
|
|
if ((bb->getBBType() & G4_BB_CALL_TYPE)) {
|
|
if (bb->getBBType() & G4_BB_EXIT_TYPE) {
|
|
// As call BB has RETURN BB as fall-thru, cannot be EXIT
|
|
vISA_ASSERT(false, ERROR_FLOWGRAPH);
|
|
}
|
|
|
|
// BB could be either INIT or RETURN, but not both.
|
|
if (bb->getBBType() & G4_BB_INIT_TYPE) {
|
|
decoupleInitBlock(bb, funcInfoTable);
|
|
} else if (bb->getBBType() & G4_BB_RETURN_TYPE) {
|
|
decoupleReturnBlock(bb);
|
|
}
|
|
} else if ((bb->getBBType() & G4_BB_INIT_TYPE)) {
|
|
if (bb->getBBType() & G4_BB_RETURN_TYPE) {
|
|
// As retrun BB must have a pred, it cannot be init BB
|
|
vISA_ASSERT(false, ERROR_FLOWGRAPH);
|
|
}
|
|
|
|
// Two possible combinations: INIT & CALL, or INIT & EXIT. INIT & CALL has
|
|
// been processed in the previous IF, here only INIT and EXIT is possible.
|
|
if (bb->getBBType() & G4_BB_EXIT_TYPE) {
|
|
decoupleInitBlock(bb, funcInfoTable);
|
|
}
|
|
} else if ((bb->getBBType() & G4_BB_EXIT_TYPE)) {
|
|
// Only EXIT & RETURN are possible. (INIT & EXIT
|
|
// has been processed)
|
|
if (bb->getBBType() & G4_BB_RETURN_TYPE) {
|
|
decoupleReturnBlock(bb);
|
|
}
|
|
} else if ((bb->getBBType() & G4_BB_RETURN_TYPE)) {
|
|
// Do we need to do this ?
|
|
if (bb->Preds.size() > 1) {
|
|
decoupleReturnBlock(bb);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// This function does a DFS and any blocks that get visited will have their
|
|
// preId set according to the ordering. Blocks that never get visited will
|
|
// have their preId unmodified.
|
|
//
|
|
static void doDFS(G4_BB *startBB, unsigned int p, BBIDMap &preIDMap) {
|
|
unsigned int currP = p;
|
|
std::stack<G4_BB *> traversalStack;
|
|
traversalStack.push(startBB);
|
|
|
|
while (!traversalStack.empty()) {
|
|
G4_BB *currBB = traversalStack.top();
|
|
traversalStack.pop();
|
|
if (preIDMap.at(currBB) != UINT_MAX) {
|
|
continue;
|
|
}
|
|
preIDMap[currBB] = currP++;
|
|
|
|
for (G4_BB *tmp : currBB->Succs) {
|
|
if (preIDMap.at(tmp) == UINT_MAX) {
|
|
traversalStack.push(tmp);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void FlowGraph::recomputePreId(BBIDMap &IDMap) {
|
|
unsigned preId = 0;
|
|
//
|
|
// initializations
|
|
//
|
|
for (G4_BB *bb : BBs) {
|
|
IDMap[bb] = UINT_MAX;
|
|
}
|
|
//
|
|
// assign DFS based pre/rpost ids to all blocks in the main program
|
|
//
|
|
doDFS(getEntryBB(), preId, IDMap);
|
|
}
|
|
|
|
//
|
|
// The optimization pass will remove unreachable blocks from functions. Later
|
|
// compilation phases assume that the only unreachable code that can exist in a
|
|
// function is the block with return/pseudo_fret instruction. All other
|
|
// unreachable code should be removed. The only time blocks with
|
|
// return/pseudo_fret will be removed is when the header of that function itself
|
|
// is deemed unreachable.
|
|
//
|
|
void FlowGraph::removeUnreachableBlocks(FuncInfoHashTable &funcInfoHT) {
|
|
BBIDMap preIDMap;
|
|
recomputePreId(preIDMap);
|
|
|
|
//
|
|
// Basic blocks with preId/rpostId set to UINT_MAX are unreachable.
|
|
// Special handling:
|
|
// 1. If CALL_BB isn't dead, don't remove RETURN_BB;
|
|
// 2. If INIT_BB isn't dead, don't remove EXIT_BB.
|
|
// 3. For BB with EOT, don't remove it.
|
|
// 4. Don't remove pseudo_fret ever as the function may be extern.
|
|
// (Note that in BB list (BBs), CALL_BB appears before RETURN_BB and INIT_BB
|
|
// appears before EXIT_BB. The algo works under this assumpion.)
|
|
BB_LIST_ITER it = BBs.begin();
|
|
while (it != BBs.end()) {
|
|
G4_BB *bb = (*it);
|
|
if (preIDMap.at(bb) == UINT_MAX) {
|
|
if (bb->getBBType() & G4_BB_INIT_TYPE) {
|
|
// Remove it from funcInfoHT.
|
|
int funcId = bb->getId();
|
|
[[maybe_unused]] unsigned numErased = funcInfoHT.erase(funcId);
|
|
vASSERT(numErased == 1);
|
|
} else if (bb->getBBType() & G4_BB_CALL_TYPE) {
|
|
// If call bb is removed, its return BB shuld be removed as well.
|
|
G4_BB *retBB = bb->getPhysicalSucc();
|
|
vISA_ASSERT(retBB, "vISA ICE: missing Return BB");
|
|
if (preIDMap.at(retBB) != UINT_MAX) {
|
|
preIDMap[retBB] = UINT_MAX;
|
|
}
|
|
}
|
|
|
|
// EOT should be in entry function, we keep it for now.
|
|
if (bb->size() > 0 && bb->back()->isEOT()) {
|
|
++it;
|
|
continue;
|
|
}
|
|
|
|
// preserve pseudo_fret BB
|
|
if (bb->isEndWithFRet()) {
|
|
++it;
|
|
continue;
|
|
}
|
|
|
|
// Now, remove bb.
|
|
while (bb->Succs.size() > 0) {
|
|
removePredSuccEdges(bb, bb->Succs.front());
|
|
}
|
|
if (bb->getBBType() & G4_BB_RETURN_TYPE) {
|
|
// As callee may be live, need to remove pred edges from exit BB.
|
|
for (auto &II : bb->Preds) {
|
|
G4_BB *exitBB = II;
|
|
if (exitBB->getBBType() & G4_BB_EXIT_TYPE) {
|
|
removePredSuccEdges(exitBB, bb);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
BB_LIST_ITER prev = it;
|
|
++it;
|
|
erase(prev);
|
|
} else {
|
|
if (bb->getBBType() & G4_BB_CALL_TYPE) {
|
|
// CALL BB isn't dead, don't remove return BB.
|
|
G4_BB *retBB = bb->getPhysicalSucc();
|
|
vISA_ASSERT(retBB && (retBB->getBBType() & G4_BB_RETURN_TYPE),
|
|
"vISA ICE: missing RETURN BB");
|
|
if (preIDMap.at(retBB) == UINT_MAX) {
|
|
// For example,
|
|
// CALL_BB : call A succ: init_BB
|
|
// RETURN_BB: pred: exit_BB
|
|
//
|
|
// // subroutine A
|
|
// init_BB:
|
|
// while (true) ...; // inifinite loop
|
|
// exit_BB: succ : RETURN_BB
|
|
// Both RETURN_BB and exit_BB are unreachable, but
|
|
// we keep both!
|
|
preIDMap[retBB] = UINT_MAX - 1;
|
|
}
|
|
} else if (bb->getBBType() & G4_BB_INIT_TYPE) {
|
|
// function isn't dead, don't remove exit BB.
|
|
int funcId = bb->getId();
|
|
auto entry = funcInfoHT.find(funcId);
|
|
vASSERT(entry != funcInfoHT.end());
|
|
FuncInfo *finfo = entry->second;
|
|
G4_BB *exitBB = finfo->getExitBB();
|
|
vISA_ASSERT(exitBB, "vISA ICE: missing exit BB");
|
|
if (preIDMap.at(exitBB) == UINT_MAX) {
|
|
// See the example above for G4_BB_CALL_TYPE
|
|
preIDMap[exitBB] = UINT_MAX - 1;
|
|
}
|
|
}
|
|
it++;
|
|
}
|
|
}
|
|
reassignBlockIDs();
|
|
setPhysicalPredSucc();
|
|
}
|
|
|
|
//
|
|
// Remove redundant goto/jmpi
|
|
// Remove the fall through edges between subroutine and its non-caller preds
|
|
// Remove basic blocks that only contain a label, function labels are untouched.
|
|
// Remove empty blocks.
|
|
//
|
|
void FlowGraph::removeRedundantLabels() {
|
|
std::vector<G4_Label *> joinLabels;
|
|
// first, remove redundant goto
|
|
// goto L0
|
|
// ....
|
|
// goto L1 <-- redundant goto
|
|
// L0: <empty BB>
|
|
// L1:
|
|
BB_LIST_ITER Next = BBs.begin(), IE = BBs.end();
|
|
for (BB_LIST_ITER II = Next; II != IE; II = Next) {
|
|
++Next;
|
|
G4_BB *BB = *II;
|
|
|
|
// Record the JIP and UIP labels of first none-label control flow
|
|
// instruction.
|
|
G4_INST *firstNoneLabelInst = BB->getFirstInst();
|
|
if (firstNoneLabelInst && firstNoneLabelInst->isFlowControl()) {
|
|
G4_Label *jip = firstNoneLabelInst->asCFInst()->getJip();
|
|
G4_Label *uip = firstNoneLabelInst->asCFInst()->getUip();
|
|
if (jip) {
|
|
joinLabels.push_back(jip);
|
|
}
|
|
if (uip) {
|
|
joinLabels.push_back(uip);
|
|
}
|
|
}
|
|
|
|
G4_opcode lastop = BB->getLastOpcode();
|
|
if (BB->Succs.size() == 1 && (lastop == G4_goto || lastop == G4_jmpi)) {
|
|
G4_BB *SuccBB = BB->Succs.back();
|
|
G4_BB *BB1 = nullptr;
|
|
// Skip over empty BBs
|
|
for (auto iter = Next; iter != IE; ++iter) {
|
|
BB1 = *iter;
|
|
if (BB1 != SuccBB &&
|
|
(BB1->empty() ||
|
|
(BB1->size() == 1 && BB1->getLastOpcode() == G4_label))) {
|
|
continue;
|
|
}
|
|
break;
|
|
}
|
|
if (BB1 && SuccBB == BB1) {
|
|
// Remove goto and its fall-thru will be its succ.
|
|
G4_BB *fallThruBB = *Next;
|
|
if (fallThruBB != SuccBB) {
|
|
// Reconnect succ/pred for BB
|
|
removePredSuccEdges(BB, SuccBB);
|
|
addPredSuccEdges(BB, fallThruBB);
|
|
}
|
|
BB->pop_back();
|
|
}
|
|
}
|
|
}
|
|
|
|
for (BB_LIST_ITER nextit = BBs.begin(), ite = BBs.end(); nextit != ite;) {
|
|
BB_LIST_ITER it = nextit++;
|
|
G4_BB *bb = *it;
|
|
|
|
if (bb == getEntryBB()) {
|
|
continue;
|
|
}
|
|
if (bb->Succs.size() == 0 && bb->Preds.size() == 0) {
|
|
// leaving dangling BBs with return in for now.
|
|
// workaround to handle unreachable return
|
|
// for example return after infinite loop.
|
|
if (bb->isEndWithFRet() ||
|
|
(bb->size() > 0 && ((G4_INST *)bb->back())->isReturn())) {
|
|
continue;
|
|
}
|
|
|
|
bb->clear();
|
|
erase(it);
|
|
|
|
continue;
|
|
}
|
|
|
|
vISA_ASSERT(bb->size() > 0 && bb->front()->isLabel(),
|
|
"Every BB should at least have a label inst!");
|
|
|
|
if (bb->getBBType() & (G4_BB_CALL_TYPE | G4_BB_EXIT_TYPE | G4_BB_INIT_TYPE |
|
|
G4_BB_RETURN_TYPE)) {
|
|
// Keep those BBs
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// The removal candidates will have a single successor and a single inst
|
|
//
|
|
if (bb->Succs.size() == 1 && bb->size() == 1) {
|
|
G4_INST *removedBlockInst = bb->front();
|
|
if (removedBlockInst->getLabel()->isSubroutine() ||
|
|
bb->isSpecialEmptyBB()) {
|
|
continue;
|
|
}
|
|
|
|
G4_Label *succ_label = bb->Succs.front()->front()->getLabel();
|
|
|
|
// check if the last inst of pred is a control flow inst
|
|
for (auto pred : bb->Preds) {
|
|
auto jt = std::find(pred->Succs.begin(), pred->Succs.end(), bb);
|
|
if (jt == pred->Succs.end()) {
|
|
// Just skip!
|
|
//
|
|
// Each unique pred is processed just once. If a pred appears
|
|
// more than once in bb->Preds, it is only handled the first time
|
|
// the pred is processed.
|
|
// Note that if jt == end(), it means that this pred appears
|
|
// more than once in the Preds list AND it has been handled
|
|
// before (see code at the end of this loop). So, it is safe to
|
|
// skip.
|
|
continue;
|
|
}
|
|
|
|
G4_INST *i = pred->back();
|
|
// replace label in instructions
|
|
if (i->isFlowControl()) {
|
|
if (isIndirectJmpTarget(i)) {
|
|
// due to the switchjmp we may have multiple jmpi
|
|
// at the end of a block.
|
|
[[maybe_unused]] bool foundMatchingJmp = false;
|
|
for (INST_LIST::iterator iter = --pred->end();
|
|
iter != pred->begin(); --iter) {
|
|
i = *iter;
|
|
if (i->opcode() == G4_jmpi) {
|
|
if (i->getSrc(0)->isLabel() &&
|
|
i->getSrc(0) == removedBlockInst->getLabel()) {
|
|
i->setSrc(succ_label, 0);
|
|
foundMatchingJmp = true;
|
|
break;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
vISA_ASSERT(foundMatchingJmp,
|
|
"Can't find the matching jmpi to the given label");
|
|
} else if (i->opcode() == G4_jmpi || i->isCall()) {
|
|
if (i->getSrc(0)->isLabel()) {
|
|
if (i->getSrc(0) == removedBlockInst->getLabel()) {
|
|
i->setSrc(succ_label, 0);
|
|
}
|
|
}
|
|
} else if (i->opcode() == G4_if || i->opcode() == G4_while ||
|
|
i->opcode() == G4_else) {
|
|
if (i->asCFInst()->getJip()->isLabel()) {
|
|
if (i->asCFInst()->getJip() == removedBlockInst->getLabel()) {
|
|
if (i->opcode() == G4_else || i->opcode() == G4_while) {
|
|
// for G4_while, jump no matter predicate
|
|
i->asCFInst()->setJip(succ_label);
|
|
}
|
|
// for G4_if, jump only when it has predictate; if no predicate,
|
|
// no jump
|
|
else if (i->getPredicate()) {
|
|
i->asCFInst()->setJip(succ_label);
|
|
}
|
|
}
|
|
}
|
|
} else if (i->opcode() == G4_break || i->opcode() == G4_cont ||
|
|
i->opcode() == G4_halt) {
|
|
// JIP and UIP must have been computed at this point
|
|
vISA_ASSERT(i->asCFInst()->getJip() != NULL &&
|
|
i->asCFInst()->getUip() != NULL,
|
|
"null JIP or UIP for break/cont instruction");
|
|
if (i->asCFInst()->getJip() == removedBlockInst->getLabel()) {
|
|
i->asCFInst()->setJip(succ_label);
|
|
}
|
|
|
|
if (i->asCFInst()->getUip() == removedBlockInst->getLabel()) {
|
|
i->asCFInst()->setUip(succ_label);
|
|
}
|
|
} else if (i->opcode() == G4_goto) {
|
|
// UIP must have been computed at this point
|
|
vISA_ASSERT(i->asCFInst()->getUip() != NULL,
|
|
"null UIP for goto instruction");
|
|
if (i->asCFInst()->getUip() == removedBlockInst->getLabel()) {
|
|
i->asCFInst()->setUip(succ_label);
|
|
}
|
|
if (i->asCFInst()->getUip() == removedBlockInst->getLabel()) {
|
|
i->asCFInst()->setUip(succ_label);
|
|
}
|
|
}
|
|
}
|
|
|
|
pred->Succs.insert(jt, bb->Succs.front());
|
|
pred->Succs.erase(jt);
|
|
|
|
// [Bug1915]: In rare case the precessor may have more than one Succ
|
|
// edge pointing to the same BB, due to empty block being eliminated.
|
|
// For example, with BB1: (P) jmp BB3 BB2: BB3: BB4:
|
|
// ...
|
|
// After BB2 is eliminated BB1's two succ will both point to BB3.
|
|
// When we get rid of BB3,
|
|
// we have to make sure we update both Succ edges as we'd otherwise
|
|
// create an edge to a non-existing BB. Note that we don't just delete
|
|
// the edge because elsewhere there may be assumptions that if a BB ends
|
|
// with a jump it must have two successors
|
|
{
|
|
BB_LIST_ITER succs = pred->Succs.begin();
|
|
BB_LIST_ITER end = pred->Succs.end();
|
|
while (succs != end) {
|
|
BB_LIST_ITER iter = succs;
|
|
++succs;
|
|
if ((*iter) == bb) {
|
|
pred->Succs.insert(iter, bb->Succs.front());
|
|
pred->Succs.erase(iter);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Replace the unique successor's predecessor links with the removed
|
|
// block's predessors.
|
|
//
|
|
BB_LIST_ITER kt = std::find(bb->Succs.front()->Preds.begin(),
|
|
bb->Succs.front()->Preds.end(), bb);
|
|
|
|
BB_LIST_ITER mt = bb->Preds.begin();
|
|
|
|
for (; mt != bb->Preds.end(); ++mt) {
|
|
bb->Succs.front()->Preds.insert(kt, *mt);
|
|
}
|
|
|
|
bb->Succs.front()->Preds.erase(kt);
|
|
bb->Succs.front()->Preds.unique();
|
|
//
|
|
// Propagate the removed block's type to its unique successor.
|
|
//
|
|
bb->Succs.front()->setBBType(bb->getBBType());
|
|
|
|
bb->Succs.clear();
|
|
bb->Preds.clear();
|
|
bb->clear();
|
|
|
|
erase(it);
|
|
} else if (bb->Preds.size() == 1 && bb->Preds.front()->Succs.size() == 1) {
|
|
// Merge bb into singlePred and delete bb if all the following are true:
|
|
// 1. singlePred has no control-flow inst (at the end),
|
|
// 2. bb's label is not used at all.
|
|
//
|
|
// singlePred:
|
|
// ....
|
|
// bb:
|
|
// ....
|
|
//
|
|
// If singlePred does not end with a control-flow inst, bb's label is not
|
|
// used except bb ends with while. For while bb, we need further to check
|
|
// if any break uses label. As break should jump to while bb's fall-thru
|
|
// (not while bb), we need to follow all preds of while's fall-thru BB to
|
|
// see if any pred has a break.
|
|
//
|
|
G4_BB *singlePred = bb->Preds.front();
|
|
G4_INST *labelInst = bb->front();
|
|
vASSERT(labelInst->isLabel());
|
|
if (!singlePred->back()->isFlowControl() &&
|
|
singlePred->getPhysicalSucc() == bb /* sanity */ &&
|
|
!labelInst->getLabel()->isSubroutine() /* skip special bb */ &&
|
|
bb != singlePred /* [special] skip dead single-BB loop */) {
|
|
bool doMerging = true;
|
|
G4_INST *whileInst = bb->back();
|
|
if (whileInst->opcode() == G4_while) {
|
|
// If there is any break inst for this while, the break uses the
|
|
// label. No merging. Note that any break inst of this while will jump
|
|
// to the BB right after while BB (this BB is the fall-thru BB of
|
|
// while if while BB has fall-thru, or just its physical succ if it
|
|
// has no fall-thru).
|
|
G4_BB *whilePhySucc = bb->getPhysicalSucc();
|
|
if (whilePhySucc) {
|
|
for (auto breakPred : whilePhySucc->Preds) {
|
|
if (breakPred->getLastOpcode() == G4_break) {
|
|
doMerging = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// For the code like following, in which the first none-label
|
|
// instruction is join, and the JIP label of the join is the label of
|
|
// successor BB, we cannot just remove the label in successor.
|
|
//
|
|
// BB17 Preds: ... Succs: BB18
|
|
// _entry_020_id196_Labe:
|
|
// join(32) _entry_k0_5_cf
|
|
// add(16) id265_(0, 0)<1> : d id63_(0, 0) < 1;
|
|
//
|
|
// BB18 Preds: BB17 Succs: ...
|
|
//_entry_k0_5_cf:
|
|
// ....
|
|
if (std::find(joinLabels.begin(), joinLabels.end(),
|
|
labelInst->getLabel()) != joinLabels.end()) {
|
|
doMerging = false;
|
|
}
|
|
|
|
if (doMerging) {
|
|
removePredSuccEdges(singlePred, bb);
|
|
vASSERT(singlePred->Succs.size() == 0);
|
|
std::vector<G4_BB *> allSuccBBs(bb->Succs.begin(), bb->Succs.end());
|
|
for (auto S : allSuccBBs) {
|
|
removePredSuccEdges(bb, S);
|
|
addPredSuccEdges(singlePred, S, false);
|
|
}
|
|
|
|
// remove bb's label before splice
|
|
bb->remove(labelInst);
|
|
singlePred->getInstList().splice(singlePred->end(),
|
|
bb->getInstList());
|
|
|
|
bb->Succs.clear();
|
|
bb->Preds.clear();
|
|
|
|
erase(it);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
reassignBlockIDs();
|
|
}
|
|
|
|
//
|
|
// remove any mov with the same src and dst opnds
|
|
//
|
|
void FlowGraph::removeRedundMov() {
|
|
for (G4_BB *bb : BBs) {
|
|
for (auto it = bb->begin(), ie = bb->end(), in = ie; it != ie; it = in) {
|
|
in = std::next(it);
|
|
G4_INST *inst = *it;
|
|
// Do not try to remove the instruction that is marked as doNotDelete.
|
|
if (inst->isDoNotDelete())
|
|
continue;
|
|
|
|
if (!inst->isRawMov())
|
|
continue;
|
|
|
|
G4_Operand *src = inst->getSrc(0);
|
|
if (!src->isSrcRegRegion())
|
|
continue;
|
|
|
|
G4_SrcRegRegion *srcRgn = src->asSrcRegRegion();
|
|
G4_DstRegRegion *dst = inst->getDst();
|
|
|
|
if (dst->isIndirect() || srcRgn->isIndirect())
|
|
continue;
|
|
|
|
if (!dst->isGreg() || !src->isGreg())
|
|
continue;
|
|
|
|
if (dst->getLinearizedStart() != srcRgn->getLinearizedStart() ||
|
|
dst->getLinearizedEnd() != srcRgn->getLinearizedEnd())
|
|
continue;
|
|
|
|
uint16_t stride = 0;
|
|
const RegionDesc *rd = srcRgn->getRegion();
|
|
unsigned ExSize = inst->getExecSize();
|
|
if (ExSize == 1 ||
|
|
(rd->isSingleStride(ExSize, stride) &&
|
|
(dst->getHorzStride() == stride))) {
|
|
auto ix = bb->erase(it);
|
|
vASSERT(ix == in);
|
|
(void) ix;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Remove any placeholder empty blocks that could have been inserted to aid
|
|
// analysis
|
|
//
|
|
void FlowGraph::removeEmptyBlocks() {
|
|
bool changed = true;
|
|
|
|
while (changed) {
|
|
changed = false;
|
|
for (BB_LIST_ITER it = BBs.begin(); it != BBs.end();) {
|
|
G4_BB *bb = *it;
|
|
|
|
//
|
|
// The removal candidates will have a unique successor and a single label
|
|
// ending with LABEL__EMPTYBB as the only instruction besides a JMP.
|
|
//
|
|
if (bb->size() > 0 && bb->size() < 3) {
|
|
INST_LIST::iterator removedBlockInst = bb->begin();
|
|
if ((*removedBlockInst)->isLabel() == false ||
|
|
!bb->isSpecialEmptyBB()) {
|
|
++it;
|
|
continue;
|
|
}
|
|
|
|
++removedBlockInst;
|
|
|
|
if (removedBlockInst != bb->end()) {
|
|
// if the BB is not empty, it must end with a unconditional jump
|
|
if ((*removedBlockInst)->opcode() != G4_jmpi ||
|
|
bb->Succs.size() > 1) {
|
|
++it;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// remove redundant succs and preds for the empty block
|
|
bb->Succs.unique();
|
|
bb->Preds.unique();
|
|
|
|
for (auto predBB : bb->Preds) {
|
|
//
|
|
// Replace the predecessors successor links to the removed block's
|
|
// unique successor.
|
|
//
|
|
BB_LIST_ITER jt =
|
|
std::find(predBB->Succs.begin(), predBB->Succs.end(), bb);
|
|
|
|
for (auto succBB : bb->Succs) {
|
|
predBB->Succs.insert(jt, succBB);
|
|
}
|
|
predBB->Succs.erase(jt);
|
|
predBB->Succs.unique();
|
|
}
|
|
|
|
for (auto succBB : bb->Succs) {
|
|
//
|
|
// Replace the unique successor's predecessor links with the removed
|
|
// block's predessors.
|
|
//
|
|
BB_LIST_ITER kt =
|
|
std::find(succBB->Preds.begin(), succBB->Preds.end(), bb);
|
|
|
|
if (kt != succBB->Preds.end()) {
|
|
for (auto predBB : bb->Preds) {
|
|
succBB->Preds.insert(kt, predBB);
|
|
}
|
|
|
|
succBB->Preds.erase(kt);
|
|
succBB->Preds.unique();
|
|
|
|
//
|
|
// Propagate the removed block's type to its unique successor.
|
|
//
|
|
succBB->setBBType(bb->getBBType());
|
|
}
|
|
}
|
|
//
|
|
// Remove the block to be removed.
|
|
//
|
|
bb->Succs.clear();
|
|
bb->Preds.clear();
|
|
bb->clear();
|
|
|
|
BB_LIST_ITER rt = it++;
|
|
erase(rt);
|
|
changed = true;
|
|
} else {
|
|
++it;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// If multiple freturns exist in a flowgraph create a new basic block
|
|
// with an freturn. Replace all freturns with jumps.
|
|
//
|
|
void FlowGraph::mergeFReturns() {
|
|
std::list<G4_BB *> exitBBs;
|
|
G4_BB *candidateFretBB = NULL;
|
|
G4_Label *dumLabel = NULL;
|
|
|
|
for (G4_BB *cur : BBs) {
|
|
if (cur->size() > 0 && cur->back()->isFReturn()) {
|
|
exitBBs.push_back(cur);
|
|
|
|
if (cur->size() == 2 && cur->front()->isLabel()) {
|
|
// An fret already exists that can be shared among all
|
|
// so skip creating a new block with fret.
|
|
dumLabel = cur->front()->getSrc(0)->asLabel();
|
|
candidateFretBB = cur;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (exitBBs.size() > 1) {
|
|
if (candidateFretBB == NULL) {
|
|
G4_BB *newExit = createNewBB();
|
|
vISA_ASSERT(!builder->getIsKernel(), "Not expecting fret in kernel");
|
|
dumLabel = builder->createLocalBlockLabel("MERGED_FRET_EXIT_BB");
|
|
G4_INST *label = createNewLabelInst(dumLabel);
|
|
newExit->push_back(label);
|
|
G4_INST *fret = builder->createInternalCFInst(
|
|
nullptr, G4_pseudo_fret, g4::SIMD1, nullptr, nullptr, InstOpt_NoOpt);
|
|
newExit->push_back(fret);
|
|
BBs.push_back(newExit);
|
|
candidateFretBB = newExit;
|
|
}
|
|
|
|
for (G4_BB *cur : exitBBs) {
|
|
if (cur != candidateFretBB) {
|
|
G4_INST *last = cur->back();
|
|
addPredSuccEdges(cur, candidateFretBB);
|
|
|
|
last->setOpcode(G4_jmpi);
|
|
last->setSrc(dumLabel, 0);
|
|
last->setExecSize(g4::SIMD1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Re-assign block ID so that we can use id to determine the ordering of two
|
|
// blocks in the code layout
|
|
//
|
|
void FlowGraph::reassignBlockIDs() {
|
|
//
|
|
// re-assign block id so that we can use id to determine the ordering of
|
|
// two blocks in the code layout; namely which one is ahead of the other.
|
|
// Important: since we re-assign id, there MUST NOT exist any instruction
|
|
// that depends on BB id. Or the code will be incorrect once we reassign id.
|
|
//
|
|
unsigned i = 0;
|
|
for (G4_BB *bb : BBs) {
|
|
bb->setId(i);
|
|
i++;
|
|
vISA_ASSERT(i <= getNumBB(), ERROR_FLOWGRAPH);
|
|
}
|
|
|
|
numBBId = i;
|
|
}
|
|
|
|
// Find the BB that has the given label from the range [StartIter, EndIter).
|
|
static G4_BB *findLabelBB(BB_LIST_ITER StartIter, BB_LIST_ITER EndIter,
|
|
G4_Label *Label) {
|
|
for (BB_LIST_ITER it = StartIter; it != EndIter; ++it) {
|
|
G4_BB *bb = *it;
|
|
G4_INST *first = bb->empty() ? nullptr : bb->front();
|
|
if (first && first->isLabel()) {
|
|
if (Label == first->getLabel())
|
|
return bb;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
typedef enum {
|
|
STRUCTURED_CF_IF = 0,
|
|
STRUCTURED_CF_LOOP = 1
|
|
} STRUCTURED_CF_TYPE;
|
|
|
|
struct StructuredCF {
|
|
STRUCTURED_CF_TYPE mType;
|
|
// for if this is the block that ends with if
|
|
// for while this is the loop block
|
|
G4_BB *mStartBB;
|
|
// for if this is the endif block
|
|
// for while this is the block that ends with while
|
|
G4_BB *mEndBB;
|
|
// it's possible for a BB to have multiple endifs, so we need
|
|
// to know which endif corresponds to this CF
|
|
G4_INST *mEndInst;
|
|
|
|
StructuredCF *enclosingCF;
|
|
|
|
// ToDo: can add more information (else, break, cont, etc.) as needed later
|
|
|
|
// endBB is set when we encounter the endif/while
|
|
StructuredCF(STRUCTURED_CF_TYPE type, G4_BB *startBB)
|
|
: mType(type), mStartBB(startBB), mEndBB(NULL), mEndInst(NULL),
|
|
enclosingCF(NULL) {}
|
|
|
|
void *operator new(size_t sz, vISA::Mem_Manager &m) { return m.alloc(sz); }
|
|
|
|
void setEnd(G4_BB *endBB, G4_INST *endInst) {
|
|
mEndBB = endBB;
|
|
mEndInst = endInst;
|
|
}
|
|
}; // StructuredCF
|
|
|
|
/*
|
|
* This function sets the JIP for Structured CF (SCF) instructions,
|
|
* Unstructured Control Flow (UCF) instructions (goto) are handled in
|
|
* processGoto().
|
|
*
|
|
* Note: we currently do not consider goto/join when adding the JIP for endif,
|
|
* since structure analysis should not allow goto into/out of if-endif. This
|
|
* means we only need to set the the JIP of the endif to its immediately
|
|
* enclosing endif/while
|
|
*
|
|
* The simd control flow blocks must be well-structured
|
|
*
|
|
*/
|
|
void FlowGraph::processSCF() {
|
|
std::stack<StructuredCF *> ifAndLoops;
|
|
std::vector<StructuredCF *> structuredSimdCF;
|
|
|
|
for (G4_BB *bb : BBs) {
|
|
for (G4_INST *inst : *bb) {
|
|
// check if first non-label inst is an endif
|
|
if (inst->opcode() != G4_label && inst->opcode() != G4_join) {
|
|
if (inst->opcode() == G4_endif) {
|
|
|
|
vISA_ASSERT(ifAndLoops.size() > 0, "endif without matching if");
|
|
StructuredCF *cf = ifAndLoops.top();
|
|
vISA_ASSERT(cf->mType == STRUCTURED_CF_IF, "ill-formed if");
|
|
cf->setEnd(bb, inst);
|
|
ifAndLoops.pop();
|
|
if (ifAndLoops.size() > 0) {
|
|
cf->enclosingCF = ifAndLoops.top();
|
|
}
|
|
} else {
|
|
// stop at the first non-endif instruction (there may be multiple
|
|
// endifs)
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
// check if bb is SIMD loop head
|
|
for (G4_BB *predBB : bb->Preds) {
|
|
// check if one of the pred ends with a while
|
|
if (predBB->getId() >= bb->getId()) {
|
|
if (predBB->size() != 0 && predBB->back()->opcode() == G4_while) {
|
|
StructuredCF *cf = new (mem) StructuredCF(STRUCTURED_CF_LOOP, bb);
|
|
structuredSimdCF.push_back(cf);
|
|
ifAndLoops.push(cf);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (bb->size() > 0) {
|
|
G4_INST *lastInst = bb->back();
|
|
if (lastInst->opcode() == G4_if) {
|
|
StructuredCF *cf = new (mem) StructuredCF(STRUCTURED_CF_IF, bb);
|
|
structuredSimdCF.push_back(cf);
|
|
ifAndLoops.push(cf);
|
|
} else if (lastInst->opcode() == G4_while) {
|
|
vISA_ASSERT(ifAndLoops.size() > 0, "while without matching do");
|
|
StructuredCF *cf = ifAndLoops.top();
|
|
vISA_ASSERT(cf->mType == STRUCTURED_CF_LOOP, "ill-formed while loop");
|
|
cf->setEnd(bb, lastInst);
|
|
ifAndLoops.pop();
|
|
if (ifAndLoops.size() > 0) {
|
|
cf->enclosingCF = ifAndLoops.top();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
vISA_ASSERT(ifAndLoops.size() == 0, "not well-structured SIMD CF");
|
|
|
|
for (StructuredCF *cf : structuredSimdCF) {
|
|
if (cf->mType == STRUCTURED_CF_IF && cf->enclosingCF != NULL) {
|
|
setJIPForEndif(cf->mEndInst, cf->enclosingCF->mEndInst,
|
|
cf->enclosingCF->mEndBB);
|
|
}
|
|
}
|
|
}
|
|
|
|
// markDivergentBBs()
|
|
// If BB is on the divergent path, mark it as divergent.
|
|
// Divergent:
|
|
// If all active simd lanes on entry to shader/kernel are
|
|
// active in a BB, that BB is NOT divergent; otherwise,
|
|
// it is divergent.
|
|
//
|
|
// Note that this does not require the number of all the
|
|
// active simd lanes equals to the simd dispatch width!
|
|
// For example, simd8 kernel might have 4 active lanes
|
|
// on entry to the kernel, thus if any BB of the kernel
|
|
// has less than 4 active lanes, it is divergent! As
|
|
// matter of fact, the entry BB must not be divergent.
|
|
//
|
|
void FlowGraph::markDivergentBBs() {
|
|
if (BBs.empty()) {
|
|
// Sanity check
|
|
return;
|
|
}
|
|
|
|
// fcall'ed Function: if AllLaneActive attribute is not set,
|
|
// assume it is divergent on entry
|
|
// (Note that each function has its own CFG)
|
|
if (builder->getIsFunction() &&
|
|
pKernel->getBoolKernelAttr(Attributes::ATTR_AllLaneActive) == false) {
|
|
for (G4_BB *BB : BBs) {
|
|
BB->setDivergent(true);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Now, handle kernel function.
|
|
// 1. It would be perfered to have a single return/exit for kernel and
|
|
// and all its subroutines in the last BB (IGC does, cm does not),
|
|
// although the code can handle return in the middle of kernel.
|
|
// 2. The entry function will appear first in the BB list, and if there
|
|
// is a call from A to B, subroutine A shall appear prior to
|
|
// subroutine B in BB list.
|
|
//
|
|
// Required: need to set BB id.
|
|
//
|
|
// Key variables:
|
|
// LastJoinBB:
|
|
// LastJoinBB is the fartherest joinBB of any goto/break/if/while
|
|
// so far, as described below in the algorithm.
|
|
// LastJoinBBId: Id(LastJoinBB).
|
|
//
|
|
// The algorithm initializes LastJoinBBId to -1 and scans all BBs in order.
|
|
// It checks control-flow instructions to see if it diverges (has join).
|
|
// If so, the algorithm sets LastJoinBBId to be the larger one of its join BB
|
|
// and LastJoinBBId; For a non-negative LastJoinBBId, it means that there is
|
|
// an active join in that BB, and therefore, all BBs from the current BB to
|
|
// that LastJoinBB (LastJoinBB not included) are divergent.
|
|
//
|
|
// The algorithm checks the following cases and their join BBs are:
|
|
// case 1: cf inst = goto
|
|
// <currBB> [(p)] goto L
|
|
// ...
|
|
// <joinBB> L:
|
|
// case 2: cf inst = if
|
|
// <currBB> if
|
|
// ...
|
|
// <joinBB> endif
|
|
//
|
|
// case 3: cf inst = break
|
|
// <currBB> break
|
|
// ...
|
|
// [(p)] while
|
|
// <joinBB>
|
|
// case 4:
|
|
// <currBB> L:
|
|
// ....
|
|
// [(p)] while/goto L
|
|
// <joinBB>
|
|
//
|
|
// The presence of loop makes the checking complicated. Before checking the
|
|
// divergence of a loop head, we need to know if the loop has ANY NON-UNIFORM
|
|
// OUT-OF-LOOP BRANCH or OUT-OF-LOOP BRANCH in DIVERGENT BB, which includes
|
|
// checking the loop's backedge and any out-going branches inside the loop
|
|
// body. For example,
|
|
// L0:
|
|
// B0: (uniform cond) goto B1
|
|
// ......
|
|
// B1:
|
|
// B2:
|
|
// L1:
|
|
// B3: goto OUT
|
|
// B4: (non-uniform cond) goto L1
|
|
// B5:
|
|
// B6: (uniform cond) goto L0
|
|
//
|
|
// OUT: B7:
|
|
//
|
|
// At B0, we don't know whether B0 is divergent even though B0's goto is
|
|
// uniform. Once we find out that B3 is divergent, "goto OUT" will make Loop
|
|
// L0 divergent, which means any of L0's BBs, including B0, is divergent. In
|
|
// order to know B3 is divergent, we need to check the back branch of loop
|
|
// L1. The fact that L1's backedge is non-uniform indicates L1 is divergent,
|
|
// therefore B3 is divergent. This means that WE NEED TO DO PRE_SCAN in order
|
|
// to know whether any BBs of a loop is divergent.
|
|
//
|
|
int LastJoinBBId;
|
|
|
|
auto pushJoin = [&](G4_BB *joinBB) {
|
|
LastJoinBBId = std::max(LastJoinBBId, (int)joinBB->getId());
|
|
};
|
|
auto popJoinIfMatch = [&](G4_BB *BB) {
|
|
if ((int)BB->getId() == LastJoinBBId) {
|
|
LastJoinBBId = -1;
|
|
}
|
|
};
|
|
auto isPriorToLastJoin = [&](G4_BB *aBB) -> bool {
|
|
return (int)aBB->getId() < LastJoinBBId;
|
|
};
|
|
|
|
reassignBlockIDs();
|
|
|
|
// Analyze function in topological order. As there is no recursion
|
|
// and no indirect call, a function will be analyzed only if all
|
|
// its callers have been analyzed.
|
|
//
|
|
// If no subroutine, sortedFuncTable is empty. Here keep all functions
|
|
// in a vector first (it works with and without subroutines), then scan
|
|
// functions in topological order.
|
|
struct StartEndIter {
|
|
BB_LIST_ITER StartI;
|
|
BB_LIST_ITER EndI;
|
|
bool InvokedFromDivergentBB;
|
|
};
|
|
int numFuncs = (int)sortedFuncTable.size();
|
|
std::vector<StartEndIter> allFuncs;
|
|
std::unordered_map<FuncInfo *, uint32_t> funcInfoIndex;
|
|
if (numFuncs == 0) {
|
|
numFuncs = 1;
|
|
allFuncs.resize(1);
|
|
allFuncs[0].StartI = BBs.begin();
|
|
allFuncs[0].EndI = BBs.end();
|
|
allFuncs[0].InvokedFromDivergentBB = false;
|
|
} else {
|
|
allFuncs.resize(numFuncs);
|
|
for (int i = numFuncs; i > 0; --i) {
|
|
uint32_t ix = (uint32_t)(i - 1);
|
|
FuncInfo *pFInfo = sortedFuncTable[ix];
|
|
G4_BB *StartBB = pFInfo->getInitBB();
|
|
G4_BB *EndBB = pFInfo->getExitBB();
|
|
uint32_t ui = (uint32_t)(numFuncs - i);
|
|
allFuncs[ui].StartI = std::find(BBs.begin(), BBs.end(), StartBB);
|
|
auto nextI = std::find(BBs.begin(), BBs.end(), EndBB);
|
|
vISA_ASSERT(nextI != BBs.end(), "ICE: subroutine's end BB not found!");
|
|
allFuncs[ui].EndI = (++nextI);
|
|
allFuncs[ui].InvokedFromDivergentBB = false;
|
|
|
|
funcInfoIndex[pFInfo] = ui;
|
|
}
|
|
}
|
|
|
|
// Update LastJoin for the backward branch of BB referred to by IT.
|
|
auto updateLastJoinForBwdBrInBB = [&](BB_LIST_ITER &IT) {
|
|
G4_BB *predBB = *IT;
|
|
G4_INST *bInst = predBB->back();
|
|
vISA_ASSERT(bInst->isCFInst() &&
|
|
(bInst->opcode() == G4_while || bInst->asCFInst()->isBackward()),
|
|
"ICE: expected backward branch/while!");
|
|
|
|
// joinBB of a loop is the BB right after tail BB
|
|
BB_LIST_ITER loopJoinIter = IT;
|
|
++loopJoinIter;
|
|
if (loopJoinIter == BBs.end()) {
|
|
// Loop end is the last BB (no Join BB). This happens in the
|
|
// following cases:
|
|
// 1. For CM, CM's return is in the middle of code! For IGC,
|
|
// this never happen as a return, if present, must be in
|
|
// the last BB.
|
|
// 2. The last segment code is an infinite loop (static infinite
|
|
// loop so that compiler knows it is an infinite loop).
|
|
// In either case, no join is needed.
|
|
return;
|
|
}
|
|
|
|
// Update LastJoin if the branch itself is divergent.
|
|
// (todo: checking G4_jmpi is redundant as jmpi must have isUniform()
|
|
// return true.)
|
|
if ((!bInst->asCFInst()->isUniform() && bInst->opcode() != G4_jmpi)) {
|
|
G4_BB *joinBB = *loopJoinIter;
|
|
pushJoin(joinBB);
|
|
}
|
|
return;
|
|
};
|
|
|
|
// Update LastJoin for BB referred to by IT. It is called either from normal
|
|
// scan (setting BB's divergent field) or from loop pre-scan (no setting to
|
|
// BB's divergent field). IsPreScan indicates whether it is called from
|
|
// the pre-scan or not.
|
|
//
|
|
// The normal scan (IsPreScan is false), this function also update the
|
|
// divergence info for any called subroutines.
|
|
//
|
|
auto updateLastJoinForFwdBrInBB = [&](BB_LIST_ITER &IT, bool IsPreScan) {
|
|
G4_BB *aBB = *IT;
|
|
if (aBB->size() == 0) {
|
|
return;
|
|
}
|
|
|
|
// Using isPriorToLastJoin() works for both loop pre-scan and normal scan.
|
|
// (aBB->isDivergent() works for normal scan only.)
|
|
bool isBBDivergent = isPriorToLastJoin(aBB);
|
|
|
|
G4_INST *lastInst = aBB->back();
|
|
if (((lastInst->opcode() == G4_goto &&
|
|
!lastInst->asCFInst()->isBackward()) ||
|
|
lastInst->opcode() == G4_break) &&
|
|
(!lastInst->asCFInst()->isUniform() || isBBDivergent)) {
|
|
// forward goto/break : the last Succ BB is our target BB
|
|
// For break, it should be the BB right after while inst.
|
|
G4_BB *joinBB = aBB->Succs.back();
|
|
pushJoin(joinBB);
|
|
} else if (lastInst->opcode() == G4_if &&
|
|
(!lastInst->asCFInst()->isUniform() || isBBDivergent)) {
|
|
G4_Label *UIPLabel = lastInst->asCFInst()->getUip();
|
|
G4_BB *joinBB = findLabelBB(IT, BBs.end(), UIPLabel);
|
|
vISA_ASSERT(joinBB, "ICE(vISA) : missing endif label!");
|
|
pushJoin(joinBB);
|
|
} else if (!IsPreScan && lastInst->opcode() == G4_call) {
|
|
// If this function is already in divergent branch, the callee
|
|
// must be in a divergent branch!.
|
|
if (isBBDivergent || lastInst->getPredicate() != nullptr) {
|
|
FuncInfo *calleeFunc = aBB->getCalleeInfo();
|
|
if (funcInfoIndex.count(calleeFunc)) {
|
|
int ix = funcInfoIndex[calleeFunc];
|
|
allFuncs[ix].InvokedFromDivergentBB = true;
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
};
|
|
|
|
// Now, scan all subroutines (kernels or functions)
|
|
for (int i = 0; i < numFuncs; ++i) {
|
|
// each function: [IT, IE)
|
|
BB_LIST_ITER &IS = allFuncs[i].StartI;
|
|
BB_LIST_ITER &IE = allFuncs[i].EndI;
|
|
if (IS == IE) {
|
|
// sanity check
|
|
continue;
|
|
}
|
|
|
|
if (allFuncs[i].InvokedFromDivergentBB) {
|
|
// subroutine's divergent on entry. Mark every BB as divergent
|
|
for (auto IT = IS; IT != IE; ++IT) {
|
|
G4_BB *BB = *IT;
|
|
|
|
BB->setDivergent(true);
|
|
if (BB->size() == 0) {
|
|
// sanity check
|
|
continue;
|
|
}
|
|
if (BB->isEndWithCall() || BB->isEndWithFCall()) {
|
|
FuncInfo *calleeFunc = BB->getCalleeInfo();
|
|
if (funcInfoIndex.count(calleeFunc)) {
|
|
int ix = funcInfoIndex[calleeFunc];
|
|
allFuncs[ix].InvokedFromDivergentBB = true;
|
|
}
|
|
}
|
|
}
|
|
// continue for next subroutine
|
|
continue;
|
|
}
|
|
|
|
// Scaning all BBs of a single subroutine (or kernel, or function).
|
|
LastJoinBBId = -1;
|
|
for (auto IT = IS; IT != IE; ++IT) {
|
|
G4_BB *BB = *IT;
|
|
|
|
// join could be: explicit (join inst) or implicit (endif/while)
|
|
popJoinIfMatch(BB);
|
|
|
|
// Handle loop
|
|
// Loop needs to be scanned twice in order to get an accurate marking.
|
|
// In pre-scan (1st scan), it finds out divergent out-of-loop branch.
|
|
// If found, it updates LastJoin to the target of that out-of-loop
|
|
// branch and restart the normal scan. If not, it restarts the normal
|
|
// scan with the original LastJoin unchanged.
|
|
|
|
// BB could be head of several loops, and the following does pre-scan for
|
|
// every one of those loops.
|
|
for (G4_BB *predBB : BB->Preds) {
|
|
if (!isBackwardBranch(predBB, BB)) {
|
|
G4_opcode t_opc = predBB->getLastOpcode();
|
|
[[maybe_unused]] bool isBr = (t_opc == G4_goto || t_opc == G4_jmpi);
|
|
vISA_ASSERT((!isBr || (BB->getId() > predBB->getId())),
|
|
"backward Branch did not set correctly!");
|
|
continue;
|
|
}
|
|
vISA_ASSERT(BB->getId() <= predBB->getId(),
|
|
"Branch incorrectly set to be backward!");
|
|
|
|
BB_LIST_ITER LoopITEnd = std::find(BBs.begin(), BBs.end(), predBB);
|
|
updateLastJoinForBwdBrInBB(LoopITEnd);
|
|
|
|
// If lastJoin is already after loop end, no need to scan loop twice
|
|
// as all BBs in the loop must be divergent
|
|
if (isPriorToLastJoin(predBB)) {
|
|
continue;
|
|
}
|
|
|
|
// pre-scan loop (BB, predBB)
|
|
//
|
|
// Observation:
|
|
// pre-scan loop once and as a BB is scanned. Each backward
|
|
// branch to this BB and a forward branch from this BB are
|
|
// processed. Doing so finds out any divergent out-of-loop branch
|
|
// iff the loop has one. In addition, if a loop has more than one
|
|
// divergent out-of-loop branches, using any of those branches
|
|
// would get us precise divergence during the normal scan.
|
|
//
|
|
// LastJoinBBId will be updated iff there is an out-of-loop branch that
|
|
// is is also divergent. For example,
|
|
// a) "goto OUT" is out-of-loop branch, but not divergent. Thus,
|
|
// LastJoinBB
|
|
// will not be updated.
|
|
//
|
|
// L :
|
|
// B0
|
|
// if (uniform cond) goto OUT;
|
|
// if (non-uniform cond)
|
|
// B1
|
|
// else
|
|
// B2
|
|
// B3
|
|
// (p) jmpi L
|
|
// OUT:
|
|
//
|
|
// b) "goto OUT" is out-of-loop branch and divergent. Thus, update
|
|
// LastJoinBB.
|
|
// Note that "goto OUT" is divergent because loop L1 is divergent,
|
|
// which makes every BB in L1 divergent. And any branch from a
|
|
// divergent BB must be divergent.
|
|
//
|
|
// L :
|
|
// B0
|
|
// L1:
|
|
// B1
|
|
// if (uniform cond) goto OUT;
|
|
// if (cond)
|
|
// B2
|
|
// else
|
|
// B3
|
|
// if (non-uniform cond) goto L1
|
|
// B4
|
|
// (uniform cond) while L
|
|
// OUT:
|
|
//
|
|
int orig_LastJoinBBId = LastJoinBBId;
|
|
for (auto LoopIT = IT; LoopIT != LoopITEnd; ++LoopIT) {
|
|
if (LoopIT != IT) {
|
|
// Check loops that are fully inside the current loop.
|
|
G4_BB *H = *LoopIT;
|
|
for (G4_BB *T : H->Preds) {
|
|
if (!isBackwardBranch(T, H)) {
|
|
continue;
|
|
}
|
|
vISA_ASSERT(H->getId() <= T->getId(),
|
|
"Branch incorrectly set to be backward!");
|
|
BB_LIST_ITER TIter = std::find(BBs.begin(), BBs.end(), T);
|
|
updateLastJoinForBwdBrInBB(TIter);
|
|
}
|
|
}
|
|
updateLastJoinForFwdBrInBB(LoopIT, true);
|
|
}
|
|
|
|
// After scan, if no branch out of loop, restore the original
|
|
// LastJoinBBId
|
|
if (!isPriorToLastJoin(predBB)) { // case a) above.
|
|
LastJoinBBId = orig_LastJoinBBId;
|
|
}
|
|
}
|
|
|
|
// normal scan of BB
|
|
if (isPriorToLastJoin(BB)) {
|
|
BB->setDivergent(true);
|
|
}
|
|
// Temporary for debugging, toBeDelete!
|
|
// if (pKernel->getIntKernelAttribute(Attributes::ATTR_Target) == VISA_CM)
|
|
// {
|
|
// vISA_ASSERT((BB->isDivergent() && BB->isInSimdFlow() ||
|
|
// (!BB->isDivergent() && !BB->isInSimdFlow())), " isDivergent !=
|
|
// isInSimdFlow!");
|
|
// }
|
|
updateLastJoinForFwdBrInBB(IT, false);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
G4_BB *FlowGraph::getUniqueReturnBlock() {
|
|
// Return block that has a return instruction
|
|
// Return NULL if multiple return instructions found
|
|
G4_BB *uniqueReturnBlock = NULL;
|
|
|
|
for (G4_BB *curBB : BBs) {
|
|
if (!curBB->empty()) {
|
|
G4_INST *last_inst = curBB->back();
|
|
|
|
if (last_inst->opcode() == G4_pseudo_fret) {
|
|
if (uniqueReturnBlock == NULL) {
|
|
uniqueReturnBlock = curBB;
|
|
} else {
|
|
uniqueReturnBlock = NULL;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return uniqueReturnBlock;
|
|
}
|
|
|
|
/*
|
|
* Insert a join at the beginning of this basic block, immediately after the
|
|
* label If a join is already present, make sure the join will cover the given
|
|
* 'execSize' and 'maskOffset'.
|
|
*/
|
|
void FlowGraph::insertJoinToBB(G4_BB *bb, G4_ExecSize execSize, G4_Label *jip,
|
|
uint8_t maskOffset) {
|
|
vISA_ASSERT(bb->size() > 0, "empty block");
|
|
INST_LIST_ITER iter = bb->begin();
|
|
|
|
// Skip label if any.
|
|
if ((*iter)->isLabel()) {
|
|
iter++;
|
|
}
|
|
|
|
if (iter == bb->end()) {
|
|
// insert join at the end
|
|
G4_InstOption instMask =
|
|
G4_INST::offsetToMask(execSize, maskOffset, builder->hasNibCtrl());
|
|
G4_INST *jInst = builder->createInternalCFInst(NULL, G4_join, execSize, jip,
|
|
NULL, instMask);
|
|
bb->push_back(jInst, false);
|
|
} else {
|
|
G4_INST *secondInst = *iter;
|
|
|
|
if (secondInst->opcode() == G4_join) {
|
|
G4_ExecSize origExSize = secondInst->getExecSize();
|
|
uint8_t origMaskOffset = (uint8_t)secondInst->getMaskOffset();
|
|
ExecMaskInfo joinEM{origExSize, origMaskOffset};
|
|
joinEM.mergeExecMask(execSize, maskOffset);
|
|
if (joinEM.getExecSize() > origExSize) {
|
|
secondInst->setExecSize(G4_ExecSize{joinEM.getExecSize()});
|
|
}
|
|
if (joinEM.getMaskOffset() != origMaskOffset) {
|
|
G4_InstOption nMask =
|
|
G4_INST::offsetToMask(joinEM.getExecSize(), joinEM.getMaskOffset(),
|
|
builder->hasNibCtrl());
|
|
secondInst->setMaskOption(nMask);
|
|
}
|
|
} else {
|
|
G4_InstOption instMask =
|
|
G4_INST::offsetToMask(execSize, maskOffset, builder->hasNibCtrl());
|
|
G4_INST *jInst = builder->createInternalCFInst(NULL, G4_join, execSize,
|
|
jip, NULL, instMask);
|
|
bb->insertBefore(iter, jInst, false);
|
|
}
|
|
}
|
|
}
|
|
|
|
// For tracking execMask information of join.
|
|
typedef std::pair<G4_BB *, ExecMaskInfo> BlockSizePair;
|
|
|
|
static void addBBToActiveJoinList(std::list<BlockSizePair> &activeJoinBlocks,
|
|
G4_BB *bb, G4_ExecSize execSize,
|
|
uint8_t maskOff) {
|
|
// add goto target to list of active blocks that need a join
|
|
std::list<BlockSizePair>::iterator listIter;
|
|
for (listIter = activeJoinBlocks.begin(); listIter != activeJoinBlocks.end();
|
|
++listIter) {
|
|
G4_BB *aBB = (*listIter).first;
|
|
if (aBB->getId() == bb->getId()) {
|
|
// block already in list, update exec size if necessary
|
|
ExecMaskInfo &EM = (*listIter).second;
|
|
EM.mergeExecMask(execSize, maskOff);
|
|
break;
|
|
} else if (aBB->getId() > bb->getId()) {
|
|
(void)activeJoinBlocks.insert(
|
|
listIter, BlockSizePair(bb, ExecMaskInfo(execSize, maskOff)));
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (listIter == activeJoinBlocks.end()) {
|
|
activeJoinBlocks.push_back(
|
|
BlockSizePair(bb, ExecMaskInfo(execSize, maskOff)));
|
|
}
|
|
}
|
|
|
|
void FlowGraph::setPhysicalPredSucc() {
|
|
BB_LIST_CITER it = BBs.cbegin();
|
|
BB_LIST_CITER cend = BBs.cend();
|
|
if (it != cend) {
|
|
// first, set up head BB
|
|
G4_BB *pred = *it;
|
|
pred->setPhysicalPred(NULL);
|
|
|
|
for (++it; it != cend; ++it) {
|
|
G4_BB *bb = *it;
|
|
bb->setPhysicalPred(pred);
|
|
pred->setPhysicalSucc(bb);
|
|
pred = bb;
|
|
}
|
|
|
|
// last, set up the last BB
|
|
pred->setPhysicalSucc(NULL);
|
|
}
|
|
}
|
|
|
|
// This function set the JIP of the endif to the target instruction (either
|
|
// endif or while)
|
|
void FlowGraph::setJIPForEndif(G4_INST *endif, G4_INST *target,
|
|
G4_BB *targetBB) {
|
|
vISA_ASSERT(endif->opcode() == G4_endif, "must be an endif instruction");
|
|
G4_Label *label = getLabelForEndif(target);
|
|
if (label) {
|
|
// see if there's another label before the inst that we can reuse
|
|
// FIXME: we really should associate labels with inst instead of having
|
|
// special label inst, so we can avoid ugly code like this
|
|
G4_INST *prevInst = NULL;
|
|
if (target->opcode() == G4_endif) {
|
|
for (G4_INST *inst : *targetBB) {
|
|
if (inst == target) {
|
|
if (prevInst != NULL && prevInst->isLabel()) {
|
|
label = prevInst->getLabel();
|
|
}
|
|
break;
|
|
}
|
|
prevInst = inst;
|
|
}
|
|
} else {
|
|
vISA_ASSERT(target->opcode() == G4_while, "must be a while instruction");
|
|
INST_LIST_RITER it = ++(targetBB->rbegin());
|
|
if (it != targetBB->rend()) {
|
|
G4_INST *inst = *it;
|
|
if (inst->isLabel()) {
|
|
label = inst->getLabel();
|
|
}
|
|
}
|
|
}
|
|
|
|
if (label == NULL) {
|
|
label = builder->createLocalBlockLabel();
|
|
endifWithLabels.emplace(target, label);
|
|
}
|
|
}
|
|
endif->asCFInst()->setJip(label);
|
|
|
|
VISA_DEBUG({
|
|
std::cout << "set JIP for: \n";
|
|
endif->emit(std::cout);
|
|
std::cout << "\n";
|
|
});
|
|
}
|
|
|
|
void FlowGraph::convertGotoToJmpi(G4_INST *gotoInst) {
|
|
gotoInst->setOpcode(G4_jmpi);
|
|
gotoInst->setSrc(gotoInst->asCFInst()->getUip(), 0);
|
|
gotoInst->asCFInst()->setJip(NULL);
|
|
gotoInst->asCFInst()->setUip(NULL);
|
|
gotoInst->setExecSize(g4::SIMD1);
|
|
gotoInst->setOptions(InstOpt_NoOpt | InstOpt_WriteEnable);
|
|
}
|
|
|
|
/*
|
|
* This function generates UCF (unstructurized control flow, that is,
|
|
* goto/join/jmpi) This function inserts the join for each goto as well as
|
|
* compute the JIP for the goto and join instructions. It additionally converts
|
|
* uniform (simd1 or non-predicated) gotos into scalar jumps. If it's a forward
|
|
* goto, it may be converted into a jmpi only if it is uniform and it does not
|
|
* overlap with another divergent goto. All uniform backward gotos may be
|
|
* converted into scalar jumps.
|
|
*
|
|
* This function assumes **scalar control flow**: meaning that given any goto
|
|
* inst, its execsize should cover all active lanes used for all BBs dominated
|
|
* by this goto.
|
|
*
|
|
* - This function does *not* alter the CFG.
|
|
* - This function does *not* consider SCF (structured control flow), as a
|
|
* well-formed vISA program should not have overlapped goto and structured CF
|
|
* instructions.
|
|
*/
|
|
void FlowGraph::processGoto() {
|
|
// For a given loop (StartBB, EndBB) (EndBB->StartBB is a backedge), this
|
|
// function return a BB in which an out-of-loop join will be inserted.
|
|
//
|
|
// For all BBs in [StartBB, EndBB) (including StartBB, but not including
|
|
// EndBB) that jump after EndBB (forward jump), return the earliest (closest
|
|
// to EndBB). If no such BB, return nullptr. Assumption: 1. BB's id is in the
|
|
// increasing order; and 2) physical succ has been set.
|
|
auto getEarliestJmpOutBB = [](const std::list<BlockSizePair> &activeJoins,
|
|
const G4_BB *StartBB,
|
|
const G4_BB *EndBB) -> G4_BB * {
|
|
// Existing active joins (passed in as "activeJoins") must be considered.
|
|
// For example,
|
|
// goto L0 (non-uniform)
|
|
// ...
|
|
// Loop:
|
|
// ...
|
|
// goto L1 (uniform goto)
|
|
// ...
|
|
// L0:
|
|
// ...
|
|
// goto Loop
|
|
// ...
|
|
// L1:
|
|
// Since 'goto L0' is non-uniform, 'goto L1' must be tranlated into goto and
|
|
// a join must be inserted at L1. This "goto L0" is outside of the loop
|
|
// (outside of [StartBB, EndBB)). If it is not considered, this function
|
|
// thinks there is no active joins at L0 and 'goto L1' would be converted
|
|
// into jmpi incorrectly.
|
|
|
|
// list of BB that will have a join at the begin of each BB.
|
|
// The list is in the order of the increasing BB id.
|
|
std::list<G4_BB *> tmpActiveJoins;
|
|
// initialize tmpActiveJoins with activeJoins
|
|
for (auto II = activeJoins.begin(), IE = activeJoins.end(); II != IE;
|
|
++II) {
|
|
tmpActiveJoins.push_back(II->first);
|
|
}
|
|
|
|
auto orderedInsert = [](std::list<G4_BB *> &tmpActiveJoins, G4_BB *aBB) {
|
|
auto II = tmpActiveJoins.begin(), IE = tmpActiveJoins.end();
|
|
for (; II != IE; ++II) {
|
|
G4_BB *bb = *II;
|
|
if (bb->getId() == aBB->getId()) {
|
|
break;
|
|
} else if (bb->getId() > aBB->getId()) {
|
|
tmpActiveJoins.insert(II, aBB);
|
|
break;
|
|
}
|
|
}
|
|
if (II == IE) {
|
|
tmpActiveJoins.push_back(aBB);
|
|
}
|
|
};
|
|
|
|
const G4_BB *bb = StartBB;
|
|
while (bb != EndBB) {
|
|
const G4_BB *currBB = bb;
|
|
if (!tmpActiveJoins.empty()) {
|
|
// adjust active join lists if a join is reached.
|
|
if (bb == tmpActiveJoins.front()) {
|
|
tmpActiveJoins.pop_front();
|
|
}
|
|
}
|
|
|
|
bb = bb->getPhysicalSucc();
|
|
if (currBB->empty()) {
|
|
continue;
|
|
}
|
|
|
|
const G4_INST *lastInst = currBB->back();
|
|
if (lastInst->opcode() != G4_goto || lastInst->asCFInst()->isBackward()) {
|
|
continue;
|
|
}
|
|
G4_BB *targetBB = currBB->Succs.back();
|
|
vASSERT(lastInst->asCFInst()->getUip() == targetBB->getLabel());
|
|
if (lastInst->asCFInst()->isUniform() &&
|
|
(tmpActiveJoins.empty() ||
|
|
tmpActiveJoins.front()->getId() >= targetBB->getId())) {
|
|
// Non-crossing uniform goto will be jmpi, and thus no join needed.
|
|
//
|
|
// For the crossing gotos, a uniform goto cannot be translated to jmpi.
|
|
// For example:
|
|
// goto L0: // non-uniform goto (need a join)
|
|
// ....
|
|
// uniform-goto L1:
|
|
// ...
|
|
// L0:
|
|
// ...
|
|
// L1:
|
|
// Here, uniform-goto L1 is uniform. Since it crosses the non-uniform
|
|
// joins at L0, it must be translated to goto, not jmpi. Thus, join is
|
|
// needed at L1.
|
|
continue;
|
|
}
|
|
orderedInsert(tmpActiveJoins, targetBB);
|
|
}
|
|
|
|
// Need to remove join at EndBB if present (looking for joins after EndBB)
|
|
if (!tmpActiveJoins.empty() && tmpActiveJoins.front() == EndBB) {
|
|
tmpActiveJoins.pop_front();
|
|
}
|
|
|
|
if (tmpActiveJoins.empty()) {
|
|
// no new join
|
|
return nullptr;
|
|
}
|
|
return tmpActiveJoins.front();
|
|
};
|
|
|
|
// list of active blocks where a join needs to be inserted, sorted in lexical
|
|
// order
|
|
std::list<BlockSizePair> activeJoinBlocks;
|
|
bool doScalarJmp = !builder->noScalarJmp();
|
|
|
|
for (G4_BB *bb : BBs) {
|
|
if (bb->size() == 0) {
|
|
continue;
|
|
}
|
|
|
|
if (activeJoinBlocks.size() > 0) {
|
|
if (bb == activeJoinBlocks.front().first) {
|
|
// This block is the target of one or more forward goto,
|
|
// or the fall-thru of a backward goto, needs to insert a join
|
|
ExecMaskInfo &EM = activeJoinBlocks.front().second;
|
|
uint8_t eSize = EM.getExecSize();
|
|
uint8_t mOff = EM.getMaskOffset();
|
|
G4_Label *joinJIP = NULL;
|
|
|
|
activeJoinBlocks.pop_front();
|
|
if (activeJoinBlocks.size() > 0) {
|
|
// set join JIP to the next active join
|
|
G4_BB *joinBlock = activeJoinBlocks.front().first;
|
|
joinJIP = joinBlock->getLabel();
|
|
}
|
|
|
|
insertJoinToBB(bb, G4_ExecSize{eSize}, joinJIP, mOff);
|
|
}
|
|
}
|
|
|
|
// check to see if this block is the target of one (or more) backward goto
|
|
// If so, we process the backward goto and push its fall-thru block to the
|
|
// active join list
|
|
for (std::list<G4_BB *>::iterator iter = bb->Preds.begin(),
|
|
iterEnd = bb->Preds.end();
|
|
iter != iterEnd; ++iter) {
|
|
G4_BB *predBB = *iter;
|
|
G4_INST *lastInst = predBB->back();
|
|
if (lastInst->opcode() == G4_goto && lastInst->asCFInst()->isBackward() &&
|
|
lastInst->asCFInst()->getUip() == bb->getLabel()) {
|
|
// backward goto
|
|
G4_ExecSize eSize = lastInst->getExecSize() > g4::SIMD1
|
|
? lastInst->getExecSize()
|
|
: pKernel->getSimdSize();
|
|
bool isUniform = lastInst->getExecSize() == g4::SIMD1 ||
|
|
lastInst->getPredicate() == NULL;
|
|
if (isUniform && doScalarJmp) {
|
|
// can always convert a uniform backward goto into a jmp
|
|
convertGotoToJmpi(lastInst);
|
|
|
|
// we still have to add a join at the BB immediately after the back
|
|
// edge, since there may be subsequent loop breaks that are waiting
|
|
// there. example: L1: (P1) goto exit (P2) goto L2 goto L1 L2: <--
|
|
// earliest after-loop goto target
|
|
// ...
|
|
// exit:
|
|
//
|
|
// In this case, L2 shall be the 1st after-loop join (not necessarily
|
|
// the one immediately after the backward BB). The goto exit's JIP
|
|
// should be set to L2.
|
|
//
|
|
// Here, we will add the 1st after-loop join so that any out-loop JIP
|
|
// (either goto or join) within the loop body will has its JIP set to
|
|
// this join.
|
|
if (G4_BB *afterLoopJoinBB =
|
|
getEarliestJmpOutBB(activeJoinBlocks, bb, predBB)) {
|
|
// conservatively use maskoffset = 0.
|
|
addBBToActiveJoinList(activeJoinBlocks, afterLoopJoinBB, eSize, 0);
|
|
}
|
|
} else {
|
|
if (lastInst->getExecSize() ==
|
|
g4::SIMD1) { // For simd1 goto, convert it to a goto with the
|
|
// right execSize.
|
|
lastInst->setExecSize(eSize);
|
|
// This should have noMask removed if any
|
|
lastInst->setOptions(InstOpt_M0);
|
|
}
|
|
// add join to the fall-thru BB
|
|
if (G4_BB *fallThruBB = predBB->getPhysicalSucc()) {
|
|
addBBToActiveJoinList(activeJoinBlocks, fallThruBB, eSize,
|
|
(uint8_t)lastInst->getMaskOffset());
|
|
lastInst->asCFInst()->setJip(fallThruBB->getLabel());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
G4_INST *lastInst = bb->back();
|
|
if (lastInst->opcode() == G4_goto && !lastInst->asCFInst()->isBackward()) {
|
|
// forward goto
|
|
// the last Succ BB is our goto target
|
|
G4_BB *gotoTargetBB = bb->Succs.back();
|
|
bool isUniform = lastInst->getExecSize() == g4::SIMD1 ||
|
|
lastInst->getPredicate() == NULL;
|
|
|
|
if (isUniform && doScalarJmp &&
|
|
(activeJoinBlocks.size() == 0 ||
|
|
activeJoinBlocks.front().first->getId() > gotoTargetBB->getId())) {
|
|
// can convert goto into a scalar jump to UIP, if the jmp will not make
|
|
// us skip any joins CFG itself does not need to be updated
|
|
convertGotoToJmpi(lastInst);
|
|
} else {
|
|
// set goto JIP to the first active block
|
|
G4_ExecSize eSize = lastInst->getExecSize() > g4::SIMD1
|
|
? lastInst->getExecSize()
|
|
: pKernel->getSimdSize();
|
|
addBBToActiveJoinList(activeJoinBlocks, gotoTargetBB, eSize,
|
|
(uint8_t)lastInst->getMaskOffset());
|
|
G4_BB *joinBlock = activeJoinBlocks.front().first;
|
|
if (lastInst->getExecSize() ==
|
|
g4::SIMD1) { // For simd1 goto, convert it to a goto with the right
|
|
// execSize.
|
|
lastInst->setExecSize(eSize);
|
|
lastInst->setOptions(InstOpt_M0);
|
|
}
|
|
lastInst->asCFInst()->setJip(joinBlock->getLabel());
|
|
|
|
if (!builder->gotoJumpOnTrue()) {
|
|
// For BDW/SKL goto, the false channels are the ones that actually
|
|
// will take the jump, and we thus have to flip the predicate
|
|
G4_Predicate *pred = lastInst->getPredicate();
|
|
if (pred != NULL) {
|
|
pred->setState(pred->getState() == PredState_Plus ? PredState_Minus
|
|
: PredState_Plus);
|
|
} else {
|
|
// if there is no predicate, generate a predicate with all 0s.
|
|
// if predicate is SIMD32, we have to use a :ud dst type for the
|
|
// move
|
|
G4_ExecSize execSize =
|
|
lastInst->getExecSize() > g4::SIMD16 ? g4::SIMD2 : g4::SIMD1;
|
|
G4_Declare *tmpFlagDcl = builder->createTempFlag(execSize);
|
|
G4_DstRegRegion *newPredDef =
|
|
builder->createDst(tmpFlagDcl->getRegVar(), 0, 0, 1,
|
|
execSize == g4::SIMD2 ? Type_UD : Type_UW);
|
|
G4_INST *predInst = builder->createMov(
|
|
g4::SIMD1, newPredDef, builder->createImm(0, Type_UW),
|
|
InstOpt_WriteEnable, false);
|
|
INST_LIST_ITER iter = bb->end();
|
|
iter--;
|
|
bb->insertBefore(iter, predInst);
|
|
|
|
pred = builder->createPredicate(PredState_Plus,
|
|
tmpFlagDcl->getRegVar(), 0);
|
|
lastInst->setPredicate(pred);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// TGL NoMask WA : to identify which BB needs WA
|
|
//
|
|
// nestedDivergentBBs[BB] = 2;
|
|
// BB is in a nested divergent branch
|
|
// nestedDivergentBBs[BB] = 1;
|
|
// BB is not in a nested divergent branch, but in a divergent branch.
|
|
void FlowGraph::findNestedDivergentBBs(
|
|
std::unordered_map<G4_BB *, int> &nestedDivergentBBs) {
|
|
// Control-Flow state
|
|
// Used for keeping the current state of control flow during
|
|
// traversing BBs in the layout order. Assuming the current
|
|
// BB is 'currBB', this class shows whether currBB is in any
|
|
// divergent branch/nested divergent branch.
|
|
class CFState {
|
|
// If currBB is the head of loop or currBB has a cf inst such as
|
|
// goto/break/if, the code will diverge from currBB to the joinBB. The
|
|
// following cases shows where joinBB is:
|
|
// case 1: cf inst = goto
|
|
// <currBB> [(p)] goto L
|
|
// ...
|
|
// <joinBB> L:
|
|
// case 2: cf inst = if
|
|
// <currBB> if
|
|
// ...
|
|
// <joinBB> endif
|
|
//
|
|
// Note that else does not increase nor decrease divergence level.
|
|
// case 3: cf inst = break
|
|
// <currBB> break
|
|
// ...
|
|
// [(p)] while
|
|
// <joinBB>
|
|
// case 4:
|
|
// <currBB> L:
|
|
// ....
|
|
// [(p)] while/goto L
|
|
// <joinBB>
|
|
// Case 1/2/3 will increase divergence level, while case 4 does not.
|
|
//
|
|
// The following are used for tracking nested divergence
|
|
// LastDivergentBBId: Id(LastDivergentBB).
|
|
// LastDivergentBB is the fartherest joinBB of any
|
|
// goto/break/if/while so far, as described above. That is, [currBB,
|
|
// LastDivergentBB) is a divergent code path. LastDivergentBB is not
|
|
// included in this divergent path.
|
|
// LastNestedBBId: Id(LastNestedBB).
|
|
// LastNestedBB is the current fartherest join BB that is inside
|
|
// [currBB, LastDivergentBB). We have LastNestedBB if a goto/break/if
|
|
// (no loop) is encountered inside an already divergent code path. It
|
|
// is also called nested divergent (or just nested).
|
|
//
|
|
// The following is always true:
|
|
// LastDivergentBBId >= LastNestedBBId
|
|
int LastDivergentBBId = -1;
|
|
int LastNestedBBId = -1;
|
|
|
|
void setLastDivergentBBId(G4_BB *toBB) {
|
|
LastDivergentBBId = std::max(LastDivergentBBId, (int)toBB->getId());
|
|
}
|
|
|
|
void setLastNestedBBId(G4_BB *toBB) {
|
|
LastNestedBBId = std::max(LastNestedBBId, (int)toBB->getId());
|
|
setLastDivergentBBId(toBB);
|
|
}
|
|
|
|
// Increase divergence level
|
|
void incDivergenceLevel(G4_BB *toBB) {
|
|
if (isInDivergentBranch()) {
|
|
setLastNestedBBId(toBB);
|
|
} else {
|
|
setLastDivergentBBId(toBB);
|
|
}
|
|
}
|
|
|
|
public:
|
|
CFState() {}
|
|
|
|
bool isInDivergentBranch() const { return (LastDivergentBBId > 0); }
|
|
bool isInNestedDivergentBranch() const { return LastNestedBBId > 0; }
|
|
|
|
// pushLoop: for case 4
|
|
// pushJoin: for case 1/2/3
|
|
void pushLoop(G4_BB *joinBB) { setLastDivergentBBId(joinBB); }
|
|
void pushJoin(G4_BB *joinBB) { incDivergenceLevel(joinBB); }
|
|
void popJoinIfMatching(G4_BB *currBB) {
|
|
if ((int)currBB->getId() == LastNestedBBId) {
|
|
LastNestedBBId = -1;
|
|
}
|
|
if ((int)currBB->getId() == LastDivergentBBId) {
|
|
LastDivergentBBId = -1;
|
|
}
|
|
vISA_ASSERT(!(LastDivergentBBId == -1 && LastNestedBBId >= 0),
|
|
"ICE: something wrong in setting divergent BB Id!");
|
|
}
|
|
};
|
|
|
|
// For loop with backedge Tail->Head, if Tail is divergent, its divergence
|
|
// should be propagated to the entire loop as Tail jumps to head, which could
|
|
// go all BBs in the loop.
|
|
//
|
|
// In another word, the whole loop's divergence level should be at least the
|
|
// same as Tail's. Once the entire loop has been handled and Tail's divergence
|
|
// is known, invoking this lambda function to carry out propagation.
|
|
//
|
|
// An example to show WA is needed (nested divergence for Tail):
|
|
// Head: // initial fuseMask = 11; 2nd iter: fuseMask
|
|
// = 11 (should be 01)
|
|
// // Need to propagae nested divergence to the
|
|
// entire loop!
|
|
// if (...) goto Tail; // fuseMask = 11
|
|
// // After if, fusedMask = 10 (bigEU is Off)
|
|
// ...
|
|
// goto out // fuseMask = 10 (BigEU off, SmallEU on)
|
|
// // after goto, fuseMask = 00, but HW remains
|
|
// 10
|
|
// Tail:
|
|
// join // after join, bigEU is on again. fusedMask
|
|
// == 11. It should be 01 goto Head // fuseMask should 01,
|
|
// but HW remains 11, and jump to Head at 2nd iter
|
|
// out:
|
|
// join
|
|
auto propLoopDivergence = [&](G4_BB *LoopTail) {
|
|
// LoopTail must be divergent.
|
|
std::vector<G4_BB *> workset;
|
|
workset.push_back(LoopTail);
|
|
while (!workset.empty()) {
|
|
std::vector<G4_BB *> newWorkset;
|
|
for (auto iter : workset) {
|
|
G4_BB *Tail = iter;
|
|
G4_InstCF *cfInst = Tail->back()->asCFInst();
|
|
|
|
vISA_ASSERT(nestedDivergentBBs.count(Tail) > 0,
|
|
"Only divergent Tail shall invoke this func!");
|
|
|
|
// Find loop head
|
|
G4_BB *Head = nullptr;
|
|
for (G4_BB *succBB : Tail->Succs) {
|
|
if ((cfInst->opcode() == G4_goto &&
|
|
cfInst->getUip() == succBB->getLabel()) ||
|
|
(cfInst->opcode() == G4_while &&
|
|
cfInst->getJip() == succBB->getLabel()) ||
|
|
(cfInst->opcode() == G4_jmpi &&
|
|
cfInst->getSrc(0) == succBB->getLabel())) {
|
|
Head = succBB;
|
|
break;
|
|
}
|
|
}
|
|
vASSERT(Head != nullptr);
|
|
|
|
// If Head's divergence level is already higher than Tail, no
|
|
// propagation needed as Head's divergence has been propagated already.
|
|
if (nestedDivergentBBs.count(Head) > 0 &&
|
|
nestedDivergentBBs[Head] >= nestedDivergentBBs[Tail]) {
|
|
continue;
|
|
}
|
|
|
|
// Follow physical succs to visit all BBs in this loop
|
|
for (G4_BB *tBB = Head; tBB != nullptr && tBB != Tail;
|
|
tBB = tBB->getPhysicalSucc()) {
|
|
auto miter = nestedDivergentBBs.find(tBB);
|
|
if (miter == nestedDivergentBBs.end() ||
|
|
miter->second < nestedDivergentBBs[Tail]) {
|
|
// Propagate Tail's divergence. If the new BB is a tail (another
|
|
// loop), add it to workset for the further propagation.
|
|
nestedDivergentBBs[tBB] = nestedDivergentBBs[Tail];
|
|
auto lastOp = tBB->getLastOpcode();
|
|
if (lastOp == G4_while ||
|
|
((lastOp == G4_goto || lastOp == G4_jmpi) &&
|
|
tBB->back()->asCFInst()->isBackward())) {
|
|
newWorkset.push_back(tBB);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
workset = std::move(newWorkset);
|
|
}
|
|
};
|
|
|
|
if (BBs.empty()) {
|
|
// Sanity check
|
|
return;
|
|
}
|
|
|
|
// If -noMaskWAOnStackCall is prsent, all BBs inside stack functions are
|
|
// assumed to need NoMaskWA.
|
|
if (builder->getOption(vISA_noMaskWAOnFuncEntry) &&
|
|
builder->getIsFunction()) {
|
|
for (auto bb : BBs) {
|
|
nestedDivergentBBs[bb] = 2;
|
|
bb->setBBType(G4_BB_NM_WA_TYPE);
|
|
}
|
|
return;
|
|
}
|
|
|
|
// Analyze subroutines in topological order. As there is no recursion
|
|
// and no indirect call, a subroutine will be analyzed only if all
|
|
// its callers have been analyzed.
|
|
//
|
|
// If no subroutine, sortedFuncTable is empty. Here keep the entry and
|
|
// subroutines in a vector first (it works with and without subroutines),
|
|
// then scan subroutines in topological order.
|
|
struct StartEndIter {
|
|
BB_LIST_ITER StartI;
|
|
BB_LIST_ITER EndI;
|
|
|
|
// When a subroutine is entered, the entry could be in either divergent
|
|
// branch (correct fused mask) or nested divergent branch (possible wrong
|
|
// fused mask).
|
|
bool isInDivergentBranch;
|
|
bool isInNestedDivergentBranch;
|
|
};
|
|
int numFuncs = (int)sortedFuncTable.size();
|
|
std::vector<StartEndIter> allFuncs;
|
|
std::unordered_map<FuncInfo *, uint32_t> funcInfoIndex;
|
|
if (numFuncs == 0) {
|
|
numFuncs = 1;
|
|
allFuncs.resize(1);
|
|
allFuncs[0].StartI = BBs.begin();
|
|
allFuncs[0].EndI = BBs.end();
|
|
allFuncs[0].isInDivergentBranch = false;
|
|
allFuncs[0].isInNestedDivergentBranch = false;
|
|
} else {
|
|
allFuncs.resize(numFuncs);
|
|
for (int i = numFuncs; i > 0; --i) {
|
|
uint32_t ix = (uint32_t)(i - 1);
|
|
FuncInfo *pFInfo = sortedFuncTable[ix];
|
|
G4_BB *StartBB = pFInfo->getInitBB();
|
|
G4_BB *EndBB = pFInfo->getExitBB();
|
|
uint32_t ui = (uint32_t)(numFuncs - i);
|
|
allFuncs[ui].StartI = std::find(BBs.begin(), BBs.end(), StartBB);
|
|
auto nextI = std::find(BBs.begin(), BBs.end(), EndBB);
|
|
vISA_ASSERT(nextI != BBs.end(), "ICE: subroutine's end BB not found!");
|
|
allFuncs[ui].EndI = (++nextI);
|
|
allFuncs[ui].isInDivergentBranch = false;
|
|
allFuncs[ui].isInNestedDivergentBranch = false;
|
|
|
|
funcInfoIndex[pFInfo] = ui;
|
|
}
|
|
}
|
|
|
|
for (int i = 0; i < numFuncs; ++i) {
|
|
// each function: [IT, IE)
|
|
BB_LIST_ITER &IB = allFuncs[i].StartI;
|
|
BB_LIST_ITER &IE = allFuncs[i].EndI;
|
|
if (IB == IE) {
|
|
// Sanity check
|
|
continue;
|
|
}
|
|
|
|
// The main code for each subroutine, including kernel, assumes that
|
|
// the fused Mask is correct always on entry to subroutine/kernel.
|
|
//
|
|
// This is true for kernel entry function, but the fused mask could be
|
|
// incorrect on entry to subroutine. Here, handle this case specially.
|
|
// This case happens if subroutine's isInNestedDivergentBranch is true.
|
|
if (i > 0 && allFuncs[i].isInNestedDivergentBranch) {
|
|
for (BB_LIST_ITER IT = IB; IT != IE; ++IT) {
|
|
G4_BB* BB = *IT;
|
|
nestedDivergentBBs[BB] = 2;
|
|
|
|
if (BB->size() > 0) {
|
|
G4_INST* lastInst = BB->back();
|
|
if (lastInst->opcode() == G4_call) {
|
|
FuncInfo* calleeFunc = BB->getCalleeInfo();
|
|
if (funcInfoIndex.count(calleeFunc)) {
|
|
int ix = funcInfoIndex[calleeFunc];
|
|
allFuncs[ix].isInDivergentBranch = true;
|
|
allFuncs[ix].isInNestedDivergentBranch = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
// go to next subroutine
|
|
continue;
|
|
}
|
|
|
|
CFState cfs;
|
|
if (allFuncs[i].isInDivergentBranch) {
|
|
// subroutine's divergent on entry. Mark [StartBB, EndBB) as divergent
|
|
auto prevI = IE;
|
|
--prevI;
|
|
G4_BB *EndBB = *prevI;
|
|
cfs.pushJoin(EndBB);
|
|
}
|
|
|
|
// FusedMask does not correct itself. Once the fusedMask goes wrong, it
|
|
// stays wrong until the control-flow reaches non-divergent BB. Thus, some
|
|
// BBs, which are not nested-divergent, may need NoMask WA as they might
|
|
// inherit the wrong fusedMask from the previous nested-divergent BBs.
|
|
//
|
|
// Here, we make adjustment for this reason.
|
|
bool seenNestedDivergent = false;
|
|
for (BB_LIST_ITER IT = IB; IT != IE; ++IT) {
|
|
G4_BB *BB = *IT;
|
|
|
|
// This handles cases in which BB has endif/while/join as well as others
|
|
// so we don't need to explicitly check whether BB has endif/while/join,
|
|
// etc.
|
|
cfs.popJoinIfMatching(BB);
|
|
|
|
G4_INST *firstInst = BB->getFirstInst();
|
|
if (firstInst == nullptr) {
|
|
// empty BB or BB with only label inst
|
|
continue;
|
|
}
|
|
|
|
// Handle loop
|
|
for (G4_BB *predBB : BB->Preds) {
|
|
G4_INST *lastInst = predBB->back();
|
|
if ((lastInst->opcode() == G4_while &&
|
|
lastInst->asCFInst()->getJip() == BB->getLabel()) ||
|
|
(lastInst->opcode() == G4_goto &&
|
|
lastInst->asCFInst()->isBackward() &&
|
|
lastInst->asCFInst()->getUip() == BB->getLabel())) {
|
|
// joinBB is the BB right after goto/while
|
|
BB_LIST_ITER predIter = std::find(BBs.begin(), BBs.end(), predBB);
|
|
++predIter;
|
|
// if predBB is the last BB then joinBB is not available
|
|
if (predIter == BBs.end())
|
|
continue;
|
|
G4_BB *joinBB = *predIter;
|
|
cfs.pushLoop(joinBB);
|
|
}
|
|
}
|
|
|
|
if (cfs.isInNestedDivergentBranch()) {
|
|
nestedDivergentBBs[BB] = 2;
|
|
seenNestedDivergent = true;
|
|
} else if (cfs.isInDivergentBranch()) {
|
|
if (seenNestedDivergent) {
|
|
// divergent and might inherit wrong fusedMask
|
|
// set it to nested divergent so we can apply WA.
|
|
nestedDivergentBBs[BB] = 2;
|
|
} else {
|
|
nestedDivergentBBs[BB] = 1;
|
|
}
|
|
} else {
|
|
// Reach non-divergent BB
|
|
seenNestedDivergent = false;
|
|
}
|
|
|
|
G4_INST *lastInst = BB->back();
|
|
|
|
// Need to check whether to propagate WA marking to entire loop!
|
|
if (nestedDivergentBBs.count(BB) > 0 && nestedDivergentBBs[BB] >= 2) {
|
|
if (lastInst->opcode() == G4_while ||
|
|
((lastInst->opcode() == G4_goto || lastInst->opcode() == G4_jmpi) &&
|
|
lastInst->asCFInst()->isBackward())) {
|
|
propLoopDivergence(BB);
|
|
}
|
|
}
|
|
|
|
if ((lastInst->opcode() == G4_goto &&
|
|
!lastInst->asCFInst()->isBackward()) ||
|
|
lastInst->opcode() == G4_break) {
|
|
// forward goto/break : the last Succ BB is our target BB
|
|
// For break, it should be the BB right after while inst.
|
|
G4_BB *joinBB = BB->Succs.back();
|
|
cfs.pushJoin(joinBB);
|
|
} else if (lastInst->opcode() == G4_if) {
|
|
G4_Label *UIPLabel = lastInst->asCFInst()->getUip();
|
|
G4_BB *joinBB = findLabelBB(IT, IE, UIPLabel);
|
|
vISA_ASSERT(joinBB, "ICE(vISA) : missing endif label!");
|
|
cfs.pushJoin(joinBB);
|
|
} else if (lastInst->opcode() == G4_call) {
|
|
// If this function is already in divergent branch, the callee
|
|
// must be in a divergent branch!.
|
|
bool divergent = (allFuncs[i].isInDivergentBranch ||
|
|
cfs.isInDivergentBranch());
|
|
bool nestedDivergent = (cfs.isInNestedDivergentBranch() ||
|
|
(divergent && lastInst->getPredicate() != nullptr));
|
|
if (divergent || nestedDivergent) {
|
|
FuncInfo *calleeFunc = BB->getCalleeInfo();
|
|
if (funcInfoIndex.count(calleeFunc)) {
|
|
int ix = funcInfoIndex[calleeFunc];
|
|
allFuncs[ix].isInDivergentBranch = true;
|
|
allFuncs[ix].isInNestedDivergentBranch = nestedDivergent;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Once all BBs are precessed, cfs should be clear
|
|
vISA_ASSERT((!cfs.isInDivergentBranch()),
|
|
"ICE(vISA): there is an error in divergence tracking!");
|
|
}
|
|
|
|
for (const auto &MI : nestedDivergentBBs) {
|
|
G4_BB *bb = MI.first;
|
|
if (MI.second >= 2) {
|
|
bb->setBBType(G4_BB_NM_WA_TYPE);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
//
|
|
// Add declares for the stack and frame pointers.
|
|
//
|
|
void FlowGraph::addFrameSetupDeclares(IR_Builder &builder,
|
|
PhyRegPool ®Pool) {
|
|
if (framePtrDcl == NULL) {
|
|
framePtrDcl = builder.getBEFP();
|
|
}
|
|
if (stackPtrDcl == NULL) {
|
|
stackPtrDcl = builder.getBESP();
|
|
}
|
|
if (scratchRegDcl == NULL) {
|
|
scratchRegDcl = builder.createDeclare(
|
|
"SR", G4_GRF, builder.numEltPerGRF<Type_UD>(), 1, Type_UD);
|
|
scratchRegDcl->getRegVar()->setPhyReg(
|
|
regPool.getGreg(getKernel()->stackCall.getSpillHeaderGRF()), 0);
|
|
// Note that scratch reg dcl assignment could change later in code based on
|
|
// platform/config (eg, stackcall).
|
|
}
|
|
}
|
|
|
|
//
|
|
// Insert pseudo dcls to represent the caller-save and callee-save registers.
|
|
// This is only required when there is more than one graph cut due to the
|
|
// presence of function calls using a stack calling convention.
|
|
//
|
|
void FlowGraph::addSaveRestorePseudoDeclares(IR_Builder &builder) {
|
|
//
|
|
// VCE_SAVE (r60.0-r125.0) [r125-127 is reserved]
|
|
//
|
|
if (pseudoVCEDcl == NULL) {
|
|
unsigned numRowsVCE = getKernel()->stackCall.getNumCalleeSaveRegs();
|
|
pseudoVCEDcl = builder.createDeclare(
|
|
"VCE_SAVE", G4_GRF, builder.numEltPerGRF<Type_UD>(),
|
|
static_cast<unsigned short>(numRowsVCE), Type_UD);
|
|
pseudoDcls.insert(pseudoVCEDcl);
|
|
} else {
|
|
pseudoVCEDcl->getRegVar()->setPhyReg(NULL, 0);
|
|
}
|
|
|
|
//
|
|
// Insert caller save decls for VCA/A0/Flag (one per call site)
|
|
// VCA_SAVE (r1.0-r60.0) [r0 is reserved] - one required per stack call,
|
|
// but will be reused across cuts.
|
|
//
|
|
INST_LIST callSites;
|
|
for (auto bb : builder.kernel.fg) {
|
|
if (bb->isEndWithFCall()) {
|
|
callSites.push_back(bb->back());
|
|
}
|
|
}
|
|
|
|
unsigned i = 0;
|
|
for (auto callSite : callSites) {
|
|
const char *nameBase =
|
|
"VCA_SAVE"; // sizeof(nameBase) = 9, including ending 0
|
|
const int maxIdLen = 10;
|
|
const char *name = builder.getNameString(sizeof(nameBase) + maxIdLen,
|
|
"%s_%d", nameBase, i);
|
|
G4_Declare *VCA = builder.createDeclare(
|
|
name, G4_GRF, builder.numEltPerGRF<Type_UD>(),
|
|
getKernel()->stackCall.getCallerSaveLastGRF(), Type_UD);
|
|
name = builder.getNameString(50, "SA0_%d", i);
|
|
G4_Declare *saveA0 = builder.createDeclare(
|
|
name, G4_ADDRESS, (uint16_t)builder.getNumAddrRegisters(), 1, Type_UW);
|
|
name = builder.getNameString(64, "SFLAG_%d", i);
|
|
G4_Declare *saveFLAG = builder.createDeclare(
|
|
name, G4_FLAG, (uint16_t)builder.getNumFlagRegisters(), 1, Type_UW);
|
|
fillPseudoDclMap(callSite->asCFInst(), VCA, saveA0, saveFLAG);
|
|
i++;
|
|
}
|
|
}
|
|
|
|
void FlowGraph::fillPseudoDclMap(G4_InstCF *cfInst, G4_Declare *VCA,
|
|
G4_Declare *saveA0, G4_Declare *saveFlag) {
|
|
fcallToPseudoDclMap[cfInst] = {VCA, saveA0, saveFlag};
|
|
pseudoDcls.insert({VCA, saveA0, saveFlag});
|
|
pseudoVCADcls.insert(VCA);
|
|
pseudoA0Dcls.insert(saveA0);
|
|
vISA_ASSERT((3 * fcallToPseudoDclMap.size() + 1) == pseudoDcls.size(),
|
|
"Found inconsistency between fcallToPseudoDclMap and pseudoDcls");
|
|
vISA_ASSERT(fcallToPseudoDclMap.size() == pseudoVCADcls.size(),
|
|
"Found inconsistency between fcallToPseudoDclMap and pseudoVCADcls");
|
|
vISA_ASSERT(fcallToPseudoDclMap.size() == pseudoA0Dcls.size(),
|
|
"Found inconsistency between fcallToPseudoDclMap and pseudoA0Dcls");
|
|
}
|
|
|
|
//
|
|
// Since we don't do SIMD augmentation in RA for CM, we have to add an edge
|
|
// between the then and else block of an if branch to ensure that liveness is
|
|
// computed correctly, if conservatively. This also applies to any goto BB and
|
|
// its JIP join block
|
|
//
|
|
void FlowGraph::addSIMDEdges() {
|
|
std::map<G4_Label *, G4_BB *> joinBBMap;
|
|
for (auto bb : BBs) {
|
|
if (bb->size() > 0 && bb->back()->opcode() == G4_else) {
|
|
addUniquePredSuccEdges(bb, bb->getPhysicalSucc());
|
|
} else {
|
|
// check goto case
|
|
auto instIter = std::find_if(bb->begin(), bb->end(), [](G4_INST *inst) {
|
|
return !inst->isLabel();
|
|
});
|
|
if (instIter != bb->end() && (*instIter)->opcode() == G4_join) {
|
|
G4_INST *firstInst = bb->front();
|
|
if (firstInst->isLabel()) {
|
|
joinBBMap[firstInst->getLabel()] = bb;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!joinBBMap.empty()) {
|
|
for (auto bb : BBs) {
|
|
if (bb->isEndWithGoto()) {
|
|
G4_INST *gotoInst = bb->back();
|
|
auto iter = joinBBMap.find(gotoInst->asCFInst()->getJip()->asLabel());
|
|
if (iter != joinBBMap.end()) {
|
|
addUniquePredSuccEdges(bb, iter->second);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Perform DFS traversal on the flow graph (do not enter subroutine, but mark
|
|
// subroutine blocks so that they will be processed independently later)
|
|
//
|
|
void FlowGraph::DFSTraverse(G4_BB *startBB, unsigned &preId, unsigned &postId,
|
|
FuncInfo *fn, BBPrePostIDMap &BBPrePostId) {
|
|
vISA_ASSERT(fn, "Invalid func info");
|
|
|
|
// clear fields that will be reset by this function.
|
|
fn->clear();
|
|
|
|
std::stack<G4_BB *> traversalStack;
|
|
traversalStack.push(startBB);
|
|
|
|
constexpr int PRE_ID = 0, POST_ID = 1;
|
|
|
|
auto getPreId = [&](G4_BB *bb) {
|
|
auto iter = BBPrePostId.find(bb);
|
|
return iter != BBPrePostId.end() ? iter->second[PRE_ID] : UINT_MAX;
|
|
};
|
|
auto getPostId = [&](G4_BB *bb) {
|
|
auto iter = BBPrePostId.find(bb);
|
|
return iter != BBPrePostId.end() ? iter->second[POST_ID] : UINT_MAX;
|
|
};
|
|
|
|
while (!traversalStack.empty()) {
|
|
G4_BB *bb = traversalStack.top();
|
|
if (getPreId(bb) != UINT_MAX) {
|
|
// Pre-processed already and continue to the next one.
|
|
// Before doing so, set postId if not set before.
|
|
traversalStack.pop();
|
|
if (getPostId(bb) == UINT_MAX) {
|
|
// All bb's succ has been visited (PreId is set) at this time.
|
|
// if any of its succ has not been finished (RPostId not set),
|
|
// bb->succ forms a backedge.
|
|
//
|
|
// Note: originally, CALL and EXIT will not check back-edges, here
|
|
// we skip checking for them as well. (INIT & RETURN should
|
|
// be checked as well ?)
|
|
if (!(bb->getBBType() & (G4_BB_CALL_TYPE | G4_BB_EXIT_TYPE))) {
|
|
for (auto succBB : bb->Succs) {
|
|
if (getPostId(succBB) == UINT_MAX) {
|
|
backEdges.push_back(Edge(bb, succBB));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Need to keep this after backedge checking so that self-backedge
|
|
// (single-bb loop) will not be missed.
|
|
BBPrePostId[bb][POST_ID] = postId++;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
fn->addBB(bb);
|
|
BBPrePostId[bb][PRE_ID] = preId++;
|
|
|
|
if (bb->getBBType() & G4_BB_CALL_TYPE) {
|
|
G4_BB *returnBB = bb->BBAfterCall();
|
|
// If call is predicated, first item in Succs is physically consecutive BB
|
|
// and second (or last) item is sub-routine entry BB.
|
|
vISA_ASSERT(bb->Succs.back()->getBBType() & G4_BB_INIT_TYPE,
|
|
ERROR_FLOWGRAPH);
|
|
// bb->Succs size may be 2 if call is predicated.
|
|
vISA_ASSERT(bb->Succs.size() == 1 || bb->Succs.size() == 2,
|
|
ERROR_FLOWGRAPH);
|
|
|
|
{
|
|
bool found = false;
|
|
for (auto func : fn->getCallees()) {
|
|
if (func == bb->getCalleeInfo())
|
|
found = true;
|
|
}
|
|
if (!found) {
|
|
fn->addCallee(bb->getCalleeInfo());
|
|
}
|
|
}
|
|
|
|
if (getPreId(returnBB) == UINT_MAX) {
|
|
traversalStack.push(returnBB);
|
|
} else {
|
|
vISA_ASSERT(false, ERROR_FLOWGRAPH);
|
|
}
|
|
} else if (bb->getBBType() & G4_BB_EXIT_TYPE) {
|
|
// Skip
|
|
} else {
|
|
// To be consistent with previous behavior, use reverse_iter.
|
|
BB_LIST_RITER RIE = bb->Succs.rend();
|
|
for (BB_LIST_RITER rit = bb->Succs.rbegin(); rit != RIE; ++rit) {
|
|
G4_BB *succBB = *rit;
|
|
if (getPreId(succBB) == UINT_MAX) {
|
|
traversalStack.push(succBB);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void vISA::FlowGraph::markStale() {
|
|
// invoked whenever CFG structures changes.
|
|
// structural changes include addition/removal of BB or edge.
|
|
// mark analysis passes as stale so getters lazily re-run
|
|
// analysis when queried.
|
|
immDom.setStale();
|
|
pDom.setStale();
|
|
loops.setStale();
|
|
|
|
// any other analysis that becomes stale when FlowGraph changes
|
|
// should be marked as stale here.
|
|
}
|
|
|
|
//
|
|
// Find back-edges in the flow graph.
|
|
//
|
|
void FlowGraph::findBackEdges() {
|
|
vISA_ASSERT(numBBId == BBs.size(), ERROR_FLOWGRAPH);
|
|
BBPrePostIDMap BBPrePostID;
|
|
|
|
for (auto bb : BBs) {
|
|
BBPrePostID[bb] = {UINT_MAX, UINT_MAX};
|
|
}
|
|
|
|
unsigned preId = 0;
|
|
unsigned postID = 0;
|
|
backEdges.clear();
|
|
|
|
setPhysicalPredSucc();
|
|
DFSTraverse(getEntryBB(), preId, postID, kernelInfo, BBPrePostID);
|
|
|
|
for (auto fn : funcInfoTable) {
|
|
DFSTraverse(fn->getInitBB(), preId, postID, fn, BBPrePostID);
|
|
}
|
|
}
|
|
|
|
//
|
|
// Find natural loops in the flow graph.
|
|
// If CFG is irreducible, we give up completely (i.e., no natural loops are
|
|
// detected).
|
|
//
|
|
void FlowGraph::findNaturalLoops() {
|
|
// findNaturalLoops may be called mulitple times to get loop info when there
|
|
// is CFG change. The old natural loops info are invalid and need be
|
|
// cleared
|
|
naturalLoops.clear();
|
|
|
|
setPhysicalPredSucc();
|
|
std::unordered_set<G4_BB *> visited;
|
|
for (auto &&backEdge : backEdges) {
|
|
G4_BB *head = backEdge.second;
|
|
G4_BB *tail = backEdge.first;
|
|
std::list<G4_BB *> loopBlocks;
|
|
Blocks loopBody;
|
|
loopBlocks.push_back(tail);
|
|
loopBody.insert(tail);
|
|
|
|
while (!loopBlocks.empty()) {
|
|
G4_BB *loopBlock = loopBlocks.front();
|
|
loopBlocks.pop_front();
|
|
visited.emplace(loopBlock);
|
|
loopBlock->setNestLevel();
|
|
|
|
if ((loopBlock == head) || (loopBlock->getBBType() & G4_BB_INIT_TYPE)) {
|
|
// Skip
|
|
} else if (loopBlock->getBBType() & G4_BB_RETURN_TYPE) {
|
|
auto callBB = loopBlock->BBBeforeCall();
|
|
if (!visited.count(callBB)) {
|
|
loopBlocks.push_front(callBB);
|
|
loopBody.insert(callBB);
|
|
}
|
|
} else {
|
|
auto entryBB = getEntryBB();
|
|
for (auto predBB : loopBlock->Preds) {
|
|
if (!visited.count(predBB)) {
|
|
if (predBB == entryBB && head != entryBB) {
|
|
// graph is irreducible, punt natural loop detection for entire
|
|
// CFG
|
|
this->reducible = false;
|
|
naturalLoops.clear();
|
|
for (auto BB : BBs)
|
|
BB->resetNestLevel();
|
|
return;
|
|
}
|
|
vISA_ASSERT(predBB != entryBB || head == entryBB, ERROR_FLOWGRAPH);
|
|
loopBlocks.push_front(predBB);
|
|
loopBody.insert(predBB);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
visited.clear();
|
|
|
|
naturalLoops.emplace(backEdge, loopBody);
|
|
}
|
|
}
|
|
|
|
void FlowGraph::traverseFunc(FuncInfo *func, unsigned *ptr) {
|
|
func->setPreID((*ptr)++);
|
|
func->setVisited();
|
|
for (auto callee : func->getCallees()) {
|
|
if (!(callee->getVisited())) {
|
|
traverseFunc(callee, ptr);
|
|
}
|
|
}
|
|
sortedFuncTable.push_back(func);
|
|
func->setPostID((*ptr)++);
|
|
}
|
|
|
|
//
|
|
// Sort subroutines in topological order based on DFS
|
|
// a topological order is guaranteed as recursion is not allowed for subroutine
|
|
// calls results are stored in sortedFuncTable in reverse topological order
|
|
//
|
|
void FlowGraph::topologicalSortCallGraph() {
|
|
unsigned visitID = 1;
|
|
traverseFunc(kernelInfo, &visitID);
|
|
}
|
|
|
|
//
|
|
// This should be called only after pre-/post-visit ID are set
|
|
//
|
|
static bool checkVisitID(FuncInfo *func1, FuncInfo *func2) {
|
|
if (func1->getPreID() < func2->getPreID() &&
|
|
func1->getPostID() > func2->getPostID()) {
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Find dominators for each function
|
|
//
|
|
void FlowGraph::findDominators(
|
|
std::map<FuncInfo *, std::set<FuncInfo *>> &domMap) {
|
|
std::map<FuncInfo *, std::set<FuncInfo *>> predMap;
|
|
|
|
for (auto func : sortedFuncTable) {
|
|
if (func == kernelInfo) {
|
|
std::set<FuncInfo *> initSet;
|
|
initSet.insert(kernelInfo);
|
|
domMap.insert(std::make_pair(kernelInfo, initSet));
|
|
} else {
|
|
std::set<FuncInfo *> initSet;
|
|
for (auto funcTmp : sortedFuncTable) {
|
|
initSet.insert(funcTmp);
|
|
}
|
|
domMap.insert(std::make_pair(func, initSet));
|
|
}
|
|
|
|
for (auto callee : func->getCallees()) {
|
|
std::map<FuncInfo *, std::set<FuncInfo *>>::iterator predMapIter =
|
|
predMap.find(callee);
|
|
if (predMapIter == predMap.end()) {
|
|
std::set<FuncInfo *> initSet;
|
|
initSet.insert(func);
|
|
predMap.insert(std::make_pair(callee, initSet));
|
|
} else {
|
|
predMapIter->second.insert(func);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool changed = false;
|
|
do {
|
|
changed = false;
|
|
|
|
unsigned funcTableSize = static_cast<unsigned>(sortedFuncTable.size());
|
|
unsigned funcID = funcTableSize - 1;
|
|
do {
|
|
funcID--;
|
|
FuncInfo *func = sortedFuncTable[funcID];
|
|
|
|
std::map<FuncInfo *, std::set<FuncInfo *>>::iterator predMapIter =
|
|
predMap.find(func);
|
|
if (predMapIter != predMap.end()) {
|
|
std::set<FuncInfo *> &domSet = (*domMap.find(func)).second;
|
|
std::set<FuncInfo *> oldDomSet = domSet;
|
|
domSet.clear();
|
|
domSet.insert(func);
|
|
|
|
std::vector<unsigned> funcVec(funcTableSize);
|
|
for (unsigned i = 0; i < funcTableSize; i++) {
|
|
funcVec[i] = 0;
|
|
}
|
|
|
|
std::set<FuncInfo *> &predSet = (*predMapIter).second;
|
|
for (auto pred : predSet) {
|
|
for (auto predDom : (*domMap.find(pred)).second) {
|
|
unsigned domID = (predDom->getScopeID() == UINT_MAX)
|
|
? funcTableSize - 1
|
|
: predDom->getScopeID() - 1;
|
|
funcVec[domID]++;
|
|
}
|
|
}
|
|
|
|
unsigned predSetSize = static_cast<unsigned>(predSet.size());
|
|
for (unsigned i = 0; i < funcTableSize; i++) {
|
|
if (funcVec[i] == predSetSize) {
|
|
FuncInfo *newFunc = sortedFuncTable[i];
|
|
domSet.insert(newFunc);
|
|
if (oldDomSet.find(newFunc) == oldDomSet.end())
|
|
changed = true;
|
|
}
|
|
}
|
|
|
|
if (oldDomSet.size() != domSet.size()) {
|
|
changed = true;
|
|
}
|
|
}
|
|
} while (funcID != 0);
|
|
|
|
} while (changed);
|
|
}
|
|
|
|
//
|
|
// Check if func1 is a dominator of func2
|
|
//
|
|
static bool checkDominator(FuncInfo *func1, FuncInfo *func2,
|
|
std::map<FuncInfo *, std::set<FuncInfo *>> &domMap) {
|
|
std::map<FuncInfo *, std::set<FuncInfo *>>::iterator domMapIter =
|
|
domMap.find(func2);
|
|
|
|
if (domMapIter != domMap.end()) {
|
|
std::set<FuncInfo *> domSet = (*domMapIter).second;
|
|
std::set<FuncInfo *>::iterator domSetIter = domSet.find(func1);
|
|
|
|
if (domSetIter != domSet.end()) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
//
|
|
// Determine the scope of a varaible based on different contexts
|
|
//
|
|
unsigned FlowGraph::resolveVarScope(G4_Declare *dcl, FuncInfo *func) {
|
|
unsigned oldID = dcl->getScopeID();
|
|
unsigned newID = func->getScopeID();
|
|
|
|
if (oldID == newID) {
|
|
return oldID;
|
|
} else if (oldID == 0) {
|
|
return newID;
|
|
} else if (oldID == UINT_MAX || newID == UINT_MAX) {
|
|
return UINT_MAX;
|
|
} else if (builder->getOption(vISA_EnableGlobalScopeAnalysis)) {
|
|
// This is safe if the global variable usage is
|
|
// self-contained under the calling function
|
|
std::map<FuncInfo *, std::set<FuncInfo *>> domMap;
|
|
|
|
findDominators(domMap);
|
|
|
|
FuncInfo *oldFunc = sortedFuncTable[oldID - 1];
|
|
|
|
if (checkVisitID(func, oldFunc) && checkDominator(func, oldFunc, domMap)) {
|
|
return newID;
|
|
} else if (checkVisitID(oldFunc, func) &&
|
|
checkDominator(oldFunc, func, domMap)) {
|
|
return oldID;
|
|
} else {
|
|
unsigned start = (newID > oldID) ? newID : oldID;
|
|
unsigned end = static_cast<unsigned>(sortedFuncTable.size());
|
|
|
|
for (unsigned funcID = start; funcID != end; funcID++) {
|
|
FuncInfo *currFunc = sortedFuncTable[funcID];
|
|
if (checkVisitID(currFunc, func) &&
|
|
checkDominator(currFunc, func, domMap) &&
|
|
checkVisitID(currFunc, oldFunc) &&
|
|
checkDominator(currFunc, oldFunc, domMap)) {
|
|
return currFunc->getScopeID();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return UINT_MAX;
|
|
}
|
|
|
|
//
|
|
// Visit all operands referenced in a function and update the varaible scope
|
|
//
|
|
void FlowGraph::markVarScope(std::vector<G4_BB *> &BBList, FuncInfo *func) {
|
|
for (auto bb : BBList) {
|
|
for (auto it = bb->begin(), end = bb->end(); it != end; it++) {
|
|
G4_INST *inst = (*it);
|
|
|
|
G4_DstRegRegion *dst = inst->getDst();
|
|
|
|
if (dst && !dst->isAreg() && dst->getBase()) {
|
|
G4_Declare *dcl = GetTopDclFromRegRegion(dst);
|
|
unsigned scopeID = resolveVarScope(dcl, func);
|
|
dcl->updateScopeID(scopeID);
|
|
}
|
|
|
|
for (int i = 0, numSrc = inst->getNumSrc(); i < numSrc; i++) {
|
|
G4_Operand *src = inst->getSrc(i);
|
|
|
|
if (src && !src->isAreg()) {
|
|
if (src->isSrcRegRegion() && src->asSrcRegRegion()->getBase()) {
|
|
G4_Declare *dcl = GetTopDclFromRegRegion(src);
|
|
unsigned scopeID = resolveVarScope(dcl, func);
|
|
dcl->updateScopeID(scopeID);
|
|
} else if (src->isAddrExp() && src->asAddrExp()->getRegVar()) {
|
|
G4_Declare *dcl =
|
|
src->asAddrExp()->getRegVar()->getDeclare()->getRootDeclare();
|
|
unsigned scopeID = resolveVarScope(dcl, func);
|
|
dcl->updateScopeID(scopeID);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
//
|
|
// Traverse the call graph and mark varaible scope
|
|
//
|
|
void FlowGraph::markScope() {
|
|
if (funcInfoTable.size() == 0) {
|
|
// no subroutines
|
|
return;
|
|
}
|
|
unsigned id = 1;
|
|
std::vector<FuncInfo *>::iterator kernelIter = sortedFuncTable.end();
|
|
kernelIter--;
|
|
for (std::vector<FuncInfo *>::iterator funcIter = sortedFuncTable.begin();
|
|
funcIter != sortedFuncTable.end(); ++funcIter) {
|
|
if (funcIter == kernelIter) {
|
|
id = UINT_MAX;
|
|
}
|
|
|
|
FuncInfo *func = (*funcIter);
|
|
func->setScopeID(id);
|
|
|
|
for (auto bb : func->getBBList()) {
|
|
bb->setScopeID(id);
|
|
}
|
|
|
|
id++;
|
|
}
|
|
|
|
for (auto func : sortedFuncTable) {
|
|
markVarScope(func->getBBList(), func);
|
|
}
|
|
}
|
|
|
|
// Return the mask for anyH or allH predicate control in the goto to be emitted.
|
|
static uint32_t getFlagMask(G4_Predicate_Control pCtrl) {
|
|
switch (pCtrl) {
|
|
case G4_Predicate_Control::PRED_ALL2H:
|
|
return 0xFFFFFFFC;
|
|
case G4_Predicate_Control::PRED_ALL4H:
|
|
return 0xFFFFFFF0;
|
|
case G4_Predicate_Control::PRED_ALL8H:
|
|
return 0xFFFFFF00;
|
|
case G4_Predicate_Control::PRED_ALL16H:
|
|
return 0xFFFF0000;
|
|
case G4_Predicate_Control::PRED_ALL32H:
|
|
return 0x00000000;
|
|
case G4_Predicate_Control::PRED_ANY2H:
|
|
return 0x00000003;
|
|
case G4_Predicate_Control::PRED_ANY4H:
|
|
return 0x0000000F;
|
|
case G4_Predicate_Control::PRED_ANY8H:
|
|
return 0x000000FF;
|
|
case G4_Predicate_Control::PRED_ANY16H:
|
|
return 0x0000FFFF;
|
|
case G4_Predicate_Control::PRED_ANY32H:
|
|
return 0xFFFFFFFF;
|
|
default:
|
|
vISA_ASSERT_UNREACHABLE("only for AllH or AnyH predicate control");
|
|
break;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Given a predicate ctrl for jmpi, return the adjusted predicate ctrl in a new
|
|
// simd size.
|
|
static G4_Predicate_Control getPredCtrl(unsigned simdSize,
|
|
G4_Predicate_Control pCtrl,
|
|
bool predCtrlHasWidth) {
|
|
if (!predCtrlHasWidth) {
|
|
return G4_Predicate::isAllH(pCtrl) ? PRED_ALL_WHOLE : PRED_ANY_WHOLE;
|
|
}
|
|
|
|
if (G4_Predicate::isAllH(pCtrl))
|
|
return (simdSize == 8) ? PRED_ALL8H
|
|
: (simdSize == 16) ? PRED_ALL16H
|
|
: G4_Predicate_Control::PRED_ALL32H;
|
|
|
|
// Any or default
|
|
return (simdSize == 8) ? PRED_ANY8H
|
|
: (simdSize == 16) ? PRED_ANY16H
|
|
: G4_Predicate_Control::PRED_ANY32H;
|
|
}
|
|
|
|
// If BB has only one predecessor, return it;
|
|
// if BB has two predecessors and one is ExcludedPred, return the other
|
|
// predecessor; otherwise, return nullptr.
|
|
G4_BB *FlowGraph::getSinglePredecessor(G4_BB *BB, G4_BB *ExcludedPred) const {
|
|
if (BB->Preds.size() != 1) {
|
|
if (BB->Preds.size() == 2) {
|
|
if (BB->Preds.front() == ExcludedPred)
|
|
return BB->Preds.back();
|
|
if (BB->Preds.back() == ExcludedPred)
|
|
return BB->Preds.front();
|
|
}
|
|
return nullptr;
|
|
}
|
|
return BB->Preds.front();
|
|
}
|
|
|
|
// Convert jmpi to goto. E.g.
|
|
//
|
|
// Case1:
|
|
// .decl P1 v_type=P num_elts=2
|
|
//
|
|
// cmp.ne (M1, 2) P1 V33(0,0)<2;2,1> 0x0:f
|
|
// (!P1.any) jmpi (M1, 1) BB1
|
|
//
|
|
// ===>
|
|
//
|
|
// cmp.ne (M1, 2) P1 V33(0,0)<2;2,1> 0x0:f
|
|
// and (1) P1 P1 0b00000011
|
|
// (!P2.any) goto (M1, 8) BB1
|
|
//
|
|
// Case2:
|
|
// .decl P1 v_type=P num_elts=2
|
|
//
|
|
// cmp.ne (M1, 2) P1 V33(0,0)<2;2,1> 0x0:f
|
|
// (!P1.all) jmpi (M1, 1) BB1
|
|
//
|
|
// ===>
|
|
//
|
|
// cmp.ne (M1, 2) P1 V33(0,0)<2;2,1> 0x0:f
|
|
// or (1) P1 P1 0b11111100
|
|
// (!P1.all) goto (M1, 8) BB1
|
|
//
|
|
bool FlowGraph::convertJmpiToGoto() {
|
|
bool Changed = false;
|
|
for (auto bb : BBs) {
|
|
for (auto I = bb->begin(), IEnd = bb->end(); I != IEnd; ++I) {
|
|
G4_INST *inst = *I;
|
|
if (inst->opcode() != G4_jmpi)
|
|
continue;
|
|
|
|
unsigned predSize = pKernel->getSimdSize();
|
|
G4_Predicate *newPred = nullptr;
|
|
|
|
if (G4_Predicate *pred = inst->getPredicate()) {
|
|
// The number of bool elements in vISA decl.
|
|
unsigned nElts = pred->getTopDcl()->getNumberFlagElements();
|
|
|
|
// Since we need to turn this into goto, set high bits properly.
|
|
if (nElts != predSize) {
|
|
// The underlying dcl type is either uw or ud.
|
|
G4_Type SrcTy = pred->getTopDcl()->getElemType();
|
|
G4_Type DstTy = (predSize > 16) ? Type_UD : Type_UW;
|
|
|
|
G4_Predicate_Control pCtrl = pred->getControl();
|
|
vISA_ASSERT(nElts == 1 || G4_Predicate::isAnyH(pCtrl) ||
|
|
G4_Predicate::isAllH(pCtrl),
|
|
"predicate control not handled yet");
|
|
|
|
// Common dst and src0 operand for flag.
|
|
G4_Declare *newDcl = builder->createTempFlag(predSize > 16 ? 2 : 1);
|
|
auto pDst = builder->createDst(newDcl->getRegVar(), 0, 0, 1, DstTy);
|
|
auto pSrc0 = builder->createSrc(pred->getBase(), 0, 0,
|
|
builder->getRegionScalar(), SrcTy);
|
|
|
|
auto truncMask = [](uint32_t mask, G4_Type Ty) -> uint64_t {
|
|
return (Ty == Type_UW) ? uint16_t(mask) : mask;
|
|
};
|
|
|
|
if (pCtrl == G4_Predicate_Control::PRED_DEFAULT) {
|
|
// P = P & 1
|
|
auto pSrc1 = builder->createImm(1, Type_UW);
|
|
auto pInst =
|
|
builder->createBinOp(G4_and, g4::SIMD1, pDst, pSrc0, pSrc1,
|
|
InstOpt_M0 | InstOpt_WriteEnable, false);
|
|
bb->insertBefore(I, pInst);
|
|
} else if (G4_Predicate::isAnyH(pCtrl)) {
|
|
// P = P & mask
|
|
uint32_t mask = getFlagMask(pCtrl);
|
|
auto pSrc1 = builder->createImm(truncMask(mask, DstTy), DstTy);
|
|
auto pInst =
|
|
builder->createBinOp(G4_and, g4::SIMD1, pDst, pSrc0, pSrc1,
|
|
InstOpt_M0 | InstOpt_WriteEnable, false);
|
|
bb->insertBefore(I, pInst);
|
|
} else {
|
|
// AllH
|
|
// P = P | mask
|
|
uint32_t mask = getFlagMask(pCtrl);
|
|
auto pSrc1 = builder->createImm(truncMask(mask, DstTy), DstTy);
|
|
auto pInst =
|
|
builder->createBinOp(G4_or, g4::SIMD1, pDst, pSrc0, pSrc1,
|
|
InstOpt_M0 | InstOpt_WriteEnable, false);
|
|
bb->insertBefore(I, pInst);
|
|
}
|
|
|
|
// Adjust pred control to the new execution size and build the
|
|
// new predicate.
|
|
pCtrl = getPredCtrl(predSize, pCtrl, builder->predCtrlHasWidth());
|
|
newPred = builder->createPredicate(pred->getState(),
|
|
newDcl->getRegVar(), 0, pCtrl);
|
|
}
|
|
}
|
|
|
|
// (!P) jmpi L
|
|
// becomes:
|
|
// P = P & MASK
|
|
// (!P.anyN) goto (N) L
|
|
inst->setOpcode(G4_goto);
|
|
inst->setExecSize(G4_ExecSize(predSize));
|
|
if (newPred)
|
|
inst->setPredicate(newPred);
|
|
inst->asCFInst()->setUip(inst->getSrc(0)->asLabel());
|
|
inst->setSrc(nullptr, 0);
|
|
inst->setOptions(InstOpt_M0);
|
|
Changed = true;
|
|
}
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
// Changes a predicated call to a unpredicated call like the following:
|
|
// BB:
|
|
// (p) call (execSize) ...
|
|
// NextBB:
|
|
//
|
|
// It is changed to
|
|
// BB:
|
|
// p1 = simdsize > execSize ? (p & ((1<<execSize) - 1)) : p
|
|
// (~p1) goto (simdsize) target_lbl
|
|
// newCallBB:
|
|
// call (execSize) ...
|
|
// newNextBB:
|
|
// NextBB:
|
|
// where target_lbl is newTargetBB.
|
|
//
|
|
bool FlowGraph::convertPredCall(
|
|
std::unordered_map<G4_Label *, G4_BB *> &aLabelMap) {
|
|
bool changed = false;
|
|
// Add BB0 and BB1 into the subroutine in which aAnchorBB is.
|
|
auto updateSubroutineTab = [&](G4_BB *aAnchorBB, G4_BB *BB0, G4_BB *BB1) {
|
|
for (auto &MI : subroutines) {
|
|
std::vector<G4_BB *> &bblists = MI.second;
|
|
auto BI = std::find(bblists.begin(), bblists.end(), aAnchorBB);
|
|
if (BI == bblists.end()) {
|
|
continue;
|
|
}
|
|
bblists.push_back(BB0);
|
|
bblists.push_back(BB1);
|
|
}
|
|
};
|
|
|
|
const G4_ExecSize SimdSize = pKernel->getSimdSize();
|
|
G4_Type PredTy = SimdSize > 16 ? Type_UD : Type_UW;
|
|
auto NextBI = BBs.begin();
|
|
for (auto BI = NextBI, BE = BBs.end(); BI != BE; BI = NextBI) {
|
|
++NextBI;
|
|
G4_BB *BB = *BI;
|
|
if (BB->empty()) {
|
|
continue;
|
|
}
|
|
G4_BB *NextBB = (NextBI == BE ? nullptr : *NextBI);
|
|
|
|
G4_INST *Inst = BB->back();
|
|
G4_Predicate *Pred = Inst->getPredicate();
|
|
if (!(Pred && (Inst->isCall() || Inst->isFCall()))) {
|
|
continue;
|
|
}
|
|
|
|
const bool hasFallThru =
|
|
(NextBB && BB->Succs.size() >= 1 && BB->Succs.front() == NextBB);
|
|
|
|
G4_BB *newCallBB = createNewBBWithLabel("predCallWA");
|
|
insert(NextBI, newCallBB);
|
|
G4_Label *callBB_lbl = newCallBB->getLabel();
|
|
vASSERT(callBB_lbl);
|
|
aLabelMap.insert(std::make_pair(callBB_lbl, newCallBB));
|
|
|
|
G4_BB *targetBB = createNewBBWithLabel("predCallWA");
|
|
insert(NextBI, targetBB);
|
|
G4_Label *target_lbl = targetBB->getLabel();
|
|
aLabelMap.insert(std::make_pair(target_lbl, targetBB));
|
|
|
|
// relink call's succs
|
|
if (hasFallThru) {
|
|
removePredSuccEdges(BB, NextBB);
|
|
}
|
|
auto SI = BB->Succs.begin(), SE = BB->Succs.end();
|
|
while (SI != SE) {
|
|
G4_BB *Succ = *SI;
|
|
++SI;
|
|
removePredSuccEdges(BB, Succ);
|
|
addPredSuccEdges(newCallBB, Succ, false);
|
|
}
|
|
addPredSuccEdges(BB, newCallBB, true);
|
|
addPredSuccEdges(BB, targetBB, false);
|
|
addPredSuccEdges(newCallBB, targetBB, true);
|
|
if (hasFallThru) {
|
|
addPredSuccEdges(targetBB, NextBB, true);
|
|
}
|
|
|
|
// delink call inst
|
|
BB->pop_back();
|
|
|
|
G4_Predicate *newPred;
|
|
const G4_ExecSize ExecSize = Inst->getExecSize();
|
|
if (SimdSize == ExecSize) {
|
|
// negate predicate
|
|
newPred = builder->createPredicate(
|
|
Pred->getState() == PredState_Plus ? PredState_Minus : PredState_Plus,
|
|
Pred->getBase()->asRegVar(), 0, Pred->getControl());
|
|
} else {
|
|
// Common dst and src0 operand for flag.
|
|
G4_Type oldPredTy = ExecSize > 16 ? Type_UD : Type_UW;
|
|
G4_Declare *newDcl = builder->createTempFlag(PredTy == Type_UD ? 2 : 1);
|
|
auto pDst = builder->createDst(newDcl->getRegVar(), 0, 0, 1, PredTy);
|
|
auto pSrc0 = builder->createSrc(Pred->getBase(), 0, 0,
|
|
builder->getRegionScalar(), oldPredTy);
|
|
auto pSrc1 = builder->createImm((1 << ExecSize) - 1, PredTy);
|
|
auto pInst =
|
|
builder->createBinOp(G4_and, g4::SIMD1, pDst, pSrc0, pSrc1,
|
|
InstOpt_M0 | InstOpt_WriteEnable, false);
|
|
BB->push_back(pInst);
|
|
|
|
newPred = builder->createPredicate(
|
|
Pred->getState() == PredState_Plus ? PredState_Minus : PredState_Plus,
|
|
newDcl->getRegVar(), 0, Pred->getControl());
|
|
}
|
|
|
|
G4_INST *gotoInst = builder->createGoto(newPred, SimdSize, target_lbl,
|
|
InstOpt_NoOpt, false);
|
|
BB->push_back(gotoInst);
|
|
|
|
Inst->setPredicate(nullptr);
|
|
newCallBB->push_back(Inst);
|
|
|
|
updateSubroutineTab(BB, newCallBB, targetBB);
|
|
changed = true;
|
|
}
|
|
// if changed is true, it means goto has been added.
|
|
return changed;
|
|
}
|
|
|
|
void FlowGraph::print(std::ostream &OS) const {
|
|
const char *kname = nullptr;
|
|
if (getKernel()) {
|
|
kname = getKernel()->getName();
|
|
}
|
|
kname = kname ? kname : "unnamed";
|
|
OS << "\n\nCFG: " << kname
|
|
<< (builder->getIsKernel() ? " [kernel]" : " [non-kernel function]")
|
|
<< "\n\n";
|
|
for (auto BB : BBs) {
|
|
BB->print(OS);
|
|
}
|
|
}
|
|
|
|
void FlowGraph::dump() const { print(std::cerr); }
|
|
|
|
FlowGraph::~FlowGraph() {
|
|
// even though G4_BBs are allocated in a mem pool and freed in one shot,
|
|
// we must call each BB's desstructor explicitly to free up the memory used
|
|
// by the STL objects(list, vector, etc.) in each BB
|
|
for (G4_BB *bb : BBAllocList) {
|
|
bb->~G4_BB();
|
|
}
|
|
BBAllocList.clear();
|
|
globalOpndHT.clearHashTable();
|
|
for (auto funcInfo : funcInfoTable) {
|
|
funcInfo->~FuncInfo();
|
|
}
|
|
if (kernelInfo) {
|
|
kernelInfo->~FuncInfo();
|
|
}
|
|
}
|
|
|
|
RelocationEntry &
|
|
RelocationEntry::createRelocation(G4_Kernel &kernel, G4_INST &inst, int opndPos,
|
|
const std::string &symbolName,
|
|
RelocationType type) {
|
|
// Force NoCompact to the instructions having relocations so the relocation
|
|
// target offset RelocationEntry::getTargetOffset is correct
|
|
inst.setOptionOn(InstOpt_NoCompact);
|
|
kernel.getRelocationTable().emplace_back(
|
|
RelocationEntry(&inst, opndPos, type, symbolName));
|
|
auto &entry = kernel.getRelocationTable().back();
|
|
inst.addComment(std::string(entry.getTypeString()) + ": " + symbolName);
|
|
return entry;
|
|
}
|
|
|
|
void RelocationEntry::doRelocation(const G4_Kernel &kernel, void *binary,
|
|
uint32_t binarySize) {
|
|
// FIXME: nothing to do here
|
|
// we have only dynamic relocations now, which cannot be resolved at
|
|
// compilation time
|
|
}
|
|
|
|
uint32_t RelocationEntry::getTargetOffset(const IR_Builder &builder) const {
|
|
// Instructions must not be compacted so the offset below can be correct
|
|
vASSERT(inst->isNoCompactedInst());
|
|
|
|
switch (inst->opcode()) {
|
|
case G4_mov:
|
|
{
|
|
// When src0 type is 64 bits:
|
|
// On Pre-Xe:
|
|
// Src0.imm[63:0] mapped to Instruction [127:64] // R_SYM_ADDR
|
|
// On Xe+:
|
|
// Src0.imm[31:0] mapped to Instruction [127:96] // R_SYM_ADDR_32
|
|
// Src0.imm[63:32] mapped to Instruction [95:64] // R_SYM_ADDR_32_HI
|
|
// Note that we cannot use "R_SYM_ADDR" relocation on XE+ imm64 since
|
|
// the low32 and high32 bits of imm64 are flipped in the instruction
|
|
// bit fields.
|
|
//
|
|
// When src0 type is 32 bits:
|
|
// Src0.imm[31:0] mapped to instruction [127:96]
|
|
G4_Operand* target_operand = inst->getSrc(opndPos);
|
|
vASSERT(target_operand->isRelocImm());
|
|
|
|
if (target_operand->getTypeSize() == 8) {
|
|
if (relocType == RelocationType::R_SYM_ADDR) {
|
|
vASSERT(builder.getPlatform() < GENX_TGLLP);
|
|
return 8;
|
|
} else {
|
|
vASSERT(builder.getPlatform() >= GENX_TGLLP);
|
|
return relocType == RelocationType::R_SYM_ADDR_32_HI ?
|
|
8 : 12;
|
|
}
|
|
} else if (target_operand->getTypeSize() == 4) {
|
|
return 12;
|
|
}
|
|
// Unreachable: mov with relocation must have 32b or 64b type
|
|
break;
|
|
}
|
|
case G4_add:
|
|
// add instruction cannot have 64-bit imm
|
|
vASSERT(relocType != R_SYM_ADDR && relocType != R_SYM_ADDR_32_HI);
|
|
vASSERT(opndPos == 1);
|
|
// Src1.imm[31:0] mapped to Instruction [127:96]
|
|
return 12;
|
|
case G4_mad:
|
|
vASSERT(relocType == R_SYM_ADDR_16);
|
|
// Src0.imm[15:0] mapped to Instruction [79:64]
|
|
// Src2.imm[15:0] mapped to Instruction [127:112]
|
|
if (opndPos == 1)
|
|
return 8;
|
|
else if (opndPos == 3)
|
|
return 14;
|
|
break;
|
|
case G4_send:
|
|
case G4_sendc:
|
|
case G4_sends:
|
|
case G4_sendsc:
|
|
vISA_ASSERT(relocType == R_SEND,
|
|
"Invalid relocation entry type for send instruction");
|
|
// R_SEND relocation symbol is send's instruction starting offset.
|
|
// This type of symbol is used to patch the imm descriptor contents,
|
|
// e.g. render target BTI
|
|
return 0;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
vISA_ASSERT_UNREACHABLE("Invalid RelocationEntry");
|
|
return 0;
|
|
}
|
|
|
|
const char *RelocationEntry::getTypeString(RelocationType relocType) {
|
|
switch (relocType) {
|
|
case RelocationType::R_SYM_ADDR:
|
|
return "R_SYM_ADDR";
|
|
case RelocationType::R_SYM_ADDR_32:
|
|
return "R_SYM_ADDR_32";
|
|
case RelocationType::R_SYM_ADDR_32_HI:
|
|
return "R_SYM_ADDR_32_HI";
|
|
case RelocationType::R_PER_THREAD_PAYLOAD_OFFSET_32:
|
|
return "R_PER_THREAD_PAYLOAD_OFFSET_32";
|
|
case RelocationType::R_GLOBAL_IMM_32:
|
|
return "R_GLOBAL_IMM_32";
|
|
case RelocationType::R_SEND:
|
|
return "R_SEND";
|
|
case RelocationType::R_SYM_ADDR_16:
|
|
return "R_SYM_ADDR_16";
|
|
default:
|
|
vISA_ASSERT_UNREACHABLE("unhandled relocation type");
|
|
return "??";
|
|
}
|
|
}
|
|
|
|
void RelocationEntry::dump(std::ostream &os) const {
|
|
os << "Relocation entry: " << getTypeString(relocType) << "\n";
|
|
os << "\t";
|
|
inst->dump();
|
|
os << " symbol = " << symName << "; src" << opndPos << "\n";
|
|
}
|
|
|
|
void FuncInfo::dump(std::ostream &os) const {
|
|
os << "subroutine " << getId() << "("
|
|
<< getInitBB()->front()->getLabelStr() << ")\n";
|
|
os << "\tentryBB=" << getInitBB()->getId()
|
|
<< ", exitBB=" << getExitBB()->getId() << "\n";
|
|
os << "\tCallees: ";
|
|
for (auto callee : callees) {
|
|
os << callee->getId() << " ";
|
|
}
|
|
os << "\n\tBB list: ";
|
|
for (auto bb : BBList) {
|
|
os << bb->getId() << " ";
|
|
}
|
|
os << "\n";
|
|
}
|