mirror of
https://github.com/intel/intel-graphics-compiler.git
synced 2025-10-30 08:18:26 +08:00
1657 lines
58 KiB
C++
1657 lines
58 KiB
C++
/*========================== begin_copyright_notice ============================
|
|
|
|
Copyright (C) 2017-2021 Intel Corporation
|
|
|
|
SPDX-License-Identifier: MIT
|
|
|
|
============================= end_copyright_notice ===========================*/
|
|
|
|
/*========================== begin_copyright_notice ============================
|
|
|
|
This file is distributed under the University of Illinois Open Source License.
|
|
See LICENSE.TXT for details.
|
|
|
|
============================= end_copyright_notice ===========================*/
|
|
|
|
//===-------- DeSSA.cpp - divide phi variables into congruent class -------===//
|
|
//
|
|
// Intel LLVM Extention
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This pass is originated from the StrongPHIElimination on the machine-ir.
|
|
// We have adopted it to work on llvm-ir. Also note that we have changed it
|
|
// from a transformation to an analysis, meaning which only divides phi-vars
|
|
// into congruent classes, and does NOT insert the copies. A separate code-gen
|
|
// pass can use this analysis to emit non-ssa target code.
|
|
//
|
|
// Algorithm and References:
|
|
//
|
|
// This pass consider how to eliminates PHI instructions by aggressively
|
|
// coalescing the copies that would otherwise be inserted by a naive algorithm
|
|
// and only inserting the copies that are necessary. The coalescing technique
|
|
// initially assumes that all registers appearing in a PHI instruction do not
|
|
// interfere. It then eliminates proven interferences, using dominators to only
|
|
// perform a linear number of interference tests instead of the quadratic number
|
|
// of interference tests that this would naively require.
|
|
// This is a technique derived from:
|
|
//
|
|
// Budimlic, et al. Fast copy coalescing and live-range identification.
|
|
// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
|
|
// Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
|
|
// PLDI '02. ACM, New York, NY, 25-32.
|
|
//
|
|
// The original implementation constructs a data structure they call a dominance
|
|
// forest for this purpose. The dominance forest was shown to be unnecessary,
|
|
// as it is possible to emulate the creation and traversal of a dominance forest
|
|
// by directly using the dominator tree, rather than actually constructing the
|
|
// dominance forest. This technique is explained in:
|
|
//
|
|
// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
|
|
// Quality and Efficiency,
|
|
// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
|
|
// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
|
|
// CGO '09. IEEE, Washington, DC, 114-125.
|
|
//
|
|
// Careful implementation allows for all of the dominator forest interference
|
|
// checks to be performed at once in a single depth-first traversal of the
|
|
// dominator tree, which is what is implemented here.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "Compiler/CISACodeGen/DeSSA.hpp"
|
|
#include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
|
|
#include "Compiler/CISACodeGen/PatternMatchPass.hpp"
|
|
#include "Compiler/MetaDataApi/MetaDataApi.h"
|
|
#include "common/debug/Debug.hpp"
|
|
#include "common/debug/Dump.hpp"
|
|
#include "Compiler/IGCPassSupport.h"
|
|
#include "common/LLVMWarningsPush.hpp"
|
|
#include "llvmWrapper/IR/Instructions.h"
|
|
#include <llvm/IR/InstIterator.h>
|
|
#include <llvm/IR/InlineAsm.h>
|
|
#include <llvmWrapper/IR/DerivedTypes.h>
|
|
#include "common/LLVMWarningsPop.hpp"
|
|
#include <algorithm>
|
|
#include "Probe/Assertion.h"
|
|
|
|
using namespace llvm;
|
|
using namespace IGC;
|
|
using namespace IGC::Debug;
|
|
using namespace IGC::IGCMD;
|
|
|
|
#define PASS_FLAG "DeSSA"
|
|
#define PASS_DESCRIPTION "coalesce moves coming from phi nodes"
|
|
#define PASS_CFG_ONLY true
|
|
#define PASS_ANALYSIS true
|
|
IGC_INITIALIZE_PASS_BEGIN(DeSSA, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(WIAnalysis)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(LiveVarsAnalysis)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenPatternMatch)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
|
|
IGC_INITIALIZE_PASS_END(DeSSA, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
|
|
|
|
char DeSSA::ID = 0;
|
|
|
|
DeSSA::DeSSA() : FunctionPass(ID) { initializeDeSSAPass(*PassRegistry::getPassRegistry()); }
|
|
|
|
void DeSSA::print(raw_ostream &OS, const Module *) const {
|
|
// Assign each inst/arg a unique integer so that the output
|
|
// would be in order. It is useful when doing comparison.
|
|
DenseMap<const Value *, int> Val2IntMap;
|
|
int id = 0;
|
|
if (m_F) {
|
|
// All arguments
|
|
for (auto AI = m_F->arg_begin(), AE = m_F->arg_end(); AI != AE; ++AI) {
|
|
Value *aVal = &*AI;
|
|
Val2IntMap[aVal] = (++id);
|
|
}
|
|
// All instructions
|
|
for (auto II = inst_begin(m_F), IE = inst_end(m_F); II != IE; ++II) {
|
|
Instruction *Inst = &*II;
|
|
Val2IntMap[(Value *)Inst] = (++id);
|
|
}
|
|
}
|
|
|
|
bool doSort = (!Val2IntMap.empty());
|
|
|
|
auto valCmp = [&](const Value *V0, const Value *V1) {
|
|
int n0 = Val2IntMap[V0];
|
|
int n1 = Val2IntMap[V1];
|
|
return n0 < n1;
|
|
};
|
|
|
|
SmallVector<Value *, 64> ValKeyVec;
|
|
DenseMap<Value *, SmallVector<Value *, 8>> output;
|
|
{
|
|
OS << "---- AliasMap ----\n\n";
|
|
for (auto &I : AliasMap) {
|
|
Value *aliaser = I.first;
|
|
Value *aliasee = I.second;
|
|
SmallVector<Value *, 8> &allAliasers = output[aliasee];
|
|
if (aliaser != aliasee) {
|
|
allAliasers.push_back(aliaser);
|
|
}
|
|
}
|
|
|
|
for (auto &I : output) {
|
|
Value *key = I.first;
|
|
ValKeyVec.push_back(key);
|
|
}
|
|
if (doSort) {
|
|
std::sort(ValKeyVec.begin(), ValKeyVec.end(), valCmp);
|
|
}
|
|
for (auto &I : ValKeyVec) {
|
|
Value *aliasee = I;
|
|
SmallVector<Value *, 8> &allAliasers = output[aliasee];
|
|
|
|
if (doSort) {
|
|
std::sort(allAliasers.begin(), allAliasers.end(), valCmp);
|
|
}
|
|
|
|
OS << " Aliasee: ";
|
|
aliasee->print(OS);
|
|
OS << "\n";
|
|
for (int i = 0, sz = (int)allAliasers.size(); i < sz; ++i) {
|
|
OS << " ";
|
|
allAliasers[i]->print(OS);
|
|
OS << "\n";
|
|
}
|
|
}
|
|
OS << "\n\n";
|
|
}
|
|
|
|
OS << "---- InsEltMap ----\n\n";
|
|
output.clear();
|
|
ValKeyVec.clear();
|
|
for (auto &I : InsEltMap) {
|
|
Value *val = I.first;
|
|
Value *rootV = I.second;
|
|
SmallVector<Value *, 8> &allVals = output[rootV];
|
|
if (rootV != val) {
|
|
allVals.push_back(val);
|
|
}
|
|
}
|
|
|
|
for (auto &I : output) {
|
|
Value *key = I.first;
|
|
ValKeyVec.push_back(key);
|
|
}
|
|
if (doSort) {
|
|
std::sort(ValKeyVec.begin(), ValKeyVec.end(), valCmp);
|
|
}
|
|
for (auto &I : ValKeyVec) {
|
|
Value *rootV = I;
|
|
SmallVector<Value *, 8> &allVals = output[rootV];
|
|
|
|
if (doSort) {
|
|
std::sort(allVals.begin(), allVals.end(), valCmp);
|
|
}
|
|
|
|
OS << " Root Value : ";
|
|
rootV->print(OS);
|
|
OS << "\n";
|
|
for (int i = 0, sz = (int)allVals.size(); i < sz; ++i) {
|
|
OS << " ";
|
|
allVals[i]->print(OS);
|
|
OS << "\n";
|
|
}
|
|
}
|
|
OS << "\n\n";
|
|
|
|
OS << "---- Alias for composite/vector values"
|
|
<< " (value in both AliasMap & InsEltMap)----\n";
|
|
// All InsElt output has been sorted
|
|
for (auto &I : ValKeyVec) {
|
|
Value *rootV = I;
|
|
SmallVector<Value *, 8> &allVals = output[rootV];
|
|
|
|
OS << " Root Value: ";
|
|
rootV->printAsOperand(OS);
|
|
if (isAliasee(rootV)) {
|
|
OS << " [aliasee]";
|
|
}
|
|
|
|
int num = 0;
|
|
for (int i = 0, sz = (int)allVals.size(); i < sz; ++i) {
|
|
Value *val = allVals[i];
|
|
if (!isAliasee(val))
|
|
continue;
|
|
if ((num % 8) == 0) {
|
|
OS << "\n ";
|
|
}
|
|
|
|
allVals[i]->printAsOperand(OS);
|
|
OS << " [aliasee] ";
|
|
++num;
|
|
}
|
|
OS << "\n";
|
|
}
|
|
OS << "\n\n";
|
|
|
|
OS << "---- Phi-Var Isolations ----\n";
|
|
SmallVector<Node *, 64> NodeKeyVec;
|
|
std::map<Node *, SmallVector<Node *, 8>> nodeOutput;
|
|
// std::map<Node*, int> LeaderVisited;
|
|
for (auto I = RegNodeMap.begin(), E = RegNodeMap.end(); I != E; ++I) {
|
|
Node *N = I->second;
|
|
// We don't want to change behavior of DeSSA by invoking
|
|
// dumping/printing functions. Thus, don't use getLeader()
|
|
// as it has side-effect (doing path halving).
|
|
Node *Leader = N->parent;
|
|
while (Leader != Leader->parent) {
|
|
Leader = Leader->parent;
|
|
}
|
|
|
|
SmallVector<Node *, 8> &allNodes = nodeOutput[Leader];
|
|
if (N != Leader) {
|
|
allNodes.push_back(N);
|
|
}
|
|
}
|
|
|
|
auto nodeCmp = [&](const Node *N0, const Node *N1) {
|
|
const Value *V0 = N0->value;
|
|
const Value *V1 = N1->value;
|
|
return valCmp(V0, V1);
|
|
};
|
|
|
|
for (auto &I : nodeOutput) {
|
|
Node *key = I.first;
|
|
NodeKeyVec.push_back(key);
|
|
}
|
|
if (doSort) {
|
|
std::sort(NodeKeyVec.begin(), NodeKeyVec.end(), nodeCmp);
|
|
}
|
|
for (auto &I : NodeKeyVec) {
|
|
Node *Leader = I;
|
|
SmallVector<Node *, 8> &allNodes = nodeOutput[Leader];
|
|
if (doSort) {
|
|
std::sort(allNodes.begin(), allNodes.end(), nodeCmp);
|
|
}
|
|
|
|
Value *VL;
|
|
if (isIsolated(Leader)) {
|
|
IGC_ASSERT_MESSAGE(allNodes.size() == 0, "ICE: isolated node still have other in its CC!");
|
|
VL = Leader->value;
|
|
OS << "\nVar isolated : ";
|
|
VL->print(OS);
|
|
OS << "\n";
|
|
} else {
|
|
OS << "\nLeader : ";
|
|
Leader->value->print(OS);
|
|
OS << "\n";
|
|
for (auto &II : allNodes) {
|
|
Node *N = II;
|
|
VL = N->value;
|
|
OS << " ";
|
|
VL->print(OS);
|
|
OS << "\n";
|
|
N = N->next;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void DeSSA::dump() const { print(dbgs()); }
|
|
|
|
bool DeSSA::runOnFunction(Function &MF) {
|
|
if (IGC_IS_FLAG_DISABLED(EnableDeSSA)) {
|
|
LV = nullptr;
|
|
WIA = nullptr;
|
|
// getRootValue(), isIsolated(), getLiveVars(), etc. still work.
|
|
return false;
|
|
}
|
|
|
|
m_F = &MF;
|
|
CurrColor = 0;
|
|
MetaDataUtils *pMdUtils = nullptr;
|
|
pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
|
|
if (pMdUtils->findFunctionsInfoItem(&MF) == pMdUtils->end_FunctionsInfo()) {
|
|
return false;
|
|
}
|
|
CTX = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
|
|
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
|
WIA = &getAnalysis<WIAnalysis>();
|
|
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
|
CG = &getAnalysis<CodeGenPatternMatch>();
|
|
DL = &MF.getParent()->getDataLayout();
|
|
LV = &getAnalysis<LiveVarsAnalysis>().getLiveVars();
|
|
|
|
// make sure we do not run WIAnalysis between CodeGen and DeSSA,
|
|
// therefore m_program's Uniform Helper is still valid, which is
|
|
// used indirectly in DeSSA::GetPhiTemp().
|
|
// If we cannot maintain this assertion, then we should do
|
|
// m_program->SetUniformHelper(WIA);
|
|
|
|
//
|
|
// The DeSSA/Coalescing procedure:
|
|
// 1. Follow Dominance tree to set up alias map. While setting up alias map,
|
|
// update liveness for aliasee so that alasee's liveness is the sum of
|
|
// all its aliasers.
|
|
// By aliaser/aliasee, it means the following:
|
|
// aliaser = bitcast aliasee
|
|
// (Most aliasing is from bitcast, some can be from other cast instructions
|
|
// such as inttoptr/ptrtoint. It could be also from insElt/extElt.)
|
|
//
|
|
// By traversing dominance tree depth-first (DF), it is guaranteed that
|
|
// a def will be visited before its use except PHI node. Since PHI inst
|
|
// is not a candidate for aliasing, this means that the def of aliasee has
|
|
// been visited before the aliaser instruction. For example,
|
|
// x = bitcast y
|
|
// The def of y should be visited before visiting this bitcast inst.
|
|
// Let alias(v0, v1) denote that v0 is an alias to v1. This visiting order
|
|
// under DF dominance-tree traversal will not see aliasing in the following
|
|
// order:
|
|
// alias(v0, v1)
|
|
// alias(v1, v2)
|
|
// rather, it must be in the order
|
|
// alias(v1, v2)
|
|
// alias(v0, v1)
|
|
//
|
|
// 2. Set up InsEltMap, which coalesces vector values used in InsertElement
|
|
// instructions. It is treated as "alias", meaning the root value's
|
|
// liveness is the sum of all its non-root values. The difference b/w
|
|
// AliasMap and InsEltMap is that AliasMap is pure alias in that all
|
|
// aliasers have the same values as its aliasee (ones of primitive types);
|
|
// while InsElt has composite/vector values. This difference does not matter
|
|
// in dessa, but it would matter when handling sub-vector aliasing after
|
|
// dessa (VariableReuseAnaysis uses experimental sub-vector aliasing under
|
|
// the key whose default is off).
|
|
//
|
|
// We could remove InsEltMap/AliasMap by adding each value into DeSSA node.
|
|
// To do so, the dessa algo needs to be improved to isolate them together
|
|
// if they need to be isolated. It also may involve several subtle changes
|
|
// in implementation. Also, adding all values instead of a single one into
|
|
// CC increases the size of CC, which could increase compiling time. With
|
|
// this, let us stay with insEltMap (as well as aliasMap).
|
|
// 3. Make sure DeSSA node only use the 'node value', that is, given value V,
|
|
// its Node value:
|
|
// V_aliasee = AliasMap[V] if V is in map, or V otherwise
|
|
// node_value = InsEltMap[V_aliasee] if in InsEltMap; or V_aliasee
|
|
// otherwise.
|
|
// Note that since the type of aliasess may be different from aliaser,
|
|
// the values in the same CC will have different types. Keep this in mind
|
|
// when creating CVariable in GetSymbol().
|
|
//
|
|
// Note that the algorithem forces coalescing of aliasing inst and InsertElement
|
|
// inst before PHI-coalescing, which means it favors coaslescing of those aliasing
|
|
// inst and InsertElement instructions. Thus, values in AliasMap/InsEltMap are
|
|
// guananteed to be coalesced together at the end of DeSSA. PHI coalescing may
|
|
// extend those maps by adding other values.
|
|
//
|
|
CoalesceAliasInst();
|
|
CoalesceInsertElements();
|
|
|
|
// checkPHILoopInput
|
|
// PreHeader:
|
|
// x = ...
|
|
// Header:
|
|
// phi0 = [x, PreHeader], [t0, End]
|
|
// phi1 = [x, PreHeader], [t1, End]
|
|
// phi2 = [x, PreHeader], [t2, End]
|
|
// ...
|
|
// End:
|
|
// ...
|
|
// goto Header
|
|
//
|
|
// The algorithme below will start with a largest congruent class possible,
|
|
// which unions all phi's with the same source operands. This ends up with
|
|
// a single congruent class of all phi's with x as their source operand.
|
|
// Later, the algorithm isolates phi's as they interfere with each other,
|
|
// causing mov instructions to be generated within the loop at BB End.
|
|
//
|
|
// However, since all phi instructions are live at the same time, we will
|
|
// not be able to coalesce them. In another word, there is no need to put
|
|
// all phi's into the same congruent class in the first place. To achieve
|
|
// this, we use a Value-to-int map to keep how many times a value is used
|
|
// in the phi's, and if the number of uses is over a threshold, we will
|
|
// isolate the source operand and do not union it with its phi. In doing
|
|
// so it is likely for the algorithm to coalesce the phi's dst and the
|
|
// other src that is used in the loop, and therefore remove mov instrutions
|
|
// in the loop.
|
|
//
|
|
// Note that isolating a value introduce additional copy, thus a threshold
|
|
// is used here as a heuristic to try to make sure that a benefit is more
|
|
// than the cost.
|
|
enum { PHI_SRC_USE_THRESHOLD = 3 }; // arbitrary number
|
|
DenseMap<Value *, int> PHILoopPreHeaderSrcs;
|
|
|
|
// build initial congruent class using union-find
|
|
for (Function::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
|
|
// First, initialize PHILoopPreHeaderSrcs map
|
|
BasicBlock *MBB = &*I;
|
|
Loop *LP = LI ? LI->getLoopFor(MBB) : nullptr;
|
|
BasicBlock *PreHeader = LP ? LP->getLoopPredecessor() : nullptr;
|
|
bool checkPHILoopInput = LP && (LP->getHeader() == MBB) && PreHeader;
|
|
PHILoopPreHeaderSrcs.clear();
|
|
if (checkPHILoopInput) {
|
|
for (BasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE; ++BBI) {
|
|
PHINode *PHI = dyn_cast<PHINode>(BBI);
|
|
if (!PHI) {
|
|
break;
|
|
}
|
|
|
|
int srcIx = PHI->getBasicBlockIndex(PreHeader);
|
|
if (srcIx < 0) {
|
|
continue;
|
|
}
|
|
Value *SrcVal = PHI->getOperand(srcIx);
|
|
if (isa<Constant>(SrcVal)) {
|
|
continue;
|
|
}
|
|
if (PHILoopPreHeaderSrcs.count(SrcVal) == 0) {
|
|
PHILoopPreHeaderSrcs[SrcVal] = 0; // initialize to zero
|
|
}
|
|
PHILoopPreHeaderSrcs[SrcVal] += 1;
|
|
}
|
|
}
|
|
|
|
for (BasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE; ++BBI) {
|
|
PHINode *PHI = dyn_cast<PHINode>(BBI);
|
|
if (!PHI) {
|
|
break;
|
|
}
|
|
|
|
e_alignment DefAlign = GetPreferredAlignment(PHI, WIA, CTX);
|
|
IGC_ASSERT(PHI == getNodeValue(PHI));
|
|
|
|
addReg(PHI, DefAlign);
|
|
PHISrcDefs[&(*I)].push_back(PHI);
|
|
|
|
for (unsigned i = 0; i < PHI->getNumOperands(); ++i) {
|
|
Value *OrigSrcVal = PHI->getOperand(i);
|
|
// skip constant
|
|
if (isa<Constant>(OrigSrcVal))
|
|
continue;
|
|
|
|
// condition for preheader-src-isolation
|
|
bool PreheaderSrcIsolation =
|
|
(checkPHILoopInput && !isa<InsertElementInst>(OrigSrcVal) && !isa<PHINode>(OrigSrcVal) &&
|
|
PHI->getIncomingBlock(i) == PreHeader && PHILoopPreHeaderSrcs.count(OrigSrcVal) > 0 &&
|
|
PHILoopPreHeaderSrcs[OrigSrcVal] >= PHI_SRC_USE_THRESHOLD);
|
|
// add src to the union
|
|
Value *SrcVal;
|
|
SrcVal = getNodeValue(OrigSrcVal);
|
|
e_alignment SrcAlign = GetPreferredAlignment(OrigSrcVal, WIA, CTX);
|
|
Instruction *DefMI = dyn_cast<Instruction>(SrcVal);
|
|
if (DefMI) {
|
|
if (CG->SIMDConstExpr(DefMI)) {
|
|
continue; // special case, simdSize becomes a constant in vISA
|
|
}
|
|
addReg(SrcVal, SrcAlign);
|
|
PHISrcDefs[DefMI->getParent()].push_back(DefMI);
|
|
if (WIA->whichDepend(PHI) == WIA->whichDepend(SrcVal) && !PreheaderSrcIsolation) {
|
|
unionRegs(PHI, SrcVal);
|
|
}
|
|
} else if (isa<Argument>(SrcVal)) {
|
|
addReg(SrcVal, SrcAlign);
|
|
PHISrcArgs.insert(SrcVal);
|
|
if (WIA->whichDepend(PHI) == WIA->whichDepend(SrcVal) && !PreheaderSrcIsolation) {
|
|
unionRegs(PHI, SrcVal);
|
|
}
|
|
}
|
|
// cases that we need to isolate source
|
|
if (CG->IsForceIsolated(SrcVal) || PreheaderSrcIsolation) {
|
|
isolateReg(SrcVal);
|
|
}
|
|
} // end of source-operand loop
|
|
// isolate complex type that IGC does not handle
|
|
if (PHI->getType()->isStructTy() || PHI->getType()->isArrayTy()) {
|
|
isolateReg(PHI);
|
|
}
|
|
}
|
|
}
|
|
|
|
// \todo, the original paper talks aibout some before-hand quick
|
|
// isolation. The idea is to identify those essential splitting first
|
|
// in order to avoid unnecessary splitting in the next loop.
|
|
|
|
// Perform a depth-first traversal of the dominator tree, splitting
|
|
// interferences amongst PHI-congruence classes.
|
|
if (!RegNodeMap.empty()) {
|
|
DenseMap<int, Value *> CurrentDominatingParent;
|
|
DenseMap<Value *, Value *> ImmediateDominatingParent;
|
|
// first, go through the function arguments
|
|
SplitInterferencesForArgument(CurrentDominatingParent, ImmediateDominatingParent);
|
|
// Then all the blocks
|
|
for (df_iterator<DomTreeNode *> DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
|
|
SplitInterferencesForBasicBlock(DI->getBlock(), CurrentDominatingParent, ImmediateDominatingParent);
|
|
}
|
|
}
|
|
|
|
// Handle values that have specific alignment requirement.
|
|
SplitInterferencesForAlignment();
|
|
|
|
if (IGC_IS_FLAG_ENABLED(DumpDeSSA)) {
|
|
const char *fname = MF.getName().data();
|
|
using namespace IGC::Debug;
|
|
auto name = DumpName(GetShaderOutputName())
|
|
.Hash(CTX->hash)
|
|
.Type(CTX->type)
|
|
.Pass("dessa")
|
|
.PostFix(fname)
|
|
.Retry(CTX->m_retryManager.GetRetryId())
|
|
.Extension("txt");
|
|
|
|
Dump dessaDump(name, DumpType::DBG_MSG_TEXT);
|
|
|
|
DumpLock();
|
|
print(dessaDump.stream());
|
|
DumpUnlock();
|
|
} else if (IGC_IS_FLAG_ENABLED(PrintToConsole)) {
|
|
print(ods());
|
|
}
|
|
m_F = nullptr;
|
|
return false;
|
|
}
|
|
|
|
void DeSSA::CoalesceAliasInst() {
|
|
for (df_iterator<DomTreeNode *> DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
|
|
BasicBlock *Blk = DI->getBlock();
|
|
|
|
for (BasicBlock::iterator BBI = Blk->begin(), BBE = Blk->end(); BBI != BBE; ++BBI) {
|
|
Instruction *I = &(*BBI);
|
|
|
|
// We are after patternmatch, would it make more sense to
|
|
// iterate all patterns instead of instructions ?
|
|
if (!CG->NeedInstruction(*I)) {
|
|
continue;
|
|
}
|
|
|
|
if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) {
|
|
if (isa<UndefValue>(I->getOperand(0))) {
|
|
SmallVector<Value *, 16> AllIEIs;
|
|
int nelts = checkInsertElementAlias(IEI, AllIEIs);
|
|
if (nelts > 1) {
|
|
// Consider the following as an alias if all
|
|
// Vi, i=0, n-1 (except Vn) has a single use.
|
|
// V0 = InsElt undef, S0, 0
|
|
// V1 = InsElt V0, S1, 1
|
|
// ...
|
|
// Vn = InsElt Vn-1, Sn, n
|
|
//
|
|
// AliasMap has the following:
|
|
// alias(V0, V0)
|
|
// alias(V1, V0)
|
|
// alias(V2, V0)
|
|
// ......
|
|
// alias(Vn, V0) <-- V0 is the root!
|
|
//
|
|
// Note that elements could be sparse like
|
|
// V0 = InsElt Undef, S1, 1
|
|
// V1 = InsElt V0, s3, 2
|
|
//
|
|
Value *aliasee = AllIEIs[0];
|
|
AddAlias(aliasee);
|
|
for (int i = 1; i < nelts; ++i) {
|
|
Value *V = AllIEIs[i];
|
|
AliasMap[V] = aliasee;
|
|
|
|
// union liveness info
|
|
LV->mergeUseFrom(aliasee, V);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (InsertValueInst *IVI = dyn_cast<InsertValueInst>(I)) {
|
|
// Handle insertValue to struct.
|
|
//
|
|
// insertvalue exists when
|
|
// 1. there are calls to functions with struct-type arguments
|
|
// and struct is simple (for example, no struct type as its
|
|
// field type) and its size is small (<= 16 bytes ?) so that
|
|
// it will be passed as struct. (Normally, FE will turn a
|
|
// larger/not-simple struct arg into a "byval" pointer.)
|
|
// 2. igc generates it internally
|
|
coalesceAliasInsertValue(IVI);
|
|
} else if (CastInst *CastI = dyn_cast<CastInst>(I)) {
|
|
Value *D = CastI;
|
|
Value *S = CastI->getOperand(0);
|
|
// For vector bitcase, if both D and S are uniform, they have
|
|
// the same layout in GRF and can be aliased.
|
|
//
|
|
// For now, only do it if S's element type is larger; otherwise
|
|
// visa variable's alignment might use the smaller element type
|
|
// and it is not clear what is a clean way to fix that.
|
|
// For example:
|
|
// b = bitcast i64 to <2xi32>
|
|
// it is okay, but the following isn't
|
|
// b = bitcast <2xi32> to i64
|
|
Type *dTy = D->getType();
|
|
Type *sTy = S->getType();
|
|
IGCLLVM::FixedVectorType *dVTy = dyn_cast<IGCLLVM::FixedVectorType>(dTy);
|
|
IGCLLVM::FixedVectorType *sVTy = dyn_cast<IGCLLVM::FixedVectorType>(sTy);
|
|
int d_nelts = dVTy ? (int)dVTy->getNumElements() : 1;
|
|
int s_nelts = sVTy ? (int)sVTy->getNumElements() : 1;
|
|
const bool canAliasVecCast = (!dVTy || sVTy) && d_nelts > s_nelts && // S's element type is larger
|
|
!isa<PointerType>(dTy->getScalarType()) && !isa<PointerType>(sTy->getScalarType());
|
|
|
|
if (isArgOrNeededInst(S) && WIA->whichDepend(D) == WIA->whichDepend(S) &&
|
|
(isNoOpInst(CastI, CTX) || (WIA->isUniform(D) && canAliasVecCast))) {
|
|
if (AliasMap.count(D) == 0) {
|
|
AddAlias(S);
|
|
Value *aliasee = AliasMap[S];
|
|
AliasMap[D] = aliasee;
|
|
|
|
// D will be deleted due to aliasing
|
|
NoopAliasMap[D] = 1;
|
|
|
|
// union liveness info
|
|
LV->mergeUseFrom(aliasee, D);
|
|
} else {
|
|
// Only src operands of a phi can be visited before
|
|
// operands' definition. For other instructions such
|
|
// as castInst, this shall never happen
|
|
IGC_ASSERT_MESSAGE(0, "ICE: Use visited before definition!");
|
|
}
|
|
}
|
|
} else if (isa<CallInst>(I)) {
|
|
if (GenIntrinsicInst *GII = dyn_cast<GenIntrinsicInst>(I)) {
|
|
auto GIIid = GII->getIntrinsicID();
|
|
if ((GIIid == GenISAIntrinsic::GenISA_bitcastfromstruct ||
|
|
GIIid == GenISAIntrinsic::GenISA_bitcasttostruct) &&
|
|
!isa<Constant>(GII->getOperand(0))) {
|
|
// special cast just for load/store.
|
|
Value *D = GII;
|
|
Value *S = GII->getOperand(0);
|
|
|
|
if (GIIid == GenISAIntrinsic::GenISA_bitcastfromstruct) {
|
|
// D must be int or int vector type; S must be struct type.
|
|
IGC_ASSERT(D->getType()->getScalarType()->isIntegerTy());
|
|
IGC_ASSERT(S->getType()->isStructTy());
|
|
} else if (GIIid == GenISAIntrinsic::GenISA_bitcasttostruct) {
|
|
// S must be int or int vector type; D must be struct type.
|
|
IGC_ASSERT(S->getType()->getScalarType()->isIntegerTy());
|
|
IGC_ASSERT(D->getType()->isStructTy());
|
|
}
|
|
|
|
AddAlias(S);
|
|
Value *aliasee = AliasMap[S];
|
|
AliasMap[D] = aliasee;
|
|
|
|
// D will be deleted due to aliasing
|
|
NoopAliasMap[D] = 1;
|
|
|
|
// union liveness info
|
|
LV->mergeUseFrom(aliasee, D);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void DeSSA::addReg(Value *Val, e_alignment Align) {
|
|
if (RegNodeMap.count(Val))
|
|
return;
|
|
RegNodeMap[Val] = new (Allocator) Node(Val, ++CurrColor, Align);
|
|
}
|
|
// Using Path Halving in union-find
|
|
DeSSA::Node *DeSSA::Node::getLeader() {
|
|
Node *N = this;
|
|
Node *Parent = parent;
|
|
Node *Grandparent = Parent->parent;
|
|
|
|
while (Parent != Grandparent) {
|
|
N->parent = Grandparent;
|
|
N = Grandparent;
|
|
Parent = N->parent;
|
|
Grandparent = Parent->parent;
|
|
}
|
|
|
|
return Parent;
|
|
}
|
|
|
|
Value *DeSSA::getRegRoot(Value *Val, e_alignment *pAlign) const {
|
|
auto RI = RegNodeMap.find(Val);
|
|
if (RI == RegNodeMap.end())
|
|
return nullptr;
|
|
Node *TheNode = RI->second;
|
|
if (isIsolated(TheNode))
|
|
return nullptr;
|
|
Node *TheLeader = TheNode->getLeader();
|
|
if (pAlign)
|
|
*pAlign = TheLeader->alignment;
|
|
return TheLeader->value;
|
|
}
|
|
|
|
int DeSSA::getRootColor(Value *V) {
|
|
auto RI = RegNodeMap.find(V);
|
|
if (RI == RegNodeMap.end())
|
|
return 0;
|
|
Node *TheNode = RI->second;
|
|
if (isIsolated(TheNode))
|
|
return 0;
|
|
Node *TheLeader = TheNode->getLeader();
|
|
return TheLeader->color;
|
|
}
|
|
|
|
void DeSSA::unionRegs(Node *Nd1, Node *Nd2) {
|
|
Node *N1 = Nd1->getLeader();
|
|
Node *N2 = Nd2->getLeader();
|
|
Node *NewLeader = nullptr;
|
|
Node *Leadee = nullptr;
|
|
|
|
if (N1 == N2)
|
|
return;
|
|
|
|
if (N1->rank > N2->rank) {
|
|
NewLeader = N1;
|
|
Leadee = N2;
|
|
} else if (N1->rank < N2->rank) {
|
|
NewLeader = N2;
|
|
Leadee = N1;
|
|
} else {
|
|
NewLeader = N1;
|
|
Leadee = N2;
|
|
NewLeader->rank++;
|
|
}
|
|
|
|
IGC_ASSERT_MESSAGE(nullptr != NewLeader, "ICE: both leader and leadee shall not be null!");
|
|
IGC_ASSERT_MESSAGE(nullptr != Leadee, "ICE: both leader and leadee shall not be null!");
|
|
Leadee->parent = NewLeader;
|
|
|
|
// Link the circular list of Leadee right before NewLeader
|
|
Node *Leadee_prev = Leadee->prev;
|
|
Node *NewLeader_prev = NewLeader->prev;
|
|
NewLeader_prev->next = Leadee;
|
|
Leadee->prev = NewLeader_prev;
|
|
Leadee_prev->next = NewLeader;
|
|
NewLeader->prev = Leadee_prev;
|
|
}
|
|
|
|
void DeSSA::isolateReg(Value *Val) {
|
|
Node *ND = RegNodeMap[Val];
|
|
splitNode(ND);
|
|
}
|
|
|
|
bool DeSSA::isIsolated(Value *V) const {
|
|
auto RI = RegNodeMap.find(V);
|
|
if (RI == RegNodeMap.end()) {
|
|
return true;
|
|
}
|
|
Node *DestNode = RI->second;
|
|
return isIsolated(DestNode);
|
|
}
|
|
|
|
// Split node ND from its existing congurent class, and the
|
|
// node ND itself becomes a new single-value congruent class.
|
|
void DeSSA::splitNode(Node *ND) {
|
|
Node *N = ND->next;
|
|
if (N == ND) {
|
|
// ND is already in a single-value congruent class
|
|
return;
|
|
}
|
|
|
|
Node *Leader = ND->getLeader();
|
|
|
|
// Remove ND from the congruent class
|
|
Node *P = ND->prev;
|
|
N->prev = P;
|
|
P->next = N;
|
|
|
|
// ND : a new single-value congruent class
|
|
ND->parent = ND;
|
|
ND->next = ND;
|
|
ND->prev = ND;
|
|
ND->rank = 0;
|
|
|
|
// If leader is removed, need to have a new leader.
|
|
if (Leader == ND) {
|
|
// P will be the new leader. Also swap ND's color with P's
|
|
// so that the original congruent class still have the original
|
|
// color (this is important as Dom traversal assumes that the
|
|
// color of any congruent class remains unchanged).
|
|
int t = P->color;
|
|
P->color = ND->color;
|
|
ND->color = t;
|
|
|
|
// New leader
|
|
Leader = P;
|
|
}
|
|
|
|
// If ND has children, those children need to set their parent.
|
|
// Since we don't know if ND has children, we conservatively set
|
|
// parent for all remaining nodes using "a path compression", so
|
|
// that all nodes remains in the same rooted tree.
|
|
N = Leader->next;
|
|
Leader->parent = Leader;
|
|
Leader->rank = (Leader == N) ? 0 : 1;
|
|
while (N != Leader) {
|
|
N->parent = Leader;
|
|
N->rank = 0;
|
|
N = N->next;
|
|
}
|
|
}
|
|
|
|
/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
|
|
/// interferences found between registers in the same congruence class. It
|
|
/// takes two DenseMaps as arguments that it also updates:
|
|
///
|
|
/// 1) CurrentDominatingParent, which maps a color to the register in that
|
|
/// congruence class whose definition was most recently seen.
|
|
///
|
|
/// 2) ImmediateDominatingParent, which maps a register to the register in the
|
|
/// same congruence class that most immediately dominates it.
|
|
///
|
|
/// This function assumes that it is being called in a depth-first traversal
|
|
/// of the dominator tree.
|
|
///
|
|
/// The algorithm used here is a generalization of the dominance-based SSA test
|
|
/// for two variables. If there are variables a_1, ..., a_n such that
|
|
///
|
|
/// def(a_1) dom ... dom def(a_n),
|
|
///
|
|
/// then we can test for an interference between any two a_i by only using O(n)
|
|
/// interference tests between pairs of variables. If i < j and a_i and a_j
|
|
/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
|
|
/// Thus, in order to test for an interference involving a_i, we need only check
|
|
/// for a potential interference with a_i+1.
|
|
///
|
|
/// This method can be generalized to arbitrary sets of variables by performing
|
|
/// a depth-first traversal of the dominator tree. As we traverse down a branch
|
|
/// of the dominator tree, we keep track of the current dominating variable and
|
|
/// only perform an interference test with that variable. However, when we go to
|
|
/// another branch of the dominator tree, the definition of the current dominating
|
|
/// variable may no longer dominate the current block. In order to correct this,
|
|
/// we need to use a stack of past choices of the current dominating variable
|
|
/// and pop from this stack until we find a variable whose definition actually
|
|
/// dominates the current block.
|
|
///
|
|
/// There will be one push on this stack for each variable that has become the
|
|
/// current dominating variable, so instead of using an explicit stack we can
|
|
/// simply associate the previous choice for a current dominating variable with
|
|
/// the new choice. This works better in our implementation, where we test for
|
|
/// interference in multiple distinct sets at once.
|
|
void DeSSA::SplitInterferencesForBasicBlock(BasicBlock *MBB, DenseMap<int, Value *> &CurrentDominatingParent,
|
|
DenseMap<Value *, Value *> &ImmediateDominatingParent) {
|
|
// Sort defs by their order in the original basic block, as the code below
|
|
// assumes that it is processing definitions in dominance order.
|
|
std::vector<Instruction *> &DefInstrs = PHISrcDefs[MBB];
|
|
std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LV));
|
|
|
|
for (std::vector<Instruction *>::const_iterator BBI = DefInstrs.begin(), BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
|
|
|
|
Instruction *DefMI = *BBI;
|
|
|
|
// If the virtual register being defined is not used in any PHI or has
|
|
// already been isolated, then there are no more interferences to check.
|
|
int RootC = getRootColor(DefMI);
|
|
if (!RootC)
|
|
continue;
|
|
|
|
// The input to this pass sometimes is not in SSA form in every basic
|
|
// block, as some virtual registers have redefinitions. We could eliminate
|
|
// this by fixing the passes that generate the non-SSA code, or we could
|
|
// handle it here by tracking defining machine instructions rather than
|
|
// virtual registers. For now, we just handle the situation conservatively
|
|
// in a way that will possibly lead to false interferences.
|
|
Value *NewParent = CurrentDominatingParent[RootC];
|
|
if (NewParent == DefMI)
|
|
continue;
|
|
|
|
// Pop registers from the stack represented by ImmediateDominatingParent
|
|
// until we find a parent that dominates the current instruction.
|
|
while (NewParent) {
|
|
if (getRootColor(NewParent)) {
|
|
// we have added the another condition because the domination-test
|
|
// does not work between two phi-node. See the following comments
|
|
// from the DT::dominates:
|
|
// " It is not possible to determine dominance between two PHI nodes
|
|
// based on their ordering
|
|
// if (isa<PHINode>(A) && isa<PHINode>(B))
|
|
// return false;"
|
|
if (isa<Argument>(NewParent)) {
|
|
break;
|
|
} else if (DT->dominates(cast<Instruction>(NewParent), DefMI)) {
|
|
break;
|
|
} else if (cast<Instruction>(NewParent)->getParent() == MBB && isa<PHINode>(DefMI) && isa<PHINode>(NewParent)) {
|
|
break;
|
|
}
|
|
}
|
|
NewParent = ImmediateDominatingParent[NewParent];
|
|
}
|
|
// If NewParent is nonzero, then its definition dominates the current
|
|
// instruction, so it is only necessary to check for the liveness of
|
|
// NewParent in order to check for an interference.
|
|
if (NewParent && LV->isLiveAt(NewParent, DefMI)) {
|
|
// If there is an interference, always isolate the new register. This
|
|
// could be improved by using a heuristic that decides which of the two
|
|
// registers to isolate.
|
|
isolateReg(DefMI);
|
|
CurrentDominatingParent[RootC] = NewParent;
|
|
} else {
|
|
// If there is no interference, update ImmediateDominatingParent and set
|
|
// the CurrentDominatingParent for this color to the current register.
|
|
ImmediateDominatingParent[DefMI] = NewParent;
|
|
CurrentDominatingParent[RootC] = DefMI;
|
|
}
|
|
}
|
|
|
|
// We now walk the PHIs in successor blocks and check for interferences. This
|
|
// is necessary because the use of a PHI's operands are logically contained in
|
|
// the predecessor block. The def of a PHI's destination register is processed
|
|
// along with the other defs in a basic block.
|
|
|
|
CurrentPHIForColor.clear();
|
|
|
|
for (succ_iterator SI = succ_begin(MBB), E = succ_end(MBB); SI != E; ++SI) {
|
|
for (BasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); BBI != BBE; ++BBI) {
|
|
PHINode *PHI = dyn_cast<PHINode>(BBI);
|
|
if (!PHI) {
|
|
break;
|
|
}
|
|
|
|
int RootC = getRootColor(PHI);
|
|
// check live-out interference
|
|
if (IGC_IS_FLAG_ENABLED(EnableDeSSAWA) && !RootC) {
|
|
// [todo] delete this code
|
|
if (CTX->type == ShaderType::COMPUTE_SHADER) {
|
|
for (unsigned i = 0; !RootC && i < PHI->getNumOperands(); i++) {
|
|
Value *SrcVal = PHI->getOperand(i);
|
|
if (!isa<Constant>(SrcVal)) {
|
|
SrcVal = getNodeValue(SrcVal);
|
|
RootC = getRootColor(SrcVal);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (!RootC) {
|
|
continue;
|
|
}
|
|
// Find the index of the PHI operand that corresponds to this basic block.
|
|
unsigned PredIndex;
|
|
for (PredIndex = 0; PredIndex < PHI->getNumOperands(); ++PredIndex) {
|
|
if (PHI->getIncomingBlock(PredIndex) == MBB)
|
|
break;
|
|
}
|
|
IGC_ASSERT(PredIndex < PHI->getNumOperands());
|
|
Value *PredValue = PHI->getOperand(PredIndex);
|
|
PredValue = getNodeValue(PredValue);
|
|
std::pair<Instruction *, Value *> &CurrentPHI = CurrentPHIForColor[RootC];
|
|
// If two PHIs have the same operand from every shared predecessor, then
|
|
// they don't actually interfere. Otherwise, isolate the current PHI. This
|
|
// could possibly be improved, e.g. we could isolate the PHI with the
|
|
// fewest operands.
|
|
if (CurrentPHI.first && CurrentPHI.second != PredValue) {
|
|
isolateReg(PHI);
|
|
continue;
|
|
} else {
|
|
CurrentPHI = std::make_pair(PHI, PredValue);
|
|
}
|
|
|
|
// check live-out interference
|
|
// Pop registers from the stack represented by ImmediateDominatingParent
|
|
// until we find a parent that dominates the current instruction.
|
|
Value *NewParent = CurrentDominatingParent[RootC];
|
|
while (NewParent) {
|
|
if (getRootColor(NewParent)) {
|
|
if (isa<Argument>(NewParent)) {
|
|
break;
|
|
} else if (DT->dominates(cast<Instruction>(NewParent)->getParent(), MBB)) {
|
|
break;
|
|
}
|
|
}
|
|
NewParent = ImmediateDominatingParent[NewParent];
|
|
}
|
|
CurrentDominatingParent[RootC] = NewParent;
|
|
|
|
// If there is an interference with a register, always isolate the
|
|
// register rather than the PHI. It is also possible to isolate the
|
|
// PHI, but that introduces copies for all of the registers involved
|
|
// in that PHI.
|
|
if (NewParent && NewParent != PredValue && LV->isLiveOut(NewParent, *MBB)) {
|
|
isolateReg(NewParent);
|
|
}
|
|
}
|
|
// fix situation in which uniform-phi is inside a divergent-join.
|
|
// The following is an example of 3-way join by nested-if
|
|
//
|
|
// %22 = phi i32 [ 13, blk19], [%16, blk0], [ 13, blk17]
|
|
// %23 = phi i32 [%16, blk19], [ 0, blk0], [%16, blk17]
|
|
// blk0 is from the uniform-if
|
|
// blk17 is from the divergent-if
|
|
// blk19 is from the divergent-if
|
|
//
|
|
// if we coalesce %16 and %22 with the same register V5
|
|
// then we still need to insert the following move in on divergent
|
|
// edges
|
|
// blk-17:
|
|
// mov(W) v5, 13
|
|
// blk-19:
|
|
// mov(W) v5, 13
|
|
//
|
|
// however, %16 and %23 are not coalesced. so we add the other two moves
|
|
// blk-17:
|
|
// mov(W) v6, v5
|
|
// mov(W) v5, 13 NOTE: this move changs v5, subsequently wrong v6
|
|
// blk-19:
|
|
// mov(W) v6, v5
|
|
// mov(W) v5, 13
|
|
// Due to divergent CF, both blk-17 and blk-19 are executed, causes problem
|
|
|
|
// so we know MBB-target, i.e. the phi-block is a divergent-join
|
|
if (WIA->insideDivergentCF(MBB->getTerminator())) {
|
|
for (BasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); BBI != BBE; ++BBI) {
|
|
PHINode *PHI = dyn_cast<PHINode>(BBI);
|
|
if (!PHI)
|
|
break;
|
|
if (!WIA->isUniform(PHI))
|
|
continue;
|
|
auto RootC = getRootColor(PHI);
|
|
if (!RootC)
|
|
continue;
|
|
// so this is uniform phi and not-isolated
|
|
unsigned PredIndex;
|
|
for (PredIndex = 0; PredIndex < PHI->getNumOperands(); ++PredIndex) {
|
|
if (PHI->getIncomingBlock(PredIndex) == MBB)
|
|
break;
|
|
}
|
|
IGC_ASSERT(PredIndex < PHI->getNumOperands());
|
|
Value *PredValue = PHI->getOperand(PredIndex);
|
|
// check if we will need to add a phi-copy from MBB
|
|
if (isa<Constant>(PredValue) || isIsolated(PredValue)) {
|
|
// adding a phi-copy interferes with current live-out
|
|
if (CurrentDominatingParent[RootC])
|
|
isolateReg(PHI);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void DeSSA::SplitInterferencesForArgument(DenseMap<int, Value *> &CurrentDominatingParent,
|
|
DenseMap<Value *, Value *> &ImmediateDominatingParent) {
|
|
// No two arguments can be in the same congruent class
|
|
for (auto BBI = PHISrcArgs.begin(), BBE = PHISrcArgs.end(); BBI != BBE; ++BBI) {
|
|
Value *AV = *BBI;
|
|
// If the virtual register being defined is not used in any PHI or has
|
|
// already been isolated, then there are no more interferences to check.
|
|
int RootC = getRootColor(AV);
|
|
if (!RootC)
|
|
continue;
|
|
Value *NewParent = CurrentDominatingParent[RootC];
|
|
if (NewParent) {
|
|
isolateReg(AV);
|
|
} else {
|
|
CurrentDominatingParent[RootC] = AV;
|
|
}
|
|
}
|
|
}
|
|
|
|
// [todo] get rid of alignment-based isolation in dessa.
|
|
// Using alignment in isolation seems over-kill. The right approach
|
|
// would be one that avoids adding values with conflicting alignment
|
|
// requirement in the same congruent, not adding them in the same
|
|
// congruent class first and trying to isolate them later.
|
|
void DeSSA::SplitInterferencesForAlignment() {
|
|
for (auto I = RegNodeMap.begin(), E = RegNodeMap.end(); I != E; ++I) {
|
|
// Find a root Node
|
|
Node *rootNode = I->second;
|
|
if (rootNode->parent != rootNode) {
|
|
continue;
|
|
}
|
|
|
|
e_alignment Align = EALIGN_AUTO;
|
|
// Find the most restrictive alignment, i.e. GRF aligned ones.
|
|
Node *N = rootNode;
|
|
Node *Curr;
|
|
do {
|
|
Curr = N;
|
|
N = Curr->next;
|
|
if (Curr->alignment == EALIGN_GRF) {
|
|
Align = EALIGN_GRF;
|
|
break;
|
|
}
|
|
} while (N != rootNode);
|
|
|
|
if (Align != EALIGN_GRF)
|
|
continue;
|
|
|
|
// Isolate any mis-aligned value.
|
|
// Start with Curr node as it cannot be isolated
|
|
// (rootNode could be isolated), therefore, it remains
|
|
// in the linked list and can be used to test stop looping.
|
|
Node *Head = Curr;
|
|
N = Head;
|
|
do {
|
|
Curr = N;
|
|
N = N->next;
|
|
if (Curr->alignment != EALIGN_AUTO && Curr->alignment != EALIGN_GRF) {
|
|
IGC_ASSERT(nullptr != Curr);
|
|
IGC_ASSERT_MESSAGE((Curr != Head), "Head Node cannot be isolated, something wrong!");
|
|
isolateReg(Curr->value);
|
|
}
|
|
} while (N != Head);
|
|
|
|
// Update root's alignment.
|
|
Head->getLeader()->alignment = Align;
|
|
}
|
|
}
|
|
|
|
Value *DeSSA::getInsEltRoot(Value *Val) const {
|
|
auto RI = InsEltMap.find(Val);
|
|
if (RI == InsEltMap.end())
|
|
return Val;
|
|
return RI->second;
|
|
}
|
|
|
|
/// <summary>
|
|
/// Identify if an instruction has partial write semantics
|
|
/// </summary>
|
|
/// <param name="Inst"></param>
|
|
/// <returns> the index of the source partial-write operand</returns>
|
|
static int getPartialWriteSource(Value *Inst) {
|
|
if (isa<InsertElementInst>(Inst))
|
|
return 0; // source 0 is the original value
|
|
if (auto CI = dyn_cast<CallInst>(Inst)) {
|
|
// only handle inline-asm with simple destination
|
|
if (CI->isInlineAsm() && !CI->getType()->isStructTy()) {
|
|
InlineAsm *IA = cast<InlineAsm>(IGCLLVM::getCalledValue(CI));
|
|
StringRef constraintStr(IA->getConstraintString());
|
|
SmallVector<StringRef, 8> constraints;
|
|
constraintStr.split(constraints, ',');
|
|
for (int i = 0; i < (int)constraints.size(); i++) {
|
|
unsigned destID = 0;
|
|
if (constraints[i].getAsInteger(10, destID) == 0) {
|
|
// constraint-string indicates that source(i-1) and
|
|
// destination should be the same vISA variable
|
|
if (i > 0 && destID == 0)
|
|
return (i - 1);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
void DeSSA::CoalesceInsertElements() {
|
|
for (df_iterator<DomTreeNode *> DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
|
|
BasicBlock *Blk = DI->getBlock();
|
|
|
|
for (BasicBlock::iterator BBI = Blk->begin(), BBE = Blk->end(); BBI != BBE; ++BBI) {
|
|
Instruction *Inst = &(*BBI);
|
|
|
|
if (!CG->NeedInstruction(*Inst)) {
|
|
continue;
|
|
}
|
|
// Only Aliasee needs to be handled.
|
|
if (getAliasee(Inst) != Inst) {
|
|
continue;
|
|
}
|
|
|
|
// For keeping the existing behavior of InsEltMap unchanged
|
|
auto PWSrcIdx = getPartialWriteSource(Inst);
|
|
if (PWSrcIdx >= 0) {
|
|
Value *origSrcV = Inst->getOperand(PWSrcIdx);
|
|
Value *SrcV = getAliasee(origSrcV);
|
|
if (SrcV != Inst && isArgOrNeededInst(origSrcV)) {
|
|
// union them
|
|
e_alignment InstAlign = GetPreferredAlignment(Inst, WIA, CTX);
|
|
e_alignment SrcVAlign = GetPreferredAlignment(SrcV, WIA, CTX);
|
|
if (!LV->isLiveAt(SrcV, Inst) && !alignInterfere(InstAlign, SrcVAlign) &&
|
|
(WIA->whichDepend(SrcV) == WIA->whichDepend(Inst))) {
|
|
InsEltMapAddValue(SrcV);
|
|
InsEltMapAddValue(Inst);
|
|
|
|
Value *SrcVRoot = getInsEltRoot(SrcV);
|
|
Value *InstRoot = getInsEltRoot(Inst);
|
|
|
|
// union them and their liveness info
|
|
InsEltMapUnionValue(SrcV, Inst);
|
|
LV->mergeUseFrom(SrcVRoot, InstRoot);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
Value *DeSSA::getRootValue(Value *Val, e_alignment *pAlign) const {
|
|
Value *mapVal = nullptr;
|
|
auto AI = AliasMap.find(Val);
|
|
if (AI != AliasMap.end()) {
|
|
mapVal = AI->second;
|
|
}
|
|
auto IEI = InsEltMap.find(mapVal ? mapVal : Val);
|
|
if (IEI != InsEltMap.end()) {
|
|
mapVal = IEI->second;
|
|
}
|
|
Value *PhiRootVal = getRegRoot(mapVal ? mapVal : Val, pAlign);
|
|
return (PhiRootVal ? PhiRootVal : mapVal);
|
|
}
|
|
|
|
void DeSSA::getAllValuesInCongruentClass(Value *V, SmallVector<Value *, 16> &ValsInCC) {
|
|
// Handle InsertElement specially. Note that only rootValue from
|
|
// a sequence of insertElement is in congruent class. The RootValue
|
|
// has its liveness modified to cover all InsertElements that are
|
|
// grouped together.
|
|
Value *RootV = getNodeValue(V);
|
|
|
|
IGC_ASSERT_MESSAGE(nullptr != RootV, "ICE: Node value should not be nullptr!");
|
|
ValsInCC.push_back(RootV);
|
|
auto RI = RegNodeMap.find(RootV);
|
|
if (RI != RegNodeMap.end()) {
|
|
Node *First = RI->second;
|
|
for (Node *N = First->next; N != First; N = N->next) {
|
|
ValsInCC.push_back(N->value);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
// All values that are coalesced together, including values that are
|
|
// handled specially, such as ones in aliasMap and insEltMap.
|
|
void DeSSA::getAllCoalescedValues(Value *V, SmallVector<Value *, 16> &Vals) {
|
|
getAllValuesInCongruentClass(V, Vals);
|
|
IGC_ASSERT_MESSAGE(Vals.size() > 0, "ICE: Vals should not be empty!");
|
|
|
|
// First, add values from InsEltMap
|
|
for (int i = 0, sz = (int)Vals.size(); i < sz; ++i) {
|
|
Value *ccVal = Vals[i];
|
|
for (const auto &II : InsEltMap) {
|
|
Value *R = II.second;
|
|
if (R != ccVal) {
|
|
continue;
|
|
}
|
|
Value *A = II.first;
|
|
if (A != R) {
|
|
Vals.push_back(A);
|
|
}
|
|
}
|
|
}
|
|
|
|
// second, add aliasers from AliasMap
|
|
for (int i = 0, sz = (int)Vals.size(); i < sz; ++i) {
|
|
Value *aliasee = Vals[i];
|
|
for (const auto &II : AliasMap) {
|
|
Value *R = II.second;
|
|
if (R != aliasee) {
|
|
continue;
|
|
}
|
|
Value *aliaser = II.first;
|
|
if (aliaser != aliasee) {
|
|
Vals.push_back(aliaser);
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
void DeSSA::coalesceAliasInsertValue(InsertValueInst *theIVI) {
|
|
// Find a chain of insertvalue, and return the lead (last one).
|
|
auto getInsValChain = [this](InsertValueInst *aIVI, SmallVector<Value *, 8> &IVIs) {
|
|
IGC_ASSERT(aIVI);
|
|
InsertValueInst *lead = aIVI;
|
|
const bool isUniform = WIA->isUniform(lead);
|
|
IVIs.push_back(lead);
|
|
while (lead->hasOneUse()) {
|
|
auto next = dyn_cast<InsertValueInst>(lead->user_back());
|
|
if (!next || isUniform != WIA->isUniform(next)) {
|
|
break;
|
|
}
|
|
lead = next;
|
|
IVIs.push_back(lead);
|
|
}
|
|
return lead;
|
|
};
|
|
|
|
auto setInsValAlias = [this](SmallVector<Value *, 8> &IVIs) {
|
|
int nelts = (int)IVIs.size();
|
|
if (nelts > 1) {
|
|
Value *aliasee = IVIs[0];
|
|
AddAlias(aliasee);
|
|
for (int i = 1; i < nelts; ++i) {
|
|
Value *V = IVIs[i];
|
|
AliasMap[V] = aliasee;
|
|
|
|
// union liveness info
|
|
LV->mergeUseFrom(aliasee, V);
|
|
}
|
|
}
|
|
};
|
|
|
|
// Get all fields' indices if there are at most 2 level fields.
|
|
// Using int = <i16 level_1-ix, i16 level_0-ix> to represent two
|
|
// level indices.
|
|
auto getIndices = [](DenseSet<int> &aFields, SmallVector<Value *, 8> &IVIs) {
|
|
int nelts = (int)IVIs.size();
|
|
for (int i = 0; i < nelts; ++i) {
|
|
InsertValueInst *tI = cast<InsertValueInst>(IVIs[i]);
|
|
switch (tI->getNumIndices()) {
|
|
case 1: {
|
|
uint16_t ix = (uint16_t)tI->getIndices().front();
|
|
aFields.insert((int)ix);
|
|
break;
|
|
}
|
|
case 2: {
|
|
ArrayRef<unsigned> ids = tI->getIndices();
|
|
uint16_t lvl0_ix = (uint16_t)ids[0];
|
|
uint16_t lvl1_ix = (uint16_t)ids[1];
|
|
uint32_t ix = ((lvl1_ix << 16) | lvl0_ix);
|
|
aFields.insert((int)ix);
|
|
break;
|
|
}
|
|
default:
|
|
aFields.clear();
|
|
return;
|
|
}
|
|
}
|
|
};
|
|
|
|
auto isDisjoin = [](DenseSet<int> &RHS, DenseSet<int> &LHS) {
|
|
for (auto II : LHS) {
|
|
int e = II;
|
|
if (RHS.find(e) != RHS.end()) {
|
|
return false;
|
|
}
|
|
}
|
|
return true;
|
|
};
|
|
|
|
// InsertValueInst chain:
|
|
// V0 = InsVal undef/const, S0, 0
|
|
// V1 = InsVal V0, S1, 1
|
|
// ...
|
|
// Vn = InsVal Vn-1, Sn,
|
|
// where all Vi, i=0, n-1 has a single use. This sequence is called
|
|
// InsertValueInst chain (IVI Chain). And they (all Vi) are set to alias
|
|
// each other.
|
|
//
|
|
// Start finding IVI chains from an inst that cannot be an operand of
|
|
// the other InsertValueInst. It's possible to have several IVI chains.
|
|
// For example,
|
|
// Chain0: V0 = insval undef
|
|
// V1 = insVal V0
|
|
// Chain1: V2 = insVal V1
|
|
// V3 = insVal V2
|
|
// Chain2: V4 = insVal V1
|
|
// Chain3: V5 = insVal V4
|
|
// Chain4: V6 = insVal V4
|
|
// This can be viewed as a tree of IVI chains, rooted at V0, with edge
|
|
// denoting def-use relation between two chains. The tree for the above
|
|
// is as below:
|
|
//
|
|
// Chain0
|
|
// / \
|
|
// Chain1 Chain2
|
|
// / \
|
|
// Chain3 Chain4
|
|
//
|
|
// The algorithm does the following:
|
|
// 1. Find all chains. All insts (Vs) of each chain are aliased to
|
|
// each other in the same chain.
|
|
// 2. Do aggressive aliasing : alias several chains along one path from
|
|
// root to a leaf node, as long as no struct field is defined by
|
|
// more than one chains, which is a strong condition and implies that
|
|
// the root inst should start like the followning:
|
|
// V0 = insval undef, s, 10
|
|
//
|
|
const Value *Oprd0 = theIVI->getOperand(0);
|
|
if (theIVI->getType()->isStructTy() && (isa<Constant>(Oprd0) || isa<Argument>(Oprd0) || isa<PHINode>(Oprd0))) {
|
|
SmallVector<Value *, 8> IVIChain;
|
|
InsertValueInst *LeadInst = getInsValChain(theIVI, IVIChain);
|
|
setInsValAlias(IVIChain);
|
|
|
|
// For aggressive aliasing. lead inst is used to identify its
|
|
// successor chain in the chain tree.
|
|
bool aggressiveAliasing = false;
|
|
DenseSet<int> commonFields;
|
|
const bool isChainUniform = WIA->isUniform(LeadInst);
|
|
Value *theAliasee = getAliasee(LeadInst);
|
|
if (isa<UndefValue>(Oprd0)) {
|
|
getIndices(commonFields, IVIChain);
|
|
if (!commonFields.empty()) { // Fields inserted are known, can do aggressive coalescing.
|
|
aggressiveAliasing = true;
|
|
}
|
|
}
|
|
|
|
// Make sure every sub-chains rooted at theIVI are processed.
|
|
std::list<InsertValueInst *> worklist;
|
|
worklist.push_back(LeadInst);
|
|
while (!worklist.empty()) {
|
|
InsertValueInst *aI = worklist.front();
|
|
worklist.pop_front();
|
|
for (auto UI = aI->user_begin(), UE = aI->user_end(); UI != UE; ++UI) {
|
|
Value *v = *UI;
|
|
InsertValueInst *I = dyn_cast<InsertValueInst>(v);
|
|
if (!I) {
|
|
continue;
|
|
}
|
|
IVIChain.clear();
|
|
InsertValueInst *nextLead = getInsValChain(I, IVIChain);
|
|
setInsValAlias(IVIChain);
|
|
worklist.push_back(nextLead);
|
|
|
|
if (aggressiveAliasing && LeadInst == aI && isChainUniform == WIA->isUniform(nextLead)) {
|
|
DenseSet<int> fields;
|
|
getIndices(fields, IVIChain);
|
|
if (!fields.empty() && isDisjoin(commonFields, fields)) {
|
|
LeadInst = nextLead;
|
|
commonFields.insert(fields.begin(), fields.end());
|
|
|
|
// update alias
|
|
AddAlias(theAliasee);
|
|
for (auto II : IVIChain) {
|
|
Value *V = II;
|
|
AliasMap[V] = theAliasee;
|
|
|
|
// union liveness info
|
|
LV->mergeUseFrom(theAliasee, V);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
int DeSSA::checkInsertElementAlias(InsertElementInst *IEI, SmallVector<Value *, 16> &AllIEIs) {
|
|
IGC_ASSERT(nullptr != IEI);
|
|
IGC_ASSERT_MESSAGE(isa<UndefValue>(IEI->getOperand(0)), "ICE: need to pass first IEI as the argument");
|
|
|
|
// Find the the alias pattern:
|
|
// V0 = IEI UndefValue, S0, 0
|
|
// V1 = IEI V0, S1, 1
|
|
// V2 = IEI V1, S2, 2
|
|
// ......
|
|
// Vn = IEI Vn_1, Sn_1, n
|
|
// All Vi (i=0,n_1, except i=n) has a single-use.
|
|
//
|
|
// If found, return the actual vector size;
|
|
// otherwise, return 0.
|
|
IGCLLVM::FixedVectorType *VTy = cast<IGCLLVM::FixedVectorType>(IEI->getType());
|
|
IGC_ASSERT(nullptr != VTy);
|
|
int nelts = (int)VTy->getNumElements();
|
|
AllIEIs.resize(nelts, nullptr);
|
|
InsertElementInst *Inst = IEI;
|
|
IGC_ASSERT(nullptr != WIA);
|
|
WIAnalysis::WIDependancy Dep = WIA->whichDepend(Inst);
|
|
while (Inst) {
|
|
// Check if Inst has constant index, stop if not.
|
|
// (This is for catching a common case, a variable index
|
|
// can be handled as well if needed.)
|
|
ConstantInt *CI = dyn_cast<ConstantInt>(Inst->getOperand(2));
|
|
if (!CI) {
|
|
return 0;
|
|
}
|
|
int ix = (int)CI->getZExtValue();
|
|
AllIEIs[ix] = Inst;
|
|
if (!Inst->hasOneUse() || Dep != WIA->whichDepend(Inst)) {
|
|
break;
|
|
}
|
|
Inst = dyn_cast<InsertElementInst>(Inst->user_back());
|
|
}
|
|
|
|
// Return the number of elements found
|
|
int num = 0;
|
|
for (int i = 0; i < nelts; ++i) {
|
|
if (AllIEIs[i] == nullptr)
|
|
continue;
|
|
if (num < i) {
|
|
// Pack them
|
|
AllIEIs[num] = AllIEIs[i];
|
|
AllIEIs[i] = nullptr;
|
|
}
|
|
++num;
|
|
}
|
|
return num;
|
|
}
|
|
|
|
Value *DeSSA::getAliasee(Value *V) const {
|
|
auto AI = AliasMap.find(V);
|
|
if (AI == AliasMap.end())
|
|
return V;
|
|
return AI->second;
|
|
}
|
|
|
|
bool DeSSA::isAliaser(Value *V) const {
|
|
auto AI = AliasMap.find(V);
|
|
if (AI == AliasMap.end()) {
|
|
return false;
|
|
}
|
|
return AI->first != AI->second;
|
|
}
|
|
|
|
bool DeSSA::isNoopAliaser(Value *V) const { return NoopAliasMap.count(V) > 0; }
|
|
|
|
bool DeSSA::isAliasee(Value *V) const {
|
|
auto AI = AliasMap.find(V);
|
|
if (AI == AliasMap.end()) {
|
|
return false;
|
|
}
|
|
return AI->first == AI->second;
|
|
}
|
|
|
|
// Single-valued:
|
|
// If a variable takes one and only one value during its entire life time,
|
|
// it is called single-valued. If a variable is of vector or struct, single
|
|
// valued means that all its component (vector elements or struct members)
|
|
// are all singled-valued.
|
|
//
|
|
// This concept is useful when setting up aliasing. For example,
|
|
// a = bitcast b to i32
|
|
// If both a and b are singled valued, a will be set to alias b without
|
|
// doing futher checking. However, if either isn't single valued, checking
|
|
// if a can be aliased to b needs to interfere checking. For example
|
|
// dessa CC : [b, c] // b and c are coalesced into a single variable.
|
|
//
|
|
// b = 1
|
|
// a = bitcast b to i32
|
|
// ...
|
|
// c = 2
|
|
// ...
|
|
// L: = a
|
|
// = b
|
|
//
|
|
// In this case, if a is aliased to b, a would get 2 at L, but the correct
|
|
// value should be 1. In order to find out if a can be aliased to b, it
|
|
// requires to do interference checking with c.
|
|
bool DeSSA::isSingleValued(llvm::Value *V) const {
|
|
// InsEltMap and non-isolated : not single-valued
|
|
Value *aliasee = getAliasee(V);
|
|
Value *insEltRootV = getInsEltRoot(aliasee);
|
|
if (InsEltMap.count(aliasee) || !isIsolated(insEltRootV)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// The following paper explains an approach to check if two
|
|
// congruent classes interfere using a linear approach.
|
|
//
|
|
// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness,
|
|
// Code Quality and Efficiency,
|
|
// In Proceedings of the 7th annual IEEE/ACM International Symposium
|
|
// on Code Generation and Optimization (Seattle, Washington,
|
|
// March 22 - 25, 2009). CGO '09. IEEE, Washington, DC, 114-125.
|
|
//
|
|
// Here, we simply use a naive pair-wise comparison.
|
|
//
|
|
// TODO: check if using linear approach described in the paper is
|
|
// necessary; To do so, it needs to get PN (preorder number of BB)
|
|
// and sort congruent classes before doing interference checking.
|
|
bool DeSSA::interfere(llvm::Value *V0, llvm::Value *V1) {
|
|
SmallVector<Value *, 16> allCC0;
|
|
SmallVector<Value *, 16> allCC1;
|
|
getAllValuesInCongruentClass(V0, allCC0);
|
|
getAllValuesInCongruentClass(V1, allCC1);
|
|
|
|
for (int i0 = 0, sz0 = (int)allCC0.size(); i0 < sz0; ++i0) {
|
|
Value *val0 = allCC0[i0];
|
|
for (int i1 = 0, sz1 = (int)allCC1.size(); i1 < sz1; ++i1) {
|
|
Value *val1 = allCC1[i1];
|
|
if (LV->hasInterference(val0, val1)) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Alias interference checking.
|
|
// The caller is trying to check if V0 can alias to V1. For example,
|
|
// V0 = bitcast V1, or
|
|
// V0 = extractElement V1, ...
|
|
// As V0 and V1 hold the same value, the interference between these two
|
|
// does not matter. Thus, this function is a variant of interfere()
|
|
// with V0 and V1 interference ignored.
|
|
bool DeSSA::aliasInterfere(llvm::Value *V0, llvm::Value *V1) {
|
|
SmallVector<Value *, 16> allCC0;
|
|
SmallVector<Value *, 16> allCC1;
|
|
getAllValuesInCongruentClass(V0, allCC0);
|
|
getAllValuesInCongruentClass(V1, allCC1);
|
|
|
|
Value *V0_aliasee = getAliasee(V0);
|
|
Value *V1_aliasee = getAliasee(V1);
|
|
|
|
//
|
|
// If aliasee is in InsEltMap, it is not single valued
|
|
// and cannot be excluded from interfere checking.
|
|
//
|
|
// For example:
|
|
// x = bitcast y
|
|
// z = InsElt y, ...
|
|
// = x
|
|
// = y
|
|
//
|
|
// {y, z} are coalesced via InsElt, interfere(x, y)
|
|
// must be checked.
|
|
// However, if y (and x too) is not in InsEltMap, no need
|
|
// to check interfere(x, y) as they have the same value
|
|
// as the following:
|
|
// x = bitcast y
|
|
// = x
|
|
// = y
|
|
//
|
|
bool V0_oneValue = (InsEltMap.count(V0_aliasee) == 0);
|
|
bool V1_oneValue = (InsEltMap.count(V1_aliasee) == 0);
|
|
bool both_singleValue = (V0_oneValue && V1_oneValue);
|
|
|
|
for (int i0 = 0, sz0 = (int)allCC0.size(); i0 < sz0; ++i0) {
|
|
Value *val0 = allCC0[i0];
|
|
for (int i1 = 0, sz1 = (int)allCC1.size(); i1 < sz1; ++i1) {
|
|
Value *val1 = allCC1[i1];
|
|
if (both_singleValue && val0 == V0_aliasee && val1 == V1_aliasee) {
|
|
continue;
|
|
}
|
|
if (LV->hasInterference(val0, val1)) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// The existing code does align interference checking. Just
|
|
// keep it for now. Likely to improve it later.
|
|
bool DeSSA::alignInterfere(e_alignment a1, e_alignment a2) {
|
|
if (a1 == EALIGN_GRF && !(a2 == EALIGN_GRF || a2 == EALIGN_AUTO)) {
|
|
return true;
|
|
}
|
|
if (a2 == EALIGN_GRF && !(a1 == EALIGN_GRF || a1 == EALIGN_AUTO)) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|