/*========================== begin_copyright_notice ============================ Copyright (C) 2017-2021 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ /*========================== begin_copyright_notice ============================ This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. ============================= end_copyright_notice ===========================*/ //===-------- DeSSA.cpp - divide phi variables into congruent class -------===// // // Intel LLVM Extention //===----------------------------------------------------------------------===// // // This pass is originated from the StrongPHIElimination on the machine-ir. // We have adopted it to work on llvm-ir. Also note that we have changed it // from a transformation to an analysis, meaning which only divides phi-vars // into congruent classes, and does NOT insert the copies. A separate code-gen // pass can use this analysis to emit non-ssa target code. // // Algorithm and References: // // This pass consider how to eliminates PHI instructions by aggressively // coalescing the copies that would otherwise be inserted by a naive algorithm // and only inserting the copies that are necessary. The coalescing technique // initially assumes that all registers appearing in a PHI instruction do not // interfere. It then eliminates proven interferences, using dominators to only // perform a linear number of interference tests instead of the quadratic number // of interference tests that this would naively require. // This is a technique derived from: // // Budimlic, et al. Fast copy coalescing and live-range identification. // In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language // Design and Implementation (Berlin, Germany, June 17 - 19, 2002). // PLDI '02. ACM, New York, NY, 25-32. // // The original implementation constructs a data structure they call a dominance // forest for this purpose. The dominance forest was shown to be unnecessary, // as it is possible to emulate the creation and traversal of a dominance forest // by directly using the dominator tree, rather than actually constructing the // dominance forest. This technique is explained in: // // Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code // Quality and Efficiency, // In Proceedings of the 7th annual IEEE/ACM International Symposium on Code // Generation and Optimization (Seattle, Washington, March 22 - 25, 2009). // CGO '09. IEEE, Washington, DC, 114-125. // // Careful implementation allows for all of the dominator forest interference // checks to be performed at once in a single depth-first traversal of the // dominator tree, which is what is implemented here. //===----------------------------------------------------------------------===// #include "Compiler/CISACodeGen/DeSSA.hpp" #include "Compiler/CISACodeGen/ShaderCodeGen.hpp" #include "Compiler/CISACodeGen/PatternMatchPass.hpp" #include "Compiler/MetaDataApi/MetaDataApi.h" #include "common/debug/Debug.hpp" #include "common/debug/Dump.hpp" #include "Compiler/IGCPassSupport.h" #include "common/LLVMWarningsPush.hpp" #include "llvmWrapper/IR/Instructions.h" #include #include #include #include "common/LLVMWarningsPop.hpp" #include #include "Probe/Assertion.h" using namespace llvm; using namespace IGC; using namespace IGC::Debug; using namespace IGC::IGCMD; #define PASS_FLAG "DeSSA" #define PASS_DESCRIPTION "coalesce moves coming from phi nodes" #define PASS_CFG_ONLY true #define PASS_ANALYSIS true IGC_INITIALIZE_PASS_BEGIN(DeSSA, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS) IGC_INITIALIZE_PASS_DEPENDENCY(WIAnalysis) IGC_INITIALIZE_PASS_DEPENDENCY(LiveVarsAnalysis) IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenPatternMatch) IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) IGC_INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper) IGC_INITIALIZE_PASS_END(DeSSA, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS) char DeSSA::ID = 0; DeSSA::DeSSA() : FunctionPass(ID) { initializeDeSSAPass(*PassRegistry::getPassRegistry()); } void DeSSA::print(raw_ostream &OS, const Module *) const { // Assign each inst/arg a unique integer so that the output // would be in order. It is useful when doing comparison. DenseMap Val2IntMap; int id = 0; if (m_F) { // All arguments for (auto AI = m_F->arg_begin(), AE = m_F->arg_end(); AI != AE; ++AI) { Value *aVal = &*AI; Val2IntMap[aVal] = (++id); } // All instructions for (auto II = inst_begin(m_F), IE = inst_end(m_F); II != IE; ++II) { Instruction *Inst = &*II; Val2IntMap[(Value *)Inst] = (++id); } } bool doSort = (!Val2IntMap.empty()); auto valCmp = [&](const Value *V0, const Value *V1) { int n0 = Val2IntMap[V0]; int n1 = Val2IntMap[V1]; return n0 < n1; }; SmallVector ValKeyVec; DenseMap> output; { OS << "---- AliasMap ----\n\n"; for (auto &I : AliasMap) { Value *aliaser = I.first; Value *aliasee = I.second; SmallVector &allAliasers = output[aliasee]; if (aliaser != aliasee) { allAliasers.push_back(aliaser); } } for (auto &I : output) { Value *key = I.first; ValKeyVec.push_back(key); } if (doSort) { std::sort(ValKeyVec.begin(), ValKeyVec.end(), valCmp); } for (auto &I : ValKeyVec) { Value *aliasee = I; SmallVector &allAliasers = output[aliasee]; if (doSort) { std::sort(allAliasers.begin(), allAliasers.end(), valCmp); } OS << " Aliasee: "; aliasee->print(OS); OS << "\n"; for (int i = 0, sz = (int)allAliasers.size(); i < sz; ++i) { OS << " "; allAliasers[i]->print(OS); OS << "\n"; } } OS << "\n\n"; } OS << "---- InsEltMap ----\n\n"; output.clear(); ValKeyVec.clear(); for (auto &I : InsEltMap) { Value *val = I.first; Value *rootV = I.second; SmallVector &allVals = output[rootV]; if (rootV != val) { allVals.push_back(val); } } for (auto &I : output) { Value *key = I.first; ValKeyVec.push_back(key); } if (doSort) { std::sort(ValKeyVec.begin(), ValKeyVec.end(), valCmp); } for (auto &I : ValKeyVec) { Value *rootV = I; SmallVector &allVals = output[rootV]; if (doSort) { std::sort(allVals.begin(), allVals.end(), valCmp); } OS << " Root Value : "; rootV->print(OS); OS << "\n"; for (int i = 0, sz = (int)allVals.size(); i < sz; ++i) { OS << " "; allVals[i]->print(OS); OS << "\n"; } } OS << "\n\n"; OS << "---- Alias for composite/vector values" << " (value in both AliasMap & InsEltMap)----\n"; // All InsElt output has been sorted for (auto &I : ValKeyVec) { Value *rootV = I; SmallVector &allVals = output[rootV]; OS << " Root Value: "; rootV->printAsOperand(OS); if (isAliasee(rootV)) { OS << " [aliasee]"; } int num = 0; for (int i = 0, sz = (int)allVals.size(); i < sz; ++i) { Value *val = allVals[i]; if (!isAliasee(val)) continue; if ((num % 8) == 0) { OS << "\n "; } allVals[i]->printAsOperand(OS); OS << " [aliasee] "; ++num; } OS << "\n"; } OS << "\n\n"; OS << "---- Phi-Var Isolations ----\n"; SmallVector NodeKeyVec; std::map> nodeOutput; // std::map LeaderVisited; for (auto I = RegNodeMap.begin(), E = RegNodeMap.end(); I != E; ++I) { Node *N = I->second; // We don't want to change behavior of DeSSA by invoking // dumping/printing functions. Thus, don't use getLeader() // as it has side-effect (doing path halving). Node *Leader = N->parent; while (Leader != Leader->parent) { Leader = Leader->parent; } SmallVector &allNodes = nodeOutput[Leader]; if (N != Leader) { allNodes.push_back(N); } } auto nodeCmp = [&](const Node *N0, const Node *N1) { const Value *V0 = N0->value; const Value *V1 = N1->value; return valCmp(V0, V1); }; for (auto &I : nodeOutput) { Node *key = I.first; NodeKeyVec.push_back(key); } if (doSort) { std::sort(NodeKeyVec.begin(), NodeKeyVec.end(), nodeCmp); } for (auto &I : NodeKeyVec) { Node *Leader = I; SmallVector &allNodes = nodeOutput[Leader]; if (doSort) { std::sort(allNodes.begin(), allNodes.end(), nodeCmp); } Value *VL; if (isIsolated(Leader)) { IGC_ASSERT_MESSAGE(allNodes.size() == 0, "ICE: isolated node still have other in its CC!"); VL = Leader->value; OS << "\nVar isolated : "; VL->print(OS); OS << "\n"; } else { OS << "\nLeader : "; Leader->value->print(OS); OS << "\n"; for (auto &II : allNodes) { Node *N = II; VL = N->value; OS << " "; VL->print(OS); OS << "\n"; N = N->next; } } } } void DeSSA::dump() const { print(dbgs()); } bool DeSSA::runOnFunction(Function &MF) { if (IGC_IS_FLAG_DISABLED(EnableDeSSA)) { LV = nullptr; WIA = nullptr; // getRootValue(), isIsolated(), getLiveVars(), etc. still work. return false; } m_F = &MF; CurrColor = 0; MetaDataUtils *pMdUtils = nullptr; pMdUtils = getAnalysis().getMetaDataUtils(); if (pMdUtils->findFunctionsInfoItem(&MF) == pMdUtils->end_FunctionsInfo()) { return false; } CTX = getAnalysis().getCodeGenContext(); DT = &getAnalysis().getDomTree(); WIA = &getAnalysis(); LI = &getAnalysis().getLoopInfo(); CG = &getAnalysis(); DL = &MF.getParent()->getDataLayout(); LV = &getAnalysis().getLiveVars(); // make sure we do not run WIAnalysis between CodeGen and DeSSA, // therefore m_program's Uniform Helper is still valid, which is // used indirectly in DeSSA::GetPhiTemp(). // If we cannot maintain this assertion, then we should do // m_program->SetUniformHelper(WIA); // // The DeSSA/Coalescing procedure: // 1. Follow Dominance tree to set up alias map. While setting up alias map, // update liveness for aliasee so that alasee's liveness is the sum of // all its aliasers. // By aliaser/aliasee, it means the following: // aliaser = bitcast aliasee // (Most aliasing is from bitcast, some can be from other cast instructions // such as inttoptr/ptrtoint. It could be also from insElt/extElt.) // // By traversing dominance tree depth-first (DF), it is guaranteed that // a def will be visited before its use except PHI node. Since PHI inst // is not a candidate for aliasing, this means that the def of aliasee has // been visited before the aliaser instruction. For example, // x = bitcast y // The def of y should be visited before visiting this bitcast inst. // Let alias(v0, v1) denote that v0 is an alias to v1. This visiting order // under DF dominance-tree traversal will not see aliasing in the following // order: // alias(v0, v1) // alias(v1, v2) // rather, it must be in the order // alias(v1, v2) // alias(v0, v1) // // 2. Set up InsEltMap, which coalesces vector values used in InsertElement // instructions. It is treated as "alias", meaning the root value's // liveness is the sum of all its non-root values. The difference b/w // AliasMap and InsEltMap is that AliasMap is pure alias in that all // aliasers have the same values as its aliasee (ones of primitive types); // while InsElt has composite/vector values. This difference does not matter // in dessa, but it would matter when handling sub-vector aliasing after // dessa (VariableReuseAnaysis uses experimental sub-vector aliasing under // the key whose default is off). // // We could remove InsEltMap/AliasMap by adding each value into DeSSA node. // To do so, the dessa algo needs to be improved to isolate them together // if they need to be isolated. It also may involve several subtle changes // in implementation. Also, adding all values instead of a single one into // CC increases the size of CC, which could increase compiling time. With // this, let us stay with insEltMap (as well as aliasMap). // 3. Make sure DeSSA node only use the 'node value', that is, given value V, // its Node value: // V_aliasee = AliasMap[V] if V is in map, or V otherwise // node_value = InsEltMap[V_aliasee] if in InsEltMap; or V_aliasee // otherwise. // Note that since the type of aliasess may be different from aliaser, // the values in the same CC will have different types. Keep this in mind // when creating CVariable in GetSymbol(). // // Note that the algorithem forces coalescing of aliasing inst and InsertElement // inst before PHI-coalescing, which means it favors coaslescing of those aliasing // inst and InsertElement instructions. Thus, values in AliasMap/InsEltMap are // guananteed to be coalesced together at the end of DeSSA. PHI coalescing may // extend those maps by adding other values. // CoalesceAliasInst(); CoalesceInsertElements(); // checkPHILoopInput // PreHeader: // x = ... // Header: // phi0 = [x, PreHeader], [t0, End] // phi1 = [x, PreHeader], [t1, End] // phi2 = [x, PreHeader], [t2, End] // ... // End: // ... // goto Header // // The algorithme below will start with a largest congruent class possible, // which unions all phi's with the same source operands. This ends up with // a single congruent class of all phi's with x as their source operand. // Later, the algorithm isolates phi's as they interfere with each other, // causing mov instructions to be generated within the loop at BB End. // // However, since all phi instructions are live at the same time, we will // not be able to coalesce them. In another word, there is no need to put // all phi's into the same congruent class in the first place. To achieve // this, we use a Value-to-int map to keep how many times a value is used // in the phi's, and if the number of uses is over a threshold, we will // isolate the source operand and do not union it with its phi. In doing // so it is likely for the algorithm to coalesce the phi's dst and the // other src that is used in the loop, and therefore remove mov instrutions // in the loop. // // Note that isolating a value introduce additional copy, thus a threshold // is used here as a heuristic to try to make sure that a benefit is more // than the cost. enum { PHI_SRC_USE_THRESHOLD = 3 }; // arbitrary number DenseMap PHILoopPreHeaderSrcs; // build initial congruent class using union-find for (Function::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { // First, initialize PHILoopPreHeaderSrcs map BasicBlock *MBB = &*I; Loop *LP = LI ? LI->getLoopFor(MBB) : nullptr; BasicBlock *PreHeader = LP ? LP->getLoopPredecessor() : nullptr; bool checkPHILoopInput = LP && (LP->getHeader() == MBB) && PreHeader; PHILoopPreHeaderSrcs.clear(); if (checkPHILoopInput) { for (BasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE; ++BBI) { PHINode *PHI = dyn_cast(BBI); if (!PHI) { break; } int srcIx = PHI->getBasicBlockIndex(PreHeader); if (srcIx < 0) { continue; } Value *SrcVal = PHI->getOperand(srcIx); if (isa(SrcVal)) { continue; } if (PHILoopPreHeaderSrcs.count(SrcVal) == 0) { PHILoopPreHeaderSrcs[SrcVal] = 0; // initialize to zero } PHILoopPreHeaderSrcs[SrcVal] += 1; } } for (BasicBlock::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE; ++BBI) { PHINode *PHI = dyn_cast(BBI); if (!PHI) { break; } e_alignment DefAlign = GetPreferredAlignment(PHI, WIA, CTX); IGC_ASSERT(PHI == getNodeValue(PHI)); addReg(PHI, DefAlign); PHISrcDefs[&(*I)].push_back(PHI); for (unsigned i = 0; i < PHI->getNumOperands(); ++i) { Value *OrigSrcVal = PHI->getOperand(i); // skip constant if (isa(OrigSrcVal)) continue; // condition for preheader-src-isolation bool PreheaderSrcIsolation = (checkPHILoopInput && !isa(OrigSrcVal) && !isa(OrigSrcVal) && PHI->getIncomingBlock(i) == PreHeader && PHILoopPreHeaderSrcs.count(OrigSrcVal) > 0 && PHILoopPreHeaderSrcs[OrigSrcVal] >= PHI_SRC_USE_THRESHOLD); // add src to the union Value *SrcVal; SrcVal = getNodeValue(OrigSrcVal); e_alignment SrcAlign = GetPreferredAlignment(OrigSrcVal, WIA, CTX); Instruction *DefMI = dyn_cast(SrcVal); if (DefMI) { if (CG->SIMDConstExpr(DefMI)) { continue; // special case, simdSize becomes a constant in vISA } addReg(SrcVal, SrcAlign); PHISrcDefs[DefMI->getParent()].push_back(DefMI); if (WIA->whichDepend(PHI) == WIA->whichDepend(SrcVal) && !PreheaderSrcIsolation) { unionRegs(PHI, SrcVal); } } else if (isa(SrcVal)) { addReg(SrcVal, SrcAlign); PHISrcArgs.insert(SrcVal); if (WIA->whichDepend(PHI) == WIA->whichDepend(SrcVal) && !PreheaderSrcIsolation) { unionRegs(PHI, SrcVal); } } // cases that we need to isolate source if (CG->IsForceIsolated(SrcVal) || PreheaderSrcIsolation) { isolateReg(SrcVal); } } // end of source-operand loop // isolate complex type that IGC does not handle if (PHI->getType()->isStructTy() || PHI->getType()->isArrayTy()) { isolateReg(PHI); } } } // \todo, the original paper talks aibout some before-hand quick // isolation. The idea is to identify those essential splitting first // in order to avoid unnecessary splitting in the next loop. // Perform a depth-first traversal of the dominator tree, splitting // interferences amongst PHI-congruence classes. if (!RegNodeMap.empty()) { DenseMap CurrentDominatingParent; DenseMap ImmediateDominatingParent; // first, go through the function arguments SplitInterferencesForArgument(CurrentDominatingParent, ImmediateDominatingParent); // Then all the blocks for (df_iterator DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) { SplitInterferencesForBasicBlock(DI->getBlock(), CurrentDominatingParent, ImmediateDominatingParent); } } // Handle values that have specific alignment requirement. SplitInterferencesForAlignment(); if (IGC_IS_FLAG_ENABLED(DumpDeSSA)) { const char *fname = MF.getName().data(); using namespace IGC::Debug; auto name = DumpName(GetShaderOutputName()) .Hash(CTX->hash) .Type(CTX->type) .Pass("dessa") .PostFix(fname) .Retry(CTX->m_retryManager.GetRetryId()) .Extension("txt"); Dump dessaDump(name, DumpType::DBG_MSG_TEXT); DumpLock(); print(dessaDump.stream()); DumpUnlock(); } else if (IGC_IS_FLAG_ENABLED(PrintToConsole)) { print(ods()); } m_F = nullptr; return false; } void DeSSA::CoalesceAliasInst() { for (df_iterator DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) { BasicBlock *Blk = DI->getBlock(); for (BasicBlock::iterator BBI = Blk->begin(), BBE = Blk->end(); BBI != BBE; ++BBI) { Instruction *I = &(*BBI); // We are after patternmatch, would it make more sense to // iterate all patterns instead of instructions ? if (!CG->NeedInstruction(*I)) { continue; } if (InsertElementInst *IEI = dyn_cast(I)) { if (isa(I->getOperand(0))) { SmallVector AllIEIs; int nelts = checkInsertElementAlias(IEI, AllIEIs); if (nelts > 1) { // Consider the following as an alias if all // Vi, i=0, n-1 (except Vn) has a single use. // V0 = InsElt undef, S0, 0 // V1 = InsElt V0, S1, 1 // ... // Vn = InsElt Vn-1, Sn, n // // AliasMap has the following: // alias(V0, V0) // alias(V1, V0) // alias(V2, V0) // ...... // alias(Vn, V0) <-- V0 is the root! // // Note that elements could be sparse like // V0 = InsElt Undef, S1, 1 // V1 = InsElt V0, s3, 2 // Value *aliasee = AllIEIs[0]; AddAlias(aliasee); for (int i = 1; i < nelts; ++i) { Value *V = AllIEIs[i]; AliasMap[V] = aliasee; // union liveness info LV->mergeUseFrom(aliasee, V); } } } } if (InsertValueInst *IVI = dyn_cast(I)) { // Handle insertValue to struct. // // insertvalue exists when // 1. there are calls to functions with struct-type arguments // and struct is simple (for example, no struct type as its // field type) and its size is small (<= 16 bytes ?) so that // it will be passed as struct. (Normally, FE will turn a // larger/not-simple struct arg into a "byval" pointer.) // 2. igc generates it internally coalesceAliasInsertValue(IVI); } else if (CastInst *CastI = dyn_cast(I)) { Value *D = CastI; Value *S = CastI->getOperand(0); // For vector bitcase, if both D and S are uniform, they have // the same layout in GRF and can be aliased. // // For now, only do it if S's element type is larger; otherwise // visa variable's alignment might use the smaller element type // and it is not clear what is a clean way to fix that. // For example: // b = bitcast i64 to <2xi32> // it is okay, but the following isn't // b = bitcast <2xi32> to i64 Type *dTy = D->getType(); Type *sTy = S->getType(); IGCLLVM::FixedVectorType *dVTy = dyn_cast(dTy); IGCLLVM::FixedVectorType *sVTy = dyn_cast(sTy); int d_nelts = dVTy ? (int)dVTy->getNumElements() : 1; int s_nelts = sVTy ? (int)sVTy->getNumElements() : 1; const bool canAliasVecCast = (!dVTy || sVTy) && d_nelts > s_nelts && // S's element type is larger !isa(dTy->getScalarType()) && !isa(sTy->getScalarType()); if (isArgOrNeededInst(S) && WIA->whichDepend(D) == WIA->whichDepend(S) && (isNoOpInst(CastI, CTX) || (WIA->isUniform(D) && canAliasVecCast))) { if (AliasMap.count(D) == 0) { AddAlias(S); Value *aliasee = AliasMap[S]; AliasMap[D] = aliasee; // D will be deleted due to aliasing NoopAliasMap[D] = 1; // union liveness info LV->mergeUseFrom(aliasee, D); } else { // Only src operands of a phi can be visited before // operands' definition. For other instructions such // as castInst, this shall never happen IGC_ASSERT_MESSAGE(0, "ICE: Use visited before definition!"); } } } else if (isa(I)) { if (GenIntrinsicInst *GII = dyn_cast(I)) { auto GIIid = GII->getIntrinsicID(); if ((GIIid == GenISAIntrinsic::GenISA_bitcastfromstruct || GIIid == GenISAIntrinsic::GenISA_bitcasttostruct) && !isa(GII->getOperand(0))) { // special cast just for load/store. Value *D = GII; Value *S = GII->getOperand(0); if (GIIid == GenISAIntrinsic::GenISA_bitcastfromstruct) { // D must be int or int vector type; S must be struct type. IGC_ASSERT(D->getType()->getScalarType()->isIntegerTy()); IGC_ASSERT(S->getType()->isStructTy()); } else if (GIIid == GenISAIntrinsic::GenISA_bitcasttostruct) { // S must be int or int vector type; D must be struct type. IGC_ASSERT(S->getType()->getScalarType()->isIntegerTy()); IGC_ASSERT(D->getType()->isStructTy()); } AddAlias(S); Value *aliasee = AliasMap[S]; AliasMap[D] = aliasee; // D will be deleted due to aliasing NoopAliasMap[D] = 1; // union liveness info LV->mergeUseFrom(aliasee, D); } } } } } } void DeSSA::addReg(Value *Val, e_alignment Align) { if (RegNodeMap.count(Val)) return; RegNodeMap[Val] = new (Allocator) Node(Val, ++CurrColor, Align); } // Using Path Halving in union-find DeSSA::Node *DeSSA::Node::getLeader() { Node *N = this; Node *Parent = parent; Node *Grandparent = Parent->parent; while (Parent != Grandparent) { N->parent = Grandparent; N = Grandparent; Parent = N->parent; Grandparent = Parent->parent; } return Parent; } Value *DeSSA::getRegRoot(Value *Val, e_alignment *pAlign) const { auto RI = RegNodeMap.find(Val); if (RI == RegNodeMap.end()) return nullptr; Node *TheNode = RI->second; if (isIsolated(TheNode)) return nullptr; Node *TheLeader = TheNode->getLeader(); if (pAlign) *pAlign = TheLeader->alignment; return TheLeader->value; } int DeSSA::getRootColor(Value *V) { auto RI = RegNodeMap.find(V); if (RI == RegNodeMap.end()) return 0; Node *TheNode = RI->second; if (isIsolated(TheNode)) return 0; Node *TheLeader = TheNode->getLeader(); return TheLeader->color; } void DeSSA::unionRegs(Node *Nd1, Node *Nd2) { Node *N1 = Nd1->getLeader(); Node *N2 = Nd2->getLeader(); Node *NewLeader = nullptr; Node *Leadee = nullptr; if (N1 == N2) return; if (N1->rank > N2->rank) { NewLeader = N1; Leadee = N2; } else if (N1->rank < N2->rank) { NewLeader = N2; Leadee = N1; } else { NewLeader = N1; Leadee = N2; NewLeader->rank++; } IGC_ASSERT_MESSAGE(nullptr != NewLeader, "ICE: both leader and leadee shall not be null!"); IGC_ASSERT_MESSAGE(nullptr != Leadee, "ICE: both leader and leadee shall not be null!"); Leadee->parent = NewLeader; // Link the circular list of Leadee right before NewLeader Node *Leadee_prev = Leadee->prev; Node *NewLeader_prev = NewLeader->prev; NewLeader_prev->next = Leadee; Leadee->prev = NewLeader_prev; Leadee_prev->next = NewLeader; NewLeader->prev = Leadee_prev; } void DeSSA::isolateReg(Value *Val) { Node *ND = RegNodeMap[Val]; splitNode(ND); } bool DeSSA::isIsolated(Value *V) const { auto RI = RegNodeMap.find(V); if (RI == RegNodeMap.end()) { return true; } Node *DestNode = RI->second; return isIsolated(DestNode); } // Split node ND from its existing congurent class, and the // node ND itself becomes a new single-value congruent class. void DeSSA::splitNode(Node *ND) { Node *N = ND->next; if (N == ND) { // ND is already in a single-value congruent class return; } Node *Leader = ND->getLeader(); // Remove ND from the congruent class Node *P = ND->prev; N->prev = P; P->next = N; // ND : a new single-value congruent class ND->parent = ND; ND->next = ND; ND->prev = ND; ND->rank = 0; // If leader is removed, need to have a new leader. if (Leader == ND) { // P will be the new leader. Also swap ND's color with P's // so that the original congruent class still have the original // color (this is important as Dom traversal assumes that the // color of any congruent class remains unchanged). int t = P->color; P->color = ND->color; ND->color = t; // New leader Leader = P; } // If ND has children, those children need to set their parent. // Since we don't know if ND has children, we conservatively set // parent for all remaining nodes using "a path compression", so // that all nodes remains in the same rooted tree. N = Leader->next; Leader->parent = Leader; Leader->rank = (Leader == N) ? 0 : 1; while (N != Leader) { N->parent = Leader; N->rank = 0; N = N->next; } } /// SplitInterferencesForBasicBlock - traverses a basic block, splitting any /// interferences found between registers in the same congruence class. It /// takes two DenseMaps as arguments that it also updates: /// /// 1) CurrentDominatingParent, which maps a color to the register in that /// congruence class whose definition was most recently seen. /// /// 2) ImmediateDominatingParent, which maps a register to the register in the /// same congruence class that most immediately dominates it. /// /// This function assumes that it is being called in a depth-first traversal /// of the dominator tree. /// /// The algorithm used here is a generalization of the dominance-based SSA test /// for two variables. If there are variables a_1, ..., a_n such that /// /// def(a_1) dom ... dom def(a_n), /// /// then we can test for an interference between any two a_i by only using O(n) /// interference tests between pairs of variables. If i < j and a_i and a_j /// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1). /// Thus, in order to test for an interference involving a_i, we need only check /// for a potential interference with a_i+1. /// /// This method can be generalized to arbitrary sets of variables by performing /// a depth-first traversal of the dominator tree. As we traverse down a branch /// of the dominator tree, we keep track of the current dominating variable and /// only perform an interference test with that variable. However, when we go to /// another branch of the dominator tree, the definition of the current dominating /// variable may no longer dominate the current block. In order to correct this, /// we need to use a stack of past choices of the current dominating variable /// and pop from this stack until we find a variable whose definition actually /// dominates the current block. /// /// There will be one push on this stack for each variable that has become the /// current dominating variable, so instead of using an explicit stack we can /// simply associate the previous choice for a current dominating variable with /// the new choice. This works better in our implementation, where we test for /// interference in multiple distinct sets at once. void DeSSA::SplitInterferencesForBasicBlock(BasicBlock *MBB, DenseMap &CurrentDominatingParent, DenseMap &ImmediateDominatingParent) { // Sort defs by their order in the original basic block, as the code below // assumes that it is processing definitions in dominance order. std::vector &DefInstrs = PHISrcDefs[MBB]; std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LV)); for (std::vector::const_iterator BBI = DefInstrs.begin(), BBE = DefInstrs.end(); BBI != BBE; ++BBI) { Instruction *DefMI = *BBI; // If the virtual register being defined is not used in any PHI or has // already been isolated, then there are no more interferences to check. int RootC = getRootColor(DefMI); if (!RootC) continue; // The input to this pass sometimes is not in SSA form in every basic // block, as some virtual registers have redefinitions. We could eliminate // this by fixing the passes that generate the non-SSA code, or we could // handle it here by tracking defining machine instructions rather than // virtual registers. For now, we just handle the situation conservatively // in a way that will possibly lead to false interferences. Value *NewParent = CurrentDominatingParent[RootC]; if (NewParent == DefMI) continue; // Pop registers from the stack represented by ImmediateDominatingParent // until we find a parent that dominates the current instruction. while (NewParent) { if (getRootColor(NewParent)) { // we have added the another condition because the domination-test // does not work between two phi-node. See the following comments // from the DT::dominates: // " It is not possible to determine dominance between two PHI nodes // based on their ordering // if (isa(A) && isa(B)) // return false;" if (isa(NewParent)) { break; } else if (DT->dominates(cast(NewParent), DefMI)) { break; } else if (cast(NewParent)->getParent() == MBB && isa(DefMI) && isa(NewParent)) { break; } } NewParent = ImmediateDominatingParent[NewParent]; } // If NewParent is nonzero, then its definition dominates the current // instruction, so it is only necessary to check for the liveness of // NewParent in order to check for an interference. if (NewParent && LV->isLiveAt(NewParent, DefMI)) { // If there is an interference, always isolate the new register. This // could be improved by using a heuristic that decides which of the two // registers to isolate. isolateReg(DefMI); CurrentDominatingParent[RootC] = NewParent; } else { // If there is no interference, update ImmediateDominatingParent and set // the CurrentDominatingParent for this color to the current register. ImmediateDominatingParent[DefMI] = NewParent; CurrentDominatingParent[RootC] = DefMI; } } // We now walk the PHIs in successor blocks and check for interferences. This // is necessary because the use of a PHI's operands are logically contained in // the predecessor block. The def of a PHI's destination register is processed // along with the other defs in a basic block. CurrentPHIForColor.clear(); for (succ_iterator SI = succ_begin(MBB), E = succ_end(MBB); SI != E; ++SI) { for (BasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); BBI != BBE; ++BBI) { PHINode *PHI = dyn_cast(BBI); if (!PHI) { break; } int RootC = getRootColor(PHI); // check live-out interference if (IGC_IS_FLAG_ENABLED(EnableDeSSAWA) && !RootC) { // [todo] delete this code if (CTX->type == ShaderType::COMPUTE_SHADER) { for (unsigned i = 0; !RootC && i < PHI->getNumOperands(); i++) { Value *SrcVal = PHI->getOperand(i); if (!isa(SrcVal)) { SrcVal = getNodeValue(SrcVal); RootC = getRootColor(SrcVal); } } } } if (!RootC) { continue; } // Find the index of the PHI operand that corresponds to this basic block. unsigned PredIndex; for (PredIndex = 0; PredIndex < PHI->getNumOperands(); ++PredIndex) { if (PHI->getIncomingBlock(PredIndex) == MBB) break; } IGC_ASSERT(PredIndex < PHI->getNumOperands()); Value *PredValue = PHI->getOperand(PredIndex); PredValue = getNodeValue(PredValue); std::pair &CurrentPHI = CurrentPHIForColor[RootC]; // If two PHIs have the same operand from every shared predecessor, then // they don't actually interfere. Otherwise, isolate the current PHI. This // could possibly be improved, e.g. we could isolate the PHI with the // fewest operands. if (CurrentPHI.first && CurrentPHI.second != PredValue) { isolateReg(PHI); continue; } else { CurrentPHI = std::make_pair(PHI, PredValue); } // check live-out interference // Pop registers from the stack represented by ImmediateDominatingParent // until we find a parent that dominates the current instruction. Value *NewParent = CurrentDominatingParent[RootC]; while (NewParent) { if (getRootColor(NewParent)) { if (isa(NewParent)) { break; } else if (DT->dominates(cast(NewParent)->getParent(), MBB)) { break; } } NewParent = ImmediateDominatingParent[NewParent]; } CurrentDominatingParent[RootC] = NewParent; // If there is an interference with a register, always isolate the // register rather than the PHI. It is also possible to isolate the // PHI, but that introduces copies for all of the registers involved // in that PHI. if (NewParent && NewParent != PredValue && LV->isLiveOut(NewParent, *MBB)) { isolateReg(NewParent); } } // fix situation in which uniform-phi is inside a divergent-join. // The following is an example of 3-way join by nested-if // // %22 = phi i32 [ 13, blk19], [%16, blk0], [ 13, blk17] // %23 = phi i32 [%16, blk19], [ 0, blk0], [%16, blk17] // blk0 is from the uniform-if // blk17 is from the divergent-if // blk19 is from the divergent-if // // if we coalesce %16 and %22 with the same register V5 // then we still need to insert the following move in on divergent // edges // blk-17: // mov(W) v5, 13 // blk-19: // mov(W) v5, 13 // // however, %16 and %23 are not coalesced. so we add the other two moves // blk-17: // mov(W) v6, v5 // mov(W) v5, 13 NOTE: this move changs v5, subsequently wrong v6 // blk-19: // mov(W) v6, v5 // mov(W) v5, 13 // Due to divergent CF, both blk-17 and blk-19 are executed, causes problem // so we know MBB-target, i.e. the phi-block is a divergent-join if (WIA->insideDivergentCF(MBB->getTerminator())) { for (BasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); BBI != BBE; ++BBI) { PHINode *PHI = dyn_cast(BBI); if (!PHI) break; if (!WIA->isUniform(PHI)) continue; auto RootC = getRootColor(PHI); if (!RootC) continue; // so this is uniform phi and not-isolated unsigned PredIndex; for (PredIndex = 0; PredIndex < PHI->getNumOperands(); ++PredIndex) { if (PHI->getIncomingBlock(PredIndex) == MBB) break; } IGC_ASSERT(PredIndex < PHI->getNumOperands()); Value *PredValue = PHI->getOperand(PredIndex); // check if we will need to add a phi-copy from MBB if (isa(PredValue) || isIsolated(PredValue)) { // adding a phi-copy interferes with current live-out if (CurrentDominatingParent[RootC]) isolateReg(PHI); } } } } } void DeSSA::SplitInterferencesForArgument(DenseMap &CurrentDominatingParent, DenseMap &ImmediateDominatingParent) { // No two arguments can be in the same congruent class for (auto BBI = PHISrcArgs.begin(), BBE = PHISrcArgs.end(); BBI != BBE; ++BBI) { Value *AV = *BBI; // If the virtual register being defined is not used in any PHI or has // already been isolated, then there are no more interferences to check. int RootC = getRootColor(AV); if (!RootC) continue; Value *NewParent = CurrentDominatingParent[RootC]; if (NewParent) { isolateReg(AV); } else { CurrentDominatingParent[RootC] = AV; } } } // [todo] get rid of alignment-based isolation in dessa. // Using alignment in isolation seems over-kill. The right approach // would be one that avoids adding values with conflicting alignment // requirement in the same congruent, not adding them in the same // congruent class first and trying to isolate them later. void DeSSA::SplitInterferencesForAlignment() { for (auto I = RegNodeMap.begin(), E = RegNodeMap.end(); I != E; ++I) { // Find a root Node Node *rootNode = I->second; if (rootNode->parent != rootNode) { continue; } e_alignment Align = EALIGN_AUTO; // Find the most restrictive alignment, i.e. GRF aligned ones. Node *N = rootNode; Node *Curr; do { Curr = N; N = Curr->next; if (Curr->alignment == EALIGN_GRF) { Align = EALIGN_GRF; break; } } while (N != rootNode); if (Align != EALIGN_GRF) continue; // Isolate any mis-aligned value. // Start with Curr node as it cannot be isolated // (rootNode could be isolated), therefore, it remains // in the linked list and can be used to test stop looping. Node *Head = Curr; N = Head; do { Curr = N; N = N->next; if (Curr->alignment != EALIGN_AUTO && Curr->alignment != EALIGN_GRF) { IGC_ASSERT(nullptr != Curr); IGC_ASSERT_MESSAGE((Curr != Head), "Head Node cannot be isolated, something wrong!"); isolateReg(Curr->value); } } while (N != Head); // Update root's alignment. Head->getLeader()->alignment = Align; } } Value *DeSSA::getInsEltRoot(Value *Val) const { auto RI = InsEltMap.find(Val); if (RI == InsEltMap.end()) return Val; return RI->second; } /// /// Identify if an instruction has partial write semantics /// /// /// the index of the source partial-write operand static int getPartialWriteSource(Value *Inst) { if (isa(Inst)) return 0; // source 0 is the original value if (auto CI = dyn_cast(Inst)) { // only handle inline-asm with simple destination if (CI->isInlineAsm() && !CI->getType()->isStructTy()) { InlineAsm *IA = cast(IGCLLVM::getCalledValue(CI)); StringRef constraintStr(IA->getConstraintString()); SmallVector constraints; constraintStr.split(constraints, ','); for (int i = 0; i < (int)constraints.size(); i++) { unsigned destID = 0; if (constraints[i].getAsInteger(10, destID) == 0) { // constraint-string indicates that source(i-1) and // destination should be the same vISA variable if (i > 0 && destID == 0) return (i - 1); } } } } return -1; } void DeSSA::CoalesceInsertElements() { for (df_iterator DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) { BasicBlock *Blk = DI->getBlock(); for (BasicBlock::iterator BBI = Blk->begin(), BBE = Blk->end(); BBI != BBE; ++BBI) { Instruction *Inst = &(*BBI); if (!CG->NeedInstruction(*Inst)) { continue; } // Only Aliasee needs to be handled. if (getAliasee(Inst) != Inst) { continue; } // For keeping the existing behavior of InsEltMap unchanged auto PWSrcIdx = getPartialWriteSource(Inst); if (PWSrcIdx >= 0) { Value *origSrcV = Inst->getOperand(PWSrcIdx); Value *SrcV = getAliasee(origSrcV); if (SrcV != Inst && isArgOrNeededInst(origSrcV)) { // union them e_alignment InstAlign = GetPreferredAlignment(Inst, WIA, CTX); e_alignment SrcVAlign = GetPreferredAlignment(SrcV, WIA, CTX); if (!LV->isLiveAt(SrcV, Inst) && !alignInterfere(InstAlign, SrcVAlign) && (WIA->whichDepend(SrcV) == WIA->whichDepend(Inst))) { InsEltMapAddValue(SrcV); InsEltMapAddValue(Inst); Value *SrcVRoot = getInsEltRoot(SrcV); Value *InstRoot = getInsEltRoot(Inst); // union them and their liveness info InsEltMapUnionValue(SrcV, Inst); LV->mergeUseFrom(SrcVRoot, InstRoot); } } } } } } Value *DeSSA::getRootValue(Value *Val, e_alignment *pAlign) const { Value *mapVal = nullptr; auto AI = AliasMap.find(Val); if (AI != AliasMap.end()) { mapVal = AI->second; } auto IEI = InsEltMap.find(mapVal ? mapVal : Val); if (IEI != InsEltMap.end()) { mapVal = IEI->second; } Value *PhiRootVal = getRegRoot(mapVal ? mapVal : Val, pAlign); return (PhiRootVal ? PhiRootVal : mapVal); } void DeSSA::getAllValuesInCongruentClass(Value *V, SmallVector &ValsInCC) { // Handle InsertElement specially. Note that only rootValue from // a sequence of insertElement is in congruent class. The RootValue // has its liveness modified to cover all InsertElements that are // grouped together. Value *RootV = getNodeValue(V); IGC_ASSERT_MESSAGE(nullptr != RootV, "ICE: Node value should not be nullptr!"); ValsInCC.push_back(RootV); auto RI = RegNodeMap.find(RootV); if (RI != RegNodeMap.end()) { Node *First = RI->second; for (Node *N = First->next; N != First; N = N->next) { ValsInCC.push_back(N->value); } } return; } // All values that are coalesced together, including values that are // handled specially, such as ones in aliasMap and insEltMap. void DeSSA::getAllCoalescedValues(Value *V, SmallVector &Vals) { getAllValuesInCongruentClass(V, Vals); IGC_ASSERT_MESSAGE(Vals.size() > 0, "ICE: Vals should not be empty!"); // First, add values from InsEltMap for (int i = 0, sz = (int)Vals.size(); i < sz; ++i) { Value *ccVal = Vals[i]; for (const auto &II : InsEltMap) { Value *R = II.second; if (R != ccVal) { continue; } Value *A = II.first; if (A != R) { Vals.push_back(A); } } } // second, add aliasers from AliasMap for (int i = 0, sz = (int)Vals.size(); i < sz; ++i) { Value *aliasee = Vals[i]; for (const auto &II : AliasMap) { Value *R = II.second; if (R != aliasee) { continue; } Value *aliaser = II.first; if (aliaser != aliasee) { Vals.push_back(aliaser); } } } return; } void DeSSA::coalesceAliasInsertValue(InsertValueInst *theIVI) { // Find a chain of insertvalue, and return the lead (last one). auto getInsValChain = [this](InsertValueInst *aIVI, SmallVector &IVIs) { IGC_ASSERT(aIVI); InsertValueInst *lead = aIVI; const bool isUniform = WIA->isUniform(lead); IVIs.push_back(lead); while (lead->hasOneUse()) { auto next = dyn_cast(lead->user_back()); if (!next || isUniform != WIA->isUniform(next)) { break; } lead = next; IVIs.push_back(lead); } return lead; }; auto setInsValAlias = [this](SmallVector &IVIs) { int nelts = (int)IVIs.size(); if (nelts > 1) { Value *aliasee = IVIs[0]; AddAlias(aliasee); for (int i = 1; i < nelts; ++i) { Value *V = IVIs[i]; AliasMap[V] = aliasee; // union liveness info LV->mergeUseFrom(aliasee, V); } } }; // Get all fields' indices if there are at most 2 level fields. // Using int = to represent two // level indices. auto getIndices = [](DenseSet &aFields, SmallVector &IVIs) { int nelts = (int)IVIs.size(); for (int i = 0; i < nelts; ++i) { InsertValueInst *tI = cast(IVIs[i]); switch (tI->getNumIndices()) { case 1: { uint16_t ix = (uint16_t)tI->getIndices().front(); aFields.insert((int)ix); break; } case 2: { ArrayRef ids = tI->getIndices(); uint16_t lvl0_ix = (uint16_t)ids[0]; uint16_t lvl1_ix = (uint16_t)ids[1]; uint32_t ix = ((lvl1_ix << 16) | lvl0_ix); aFields.insert((int)ix); break; } default: aFields.clear(); return; } } }; auto isDisjoin = [](DenseSet &RHS, DenseSet &LHS) { for (auto II : LHS) { int e = II; if (RHS.find(e) != RHS.end()) { return false; } } return true; }; // InsertValueInst chain: // V0 = InsVal undef/const, S0, 0 // V1 = InsVal V0, S1, 1 // ... // Vn = InsVal Vn-1, Sn, // where all Vi, i=0, n-1 has a single use. This sequence is called // InsertValueInst chain (IVI Chain). And they (all Vi) are set to alias // each other. // // Start finding IVI chains from an inst that cannot be an operand of // the other InsertValueInst. It's possible to have several IVI chains. // For example, // Chain0: V0 = insval undef // V1 = insVal V0 // Chain1: V2 = insVal V1 // V3 = insVal V2 // Chain2: V4 = insVal V1 // Chain3: V5 = insVal V4 // Chain4: V6 = insVal V4 // This can be viewed as a tree of IVI chains, rooted at V0, with edge // denoting def-use relation between two chains. The tree for the above // is as below: // // Chain0 // / \ // Chain1 Chain2 // / \ // Chain3 Chain4 // // The algorithm does the following: // 1. Find all chains. All insts (Vs) of each chain are aliased to // each other in the same chain. // 2. Do aggressive aliasing : alias several chains along one path from // root to a leaf node, as long as no struct field is defined by // more than one chains, which is a strong condition and implies that // the root inst should start like the followning: // V0 = insval undef, s, 10 // const Value *Oprd0 = theIVI->getOperand(0); if (theIVI->getType()->isStructTy() && (isa(Oprd0) || isa(Oprd0) || isa(Oprd0))) { SmallVector IVIChain; InsertValueInst *LeadInst = getInsValChain(theIVI, IVIChain); setInsValAlias(IVIChain); // For aggressive aliasing. lead inst is used to identify its // successor chain in the chain tree. bool aggressiveAliasing = false; DenseSet commonFields; const bool isChainUniform = WIA->isUniform(LeadInst); Value *theAliasee = getAliasee(LeadInst); if (isa(Oprd0)) { getIndices(commonFields, IVIChain); if (!commonFields.empty()) { // Fields inserted are known, can do aggressive coalescing. aggressiveAliasing = true; } } // Make sure every sub-chains rooted at theIVI are processed. std::list worklist; worklist.push_back(LeadInst); while (!worklist.empty()) { InsertValueInst *aI = worklist.front(); worklist.pop_front(); for (auto UI = aI->user_begin(), UE = aI->user_end(); UI != UE; ++UI) { Value *v = *UI; InsertValueInst *I = dyn_cast(v); if (!I) { continue; } IVIChain.clear(); InsertValueInst *nextLead = getInsValChain(I, IVIChain); setInsValAlias(IVIChain); worklist.push_back(nextLead); if (aggressiveAliasing && LeadInst == aI && isChainUniform == WIA->isUniform(nextLead)) { DenseSet fields; getIndices(fields, IVIChain); if (!fields.empty() && isDisjoin(commonFields, fields)) { LeadInst = nextLead; commonFields.insert(fields.begin(), fields.end()); // update alias AddAlias(theAliasee); for (auto II : IVIChain) { Value *V = II; AliasMap[V] = theAliasee; // union liveness info LV->mergeUseFrom(theAliasee, V); } } } } } } } int DeSSA::checkInsertElementAlias(InsertElementInst *IEI, SmallVector &AllIEIs) { IGC_ASSERT(nullptr != IEI); IGC_ASSERT_MESSAGE(isa(IEI->getOperand(0)), "ICE: need to pass first IEI as the argument"); // Find the the alias pattern: // V0 = IEI UndefValue, S0, 0 // V1 = IEI V0, S1, 1 // V2 = IEI V1, S2, 2 // ...... // Vn = IEI Vn_1, Sn_1, n // All Vi (i=0,n_1, except i=n) has a single-use. // // If found, return the actual vector size; // otherwise, return 0. IGCLLVM::FixedVectorType *VTy = cast(IEI->getType()); IGC_ASSERT(nullptr != VTy); int nelts = (int)VTy->getNumElements(); AllIEIs.resize(nelts, nullptr); InsertElementInst *Inst = IEI; IGC_ASSERT(nullptr != WIA); WIAnalysis::WIDependancy Dep = WIA->whichDepend(Inst); while (Inst) { // Check if Inst has constant index, stop if not. // (This is for catching a common case, a variable index // can be handled as well if needed.) ConstantInt *CI = dyn_cast(Inst->getOperand(2)); if (!CI) { return 0; } int ix = (int)CI->getZExtValue(); AllIEIs[ix] = Inst; if (!Inst->hasOneUse() || Dep != WIA->whichDepend(Inst)) { break; } Inst = dyn_cast(Inst->user_back()); } // Return the number of elements found int num = 0; for (int i = 0; i < nelts; ++i) { if (AllIEIs[i] == nullptr) continue; if (num < i) { // Pack them AllIEIs[num] = AllIEIs[i]; AllIEIs[i] = nullptr; } ++num; } return num; } Value *DeSSA::getAliasee(Value *V) const { auto AI = AliasMap.find(V); if (AI == AliasMap.end()) return V; return AI->second; } bool DeSSA::isAliaser(Value *V) const { auto AI = AliasMap.find(V); if (AI == AliasMap.end()) { return false; } return AI->first != AI->second; } bool DeSSA::isNoopAliaser(Value *V) const { return NoopAliasMap.count(V) > 0; } bool DeSSA::isAliasee(Value *V) const { auto AI = AliasMap.find(V); if (AI == AliasMap.end()) { return false; } return AI->first == AI->second; } // Single-valued: // If a variable takes one and only one value during its entire life time, // it is called single-valued. If a variable is of vector or struct, single // valued means that all its component (vector elements or struct members) // are all singled-valued. // // This concept is useful when setting up aliasing. For example, // a = bitcast b to i32 // If both a and b are singled valued, a will be set to alias b without // doing futher checking. However, if either isn't single valued, checking // if a can be aliased to b needs to interfere checking. For example // dessa CC : [b, c] // b and c are coalesced into a single variable. // // b = 1 // a = bitcast b to i32 // ... // c = 2 // ... // L: = a // = b // // In this case, if a is aliased to b, a would get 2 at L, but the correct // value should be 1. In order to find out if a can be aliased to b, it // requires to do interference checking with c. bool DeSSA::isSingleValued(llvm::Value *V) const { // InsEltMap and non-isolated : not single-valued Value *aliasee = getAliasee(V); Value *insEltRootV = getInsEltRoot(aliasee); if (InsEltMap.count(aliasee) || !isIsolated(insEltRootV)) { return false; } return true; } // The following paper explains an approach to check if two // congruent classes interfere using a linear approach. // // Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, // Code Quality and Efficiency, // In Proceedings of the 7th annual IEEE/ACM International Symposium // on Code Generation and Optimization (Seattle, Washington, // March 22 - 25, 2009). CGO '09. IEEE, Washington, DC, 114-125. // // Here, we simply use a naive pair-wise comparison. // // TODO: check if using linear approach described in the paper is // necessary; To do so, it needs to get PN (preorder number of BB) // and sort congruent classes before doing interference checking. bool DeSSA::interfere(llvm::Value *V0, llvm::Value *V1) { SmallVector allCC0; SmallVector allCC1; getAllValuesInCongruentClass(V0, allCC0); getAllValuesInCongruentClass(V1, allCC1); for (int i0 = 0, sz0 = (int)allCC0.size(); i0 < sz0; ++i0) { Value *val0 = allCC0[i0]; for (int i1 = 0, sz1 = (int)allCC1.size(); i1 < sz1; ++i1) { Value *val1 = allCC1[i1]; if (LV->hasInterference(val0, val1)) { return true; } } } return false; } // Alias interference checking. // The caller is trying to check if V0 can alias to V1. For example, // V0 = bitcast V1, or // V0 = extractElement V1, ... // As V0 and V1 hold the same value, the interference between these two // does not matter. Thus, this function is a variant of interfere() // with V0 and V1 interference ignored. bool DeSSA::aliasInterfere(llvm::Value *V0, llvm::Value *V1) { SmallVector allCC0; SmallVector allCC1; getAllValuesInCongruentClass(V0, allCC0); getAllValuesInCongruentClass(V1, allCC1); Value *V0_aliasee = getAliasee(V0); Value *V1_aliasee = getAliasee(V1); // // If aliasee is in InsEltMap, it is not single valued // and cannot be excluded from interfere checking. // // For example: // x = bitcast y // z = InsElt y, ... // = x // = y // // {y, z} are coalesced via InsElt, interfere(x, y) // must be checked. // However, if y (and x too) is not in InsEltMap, no need // to check interfere(x, y) as they have the same value // as the following: // x = bitcast y // = x // = y // bool V0_oneValue = (InsEltMap.count(V0_aliasee) == 0); bool V1_oneValue = (InsEltMap.count(V1_aliasee) == 0); bool both_singleValue = (V0_oneValue && V1_oneValue); for (int i0 = 0, sz0 = (int)allCC0.size(); i0 < sz0; ++i0) { Value *val0 = allCC0[i0]; for (int i1 = 0, sz1 = (int)allCC1.size(); i1 < sz1; ++i1) { Value *val1 = allCC1[i1]; if (both_singleValue && val0 == V0_aliasee && val1 == V1_aliasee) { continue; } if (LV->hasInterference(val0, val1)) { return true; } } } return false; } // The existing code does align interference checking. Just // keep it for now. Likely to improve it later. bool DeSSA::alignInterfere(e_alignment a1, e_alignment a2) { if (a1 == EALIGN_GRF && !(a2 == EALIGN_GRF || a2 == EALIGN_AUTO)) { return true; } if (a2 == EALIGN_GRF && !(a1 == EALIGN_GRF || a1 == EALIGN_AUTO)) { return true; } return false; }