mirror of
https://github.com/intel/intel-graphics-compiler.git
synced 2025-10-30 08:18:26 +08:00
1584 lines
51 KiB
C++
1584 lines
51 KiB
C++
/*========================== begin_copyright_notice ============================
|
|
|
|
Copyright (C) 2017-2021 Intel Corporation
|
|
|
|
SPDX-License-Identifier: MIT
|
|
|
|
============================= end_copyright_notice ===========================*/
|
|
|
|
#include "VariableReuseAnalysis.hpp"
|
|
#include "Compiler/IGCPassSupport.h"
|
|
#include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
|
|
#include "Compiler/CodeGenPublic.h"
|
|
#include "common/LLVMWarningsPush.hpp"
|
|
#include <llvm/Support/Debug.h>
|
|
#include "llvmWrapper/IR/DerivedTypes.h"
|
|
#include "common/LLVMWarningsPop.hpp"
|
|
#include <algorithm>
|
|
#include "Probe/Assertion.h"
|
|
|
|
using namespace llvm;
|
|
using namespace IGC;
|
|
using namespace IGC::IGCMD;
|
|
|
|
namespace {
|
|
// If V is scalar, return 1.
|
|
// if V is vector, return the number of elements.
|
|
inline int getNumElts(Value *V) {
|
|
IGCLLVM::FixedVectorType *VTy = dyn_cast<IGCLLVM::FixedVectorType>(V->getType());
|
|
return VTy ? (int)VTy->getNumElements() : 1;
|
|
}
|
|
|
|
inline int getTypeSizeInBits(Type *Ty) {
|
|
int scalarBits = Ty->getScalarSizeInBits();
|
|
IGCLLVM::FixedVectorType *VTy = dyn_cast<IGCLLVM::FixedVectorType>(Ty);
|
|
return scalarBits * (VTy ? (int)VTy->getNumElements() : 1);
|
|
}
|
|
|
|
e_alignment getMinAlignment(Value *V, WIAnalysis *WIA, CodeGenContext *pContext) {
|
|
auto grfAlignment = [pContext]() { return pContext->platform.getGRFSize() == 64 ? EALIGN_32WORD : EALIGN_HWORD; };
|
|
|
|
auto getSendPayloadAlignment = [pContext](const bool Is64BitTy) {
|
|
if (pContext->platform.getGRFSize() == 64 /*bytes*/)
|
|
return Is64BitTy ? EALIGN_64WORD : EALIGN_32WORD;
|
|
return Is64BitTy ? EALIGN_32WORD : EALIGN_HWORD;
|
|
};
|
|
|
|
// Type* eltTy = V->getType()->getScalarType();
|
|
bool is64BitTy = false; // (eltTy->getPrimitiveSizeInBits() > 32);
|
|
|
|
// GRF-aligned for send operands
|
|
// check if V is defined by send
|
|
if (GenIntrinsicInst *CI = dyn_cast<GenIntrinsicInst>(V)) {
|
|
switch (CI->getIntrinsicID()) {
|
|
case GenISAIntrinsic::GenISA_sub_group_dpas:
|
|
return grfAlignment();
|
|
case GenISAIntrinsic::GenISA_simdBlockRead:
|
|
case GenISAIntrinsic::GenISA_LSC2DBlockRead:
|
|
return getSendPayloadAlignment(is64BitTy);
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Check if V is used in send
|
|
bool isSend = false;
|
|
for (auto UI = V->user_begin(), UE = V->user_end(); UI != UE; ++UI) {
|
|
User *U = *UI;
|
|
if (isa<LoadInst>(U) || isa<StoreInst>(U))
|
|
isSend = true;
|
|
if (GenIntrinsicInst *CI = dyn_cast<GenIntrinsicInst>(V)) {
|
|
switch (CI->getIntrinsicID()) {
|
|
// GetPreferredAlilgnment() will handle dpas
|
|
// case GenISAIntrinsic::GenISA_sub_group_dpas:
|
|
case GenISAIntrinsic::GenISA_simdBlockWrite:
|
|
case GenISAIntrinsic::GenISA_LSC2DBlockWrite:
|
|
isSend = true;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if (isSend)
|
|
return getSendPayloadAlignment(is64BitTy);
|
|
return GetPreferredAlignment(V, WIA, pContext);
|
|
}
|
|
} // namespace
|
|
|
|
char VariableReuseAnalysis::ID = 0;
|
|
|
|
IGC_INITIALIZE_PASS_BEGIN(VariableReuseAnalysis, "VariableReuseAnalysis", "VariableReuseAnalysis", false, true)
|
|
// IGC_INITIALIZE_PASS_DEPENDENCY(RegisterEstimator)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(WIAnalysis)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(LiveVarsAnalysis)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenPatternMatch)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(DeSSA)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(CoalescingEngine)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(BlockCoalescing)
|
|
IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenContextWrapper)
|
|
IGC_INITIALIZE_PASS_END(VariableReuseAnalysis, "VariableReuseAnalysis", "VariableReuseAnalysis", false, true)
|
|
|
|
llvm::FunctionPass *IGC::createVariableReuseAnalysisPass() { return new VariableReuseAnalysis; }
|
|
|
|
VariableReuseAnalysis::VariableReuseAnalysis()
|
|
: FunctionPass(ID), m_pCtx(nullptr), m_WIA(nullptr), m_LV(nullptr), m_DeSSA(nullptr), m_PatternMatch(nullptr),
|
|
m_coalescingEngine(nullptr), m_RPE(nullptr), m_SimdSize(0), m_IsFunctionPressureLow(Status::Undef),
|
|
m_IsBlockPressureLow(Status::Undef), m_BBSizeThreshold(IGC_GET_FLAG_VALUE(ScalarAliasBBSizeThreshold)) {
|
|
initializeVariableReuseAnalysisPass(*PassRegistry::getPassRegistry());
|
|
}
|
|
|
|
bool VariableReuseAnalysis::runOnFunction(Function &F) {
|
|
m_F = &F;
|
|
|
|
m_WIA = &(getAnalysis<WIAnalysis>());
|
|
if (IGC_IS_FLAG_ENABLED(EnableDeSSA)) {
|
|
m_DeSSA = &getAnalysis<DeSSA>();
|
|
}
|
|
m_LV = &(getAnalysis<LiveVarsAnalysis>().getLiveVars());
|
|
m_PatternMatch = &getAnalysis<CodeGenPatternMatch>();
|
|
m_pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
|
|
m_coalescingEngine = &getAnalysis<CoalescingEngine>();
|
|
m_DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
|
m_DL = &F.getParent()->getDataLayout();
|
|
|
|
// FIXME: enable RPE.
|
|
// m_RPE = &getAnalysis<RegisterEstimator>();
|
|
|
|
// Nothing but cleanup data from previous runs.
|
|
reset();
|
|
|
|
if (IGC_IS_FLAG_ENABLED(EnableVariableAlias) && m_DeSSA && !m_pCtx->getModuleMetaData()->compOpt.OptDisable &&
|
|
m_pCtx->platform.GetPlatformFamily() >= IGFX_GEN9_CORE) {
|
|
// Setup ArgDeSSARoot (for subroutine, it might be conservative,
|
|
// but it should work.).
|
|
m_ArgDeSSARoot.clear();
|
|
for (auto II = F.arg_begin(), IE = F.arg_end(); II != IE; ++II) {
|
|
Value *A = II;
|
|
if (Value *R = m_DeSSA->getRootValue(A)) {
|
|
m_ArgDeSSARoot.push_back(R);
|
|
}
|
|
}
|
|
|
|
// 0. Merge Variables
|
|
// Merge two different variables into a single one.
|
|
// The two vars that will be merged should have the same
|
|
// size/type and normally are defined with different values.
|
|
// Once merged, they are put in the same DeSSA congruent class
|
|
mergeVariables(&F);
|
|
|
|
// 1. SubVector aliasing
|
|
// Two variables alias each other if they have the same values.
|
|
// Although they have different names, the two variables share
|
|
// the same values over their live ranges. The cases such as
|
|
// extractElement/insertElement, etc. Once aliasing is identified,
|
|
// the liveness of the alias root is updated to be the sum of both.
|
|
// This is the same as DeSSA alias.
|
|
InsertElementAliasing(&F);
|
|
|
|
// 2. Handle extractElement, etc that handles a single instruction or
|
|
// a few instruction, not invovled in a complicated patterns like
|
|
// InsertElement.
|
|
visitLiveInstructions(&F);
|
|
|
|
postProcessing();
|
|
|
|
sortAliasResult();
|
|
|
|
if (IGC_IS_FLAG_ENABLED(DumpVariableAlias)) {
|
|
auto name = Debug::DumpName(Debug::GetShaderOutputName())
|
|
.Hash(m_pCtx->hash)
|
|
.Type(m_pCtx->type)
|
|
.Pass("VariableAlias")
|
|
.PostFix(F.getName().str())
|
|
.Extension("txt");
|
|
printAlias(Debug::Dump(name, Debug::DumpType::DBG_MSG_TEXT).stream(), m_F);
|
|
}
|
|
}
|
|
|
|
m_F = nullptr;
|
|
return false;
|
|
}
|
|
|
|
static unsigned getMaxReuseDistance(uint16_t size) { return (size == 8) ? 10 : 5; }
|
|
|
|
bool VariableReuseAnalysis::checkUseInst(Instruction *UseInst, LiveVars *LV) {
|
|
BasicBlock *CurBB = UseInst->getParent();
|
|
// If dessa is disabled (LV = null), skip.
|
|
if (UseInst->isUsedOutsideOfBlock(CurBB) || LV == nullptr)
|
|
return false;
|
|
|
|
// This situation can occur:
|
|
//
|
|
// ,------.
|
|
// | |
|
|
// | v
|
|
// | t2 = phi ... t1 ...
|
|
// | |
|
|
// | v
|
|
// | t1 = ...
|
|
// | ... = ... t1 ...
|
|
// | |
|
|
// `------'
|
|
//
|
|
// Disallow reuse if t1 has a phi use.
|
|
// Disallow reuse if t1 has a far away use when the pressure is not low.
|
|
unsigned DefLoc = LV->getDistance(UseInst);
|
|
unsigned FarUseLoc = 0;
|
|
for (auto UI : UseInst->users()) {
|
|
if (isa<PHINode>(UI))
|
|
return false;
|
|
|
|
auto Inst = dyn_cast<Instruction>(UI);
|
|
if (!Inst)
|
|
return false;
|
|
unsigned UseLoc = LV->getDistance(Inst);
|
|
FarUseLoc = std::max(FarUseLoc, UseLoc);
|
|
}
|
|
|
|
// When the whole function or block pressure is low, skip the distance check.
|
|
if (isCurFunctionPressureLow() || isCurBlockPressureLow())
|
|
return true;
|
|
|
|
// Use distance to limit reuse.
|
|
const unsigned FarUseDistance = getMaxReuseDistance(m_SimdSize);
|
|
return FarUseLoc <= DefLoc + FarUseDistance;
|
|
}
|
|
|
|
bool VariableReuseAnalysis::checkDefInst(Instruction *DefInst, Instruction *UseInst, LiveVars *LV) {
|
|
IGC_ASSERT(nullptr != DefInst);
|
|
IGC_ASSERT(nullptr != UseInst);
|
|
// If dessa is disabled (LV = null), skip
|
|
if (isa<PHINode>(DefInst) || LV == nullptr)
|
|
return false;
|
|
|
|
if (auto CI = dyn_cast<CallInst>(DefInst)) {
|
|
Function *F = CI->getCalledFunction();
|
|
// Do not reuse the return symbol of subroutine/stack calls.
|
|
if (!F || !F->isDeclaration())
|
|
return false;
|
|
|
|
if (isa<GenIntrinsicInst>(DefInst)) {
|
|
// Just skip all gen intrinsic calls. Some intrinsic calls may have
|
|
// special meaning.
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// This is a block level reuse.
|
|
BasicBlock *CurBB = UseInst->getParent();
|
|
if (DefInst->getParent() != CurBB || DefInst->isUsedOutsideOfBlock(CurBB))
|
|
return false;
|
|
|
|
// Check whether UseInst is the last use of DefInst. If not, this source
|
|
// variable cannot be reused.
|
|
Instruction *LastUse = LV->getLVInfo(DefInst).findKill(CurBB);
|
|
if (LastUse != UseInst)
|
|
return false;
|
|
|
|
// When the whole function or block pressure is low, skip the distance check.
|
|
if (isCurFunctionPressureLow() || isCurBlockPressureLow())
|
|
return true;
|
|
|
|
// Use distance to limit far reuses.
|
|
unsigned DefLoc = LV->getDistance(DefInst);
|
|
unsigned UseLoc = LV->getDistance(UseInst);
|
|
const unsigned FarDefDistance = getMaxReuseDistance(m_SimdSize);
|
|
return UseLoc <= DefLoc + FarDefDistance;
|
|
}
|
|
|
|
void VariableReuseAnalysis::mergeVariables(Function *F) {
|
|
for (auto II = inst_begin(F), IE = inst_end(F); II != IE; ++II) {
|
|
Instruction *I = &*II;
|
|
if (!m_PatternMatch->NeedInstruction(*I))
|
|
continue;
|
|
if (GenIntrinsicInst *CI = dyn_cast<GenIntrinsicInst>(I)) {
|
|
switch (CI->getIntrinsicID()) {
|
|
case GenISAIntrinsic::GenISA_sub_group_dpas:
|
|
case GenISAIntrinsic::GenISA_dpas: {
|
|
if (!m_DeSSA) {
|
|
// Skip if no DeSSA
|
|
break;
|
|
}
|
|
|
|
Value *out = CI;
|
|
Value *input = CI->getOperand(0);
|
|
|
|
if (!(isa<Instruction>(input) || isa<Argument>(input))) {
|
|
// input may be a constant for example
|
|
break;
|
|
}
|
|
Type *OTy = out->getType();
|
|
Type *ITy = input->getType();
|
|
if (getTypeSizeInBits(OTy) != getTypeSizeInBits(ITy)) {
|
|
// If out and input are different size, skip
|
|
break;
|
|
}
|
|
|
|
// For now, coalescing out and input if at least one of them
|
|
// is local, and input is the last use.
|
|
if ((m_WIA && m_WIA->whichDepend(out) == m_WIA->whichDepend(input)) && !hasBeenPayloadCoalesced(input) &&
|
|
!hasBeenPayloadCoalesced(out) && !m_DeSSA->interfere(out, input)) {
|
|
// For dpas, alignment for out/input are the same
|
|
e_alignment align = EALIGN_AUTO;
|
|
if (m_WIA) {
|
|
align = GetPreferredAlignment(out, m_WIA, m_pCtx);
|
|
}
|
|
// Make sure that nodes have been created before doing union
|
|
m_DeSSA->addReg(out, align);
|
|
m_DeSSA->addReg(input, align);
|
|
m_DeSSA->unionRegs(out, input);
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
} // End of switch
|
|
}
|
|
}
|
|
}
|
|
|
|
void VariableReuseAnalysis::visitLiveInstructions(Function *F) {
|
|
// VectorAlias = 0x4
|
|
// = 0x8
|
|
const auto control = ((m_pCtx->getVectorCoalescingControl() >> 2) & 0x3);
|
|
if (control == 0) {
|
|
return;
|
|
}
|
|
|
|
for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) {
|
|
BasicBlock *BB = &*BI;
|
|
for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II) {
|
|
Instruction &I = *II;
|
|
if (!m_PatternMatch->NeedInstruction(I))
|
|
continue;
|
|
visit(I);
|
|
}
|
|
}
|
|
}
|
|
|
|
// This is for assisting visa for liveness analysis
|
|
//
|
|
// Given a root Value RootVal, all values that are coalesced with it are
|
|
// in AllVals. This function finds the place to insert the lifeTimeStart
|
|
// for RootVal, which is either at the end of a nearest dominating BB of
|
|
// all definitions if no definition dominates the rest, or right before
|
|
// the definition that dominates the rest definitions. If a value in AllVals
|
|
// is an argument, no lifeTimeStart is needed.
|
|
void VariableReuseAnalysis::setLifeTimeStartPos(Value *RootVal, ValueVectorTy &AllVals, BlockCoalescing *theBC) {
|
|
SmallSet<BasicBlock *, 8> defBBSet;
|
|
SmallSet<BasicBlock *, 8> phiSrcMovBBSet;
|
|
for (int i = 0, sz = (int)AllVals.size(); i < sz; ++i) {
|
|
Value *V = AllVals[i];
|
|
Instruction *I = dyn_cast<Instruction>(V);
|
|
if (!I) {
|
|
// For arg, global etc., its start is on entry.
|
|
// Thus, no need to insert lifetime start.
|
|
defBBSet.clear();
|
|
phiSrcMovBBSet.clear();
|
|
break;
|
|
}
|
|
|
|
if (PHINode *PHI = dyn_cast<PHINode>(I)) {
|
|
Value *PHI_root = m_DeSSA->getRootValue(PHI);
|
|
int sz1 = (int)PHI->getNumIncomingValues();
|
|
for (int i1 = 0; i1 < sz1; ++i1) {
|
|
Value *Src = PHI->getIncomingValue(i1);
|
|
Value *Src_root = m_DeSSA->getRootValue(Src);
|
|
if (!Src_root || PHI_root != Src_root) {
|
|
// Need Src-side phi mov
|
|
BasicBlock *BB = PHI->getIncomingBlock(i1);
|
|
phiSrcMovBBSet.insert(BB);
|
|
}
|
|
}
|
|
} else {
|
|
BasicBlock *BB = I->getParent();
|
|
defBBSet.insert(BB);
|
|
}
|
|
}
|
|
|
|
if (defBBSet.size() == 0 && phiSrcMovBBSet.size() == 0) {
|
|
return;
|
|
}
|
|
|
|
auto BSI = defBBSet.begin();
|
|
auto BSE = defBBSet.end();
|
|
BasicBlock *NearestDomBB = *BSI;
|
|
for (++BSI; BSI != BSE; ++BSI) {
|
|
BasicBlock *aB = *BSI;
|
|
NearestDomBB = m_DT->findNearestCommonDominator(NearestDomBB, aB);
|
|
}
|
|
|
|
// phiSrcMovBBSet
|
|
for (auto II = phiSrcMovBBSet.begin(), IE = phiSrcMovBBSet.end(); II != IE; ++II) {
|
|
BasicBlock *aB = *II;
|
|
NearestDomBB = m_DT->findNearestCommonDominator(NearestDomBB, aB);
|
|
}
|
|
|
|
// Skip emptry BBs that are going to be skipped in codegen emit.
|
|
while (theBC->IsEmptyBlock(NearestDomBB)) {
|
|
auto Node = m_DT->getNode(NearestDomBB);
|
|
NearestDomBB = Node->getIDom()->getBlock();
|
|
}
|
|
|
|
if (defBBSet.count(NearestDomBB)) {
|
|
// lifeTimeStart insert pos is in a BB where a def exists
|
|
m_LifetimeAt1stDefOfBB[RootVal] = NearestDomBB;
|
|
} else {
|
|
// No def in the bb, it must be at the end of BB
|
|
// (must be before phiSrcMov too).
|
|
m_LifetimeAtEndOfBB[NearestDomBB].push_back(RootVal);
|
|
}
|
|
}
|
|
|
|
void VariableReuseAnalysis::postProcessing() {
|
|
// BlockCoalescing : check if a BB is a to-be-skipped empty BB.
|
|
// It is used for selecting BB to add lifetime start
|
|
//
|
|
// VectorAlias = 0x10
|
|
// = 0x20
|
|
BlockCoalescing *theBC = &getAnalysis<BlockCoalescing>();
|
|
const auto control = ((m_pCtx->getVectorCoalescingControl() >> 4) & 0x3);
|
|
if (!m_DeSSA || control == 0)
|
|
return;
|
|
|
|
// VectorAlias = 0x10
|
|
DenseMap<Value *, int> dessaRootVisited;
|
|
auto IS = m_baseVecMap.begin();
|
|
auto IE = m_baseVecMap.end();
|
|
for (auto II = IS; II != IE; ++II) {
|
|
SBaseVecDesc *BV = II->second;
|
|
Value *aliasee = BV->BaseVector;
|
|
// An alias set of an aliasee:
|
|
// The aliasee and all its aliasers; and for each of them, all values
|
|
// in their dessa CC.
|
|
//
|
|
// For each Aliasee, record its lifetime start, which is the
|
|
// nearest dominator that dominates all value defs in an alias set.
|
|
// This BB is either one that has no defintion of values in the set;
|
|
// or one that has a defintion to a value in the set. For the former,
|
|
// m_LifetimeAtEndOfBB is used to keep track of it; for the latter,
|
|
// m_LifetimeAt1stDefOfBB is used.
|
|
ValueVectorTy AllVals;
|
|
SmallVector<Value *, 16> valInCC;
|
|
m_DeSSA->getAllValuesInCongruentClass(aliasee, valInCC);
|
|
AllVals.insert(AllVals.end(), valInCC.begin(), valInCC.end());
|
|
|
|
// update visited for aliasee
|
|
Value *aliaseeRoot = getRootValue(aliasee);
|
|
dessaRootVisited[aliaseeRoot] = 1;
|
|
for (int i = 0, sz = (int)BV->Aliasers.size(); i < sz; ++i) {
|
|
SSubVecDesc *aSV = BV->Aliasers[i];
|
|
Value *aliaser = aSV->Aliaser;
|
|
valInCC.clear();
|
|
m_DeSSA->getAllValuesInCongruentClass(aliaser, valInCC);
|
|
AllVals.insert(AllVals.end(), valInCC.begin(), valInCC.end());
|
|
|
|
// update visited for aliaser
|
|
Value *aRoot = getRootValue(aliaser);
|
|
dessaRootVisited[aRoot] = 1;
|
|
}
|
|
|
|
setLifeTimeStartPos(aliaseeRoot, AllVals, theBC);
|
|
}
|
|
|
|
// VectorAlias = 0x20, for other vector values.
|
|
if (control < 2)
|
|
return;
|
|
|
|
for (auto II = inst_begin(*m_F), IE = inst_end(*m_F); II != IE; ++II) {
|
|
Instruction *I = &*II;
|
|
if (!m_PatternMatch->NeedInstruction(*I))
|
|
continue;
|
|
if (!I->getType()->isVectorTy())
|
|
continue;
|
|
Value *I_nd = m_DeSSA->getNodeValue(I);
|
|
Value *rootV = getRootValue(I_nd);
|
|
if (dessaRootVisited.find(rootV) != dessaRootVisited.end()) {
|
|
// Already handled by sub-vector aliasing, skip
|
|
continue;
|
|
}
|
|
dessaRootVisited[rootV] = 1;
|
|
|
|
ValueVectorTy AllVals;
|
|
SmallVector<Value *, 16> valInCC;
|
|
m_DeSSA->getAllValuesInCongruentClass(rootV, valInCC);
|
|
AllVals.insert(AllVals.end(), valInCC.begin(), valInCC.end());
|
|
|
|
setLifeTimeStartPos(rootV, AllVals, theBC);
|
|
}
|
|
}
|
|
|
|
Value *VariableReuseAnalysis::getRootValue(Value *V) {
|
|
Value *dessaRV = nullptr;
|
|
if (m_DeSSA) {
|
|
dessaRV = m_DeSSA->getRootValue(V);
|
|
}
|
|
return dessaRV ? dessaRV : V;
|
|
}
|
|
|
|
Value *VariableReuseAnalysis::getAliasRootValue(Value *V) {
|
|
Value *V_nv = m_DeSSA ? m_DeSSA->getNodeValue(V) : V;
|
|
auto II0 = m_baseVecMap.find(V_nv);
|
|
if (II0 != m_baseVecMap.end())
|
|
return II0->second->BaseVector;
|
|
|
|
auto II1 = m_aliasMap.find(V_nv);
|
|
if (II1 == m_aliasMap.end()) {
|
|
return V_nv;
|
|
}
|
|
return II1->second->Aliasee->BaseVector;
|
|
}
|
|
|
|
void VariableReuseAnalysis::visitExtractElementInst(ExtractElementInst &I) {
|
|
const auto control = ((m_pCtx->getVectorCoalescingControl() >> 2) & 0x3);
|
|
// VectorAlias=0x4 : for isolated values
|
|
// =0x8 : for both isolated and non-isolated values
|
|
if (control == 0) {
|
|
return;
|
|
}
|
|
|
|
ExtractElementInst *EEI = &I;
|
|
Value *vecVal = EEI->getVectorOperand();
|
|
|
|
// Before doing extractMask explicitly, don't do aliasing
|
|
// for extractElement whose vector operand are the candidate
|
|
// of the existing extractMask optimization, as doing so will
|
|
// disable the existing extractMask optimization, which will
|
|
// cause perf regression.
|
|
if (Instruction *Inst = dyn_cast<Instruction>(vecVal)) {
|
|
if (IGC_IS_FLAG_DISABLED(EnableExtractMask) && (isSampleInstruction(Inst) || isLdInstruction(Inst))) {
|
|
// OCL can have sample (image read), not ld. For 3d/mac,
|
|
// need to check more
|
|
return;
|
|
}
|
|
}
|
|
|
|
// If inst is dead, EEI is an argument, or EEI & vecVal have
|
|
// different uniformness, skip it. (Current igc & visa interface
|
|
// requires any argument value to be a root value, not alias.)
|
|
if (m_HasBecomeNoopInsts.count(EEI) || m_DeSSA->isNoopAliaser(EEI) || isOrCoalescedWithArg(EEI) ||
|
|
(m_WIA && m_WIA->whichDepend(EEI) != m_WIA->whichDepend(vecVal))) {
|
|
return;
|
|
}
|
|
|
|
Value *EEI_nv = m_DeSSA->getNodeValue(EEI);
|
|
Value *vec_nv = m_DeSSA->getNodeValue(vecVal);
|
|
// If EEI has been payload-coalesced or has been an aliaser, skip
|
|
if (hasBeenPayloadCoalesced(EEI) || isAliaser(EEI_nv)) {
|
|
return;
|
|
}
|
|
|
|
if (!m_DeSSA->isSingleValued(EEI_nv) || !m_DeSSA->isSingleValued(vec_nv)) {
|
|
if (control < 2) {
|
|
return;
|
|
}
|
|
|
|
if (hasAnyDCCAsAliaser(EEI_nv) || hasAnotherDCCAsAliasee(vec_nv)) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Can only do alias if idx is a known constant.
|
|
Value *IdxVal = EEI->getIndexOperand();
|
|
ConstantInt *Idx = dyn_cast<ConstantInt>(IdxVal);
|
|
if (!Idx) {
|
|
return;
|
|
}
|
|
|
|
int iIdx = (int)Idx->getZExtValue();
|
|
if (!m_DeSSA->isSingleValued(EEI_nv) || !m_DeSSA->isSingleValued(vec_nv)) {
|
|
if (control < 2)
|
|
return;
|
|
|
|
// case 3: DeSSA CC not empty
|
|
if (aliasInterfere(EEI_nv, vec_nv, iIdx))
|
|
return;
|
|
}
|
|
|
|
// Valid vec alias and add it into alias map
|
|
addVecAlias(EEI_nv, vec_nv, vecVal, iIdx);
|
|
|
|
// Mark this inst as noop inst
|
|
m_HasBecomeNoopInsts[EEI] = 1;
|
|
}
|
|
|
|
void VariableReuseAnalysis::printAlias(raw_ostream &OS, const Function *F) const {
|
|
auto toString = [](e_alignment A) -> const char * {
|
|
switch (A) {
|
|
case EALIGN_BYTE:
|
|
return "Byte";
|
|
case EALIGN_WORD:
|
|
return "word";
|
|
case EALIGN_DWORD:
|
|
return "dword";
|
|
case EALIGN_QWORD:
|
|
return "qword";
|
|
case EALIGN_OWORD:
|
|
return "oword";
|
|
case EALIGN_HWORD:
|
|
return "hword";
|
|
case EALIGN_32WORD:
|
|
return "32word";
|
|
case EALIGN_64WORD:
|
|
return "64word";
|
|
default:
|
|
break;
|
|
}
|
|
return "auto";
|
|
};
|
|
|
|
// Assign each inst/arg a unique integer so that the output
|
|
// would be in order. It is useful when doing comparison.
|
|
DenseMap<const Value *, int> Val2IntMap;
|
|
int id = 0;
|
|
if (F) {
|
|
// All arguments
|
|
for (auto AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) {
|
|
const Value *aVal = AI;
|
|
Val2IntMap[aVal] = (++id);
|
|
}
|
|
// All instructions
|
|
for (auto II = inst_begin(F), IE = inst_end(F); II != IE; ++II) {
|
|
const Instruction *Inst = &*II;
|
|
Val2IntMap[(Value *)Inst] = (++id);
|
|
}
|
|
}
|
|
|
|
auto SubVecCmp = [&](const SSubVecDesc *SV0, const SSubVecDesc *SV1) {
|
|
int n0 = Val2IntMap[SV0->Aliaser];
|
|
int n1 = Val2IntMap[SV1->Aliaser];
|
|
return n0 < n1;
|
|
};
|
|
|
|
auto BaseVecCmp = [&](const SBaseVecDesc *BV0, const SBaseVecDesc *BV1) {
|
|
int n0 = Val2IntMap[BV0->BaseVector];
|
|
int n1 = Val2IntMap[BV1->BaseVector];
|
|
return n0 < n1;
|
|
};
|
|
|
|
OS << "\nSummary of Variable Alias Info: " << (F ? F->getName().str() : "Function") << "\n";
|
|
|
|
SmallVector<SBaseVecDesc *, 64> sortedAlias;
|
|
for (auto &MI : m_baseVecMap) {
|
|
SBaseVecDesc *BV = MI.second;
|
|
sortedAlias.push_back(BV);
|
|
}
|
|
std::sort(sortedAlias.begin(), sortedAlias.end(), BaseVecCmp);
|
|
|
|
for (int i = 0, sz = (int)sortedAlias.size(); i < sz; ++i) {
|
|
SBaseVecDesc *BV = sortedAlias[i];
|
|
Value *aliasee = BV->BaseVector;
|
|
|
|
OS << "Aliasee : " << *aliasee << " align: " << toString(BV->Align) << "\n";
|
|
std::sort(BV->Aliasers.begin(), BV->Aliasers.end(), SubVecCmp);
|
|
for (auto VI : BV->Aliasers) {
|
|
SSubVecDesc *aSV = VI;
|
|
Value *aliaser = aSV->Aliaser;
|
|
bool isSinglVal = m_DeSSA ? m_DeSSA->isSingleValued(aliaser) : true;
|
|
const char *inCC = !isSinglVal ? ".inDessaCC" : "";
|
|
OS << " " << *aliaser << " [" << aSV->StartElementOffset << "]" << inCC << "\n";
|
|
}
|
|
OS << "\n";
|
|
}
|
|
OS << "\n";
|
|
}
|
|
|
|
// Sort the final aliase info (baseVecmap) so that its order is deterministic.
|
|
// CreateAliasVars() relies on this to generate cvariables in order.
|
|
// (todo: use vector instead of map in the algorithm to avoid sorting.)
|
|
void VariableReuseAnalysis::sortAliasResult() {
|
|
if (m_baseVecMap.empty()) {
|
|
return;
|
|
}
|
|
|
|
Function *F = m_F;
|
|
// Assign each inst/arg a unique integer so that the output
|
|
// would be in order. It is useful when doing comparison.
|
|
DenseMap<const Value *, int> Val2IntMap;
|
|
int id = 0;
|
|
if (F) {
|
|
// All arguments
|
|
for (auto AI = F->arg_begin(), AE = F->arg_end(); AI != AE; ++AI) {
|
|
const Value *aVal = AI;
|
|
Val2IntMap[aVal] = (++id);
|
|
}
|
|
// All instructions
|
|
for (auto II = inst_begin(F), IE = inst_end(F); II != IE; ++II) {
|
|
const Instruction *Inst = &*II;
|
|
Val2IntMap[(Value *)Inst] = (++id);
|
|
}
|
|
}
|
|
|
|
auto SubVecCmp = [&](const SSubVecDesc *SV0, const SSubVecDesc *SV1) {
|
|
int n0 = Val2IntMap[SV0->Aliaser];
|
|
int n1 = Val2IntMap[SV1->Aliaser];
|
|
return n0 < n1;
|
|
};
|
|
|
|
auto BaseVecCmp = [&](const SBaseVecDesc *BV0, const SBaseVecDesc *BV1) {
|
|
int n0 = Val2IntMap[BV0->BaseVector];
|
|
int n1 = Val2IntMap[BV1->BaseVector];
|
|
return n0 < n1;
|
|
};
|
|
|
|
m_sortedBaseVec.clear();
|
|
for (auto &MI : m_baseVecMap) {
|
|
SBaseVecDesc *BV = MI.second;
|
|
std::sort(BV->Aliasers.begin(), BV->Aliasers.end(), SubVecCmp);
|
|
m_sortedBaseVec.push_back(BV);
|
|
}
|
|
std::sort(m_sortedBaseVec.begin(), m_sortedBaseVec.end(), BaseVecCmp);
|
|
}
|
|
|
|
void VariableReuseAnalysis::dumpAlias() const { printAlias(dbgs(), m_F); }
|
|
|
|
// Add alias Aliaser -> Aliasee[Idx]
|
|
void VariableReuseAnalysis::addVecAlias(Value *Aliaser, Value *Aliasee, Value *OrigBaseVec, int Idx,
|
|
e_alignment AliaseeAlign) {
|
|
auto getLargerAlign = [](e_alignment A0, e_alignment A1) -> e_alignment {
|
|
if (A0 == EALIGN_AUTO)
|
|
return A1;
|
|
if (A1 == EALIGN_AUTO)
|
|
return A0;
|
|
return A0 > A1 ? A0 : A1;
|
|
};
|
|
|
|
int StartIx = Idx;
|
|
// if Aliasee is an aliaser now, its aliasee will be the new aliasee
|
|
SBaseVecDesc *aliaseeBV;
|
|
auto SMI = m_aliasMap.find(Aliasee);
|
|
if (SMI != m_aliasMap.end()) {
|
|
SSubVecDesc *SV = SMI->second;
|
|
aliaseeBV = SV->Aliasee;
|
|
StartIx += SV->StartElementOffset;
|
|
} else {
|
|
aliaseeBV = getOrCreateBaseVecDesc(Aliasee, OrigBaseVec, AliaseeAlign);
|
|
}
|
|
// update align
|
|
aliaseeBV->Align = getLargerAlign(aliaseeBV->Align, AliaseeAlign);
|
|
|
|
SSubVecDesc *aliaserSV = getOrCreateSubVecDesc(Aliaser);
|
|
aliaserSV->Aliasee = aliaseeBV;
|
|
aliaserSV->StartElementOffset = StartIx;
|
|
|
|
// If Aliaser exists as aliasee, must re-alias its aliasers.
|
|
auto BMI = m_baseVecMap.find(Aliaser);
|
|
if (BMI != m_baseVecMap.end()) {
|
|
SBaseVecDesc *BVD = BMI->second;
|
|
for (int i = 0, sz = (int)BVD->Aliasers.size(); i < sz; ++i) {
|
|
SSubVecDesc *SV = BVD->Aliasers[i];
|
|
SV->Aliasee = aliaseeBV;
|
|
SV->StartElementOffset += StartIx;
|
|
aliaseeBV->Aliasers.push_back(SV);
|
|
}
|
|
|
|
aliaseeBV->Align = getLargerAlign(aliaseeBV->Align, BVD->Align);
|
|
|
|
// Delete BMI as it is no longer a base vector.
|
|
m_baseVecMap.erase(BMI);
|
|
}
|
|
|
|
// Finally, add aliaserSV into Aliasee's Aliaser vector
|
|
aliaseeBV->Aliasers.push_back(aliaserSV);
|
|
|
|
const auto control1 = (m_pCtx->getVectorCoalescingControl() & 0x3);
|
|
const auto control2 = ((m_pCtx->getVectorCoalescingControl() >> 2) & 0x3);
|
|
if (control1 > 1 || control2 > 1) {
|
|
// If aliaser isn't single-valued, add it to its root map.
|
|
if (!m_DeSSA->isSingleValued(Aliaser)) {
|
|
Value *rv0 = m_DeSSA->getRootValue(Aliaser);
|
|
m_root2AliasMap[rv0] = Aliaser;
|
|
}
|
|
if (!m_DeSSA->isSingleValued(Aliasee)) {
|
|
// If it isn't isolated, add it to its root map
|
|
Value *rv1 = m_DeSSA->getRootValue(Aliasee);
|
|
m_root2AliasMap[rv1] = Aliasee;
|
|
}
|
|
}
|
|
}
|
|
|
|
SSubVecDesc *VariableReuseAnalysis::getOrCreateSubVecDesc(Value *V) {
|
|
if (m_aliasMap.count(V) == 0) {
|
|
SSubVecDesc *SV = new (Allocator) SSubVecDesc(V);
|
|
m_aliasMap.insert(std::make_pair(V, SV));
|
|
}
|
|
return m_aliasMap[V];
|
|
}
|
|
|
|
SBaseVecDesc *VariableReuseAnalysis::getOrCreateBaseVecDesc(Value *V, Value *OV, e_alignment A) {
|
|
if (m_baseVecMap.count(V) == 0) {
|
|
SBaseVecDesc *BV = new (Allocator) SBaseVecDesc(V, OV, A);
|
|
m_baseVecMap.insert(std::make_pair(V, BV));
|
|
}
|
|
return m_baseVecMap[V];
|
|
}
|
|
|
|
// Return true if V itself is sub-vector aliased.
|
|
// Note that other values in V's DeSSA CC are not checked.
|
|
bool VariableReuseAnalysis::isAliased(Value *V) const {
|
|
Value *V_nv = m_DeSSA ? m_DeSSA->getNodeValue(V) : V;
|
|
return (m_aliasMap.count(V_nv) > 0 || m_baseVecMap.count(V_nv) > 0);
|
|
}
|
|
|
|
// Return true if V is aliased to a vector as an aliaser
|
|
bool VariableReuseAnalysis::isAliaser(Value *V) const {
|
|
Value *V_nv = m_DeSSA ? m_DeSSA->getNodeValue(V) : V;
|
|
return m_aliasMap.count(V_nv) > 0;
|
|
}
|
|
|
|
// DCC: DeSSA Congruent Class
|
|
// If V has been coalesced by DeSSA and any value in V's DCC has been aliased
|
|
// as an aliaser, return true.
|
|
bool VariableReuseAnalysis::hasAnyDCCAsAliaser(Value *V) const {
|
|
// If V is not in the map, check others in its DCC
|
|
Value *rv = m_DeSSA ? m_DeSSA->getRootValue(V) : nullptr;
|
|
if (rv) {
|
|
auto II = m_root2AliasMap.find(rv);
|
|
if (II != m_root2AliasMap.end()) {
|
|
Value *aV = II->second;
|
|
auto MI = m_aliasMap.find(aV);
|
|
if (MI != m_aliasMap.end())
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// DCC: DeSSA Congruent Class
|
|
// If there is another value (different from V) in V's DCC that is aliasee,
|
|
// return true.
|
|
bool VariableReuseAnalysis::hasAnotherDCCAsAliasee(Value *V) const {
|
|
// Check if any value of its dessa CC has been an aliasee.
|
|
Value *rv = m_DeSSA ? m_DeSSA->getRootValue(V) : nullptr;
|
|
if (rv) {
|
|
auto II = m_root2AliasMap.find(rv);
|
|
if (II != m_root2AliasMap.end()) {
|
|
Value *aV = II->second;
|
|
auto MI = m_baseVecMap.find(aV);
|
|
if (MI != m_baseVecMap.end() && aV != V)
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// A chain of IEIs is used to define a vector. If all elements of this vector
|
|
// are inserted via this chain IEIs with constant indices, populate AllIEIs.
|
|
// input: FirstIEI (first IEI, usually with index = 0)
|
|
// output: AllIEIs (collect all values used to initialize the vector)
|
|
// Return value:
|
|
// true : if all elements are inserted with IEI of constant index
|
|
// false: otherwise.
|
|
bool VariableReuseAnalysis::getAllInsEltsIfAvailable(InsertElementInst *FirstIEI, VecInsEltInfoTy &AllIEIs,
|
|
bool OnlySameBB) {
|
|
int nelts = getNumElts(FirstIEI);
|
|
|
|
// Sanity
|
|
if (nelts < 2)
|
|
return false;
|
|
|
|
AllIEIs.resize(nelts);
|
|
|
|
InsertElementInst *LastIEI = FirstIEI;
|
|
InsertElementInst *I = FirstIEI;
|
|
Value *dessaRoot = m_DeSSA->getRootValue(FirstIEI);
|
|
while (I) {
|
|
LastIEI = I;
|
|
|
|
if (OnlySameBB && LastIEI->getParent() != FirstIEI->getParent())
|
|
return false;
|
|
|
|
// For insertElement, it should be in the same dessa CC
|
|
// already, as dessa special-handles it. Make sure they
|
|
// are indeed in the same CC, otherwise, skip.
|
|
if (hasBeenPayloadCoalesced(I) || m_DeSSA->getRootValue(I) != dessaRoot)
|
|
return false;
|
|
|
|
Value *V = nullptr;
|
|
Value *E = nullptr;
|
|
int IEI_ix = 0, V_ix = 0;
|
|
if (!getElementValue(I, IEI_ix, E, V, V_ix)) {
|
|
return false;
|
|
}
|
|
|
|
IGC_ASSERT_MESSAGE(IEI_ix < nelts, "ICE: IEI's index out of bound!");
|
|
SVecInsEltInfo &InsEltInfo = AllIEIs[IEI_ix];
|
|
if (InsEltInfo.IEI) {
|
|
// One element is inserted more than once, skip.
|
|
return false;
|
|
}
|
|
InsEltInfo.IEI = I;
|
|
InsEltInfo.Elt = E;
|
|
InsEltInfo.FromVec = V;
|
|
InsEltInfo.FromVec_eltIx = V_ix;
|
|
if (E) {
|
|
InsEltInfo.EEI = dyn_cast<ExtractElementInst>(E);
|
|
}
|
|
|
|
if (!I->hasOneUse()) {
|
|
break;
|
|
}
|
|
|
|
I = dyn_cast<InsertElementInst>(I->user_back());
|
|
}
|
|
|
|
// Special cases.
|
|
if (AllIEIs.empty() || LastIEI->use_empty()) {
|
|
return false;
|
|
}
|
|
|
|
// Make sure all elements are present and have the same uniformity.
|
|
Value *V = AllIEIs[0].IEI;
|
|
if (V == nullptr) {
|
|
return false;
|
|
}
|
|
Value *V_nv = m_DeSSA->getNodeValue(V);
|
|
Value *V_root = getRootValue(V_nv);
|
|
auto V_dep = m_WIA->whichDepend(V);
|
|
for (int i = 0; i < nelts; ++i) {
|
|
Value *tV = AllIEIs[i].IEI;
|
|
if (tV == nullptr)
|
|
return false;
|
|
|
|
// Expect node values for all IEIs are identical. In general, if they
|
|
// are in the same DeSSA CC, that would be fine.
|
|
Value *tV_nv = m_DeSSA->getNodeValue(tV);
|
|
if (V_root != getRootValue(tV_nv))
|
|
return false;
|
|
|
|
Value *E = AllIEIs[i].Elt;
|
|
Value *FromVec = AllIEIs[i].FromVec;
|
|
Value *FromVec_nv = m_DeSSA->getNodeValue(FromVec);
|
|
// check if FromVec has been coalesced with IEI already by DeSSA.
|
|
// (Wouldn't happen under current DeSSA, but might happen in future)
|
|
if (V_root == getRootValue(FromVec_nv))
|
|
return false;
|
|
|
|
// Make sure FromVec or E have the same uniformness as V.
|
|
if ((E && V_dep != m_WIA->whichDepend(E)) || (FromVec && V_dep != m_WIA->whichDepend(FromVec)))
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
Value *VariableReuseAnalysis::traceAliasValue(Value *V) {
|
|
if (CastInst *CastI = dyn_cast_or_null<CastInst>(V)) {
|
|
// Only handle Noop cast inst. For example,
|
|
// dst = bitcast <3 x i32> src to <3 x float>,
|
|
// it is okay, but the following isn't.
|
|
// dst = bitcast <3 x i64> src to <6 x i32>
|
|
if (!isNoOpInst(CastI, m_pCtx)) {
|
|
return V;
|
|
}
|
|
|
|
Value *Src = CastI->getOperand(0);
|
|
if (isa<Constant>(Src))
|
|
return CastI;
|
|
|
|
Value *NV0 = m_DeSSA->getNodeValue(CastI);
|
|
Value *NV1 = m_DeSSA->getNodeValue(Src);
|
|
if (NV0 == NV1) {
|
|
// Meaning they are aliased already by dessa
|
|
return traceAliasValue(Src);
|
|
}
|
|
}
|
|
return V;
|
|
}
|
|
|
|
//
|
|
// Returns true if the following is true
|
|
// IEI = insertElement <vectorType> Vec, S, <constant IEI_ix>
|
|
// Return false, otherwise.
|
|
//
|
|
// When the above condition is true, V and V_ix are used for the
|
|
// following cases:
|
|
// 1. S is from another vector V.
|
|
// S = extractElement <vectorType> V, <constant V_ix>
|
|
// S is the element denoted by (V, V_ix)
|
|
// 2. otherwise, V=nullptr, V_ix=0.
|
|
// S is a candidate inserted and could be alias to the vector.
|
|
//
|
|
// Input: IEI
|
|
// Output: IEI_ix, S, V, V_ix
|
|
bool VariableReuseAnalysis::getElementValue(InsertElementInst *IEI, int &IEI_ix, Value *&S, Value *&V, int &V_ix) {
|
|
// Return value: S or (V, V_ix)
|
|
S = nullptr;
|
|
V = nullptr;
|
|
V_ix = 0;
|
|
IEI_ix = 0;
|
|
|
|
// Check if I has constant index, skip if not.
|
|
ConstantInt *CI = dyn_cast<ConstantInt>(IEI->getOperand(2));
|
|
if (!CI) {
|
|
return false;
|
|
}
|
|
|
|
IEI_ix = (int)CI->getZExtValue();
|
|
|
|
Value *elem0 = IEI->getOperand(1);
|
|
if (hasBeenPayloadCoalesced(elem0) || isa<Constant>(elem0) || isOrCoalescedWithArg(elem0)) {
|
|
// If elem0 has been payload-coalesced, is constant,
|
|
// or it has been aliased to an argument, skip it.
|
|
return false;
|
|
}
|
|
|
|
Value *elem = traceAliasValue(elem0);
|
|
ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(elem);
|
|
S = elem;
|
|
if (!EEI) {
|
|
// case 2.
|
|
return true;
|
|
}
|
|
ConstantInt *CI1 = dyn_cast<ConstantInt>(EEI->getIndexOperand());
|
|
if (!CI1 || !m_DeSSA->isSingleValued(elem)) {
|
|
// case 2
|
|
return true;
|
|
}
|
|
|
|
V = EEI->getVectorOperand();
|
|
if (isa<Constant>(V) || hasBeenPayloadCoalesced(V)) {
|
|
// case 2 again
|
|
V = nullptr;
|
|
return true;
|
|
}
|
|
|
|
// case 1.
|
|
V_ix = (int)CI1->getZExtValue();
|
|
return true;
|
|
}
|
|
|
|
void VariableReuseAnalysis::InsertElementAliasing(Function *F) {
|
|
// There are dead blocks that are still not removed, don't count them
|
|
// Should use F->size() once dead BBs are removed
|
|
auto getNumBBs = [](Function *aF) {
|
|
int32_t i = 1; // count entry
|
|
for (BasicBlock &aBB : *aF) {
|
|
if (aBB.hasNPredecessors(0)) {
|
|
continue;
|
|
}
|
|
++i;
|
|
}
|
|
return i;
|
|
};
|
|
|
|
// IGC Key VectorAlias controls vectorAlias optimiation.
|
|
//
|
|
// Do it if VectorAlias != 0.
|
|
// VectorAlias=0x1: subvec aliasing for isolated values
|
|
// (getRootValue()=null)
|
|
// =0x2: subvec aliasing for both isolated and non-isolated
|
|
// value)
|
|
const auto control = (m_pCtx->getVectorCoalescingControl() & 0x3);
|
|
// To avoid increasing GRF pressure, skip if F is too large or not an entry
|
|
const int32_t NumBBThreshold = IGC_GET_FLAG_VALUE(VectorAliasBBThreshold);
|
|
bool OnlySameBB = getNumBBs(F) > NumBBThreshold;
|
|
MetaDataUtils *pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
|
|
if (control == 0 || !isEntryFunc(pMdUtils, F)) {
|
|
return;
|
|
}
|
|
for (auto BI = F->begin(), BE = F->end(); BI != BE; ++BI) {
|
|
BasicBlock *BB = &*BI;
|
|
for (auto II = BB->begin(), IE = BB->end(); II != IE; ++II) {
|
|
Instruction *I = &*II;
|
|
if (!m_PatternMatch->NeedInstruction(*I))
|
|
continue;
|
|
|
|
InsertElementInst *IEI = dyn_cast<InsertElementInst>(I);
|
|
if (!IEI)
|
|
continue;
|
|
|
|
// Two cases for sub-vector aliasing:
|
|
// 1. extractFrom: sub-vector is created from a base vector.
|
|
// For example:
|
|
// given base: int8 b; a sub-vector s (int4) can be:
|
|
// s = (int4)(b.s4, b.s5, b.s6, b.s7)
|
|
// In this case, 's' becomes a part of 'b'. In LLVM IR,
|
|
// there are a chain of extElt and insElt instructions for
|
|
// doing so.
|
|
// 2. insertTo: sub-vector is used to create a base vector.
|
|
// For example:
|
|
// given sub-vector int4 s0, s1; int8 vector b is created like:
|
|
// b = (int8) (s0, s1)
|
|
// In this case, both s0 and s1 become part of b.
|
|
|
|
// Check insertElement pattern from the first InsertElement (one
|
|
// with UndefValue. Note that this's also the dessa insElt root.
|
|
if (!isa<UndefValue>(IEI->getOperand(0)))
|
|
continue;
|
|
|
|
// First, collect all insertElementInst and extractElementInst.
|
|
VecInsEltInfoTy AllIEIs;
|
|
if (!getAllInsEltsIfAvailable(IEI, AllIEIs, OnlySameBB)) {
|
|
continue;
|
|
}
|
|
|
|
// Check if this is an extractFrom pattern, if so, add alias.
|
|
if (processExtractFrom(AllIEIs)) {
|
|
continue;
|
|
}
|
|
|
|
// Check if this is an insertTo pattern, if so add alias.
|
|
if (processInsertTo(BB, AllIEIs)) {
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Return true if vector formed by IEI chain is a sub-vector of another one.
|
|
bool VariableReuseAnalysis::processExtractFrom(VecInsEltInfoTy &AllIEIs) {
|
|
const int nelts = (int)AllIEIs.size();
|
|
Value *BaseVec = AllIEIs[0].FromVec;
|
|
int BaseStartIx = AllIEIs[0].FromVec_eltIx;
|
|
if (!BaseVec) {
|
|
return false;
|
|
}
|
|
int base_nelts = getNumElts(BaseVec);
|
|
|
|
if (base_nelts < nelts) {
|
|
return false;
|
|
}
|
|
|
|
for (int i = 1; i < nelts; ++i) {
|
|
if (AllIEIs[i].FromVec != BaseVec || AllIEIs[i].FromVec_eltIx != (BaseStartIx + i))
|
|
return false;
|
|
}
|
|
|
|
// DPAS unlikely uses smaller vector, favor extractMask
|
|
if (base_nelts <= 4 && isExtractMaskCandidate(BaseVec)) {
|
|
return false;
|
|
}
|
|
|
|
Value *lastIEI = AllIEIs[nelts - 1].IEI;
|
|
auto S_use = getCandidateStateUse(lastIEI);
|
|
auto B_def = getCandidateStateDef(BaseVec);
|
|
auto B_use = getCandidateStateUse(BaseVec);
|
|
if (!aliasOkay(S_use, B_def, B_use))
|
|
return false;
|
|
|
|
Value *Sub = AllIEIs[0].IEI;
|
|
// If Sub is coalesced with an arg of function, skip.
|
|
if (isOrCoalescedWithArg(Sub)) {
|
|
return false;
|
|
}
|
|
|
|
Value *Sub_nv = m_DeSSA->getNodeValue(Sub);
|
|
Value *Base_nv = m_DeSSA->getNodeValue(BaseVec);
|
|
|
|
// If Sub_nv has been aliased to another vector already, skip
|
|
if (isAliaser(Sub_nv)) {
|
|
return false;
|
|
}
|
|
|
|
e_alignment BaseAlign;
|
|
if (!checkSubAlign(BaseAlign, Sub, BaseVec, BaseStartIx)) {
|
|
return false;
|
|
}
|
|
|
|
// Skip if they are not single-valued
|
|
bool isSub_singleVal = m_DeSSA->isSingleValued(Sub_nv);
|
|
bool isBase_singleVal = m_DeSSA->isSingleValued(Base_nv);
|
|
if (!isSub_singleVal || !isBase_singleVal) {
|
|
if ((m_pCtx->getVectorCoalescingControl() & 0x3) < 2)
|
|
return false;
|
|
|
|
// Skip if they are already coalesced by DeSSA
|
|
Value *rootBase_nv = m_DeSSA->getRootValue(Base_nv);
|
|
if (rootBase_nv && rootBase_nv == m_DeSSA->getRootValue(Sub_nv))
|
|
return false;
|
|
|
|
// Skip
|
|
// 1) if Sub_nv has been vector-aliased to another one already; or
|
|
// 2) if a different one from Base_nv's CC (not Base_nv) has been
|
|
// aliased by anotehr one already
|
|
// Both cases need a complicated interference checking.
|
|
if (hasAnyDCCAsAliaser(Sub_nv) || hasAnotherDCCAsAliasee(Base_nv)) {
|
|
return false;
|
|
}
|
|
|
|
if (aliasInterfere(Sub_nv, Base_nv, BaseStartIx)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// add alias
|
|
addVecAlias(Sub_nv, Base_nv, BaseVec, BaseStartIx, BaseAlign);
|
|
|
|
// Make sure noop insts are in the map so they won't be emitted later.
|
|
for (int i = 0, sz = nelts; i < sz; ++i) {
|
|
// IEI chain is coalesced by DeSSA, so it's safe to mark it as noop
|
|
InsertElementInst *IEI = AllIEIs[i].IEI;
|
|
if (!m_DeSSA->isNoopAliaser(IEI)) {
|
|
m_HasBecomeNoopInsts[IEI] = 1;
|
|
}
|
|
|
|
ExtractElementInst *EEI = AllIEIs[i].EEI;
|
|
IGC_ASSERT(EEI);
|
|
if (!m_DeSSA->isNoopAliaser(EEI)) {
|
|
// Set EEI as an aliser, thus it become noop.
|
|
Value *EEI_nv = m_DeSSA->getNodeValue(EEI);
|
|
addVecAlias(EEI_nv, Base_nv, BaseVec, AllIEIs[i].FromVec_eltIx, EALIGN_AUTO);
|
|
m_HasBecomeNoopInsts[EEI] = 1;
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Check if IEI is a base vector created by other sub-vectors
|
|
// or scalars. If it is, create alias and return true.
|
|
bool VariableReuseAnalysis::processInsertTo(BasicBlock *BB, VecInsEltInfoTy &AllIEIs) {
|
|
const auto control = (m_pCtx->getVectorCoalescingControl() & 0x3);
|
|
SmallVector<std::pair<Value *, int>, 8> SubVecs;
|
|
auto IsInSubVecs = [&SubVecs](Value *Val) {
|
|
for (int j = 0, sz = (int)SubVecs.size(); j < sz; ++j) {
|
|
if (SubVecs[j].first == Val)
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
InsertElementInst *FirstIEI = AllIEIs[0].IEI;
|
|
Value *Base_nv = m_DeSSA->getNodeValue(FirstIEI);
|
|
if (!m_DeSSA->isSingleValued(Base_nv)) {
|
|
if (control < 2)
|
|
return false;
|
|
|
|
if (hasAnotherDCCAsAliasee(Base_nv)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Find all subvec that are part of BaseVec. AllIEIs may be formed from
|
|
// multiple subvec and scalar.
|
|
bool isSubCandidate = true;
|
|
int nelts = (int)AllIEIs.size();
|
|
Value *Sub = AllIEIs[0].FromVec;
|
|
int SubStartIx = 0;
|
|
for (int i = 0; i < nelts; ++i) {
|
|
// On entry to the iteration, AllIEIs[i].FromVec must be the same as
|
|
// Sub. If the next Sub is different from the current one, the current
|
|
// element (AllIEIs[i]) is the last one element for the Sub.
|
|
//
|
|
// Note
|
|
// case 1: if Elt == nullptr, no aliasing
|
|
// case 2: if Elt != nullptr && Fromvec == nullptr, scalar aliasing
|
|
// case 3: if Elt != nullptr && FromVec != nullptr,
|
|
// (FromVec, FromVec_eltIx) sub-vector aliasing
|
|
//
|
|
Value *Elt = AllIEIs[i].Elt;
|
|
if (!Elt || (Sub && (i - SubStartIx) != AllIEIs[i].FromVec_eltIx)) {
|
|
isSubCandidate = false;
|
|
}
|
|
|
|
if (Elt && Sub == nullptr && skipScalarAliaser(BB, Elt)) {
|
|
// Skip scalar coalescing
|
|
isSubCandidate = false;
|
|
}
|
|
|
|
// If Sub == nullptr or NextSub != Sub, this is the last element
|
|
// of the current Sub (it is a scalar in case of sub == nullpr).
|
|
Value *NextSub = (i < (nelts - 1)) ? AllIEIs[i + 1].FromVec : nullptr;
|
|
if (!Sub || Sub != NextSub) {
|
|
// End of the current Sub
|
|
if (isSubCandidate) {
|
|
Value *aliaser = Sub ? Sub : Elt;
|
|
int sub_nelts = getNumElts(aliaser);
|
|
// If Sub's size is not smaller than IEI's, or not all sub's
|
|
// elements are used, skip.
|
|
if (sub_nelts < nelts && (i - SubStartIx) == (sub_nelts - 1)) {
|
|
SubVecs.push_back(std::make_pair(aliaser, SubStartIx));
|
|
}
|
|
}
|
|
|
|
// NextSub should be the new sub-vector.
|
|
// Make sure it is not used yet.
|
|
// Note this works for special case in which NextSub = nullptr.
|
|
isSubCandidate = true;
|
|
Value *NextElt = (i < (nelts - 1)) ? AllIEIs[i + 1].Elt : nullptr;
|
|
if (!NextElt || (NextSub && IsInSubVecs(NextSub)) || (!NextSub && IsInSubVecs(NextElt))) {
|
|
isSubCandidate = false;
|
|
}
|
|
Sub = NextSub;
|
|
SubStartIx = i + 1;
|
|
}
|
|
}
|
|
|
|
InsertElementInst *LastIEI = AllIEIs.back().IEI;
|
|
auto B_use = getCandidateStateUse(LastIEI);
|
|
bool hasAlias = false;
|
|
for (int i = 0, sz = (int)SubVecs.size(); i < sz; ++i) {
|
|
std::pair<Value *, int> &aPair = SubVecs[i];
|
|
Value *V = aPair.first;
|
|
int V_ix = aPair.second;
|
|
|
|
// If V is an arg, skip it
|
|
if (isOrCoalescedWithArg(V)) {
|
|
continue;
|
|
}
|
|
|
|
// If V has been an aliaser, skip.
|
|
if (isAliaser(V)) {
|
|
continue;
|
|
}
|
|
|
|
auto S_use = getCandidateStateUse(V);
|
|
auto S_def = getCandidateStateDef(V);
|
|
if (!aliasOkay(S_use, S_def, B_use))
|
|
continue;
|
|
|
|
e_alignment BaseAlign;
|
|
if (!checkSubAlign(BaseAlign, V, LastIEI, V_ix))
|
|
continue;
|
|
|
|
Value *V_nv = m_DeSSA->getNodeValue(V);
|
|
if (!m_DeSSA->isSingleValued(V_nv)) {
|
|
if (control < 2)
|
|
continue;
|
|
|
|
if (hasAnyDCCAsAliaser(V_nv))
|
|
continue;
|
|
|
|
if (aliasInterfere(V_nv, Base_nv, V_ix)) {
|
|
continue;
|
|
}
|
|
}
|
|
addVecAlias(V_nv, Base_nv, FirstIEI, V_ix, BaseAlign);
|
|
|
|
int V_sz = getNumElts(V);
|
|
if (V_sz > 1) {
|
|
// set up Noop inst map to skip emitting them later.
|
|
for (int j = V_ix, sz = V_ix + V_sz; j < sz; ++j) {
|
|
// Safe to mark IEI as noop as IEI chain's coalesced by DeSSA
|
|
InsertElementInst *IEI = AllIEIs[j].IEI;
|
|
if (!m_DeSSA->isNoopAliaser(IEI)) {
|
|
m_HasBecomeNoopInsts[IEI] = 1;
|
|
}
|
|
|
|
ExtractElementInst *EEI = AllIEIs[j].EEI;
|
|
IGC_ASSERT(EEI);
|
|
// Sub-vector
|
|
if (!m_DeSSA->isNoopAliaser(EEI)) {
|
|
// EEI should be in alias map so it can be marked as noop
|
|
Value *EEI_nv = m_DeSSA->getNodeValue(EEI);
|
|
addVecAlias(EEI_nv, Base_nv, FirstIEI, j);
|
|
m_HasBecomeNoopInsts[EEI] = 1;
|
|
}
|
|
}
|
|
} else {
|
|
// scalar
|
|
// Safe to mark IEI as noop as IEI chain's coalesced by DeSSA
|
|
InsertElementInst *IEI = AllIEIs[V_ix].IEI;
|
|
if (m_DeSSA->isNoopAliaser(IEI))
|
|
continue;
|
|
m_HasBecomeNoopInsts[IEI] = 1;
|
|
}
|
|
hasAlias = true;
|
|
}
|
|
return hasAlias;
|
|
}
|
|
|
|
// Return all aliased values of VecAliasee, given the alias:
|
|
// Aliaser->(VecAliasee, Idx)
|
|
void VariableReuseAnalysis::getAllAliasVals(ValueVectorTy &AliasVals, Value *Aliaser, Value *VecAliasee, int Idx) {
|
|
AliasVals.clear();
|
|
auto II = m_aliasMap.find(VecAliasee);
|
|
AliasVals.push_back(VecAliasee);
|
|
if (II != m_aliasMap.end()) {
|
|
SSubVecDesc *aliaseeSV = II->second;
|
|
SBaseVecDesc *baseV = aliaseeSV->Aliasee;
|
|
int nelts = getNumElts(Aliaser);
|
|
int Idx_end = Idx + nelts - 1;
|
|
for (int i = 0, sz = (int)(baseV->Aliasers.size()); i < sz; ++i) {
|
|
SSubVecDesc *SV = baseV->Aliasers[i];
|
|
int start = SV->StartElementOffset;
|
|
int end = start + SV->NumElts - 1;
|
|
if ((start > Idx_end) || (end < Idx))
|
|
continue;
|
|
AliasVals.push_back(SV->Aliaser);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Given two values : Sub and (Base, BaseIdx), check if theses two value
|
|
// interfere each other. Assume these two values are dessa node values.
|
|
bool VariableReuseAnalysis::aliasInterfere(Value *Sub, Value *Base, int BaseIdx) {
|
|
// Vec0 : set of values aliased to Sub (as aliasee). Sub cannot be aliaser
|
|
// as algo does not make an aliaser twice.
|
|
// Vec1 : set of values aliased to Sub if Sub aliases to (Base, BaseIdx).
|
|
ValueVectorTy Vec0, Vec1;
|
|
Vec0.push_back(Sub);
|
|
getAllAliasVals(Vec1, Sub, Base, BaseIdx);
|
|
auto II0 = m_baseVecMap.find(Sub);
|
|
if (II0 != m_baseVecMap.end()) {
|
|
SBaseVecDesc *BV0 = II0->second;
|
|
for (int i = 0, sz = (int)BV0->Aliasers.size(); i < sz; ++i) {
|
|
SSubVecDesc *tSV = BV0->Aliasers[i];
|
|
Vec0.push_back(tSV->Aliaser);
|
|
}
|
|
}
|
|
|
|
for (int i0 = 0, sz0 = (int)Vec0.size(); i0 < sz0; ++i0) {
|
|
Value *V0 = Vec0[i0];
|
|
for (int i1 = 0, sz1 = (int)Vec1.size(); i1 < sz1; ++i1) {
|
|
Value *V1 = Vec1[i1];
|
|
if (m_DeSSA->aliasInterfere(V0, V1))
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Check if a value is used in instructions that we handle.
|
|
VariableReuseAnalysis::AState VariableReuseAnalysis::getCandidateStateUse(Value *V) const {
|
|
// If any of its use is used as func arg, skip
|
|
AState retSt = AState::OK;
|
|
for (User *U : V->users()) {
|
|
Value *Val = U;
|
|
CastInst *CI = dyn_cast<CastInst>(Val);
|
|
if (CI && isNoOpInst(CI, m_pCtx)) {
|
|
Val = CI->getOperand(0);
|
|
}
|
|
if (GenIntrinsicInst *GII = dyn_cast<GenIntrinsicInst>(Val)) {
|
|
switch (GII->getIntrinsicID()) {
|
|
case GenISAIntrinsic::GenISA_sub_group_dpas:
|
|
case GenISAIntrinsic::GenISA_LSC2DBlockWrite:
|
|
case GenISAIntrinsic::GenISA_simdBlockWrite:
|
|
retSt = AState::TARGET;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
} else if (StoreInst *SI = dyn_cast<StoreInst>(Val)) {
|
|
retSt = AState::TARGET;
|
|
} else if (isa<CallInst>(Val)) {
|
|
return AState::SKIP;
|
|
}
|
|
}
|
|
return retSt;
|
|
}
|
|
|
|
// Check if a value is defined by instructions that we handle.
|
|
VariableReuseAnalysis::AState VariableReuseAnalysis::getCandidateStateDef(Value *V) const {
|
|
Value *Val = V;
|
|
CastInst *CI = dyn_cast<CastInst>(Val);
|
|
if (CI && isNoOpInst(CI, m_pCtx)) {
|
|
Val = CI->getOperand(0);
|
|
}
|
|
|
|
// skip if V is defined by a function.
|
|
if (GenIntrinsicInst *GII = dyn_cast<GenIntrinsicInst>(Val)) {
|
|
switch (GII->getIntrinsicID()) {
|
|
case GenISAIntrinsic::GenISA_sub_group_dpas:
|
|
case GenISAIntrinsic::GenISA_LSC2DBlockRead:
|
|
case GenISAIntrinsic::GenISA_simdBlockRead:
|
|
return AState::TARGET;
|
|
default:
|
|
break;
|
|
}
|
|
} else if (LoadInst *SI = dyn_cast<LoadInst>(Val)) {
|
|
return AState::TARGET;
|
|
} else if (isa<CallInst>(Val)) {
|
|
return AState::SKIP;
|
|
}
|
|
return AState::OK;
|
|
}
|
|
|
|
// Vector alias disables extractMask optimization. This function
|
|
// checks if extractMask optim can be applied. And the caller
|
|
// will decide whether to favor extractMask optimization.
|
|
bool VariableReuseAnalysis::isExtractMaskCandidate(Value *V) const {
|
|
auto BIT = [](int n) { return (uint32_t)(1 << n); };
|
|
|
|
if (!isa<VectorType>(V->getType()))
|
|
return false;
|
|
|
|
uint32_t nelts = getNumElts(V);
|
|
// Using 31 to be consistent with extractMash.
|
|
if (nelts > 31) {
|
|
return false;
|
|
}
|
|
uint32_t mask = 0;
|
|
for (auto II = V->user_begin(), IE = V->user_end(); II != IE; ++II) {
|
|
Value *V = *II;
|
|
if (ExtractElementInst *EEI = dyn_cast<llvm::ExtractElementInst>(V)) {
|
|
if (ConstantInt *CI = dyn_cast<ConstantInt>(EEI->getIndexOperand())) {
|
|
uint32_t indexBit = BIT(static_cast<uint>(CI->getZExtValue()));
|
|
mask |= indexBit;
|
|
continue;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
uint32_t fullMask = maskTrailingOnes<uint32_t>(nelts);
|
|
return fullMask > mask;
|
|
}
|
|
|
|
// Check if SubVec is aligned if it becomes a sub-vector at Base_ix of
|
|
// BaseVec. If so, return true with SubVec alignment in BaseAlign.
|
|
bool VariableReuseAnalysis::checkSubAlign(e_alignment &BaseAlign, Value *SubVec, Value *BaseVec, int Base_ix) {
|
|
auto maxAlign = [](e_alignment A, e_alignment B) {
|
|
if (A == EALIGN_AUTO)
|
|
return B;
|
|
if (B == EALIGN_AUTO)
|
|
return A;
|
|
return A > B ? A : B;
|
|
};
|
|
|
|
auto toBytes = [](e_alignment A) {
|
|
switch (A) {
|
|
case EALIGN_BYTE:
|
|
return 1;
|
|
case EALIGN_WORD:
|
|
return 2;
|
|
case EALIGN_DWORD:
|
|
return 4;
|
|
case EALIGN_QWORD:
|
|
return 8;
|
|
case EALIGN_OWORD:
|
|
return 16;
|
|
case EALIGN_HWORD:
|
|
return 32;
|
|
case EALIGN_32WORD:
|
|
return 64;
|
|
case EALIGN_64WORD:
|
|
return 128;
|
|
default:
|
|
break;
|
|
}
|
|
return 0;
|
|
};
|
|
|
|
BaseAlign = EALIGN_AUTO;
|
|
|
|
// Get element bytes from original base vector
|
|
Type *eltTy = BaseVec->getType()->getScalarType();
|
|
uint32_t eltBytes = (uint32_t)m_DL->getTypeStoreSize(eltTy);
|
|
|
|
// get all coalesced values for subvec and find the max alignment
|
|
SmallVector<Value *, 16> allVals;
|
|
m_DeSSA->getAllCoalescedValues(SubVec, allVals);
|
|
|
|
e_alignment sub_align = EALIGN_AUTO;
|
|
for (auto II : allVals) {
|
|
Value *V = II;
|
|
e_alignment thisAlign = getMinAlignment(V, m_WIA, m_pCtx);
|
|
sub_align = maxAlign(sub_align, thisAlign);
|
|
}
|
|
int sub_alignBytes = toBytes(sub_align);
|
|
if (sub_alignBytes == 0) {
|
|
// AUTO align is fine.
|
|
return true;
|
|
}
|
|
|
|
// m_SimdSize is unavailable, using smallest simdsize for now.
|
|
int simdsize = numLanes(m_pCtx->platform.getMinDispatchMode());
|
|
int uLanes = (m_WIA->isUniform(BaseVec) ? 1 : simdsize);
|
|
// If base is an aliaser at this time, must check its aliasee
|
|
|
|
Value *BaseVec_nd = m_DeSSA->getNodeValue(BaseVec);
|
|
int ix1 = 0;
|
|
auto MII = m_aliasMap.find(BaseVec_nd);
|
|
if (MII != m_aliasMap.end()) {
|
|
SSubVecDesc *SV = MII->second;
|
|
ix1 = SV->StartElementOffset;
|
|
}
|
|
int ix = Base_ix + ix1;
|
|
int startOffset = eltBytes * uLanes * ix;
|
|
if ((startOffset % sub_alignBytes) != 0) {
|
|
// cannot be correctly aligned, skip
|
|
return false;
|
|
}
|
|
BaseAlign = sub_align;
|
|
return true;
|
|
}
|
|
|
|
bool VariableReuseAnalysis::skipScalarAliaser(BasicBlock *BB, Value *ScalarVal) const {
|
|
Instruction *I = dyn_cast<Instruction>(ScalarVal);
|
|
// Don't count dbg instructions in BB
|
|
unsigned InstCountInBB = BB->sizeWithoutDebug();
|
|
return ((InstCountInBB > m_BBSizeThreshold) || !I || I->getParent() != BB);
|
|
}
|