/*========================== begin_copyright_notice ============================ Copyright (C) 2017-2023 Intel Corporation SPDX-License-Identifier: MIT ============================= end_copyright_notice ===========================*/ #pragma once #include "Compiler/CISACodeGen/WIAnalysis.hpp" #include "Compiler/CISACodeGen/PatternMatchPass.hpp" #include "Compiler/CISACodeGen/DeSSA.hpp" #include "Compiler/CISACodeGen/CoalescingEngine.hpp" #include "Compiler/CISACodeGen/BlockCoalescing.hpp" #include "common/LLVMWarningsPush.hpp" #include "Compiler/MetaDataUtilsWrapper.h" #include "llvm/ADT/DenseMap.h" #include "llvm/IR/Dominators.h" #include "llvm/ADT/TinyPtrVector.h" #include #include #include #include #include "llvm/Pass.h" #include "llvmWrapper/IR/DerivedTypes.h" #include "llvm/Support/raw_ostream.h" #include "common/LLVMWarningsPop.hpp" #include "Compiler/CISACodeGen/RegisterEstimator.hpp" #include #include #include #include "Probe/Assertion.h" namespace IGC { // SBaseVecDesc and SSubVecDesc together describe subvec to baseVec aliasing // BaseVec is aka aliasee; subVec is aka aliaser. struct SSubVecDesc; struct SBaseVecDesc { // Minimum aligment required for BaseVector. // For example, // int2 a = ld p // int4 b = {a, x, y} // as 'a' is grf-aligned (ld's return payload), 'b' should be aligned // at grf too in order to make 'a' the part of 'b'. e_alignment Align; llvm::Value *BaseVector; // Keep the original vector type as BaseVector is a dessa node value, // which could be a different value with a differnt type. For vector // aliasing, both sub and base must have the same element size. llvm::VectorType *OrigType; // All BaseVector's aliasers (subVec) llvm::SmallVector Aliasers; SBaseVecDesc(llvm::Value *V, llvm::Value *OV, e_alignment A) : Align(A), BaseVector(V), OrigType(llvm::dyn_cast(OV->getType())) { IGC_ASSERT(OrigType != nullptr); } }; struct SSubVecDesc { // Denote a subvector of BaseVector starting at StartElementOffset. // StartElementOffset is in the unit of BaseVector's element type. // // Current implementation assumes that subvector and basevector have // the same element size (could be differnt types, such as int32_t // and float, etc). Here is the example showing the // relationship among them: // Given the aliasing relation: // Aliaser[0:n] --> BaseVector[0:m] // where (StartElementOffset + n) <= m. Then, // Aliaser = BaseVector[StartElementOffset, StartElementOffset+n] // Aliaser and Aliasee // They are dessa node values. llvm::Value *Aliaser; // Keep all aliasers of BaseVecotr. Valid for the root entry only, // that is, Aliaser == BaseVector SBaseVecDesc *Aliasee; short StartElementOffset; // in the unit of BaseVector's element type short NumElts; // the number of elements of Aliaser SSubVecDesc(llvm::Value *V) : Aliaser(V), Aliasee(nullptr), StartElementOffset(0) { IGCLLVM::FixedVectorType *VTy = llvm::dyn_cast(V->getType()); NumElts = VTy ? (short)VTy->getNumElements() : 1; } }; // A struct for capturing InsElt aliasing b/w sub-vector/vector // Two cases are considered: // // case 1: Insert to (x0 and x1 are inserted to y) // case 1.1 // int4 x0, x1; // int8 y = (x0, x1); // // case 1.2 // int4 y = (s0, s1, s2, s3); // // case 2: Extract from (y is extracted from x) // int8 x; // int4 y0 = x.s0123 (first half of x) // int4 y1 = x.4567 (second half of x) // // Corresponding LLVM IRs are some extElt instructions followed by insElt. // This struct captures this relation by describing what extElt is used // to create the element value used in insElt. // // For example, y0 in case 2 would be: // s0 = extElt BVec, 0 // s1 = extElt BVec, 1 // s2 = extElt BVec, 2 // s3 = extElt BVec, 3 // // v0 = insElt undef, s0, 0 // v1 = insElt v0, s1, 1 // v2 = insElt V1, s2, 2 // v3 = insElt V2, s3, 3 // // (Note that sometimes, type cast instrutions might be present for // s0, s1, s2, and s3 before doing insElt.) // // v0 (insElt undef, s0, 0) at 0 <--> s0 (extElt BVec, 0) // v1 (insElt undef, s1, 1) at 1 <--> s1 (extElt BVec, 1) // v2 (insElt undef, s2, 2) at 2 <--> s2 (extElt BVec, 2) // v3 (insElt undef, s3, 3) at 3 <--> s3 (extElt BVec, 3) // struct SVecInsEltInfo { llvm::InsertElementInst *IEI; llvm::Value *Elt; // If Elt is null, EEI must not be null. EEI is used as scalar operand // in IEI and is the same as (FromVec, FromVec_eltIx). llvm::ExtractElementInst *EEI; llvm::Value *FromVec; int FromVec_eltIx; SVecInsEltInfo() : IEI(nullptr), Elt(nullptr), EEI(nullptr), FromVec(nullptr), FromVec_eltIx(0) {} }; /// RPE based analysis for querying variable reuse status. /// /// Let two instructions DInst and UInst be defined in the same basic block, /// /// DInst = ... /// UInst = DInst op Other /// /// and assume it is legal to use the same CVariable for DInst and UInst. This /// analysis determines if this reuse will be applied or not. When overall /// register pressure is low, this decision could be most aggressive. When DInst /// and UInst are acrossing a high pressure region (defined below), then the /// reuse will only be applied less aggressively. /// /// Denote by RPE(x) the estimated register pressure at point x. Let Threshold /// be a predefined threshold constant. We say pair (DInst, UInst) is crossing a /// high register pressure region if /// /// (1) RPE(x) >= Threshold for any x between DInst and UInst (inclusive), or /// (2) RPE(x) >= Threshold for any use x of UInst. /// class VariableReuseAnalysis : public llvm::FunctionPass, public llvm::InstVisitor { public: static char ID; VariableReuseAnalysis(); ~VariableReuseAnalysis() {} typedef llvm::SmallVector VecInsEltInfoTy; typedef std::unordered_map AliasMapTy; typedef std::unordered_map BaseVecMapTy; typedef llvm::SmallVector ValueVectorTy; typedef llvm::DenseMap Val2ValMapTy; virtual bool runOnFunction(llvm::Function &F) override; // Need to perform this after WI/LiveVars/DeSSA/CoalescingEnging. // (todo: check if coalescing can be merged into dessa completely) virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { // AU.addRequired(); AU.setPreservesAll(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); AU.addRequired(); } llvm::StringRef getPassName() const override { return "VariableReuseAnalysis"; } /// Initialize per-function states. In particular, check if the entire /// function has a low pressure. void BeginFunction(llvm::Function *F, unsigned SimdSize) { m_SimdSize = (uint16_t)SimdSize; if (m_RPE) { if (m_RPE->isGRFPressureLow(m_SimdSize)) m_IsFunctionPressureLow = Status::True; else m_IsFunctionPressureLow = Status::False; } } bool isCurFunctionPressureLow() const { return m_IsFunctionPressureLow == Status::True; } bool isCurBlockPressureLow() const { return m_IsBlockPressureLow == Status::True; } /// RAII class to initialize and cleanup basic block level cache. class EnterBlockRAII { public: explicit EnterBlockRAII(VariableReuseAnalysis *VRA, llvm::BasicBlock *BB) : VRA(VRA) { VRA->BeginBlock(BB); } ~EnterBlockRAII() { VRA->EndBlock(); } EnterBlockRAII(const EnterBlockRAII &) = delete; EnterBlockRAII &operator=(const EnterBlockRAII &) = delete; VariableReuseAnalysis *VRA; }; friend class EnterBlockRAII; // Check use instruction's legality and its pressure impact. bool checkUseInst(llvm::Instruction *UInst, LiveVars *LV); // Check def instruction's legality and its pressure impact. bool checkDefInst(llvm::Instruction *DInst, llvm::Instruction *UInst, LiveVars *LV); // Visitor void visitExtractElementInst(llvm::ExtractElementInst &I); bool isAliasedValue(llvm::Value *V) { if (m_pCtx->getVectorCoalescingControl() > 0) { return isAliased(V); } return false; } // getRootValue(): // return dessa root value; if dessa root value // is null, return itself. llvm::Value *getRootValue(llvm::Value *V); // getAliasRootValue() // return alias root value if it exists, itself otherwise. llvm::Value *getAliasRootValue(llvm::Value *V); /// printAlias - print value aliasing info in human readable form void printAlias(llvm::raw_ostream &OS, const llvm::Function *F = nullptr) const; /// dumpAalias - dump alias info to dbgs(). void dumpAlias() const; // // m_aliasMap: // For mapping aliaser to aliasee: aliaser -> aliasee // where aliasee is a vector and aliaser could be a scalar or a vector. // // Properties of the map: // 1. alias root value. (root is aka baseVector) // A root value is denoted by a special map entry from a value to // itself (self-mapping entry) // Vec0 -> Vec0 // Root value is always an aliasee, meaning the map has other // entry like: // Vec1 -> Vec0 // During constructing m_aliasMap, previous root value may become // a non-root value. // // 2. Any non-root value (no self-mapping entry) in this map is an // aliaser. A value is either an aliaser or aliasee, but not // both. For example, // cannot have this: // Vec0 -> Vec1 // v0 -> Vec0 // Instead, they are represented as follows: // Vec0 -> Vec1 // v0 -> Vec1 // 3. Liveness of aliaser and aliasee are not combined // Unlike dessa alias, in which aliser's liveness is merged // into aliasee's. Here, aliaser's liveness is nerver merged // into aliasee's. // // Notation: // aliasCC(v) : all values that have the same alias root as v, // including alias root. // subAlias(v, startIx, nelts) : // all x in aliasCC(v) that overlap v's elements in // range baseVector[startIx : startIx+nelts-1]. // dessaCC(v) : all values in the same dessa congruent class as v. // For example, V is of int4, s0, s1, s2, s3 are scalars that are // aliased to V's element at 0, 1, 2, and 2, respectively. // s0 --> v[0] // s1 --> v[1] // s2 --> v[2] // s3 --> v[3] // aliasCC(s0) = aliasCC(s1) = aliasCC(s2) = aliasCC(s3) = aliasCC(v) // = {v, s0, s1, s2, s3} // subAlias(v, 2, 2) = {s2, s3, v} // only s2&s3 overlaps V[2:3] // dessaCC(s0) = { values in the same dessa CC } // AliasMapTy m_aliasMap; BaseVecMapTy m_baseVecMap; // sorted m_baseVecMap for creating cvar in derministic order llvm::SmallVector m_sortedBaseVec; // Function argument cannot be made a sub-part of another bigger // value as it has been assigned a fixed physical GRF. The following // map is used for checking if a value is an arg or coalesced with // an argument by dessa. std::list m_ArgDeSSARoot; bool isOrCoalescedWithArg(llvm::Value *V) { if (llvm::isa(V)) return true; if (m_DeSSA) { if (llvm::Value *R = m_DeSSA->getRootValue(V)) { auto IE = m_ArgDeSSARoot.end(); auto it = std::find(m_ArgDeSSARoot.begin(), IE, R); return it != IE; } } return false; } void addVecAlias(llvm::Value *Aliaser, llvm::Value *Aliasee, llvm::Value *OrigBaseVec, int Idx, e_alignment AliaseeAlign = EALIGN_AUTO); SSubVecDesc *getOrCreateSubVecDesc(llvm::Value *V); SBaseVecDesc *getOrCreateBaseVecDesc(llvm::Value *V, llvm::Value *OV, e_alignment A); void getAllAliasVals(ValueVectorTy &AliasVals, llvm::Value *Aliaser, llvm::Value *VecAliasee, int Idx); // No need to emit code for instructions in this map due to aliasing llvm::DenseMap m_HasBecomeNoopInsts; // For emitting livetime start to visa to assist liveness analysis // 1. m_LifetimeAt1stDefInBB : aliasee -> BB // Once a first def is encounted, add lifetime start and clear // this map entry afterwards. // 2. m_LifetimeAtEndOfBB : BB -> set of values // Add lifetime start for all values in the set at the end of BB. llvm::DenseMap m_LifetimeAt1stDefOfBB; llvm::DenseMap> m_LifetimeAtEndOfBB; private: void reset() { m_SimdSize = 0; m_IsFunctionPressureLow = Status::Undef; m_IsBlockPressureLow = Status::Undef; m_aliasMap.clear(); m_baseVecMap.clear(); m_sortedBaseVec.clear(); m_root2AliasMap.clear(); m_HasBecomeNoopInsts.clear(); m_LifetimeAt1stDefOfBB.clear(); m_LifetimeAtEndOfBB.clear(); } // Initialize per-block states. In particular, check if the entire block has a // low pressure. void BeginBlock(llvm::BasicBlock *BB) { IGC_ASSERT(m_SimdSize != 0); if (m_RPE) { CodeGenContext *context = nullptr; context = getAnalysis().getCodeGenContext(); uint32_t BBPresure = m_RPE->getMaxLiveGRFAtBB(BB, m_SimdSize); if (BBPresure <= context->getNumGRFPerThread()) m_IsBlockPressureLow = Status::True; else m_IsBlockPressureLow = Status::False; } } // Cleanup per-block states. void EndBlock() { m_IsBlockPressureLow = Status::Undef; } void visitLiveInstructions(llvm::Function *F); void setLifeTimeStartPos(llvm::Value *RootVal, ValueVectorTy &AllVals, BlockCoalescing *theBC); void postProcessing(); void sortAliasResult(); // Return true if this instruction can be converted to an alias bool canBeAlias(llvm::CastInst *I); // If V has been payload-coalesced, return true. bool hasBeenPayloadCoalesced(llvm::Value *V) { return (m_coalescingEngine->GetValueCCTupleMapping(V) != nullptr); } void mergeVariables(llvm::Function *F); void InsertElementAliasing(llvm::Function *F); llvm::Value *traceAliasValue(llvm::Value *V); bool getElementValue(llvm::InsertElementInst *IEI, int &IEI_ix, llvm::Value *&S, llvm::Value *&V, int &V_ix); bool getAllInsEltsIfAvailable(llvm::InsertElementInst *FirstIEI, VecInsEltInfoTy &AllIEIs, bool OnlySameBB = false); bool processExtractFrom(VecInsEltInfoTy &AllIEIs); bool processInsertTo(llvm::BasicBlock *BB, VecInsEltInfoTy &AllIEIs); // Check if sub can be aliased to Base[Base_ix:size(sub)-1] bool aliasInterfere(llvm::Value *Sub, llvm::Value *Base, int Base_ix); // DCC: DeSSA congruent class // If any value of V's DCC is an aliaser, return true. bool hasAnyDCCAsAliaser(llvm::Value *V) const; // If another value of V's DCC(not V itself) is an aliasee, return true. bool hasAnotherDCCAsAliasee(llvm::Value *V) const; bool isAliased(llvm::Value *V) const; bool isAliaser(llvm::Value *V) const; // For alias(S,B), each of S and B can be one of three states. enum class AState { SKIP, // skip aliasing OK, // aliasing okay if the other is target TARGET // aliasing okay if no one is SKIP }; bool isExtractMaskCandidate(llvm::Value *V) const; AState getCandidateStateUse(llvm::Value *V) const; AState getCandidateStateDef(llvm::Value *V) const; bool aliasOkay(AState A, AState B, AState C) const { if ((A == AState::TARGET || B == AState::TARGET || C == AState::TARGET) && A != AState::SKIP && B != AState::SKIP && C != AState::SKIP) { return true; } return false; } bool checkSubAlign(e_alignment &BaseAlign, llvm::Value *Subvec, llvm::Value *Basevec, int Base_ix); CodeGenContext *m_pCtx; WIAnalysis *m_WIA; LiveVars *m_LV; DeSSA *m_DeSSA; CodeGenPatternMatch *m_PatternMatch; CoalescingEngine *m_coalescingEngine; llvm::DominatorTree *m_DT = nullptr; const llvm::DataLayout *m_DL = nullptr; llvm::BumpPtrAllocator Allocator; /// Current Function; set on entry to runOnFunction /// and unset on exit to runOnFunction llvm::Function *m_F = nullptr; // The register pressure estimator (optional). RegisterEstimator *m_RPE; // Results may be cached at kernel level or basic block level. Use the // following enum to indicate cached flag status. enum class Status : int8_t { Undef = -1, False = 0, True = 1 }; // Per SIMD-compilation constant. Each compilation needs to initialize the // SIMD mode. uint16_t m_SimdSize; // When this function has low register pressure, reuse can be applied // aggressively without checking each individual def-use pair. Status m_IsFunctionPressureLow; // When this block has low register pressure, reuse can be applied // aggressively without checking each individual def-use pair. Status m_IsBlockPressureLow; // For vector alising on non-isolated values (under VectorAlias >= 2). // If a value V is in a dessa CC (not isolated) and V is aliased, add // into the map. This is a quick check to see if any // value in a dessa CC has been aliased (either aliaser or aliasee) Val2ValMapTy m_root2AliasMap; // Max size of BB for which scalar aliasing will apply. // Scalar aliasing will skip for BBs beyond this threshold const size_t m_BBSizeThreshold; // For vector aliasing heuristic to prevent possible high-reg pressure bool skipScalarAliaser(llvm::BasicBlock *BB, llvm::Value *V) const; }; llvm::FunctionPass *createVariableReuseAnalysisPass(); } // namespace IGC