mirror of
				https://github.com/intel/intel-graphics-compiler.git
				synced 2025-10-30 08:18:26 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			1336 lines
		
	
	
		
			52 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			1336 lines
		
	
	
		
			52 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*========================== begin_copyright_notice ============================
 | |
| 
 | |
| Copyright (C) 2017-2021 Intel Corporation
 | |
| 
 | |
| SPDX-License-Identifier: MIT
 | |
| 
 | |
| ============================= end_copyright_notice ===========================*/
 | |
| 
 | |
| //===-------- CoalescingEngine.cpp - Pass and analysis for coalescing-------===//
 | |
| //
 | |
| //  Intel Extention to LLVM core
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| // This pass is an implementation of idea of coalescing originated from USC
 | |
| // but re-designed to work (and take advantage of) on SSA form.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #include "Compiler/CISACodeGen/CoalescingEngine.hpp"
 | |
| #include "Compiler/CISACodeGen/DeSSA.hpp"
 | |
| #include "Compiler/CISACodeGen/ShaderCodeGen.hpp"
 | |
| #include "Compiler/CISACodeGen/PatternMatchPass.hpp"
 | |
| #include "Compiler/MetaDataApi/MetaDataApi.h"
 | |
| #include "Compiler/CISACodeGen/helper.h"
 | |
| #include "PayloadMapping.hpp"
 | |
| #include "common/debug/Debug.hpp"
 | |
| #include "common/debug/Dump.hpp"
 | |
| #include "common/igc_regkeys.hpp"
 | |
| #include "common/LLVMWarningsPush.hpp"
 | |
| #include <llvm/IR/InstVisitor.h>
 | |
| #include <llvm/ADT/SmallVector.h>
 | |
| #include "common/LLVMWarningsPop.hpp"
 | |
| #include "Compiler/IGCPassSupport.h"
 | |
| #include "Probe/Assertion.h"
 | |
| 
 | |
| using namespace llvm;
 | |
| using namespace IGC;
 | |
| using namespace IGC::Debug;
 | |
| using namespace IGC::IGCMD;
 | |
| 
 | |
| char CoalescingEngine::ID = 0;
 | |
| #define PASS_FLAG "CoalescingEngine"
 | |
| #define PASS_DESCRIPTION "coalesce moves coming payloads, insert and extract element"
 | |
| #define PASS_CFG_ONLY true
 | |
| #define PASS_ANALYSIS true
 | |
| IGC_INITIALIZE_PASS_BEGIN(CoalescingEngine, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
 | |
| IGC_INITIALIZE_PASS_DEPENDENCY(WIAnalysis)
 | |
| IGC_INITIALIZE_PASS_DEPENDENCY(LiveVarsAnalysis)
 | |
| IGC_INITIALIZE_PASS_DEPENDENCY(CodeGenPatternMatch)
 | |
| IGC_INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 | |
| IGC_INITIALIZE_PASS_DEPENDENCY(DeSSA)
 | |
| IGC_INITIALIZE_PASS_DEPENDENCY(MetaDataUtilsWrapper)
 | |
| IGC_INITIALIZE_PASS_DEPENDENCY(ResourceLoopAnalysis)
 | |
| IGC_INITIALIZE_PASS_END(CoalescingEngine, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
 | |
| 
 | |
| namespace IGC {
 | |
| 
 | |
| CoalescingEngine::CoalescingEngine() : FunctionPass(ID) {
 | |
|   initializeCoalescingEnginePass(*PassRegistry::getPassRegistry());
 | |
| }
 | |
| 
 | |
| void CoalescingEngine::getAnalysisUsage(llvm::AnalysisUsage &AU) const {
 | |
|   AU.setPreservesAll();
 | |
|   AU.addRequired<llvm::DominatorTreeWrapperPass>();
 | |
|   AU.addRequired<WIAnalysis>();
 | |
|   AU.addRequired<LiveVarsAnalysis>();
 | |
|   AU.addRequired<CodeGenPatternMatch>();
 | |
|   AU.addRequired<DeSSA>();
 | |
|   AU.addRequired<MetaDataUtilsWrapper>();
 | |
|   AU.addRequired<CodeGenContextWrapper>();
 | |
|   AU.addRequired<ResourceLoopAnalysis>();
 | |
| }
 | |
| 
 | |
| void CoalescingEngine::CCTuple::print(raw_ostream &OS, const Module *) const {
 | |
|   OS << "CC Tuple ";
 | |
| 
 | |
|   auto IT = OffsetToCCMap.begin();
 | |
|   while (IT != OffsetToCCMap.end()) {
 | |
|     OS << IT->first << " : ";
 | |
|     IT++;
 | |
|   }
 | |
| }
 | |
| 
 | |
| void CoalescingEngine::CCTuple::dump() const { print(ods()); }
 | |
| 
 | |
| void CoalescingEngine::print(raw_ostream &OS, const Module *) const {
 | |
|   OS << "CC Tuple list: \n";
 | |
| 
 | |
|   for (auto itr = m_CCTupleList.begin(), iend = m_CCTupleList.end(); itr != iend; ++itr) {
 | |
|     CCTuple *ccTuple = *itr;
 | |
|     ccTuple->print(OS);
 | |
|   }
 | |
| 
 | |
|   OS << "CC Tuple list end. \n";
 | |
| }
 | |
| 
 | |
| void CoalescingEngine::dump() const { print(ods()); }
 | |
| 
 | |
| /// \brief Entry point.
 | |
| bool CoalescingEngine::runOnFunction(Function &MF) {
 | |
|   MetaDataUtils *pMdUtils = getAnalysis<MetaDataUtilsWrapper>().getMetaDataUtils();
 | |
|   m_ModuleMetadata = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData();
 | |
|   m_CodeGenContext = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
 | |
|   m_Platform = m_CodeGenContext->platform;
 | |
|   m_PayloadMapping = PayloadMapping(m_CodeGenContext);
 | |
|   if (IGC_IS_FLAG_ENABLED(DisablePayloadCoalescing) || m_ModuleMetadata->compOpt.WaDisablePayloadCoalescing) {
 | |
|     return false;
 | |
|   }
 | |
|   if (!isEntryFunc(pMdUtils, &MF)) {
 | |
|     return false;
 | |
|   }
 | |
|   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 | |
|   WIA = &getAnalysis<WIAnalysis>();
 | |
|   CG = &getAnalysis<CodeGenPatternMatch>();
 | |
|   LV = &getAnalysis<LiveVarsAnalysis>().getLiveVars();
 | |
|   RLA = &getAnalysis<ResourceLoopAnalysis>();
 | |
|   if (IGC_IS_FLAG_ENABLED(EnableDeSSA)) {
 | |
|     m_DeSSA = &getAnalysis<DeSSA>();
 | |
|   } else {
 | |
|     m_DeSSA = nullptr;
 | |
|   }
 | |
| 
 | |
|   for (Function::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
 | |
|     ProcessBlock(&(*I));
 | |
|   }
 | |
| 
 | |
|   for (df_iterator<DomTreeNode *> DI = df_begin(DT->getRootNode()), DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
 | |
| 
 | |
|     IncrementalCoalesce(DI->getBlock());
 | |
|   }
 | |
|   if (IGC_IS_FLAG_ENABLED(PrintToConsole))
 | |
|     dump();
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| /// Coalesce instructions within a single-BB
 | |
| void CoalescingEngine::IncrementalCoalesce(BasicBlock *MBB) {
 | |
|   std::vector<Instruction *> &DefInstrs = BBProcessingDefs[MBB];
 | |
|   std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LV));
 | |
| 
 | |
|   for (std::vector<Instruction *>::const_iterator BBI = DefInstrs.begin(), BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
 | |
|     Instruction *DefMI = *BBI;
 | |
| 
 | |
|     auto RI = ValueNodeMap.find((Value *)DefMI);
 | |
|     if (RI == ValueNodeMap.end()) {
 | |
|       // Intrinsics themselves are not mapped to a node, they are
 | |
|       // tuple generators.
 | |
| 
 | |
|       // NOTE: Please leave this commented code snippet! It is useful when debugging coalescing regressions:
 | |
|       //       (used to isolate coalescing to particular shader to check failing/passing using binary search)
 | |
|       // DWORD hash = (DWORD) static_cast<IGC::CodeGenContext&>(MBB->getParent()->getContext()).hash.getAsmHash();
 | |
|       ////if (hash >= 0xA27Affff && hash <= 0xA3CFFF00)
 | |
|       // if (hash >= 0xA3C00000 && hash <= 0xA3CFFFFF)
 | |
|       //{
 | |
|       //     continue;
 | |
|       // }
 | |
| 
 | |
|       if (GenIntrinsicInst *intrinsic = llvm::dyn_cast<llvm::GenIntrinsicInst>(DefMI)) {
 | |
|         if ((isURBWriteIntrinsic(intrinsic) && !IGC_IS_FLAG_ENABLED(DisablePayloadCoalescing_URB)) ||
 | |
|             (llvm::isa<llvm::RTWriteIntrinsic>(intrinsic) && !IGC_IS_FLAG_ENABLED(DisablePayloadCoalescing_RT)) ||
 | |
|             (llvm::isa<llvm::AtomicTypedIntrinsic>(intrinsic) &&
 | |
|              !IGC_IS_FLAG_ENABLED(DisablePayloadCoalescing_AtomicTyped)) ||
 | |
|             (llvm::isa<llvm::RTDualBlendSourceIntrinsic>(intrinsic) &&
 | |
|              !IGC_IS_FLAG_ENABLED(DisablePayloadCoalescing_RT) && m_Platform.hasDualKSPPS())) {
 | |
|           ProcessTuple(DefMI);
 | |
|         }
 | |
|       }
 | |
| 
 | |
|       if (isSampleInstruction(DefMI) && !IGC_IS_FLAG_ENABLED(DisablePayloadCoalescing_Sample)) {
 | |
|         ProcessTuple(DefMI);
 | |
|       } else if (isLdInstruction(DefMI)) {
 | |
|         ProcessTuple(DefMI);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| static bool IsBindlessSampler(Instruction *inst) {
 | |
|   if (SampleIntrinsic *sample = dyn_cast<SampleIntrinsic>(inst)) {
 | |
|     Value *sampler = sample->getSamplerValue();
 | |
|     uint bufferIndex = 0;
 | |
|     bool directIndexing = false;
 | |
|     if (sampler->getType()->isPointerTy()) {
 | |
|       BufferType bufType =
 | |
|           DecodeAS4GFXResource(sampler->getType()->getPointerAddressSpace(), directIndexing, bufferIndex);
 | |
|       if (bufType == BINDLESS_SAMPLER) {
 | |
|         return true;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| /// Core algorithm.
 | |
| void CoalescingEngine::ProcessTuple(Instruction *tupleGeneratingInstruction) {
 | |
|   const uint numOperands = m_PayloadMapping.GetNumPayloadElements(tupleGeneratingInstruction);
 | |
|   // bindless sampler always have a header
 | |
|   bool needsHeader = IsBindlessSampler(tupleGeneratingInstruction);
 | |
| 
 | |
|   // There should be an early exit in the case we have split-send available
 | |
|   // and this instruction is sure to generate two-Element payload.
 | |
|   if (m_Platform.supportSplitSend() && !needsHeader) {
 | |
|     if (numOperands < 3) {
 | |
|       return;
 | |
|     }
 | |
|   } else {
 | |
|     if (numOperands < 2) {
 | |
|       return;
 | |
|     }
 | |
|   }
 | |
| #if 0
 | |
|         // turn off coalescing when inst is inside a fused resource-loop
 | |
|         // feels like this approach may not be strong enough.
 | |
|         // So opt to the solution of disabling coalescing in whole BB
 | |
|         auto loopMarker =
 | |
|             RLA->GetResourceLoopMarker(tupleGeneratingInstruction);
 | |
|         if ((loopMarker & ResourceLoopAnalysis::MarkResourceLoopInside) ||
 | |
|             ((loopMarker & ResourceLoopAnalysis::MarkResourceLoopStart) &&
 | |
|              !(loopMarker & ResourceLoopAnalysis::MarkResourceLoopEnd)))
 | |
|         {
 | |
|             return;
 | |
|         }
 | |
| #endif
 | |
|   IGC_ASSERT(numOperands >= 2);
 | |
| 
 | |
|   // No result, but has side effects of updating the split mapping.
 | |
|   DecideSplit(tupleGeneratingInstruction);
 | |
| 
 | |
|   uint numSplits = GetNumSplitParts(tupleGeneratingInstruction);
 | |
|   for (uint numPart = 0; numPart < numSplits; numPart++) {
 | |
|     SetCurrentPart(tupleGeneratingInstruction, numPart);
 | |
|     const uint numOperands = GetNumPayloadElements(tupleGeneratingInstruction);
 | |
|     bool isAnyNodeAnchored = false, isAnyNodeCoalescable = false;
 | |
|     SmallPtrSet<CCTuple *, MaxTouchedTuples> touchedTuplesSet;
 | |
|     SmallVector<CCTuple *, MaxTouchedTuples> touchedTuples;
 | |
| 
 | |
|     // Step: Prepare.
 | |
|     PrepareTuple(numOperands, tupleGeneratingInstruction,
 | |
|                  // out:
 | |
|                  touchedTuplesSet, touchedTuples, isAnyNodeAnchored, isAnyNodeCoalescable);
 | |
| 
 | |
|     if (!isAnyNodeCoalescable) {
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     // So at this point, there is at least one node that is candidate for coalescing.
 | |
|     // It might be already a part of CCtuple, or it might be yet unconstrained.
 | |
| 
 | |
|     // Tuple Generating Instruction enforces sequential order of registers allocated to values.
 | |
|     // Each value might belong to (up to) one CCtuple.
 | |
|     // If it belongs to CCTuple, it has an unique offset in that CCTuple.
 | |
|     // Three cases are considered:
 | |
|     // 1) No value have CCtuple assigned -> create new CCTuple and assign offsets.
 | |
|     //     a) if platform supports split-send, then some (arbitrary) split point should be determined
 | |
|     // 2) At least one value belongs to a CCtuple (we say it is 'anchored' (constrained) to that tuple)
 | |
|     //    and all values belong to the same CCtuple
 | |
|     // 3) More than one value belong to a tuple, and they belong to different (>1) tuples: NOT implemented
 | |
| 
 | |
|     // Case 1) : create new CCTuple
 | |
|     if (!isAnyNodeAnchored) {
 | |
|       CreateTuple(numOperands, tupleGeneratingInstruction);
 | |
|     } else {
 | |
|       // Case 2) : at least one value is 'anchored'(constrained) to a CCtuple
 | |
|       // Sub-case: All arguments belong to the same tuple.
 | |
|       if (touchedTuples.size() == 1) {
 | |
|         // Step I: find an anchoring element.
 | |
|         // If arguments of this tuple-instruction are already constrained by CCtuple, it means
 | |
|         // that they also constrain the tuple that would be generated by this payload.
 | |
|         // They 'anchor' the new tuple, so pick the element that anchors the tuple and
 | |
|         // determine its offset.
 | |
| 
 | |
|         // E.g. :
 | |
|         //  ..     v1 (0) v2 (1) CCtuple
 | |
|         //  v0 (0) v1 (1) v2 (2) (this payload)
 | |
|         //  v1 will have: thisTupleStartOffset = 1, rootTupleStartOffset = 0
 | |
|         //  CCtuple will be 'grown' with element v0, that is at relative offset =  -1
 | |
|         //  v0(-1) v1 (0) v2 (1) CCtuple
 | |
| 
 | |
|         CCTuple *ccTuple = NULL;
 | |
|         int rootTupleStartOffset = 0;
 | |
|         int thisTupleStartOffset = 0;
 | |
| 
 | |
|         DetermineAnchor(numOperands, tupleGeneratingInstruction,
 | |
|                         // out:
 | |
|                         ccTuple, rootTupleStartOffset, thisTupleStartOffset);
 | |
| 
 | |
|         IGC_ASSERT(ccTuple);
 | |
| 
 | |
|         /* Step II: Interference checking */
 | |
|         int offsetDiff = rootTupleStartOffset - thisTupleStartOffset;
 | |
| 
 | |
|         bool interferes = false;
 | |
| 
 | |
|         // Heuristic: we do not want to grow the ccTuple size too much
 | |
|         //  [c0 c1 c2 c3  c4  c5  c6  c7]
 | |
|         //(offsetDiff=4)[e0' e1' e2' e3' e4' e5' e6' e7']
 | |
|         //  here, tuple numElements = 8, and offsetDiff = 4, so it would result in
 | |
|         //  12 element CC tuple if joined together. We want to prevent growing
 | |
|         //  more than MaxTupleSize elements.
 | |
|         if (abs(offsetDiff) > 0 && ccTuple->GetNumElements() >= MaxTupleSize) {
 | |
|           interferes = true;
 | |
|         }
 | |
| 
 | |
|         ProcessInterferencesElementFunctor interferencesFunctor(false /*forceEviction */, tupleGeneratingInstruction,
 | |
|                                                                 offsetDiff, ccTuple, this);
 | |
| 
 | |
|         if (!interferes) {
 | |
|           interferes = InterferenceCheck(numOperands, tupleGeneratingInstruction, offsetDiff, ccTuple,
 | |
|                                          // out:
 | |
|                                          &interferencesFunctor);
 | |
| 
 | |
|           if (!interferes && (ccTuple->HasNonHomogeneousElements() ||
 | |
|                               m_PayloadMapping.HasNonHomogeneousPayloadElements(tupleGeneratingInstruction))) {
 | |
|             interferes =
 | |
|                 CheckIntersectionForNonHomogeneous(numOperands, tupleGeneratingInstruction, offsetDiff, ccTuple);
 | |
|           }
 | |
|         }
 | |
| 
 | |
|         // If 'interferes' is false, it means that whole transaction could be committed,
 | |
|         // provided that elements in dominatorsForDisplacement are displaced, and other nodes are attached.
 | |
|         // If interferes is true, then no element will be attached to the ccTuple.
 | |
|         if (!interferes) {
 | |
|           SmallPtrSet<Value *, MaxTouchedTuples> touchedValuesSet;
 | |
|           for (uint i = 0; i < numOperands; i++) {
 | |
|             Value *val = GetPayloadElementToValueMapping(tupleGeneratingInstruction, i);
 | |
| 
 | |
|             ccTuple->CheckIndexForNonInitializedSlot(i + offsetDiff);
 | |
| 
 | |
|             if (touchedValuesSet.count(val)) {
 | |
|               continue;
 | |
|             } else {
 | |
|               touchedValuesSet.insert(val);
 | |
|             }
 | |
| 
 | |
|             if (IsValConstOrIsolated(val)) {
 | |
|               // do nothing
 | |
|             } else {
 | |
|               Value *RootV = getRegRoot(val);
 | |
|               if (!RootV) // bail out if isolated
 | |
|                 continue;
 | |
| 
 | |
|               // We do not expect to get Argument here, it must have been isolated.
 | |
|               IGC_ASSERT(dyn_cast<llvm::Instruction>(val));
 | |
|               IGC_ASSERT(ValueNodeMap.count(val));
 | |
|               // auto RI = ValueNodeMap.find(val);
 | |
|               // ElementNode *Node = RI->second;
 | |
|               ElementNode *Node = ValueNodeMap[val];
 | |
| 
 | |
|               if (!interferencesFunctor.GetComputedValuesForIsolation().count(val)) {
 | |
|                 ccTuple->AttachNodeAtIndex(i + offsetDiff, Node, *this);
 | |
|                 IGC_ASSERT(getRegRoot(val));
 | |
|               }
 | |
|             }
 | |
|           } // for
 | |
|         } else // (interferes) is true
 | |
|         {
 | |
|           CreateTuple(numOperands, tupleGeneratingInstruction);
 | |
|         }
 | |
|       } else {
 | |
|         // Case 3)
 | |
|         // More than one value belong to a tuple, and they belong to different (>1) tuples.
 | |
|         // FIXME: not implemented yet
 | |
|         // Need to implement code for joining two non-overlapping tuples.
 | |
|         // This probably does not bring much benefit, but is nevertheless complex to implement.
 | |
|         // Having two congruence class tuples, we need to find whether piecewise congruent classes
 | |
|         // of the tuples interfere. Here a 'two-set interference check' due to Boissinot could be
 | |
|         // used (first sorting elements)
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| ///
 | |
| bool CoalescingEngine::CheckIntersectionForNonHomogeneous(const uint numOperands,
 | |
|                                                           Instruction *tupleGeneratingInstruction, const int offsetDiff,
 | |
|                                                           CCTuple *ccTuple) {
 | |
|   // Picturesque description:
 | |
|   //  Hi - homogeneous slots
 | |
|   //  <left_part>[H_0 H_1 .. H_n]<right_part>
 | |
| 
 | |
|   // ccTuple:
 | |
|   //  <left_part> [H_0  H_1  ..  H_n]<right_part>
 | |
|   //
 | |
|   // 1) this tuple instruction contains non-homogeneous part:
 | |
|   //  <left_part'>[H_0' H_1' .. H_n']<right_part'>
 | |
|   // 2) or this tuple instruction does not contain non-homogeneous part
 | |
|   //              [H_0' H_1' .. H_n']
 | |
| 
 | |
|   bool interferes = false;
 | |
|   if (ccTuple->HasNonHomogeneousElements()) {
 | |
|     // Finding a supremum instruction for a homogeneous part is implemented
 | |
|     // only for render target write instructions (RTWriteIntrinsic).
 | |
|     // An only other possible combination for non-homogeneous instructions comprises
 | |
|     // RTWriteIntrinsic and RTDualBlendSourceIntrinsic.
 | |
|     // In some cases there is an opportunity to coalesce their payloads but there exists
 | |
|     // a danger that they are compiled in different SIMD modes so there is a safe assumption
 | |
|     // that they cannot be coalesced.
 | |
|     // A comparison of the payload of RTWriteIntrinsic and RTDualBlendSourceIntrinsic:
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | RTWriteIntrinsic            | RTDualBlendSourceIntrinsic |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | src0 Alpha (optional)      | src0 Alpha (unavailable)*  |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | output mask (optional)     | output mask (optional)     |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | -                          | R0 channel                 |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | -                          | G0 channel                 |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | -                          | B0 channel                 |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | -                          | A0 channel                 |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | R0 channel                 | R1 channel                 |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | G0 channel                 | G1 channel                 |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | B0 channel                 | B1 channel                 |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | A0 channel                 | A1 channel                 |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | Depth (optional)           | Depth (optional)           |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // | Stencil (optional)         | Stencil (optional)         |
 | |
|     // +----------------------------+----------------------------+
 | |
|     // All optional fields except for src0 Alpha are consistent for both instruction called in a fragment shader.
 | |
|     // * RTDualBlendSourceIntrinsic doesn't have such an argument but it is defined in its payload.
 | |
|     if (offsetDiff == 0 && ccTuple->GetNumElements() == numOperands &&
 | |
|         llvm::isa<llvm::RTWriteIntrinsic>(ccTuple->GetRoot()) &&
 | |
|         llvm::isa<llvm::RTWriteIntrinsic>(tupleGeneratingInstruction)) {
 | |
|       if (m_PayloadMapping.HasNonHomogeneousPayloadElements(tupleGeneratingInstruction)) {
 | |
|         // TODO: test for 'supremum' of TGI and ccTuple root element can be obtained
 | |
|         const Instruction *supremum =
 | |
|             m_PayloadMapping.GetSupremumOfNonHomogeneousPart(tupleGeneratingInstruction, ccTuple->GetRoot());
 | |
|         if (supremum) {
 | |
|         } else {
 | |
|           interferes = true;
 | |
|         }
 | |
|       }
 | |
|     } else {
 | |
|       // Since ccTuple contains non-homogeneous, and this homogeneous
 | |
|       // part is not perfectly aligned to ccTuple, we cannot reason about
 | |
|       // interferences safely, thus assume interferes.
 | |
|       interferes = true;
 | |
|     }
 | |
|   } else {
 | |
|     // ccTuple:
 | |
|     //              [H_0  H_1  ..  H_n]
 | |
|     //
 | |
|     // 1) this tuple instruction contains non-homogeneous part:
 | |
|     //  <left_part'>[H_0' H_1' .. H_n']<right_part'>
 | |
| 
 | |
|     if (m_PayloadMapping.HasNonHomogeneousPayloadElements(tupleGeneratingInstruction)) {
 | |
|       if (offsetDiff == 0 && ccTuple->GetNumElements() == numOperands) {
 | |
|         ccTuple->SetHasNonHomogeneousElements(tupleGeneratingInstruction);
 | |
|       } else {
 | |
|         interferes = true;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return interferes;
 | |
| }
 | |
| 
 | |
| void CoalescingEngine::DecideSplit(Instruction *tupleGeneratingInstruction) {
 | |
|   if (m_PayloadMapping.DoPeelFirstElement(tupleGeneratingInstruction)) {
 | |
|     splitPoint_[tupleGeneratingInstruction] = 1;
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   uint splitPoint = 0;
 | |
|   const uint numOperands = m_PayloadMapping.GetNumPayloadElements(tupleGeneratingInstruction);
 | |
|   // No sense entering here if we have less than 2 operands.
 | |
|   IGC_ASSERT(numOperands >= 2);
 | |
|   Value *val = m_PayloadMapping.GetPayloadElementToValueMapping(tupleGeneratingInstruction, 0);
 | |
|   if (IsValConstOrIsolated(val) || !getRegRoot(val)) {
 | |
|     splitPoint = 1;
 | |
|   }
 | |
| 
 | |
|   val = m_PayloadMapping.GetPayloadElementToValueMapping(tupleGeneratingInstruction, numOperands - 1);
 | |
|   if (IsValConstOrIsolated(val) || !getRegRoot(val)) {
 | |
|     splitPoint = numOperands - 1;
 | |
|   }
 | |
| 
 | |
|   if (splitPoint > 0 && m_PayloadMapping.DoesAllowSplit(tupleGeneratingInstruction)) {
 | |
|     splitPoint_[tupleGeneratingInstruction] = splitPoint;
 | |
|   }
 | |
| }
 | |
| 
 | |
| /// Determines if any value that is an argument of (possible) tuple generating instruction
 | |
| /// can participate in payload coalescing. If so, it also determines if there are any
 | |
| /// 'anchored'(constrained) values and the number of constraining tuples.
 | |
| void CoalescingEngine::PrepareTuple(const uint numOperands, Instruction *tupleGeneratingInstruction,
 | |
|                                     SmallPtrSet<CCTuple *, MaxTouchedTuples> &touchedTuplesSet,
 | |
|                                     SmallVector<CCTuple *, MaxTouchedTuples> &touchedTuples, bool &isAnyNodeAnchored,
 | |
|                                     bool &isAnyNodeCoalescable) {
 | |
|   uint numConstants = 0;
 | |
|   // Step: Prepare.
 | |
|   for (uint i = 0; i < numOperands; i++) {
 | |
|     Value *val = GetPayloadElementToValueMapping(tupleGeneratingInstruction, i);
 | |
| 
 | |
|     if (IsValConstOrIsolated(val) || !getRegRoot(val)) {
 | |
|       numConstants++;
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     Value *RootV = getRegRoot(val);
 | |
|     IGC_ASSERT(RootV);
 | |
|     {
 | |
|       isAnyNodeCoalescable = true;
 | |
| 
 | |
|       IGC_ASSERT(ValueNodeMap.count(RootV));
 | |
|       auto RI = ValueNodeMap.find(RootV);
 | |
|       ElementNode *Node = RI->second;
 | |
| 
 | |
|       auto CCI = NodeCCTupleMap.find(Node);
 | |
|       if (CCI != NodeCCTupleMap.end()) {
 | |
|         CCTuple *ccTuple = NodeCCTupleMap[Node];
 | |
|         if (!touchedTuplesSet.count(ccTuple)) {
 | |
|           touchedTuplesSet.insert(ccTuple);
 | |
|           touchedTuples.push_back(ccTuple);
 | |
|         }
 | |
| 
 | |
|         isAnyNodeAnchored = true;
 | |
|       }
 | |
| 
 | |
|       // Do not coalesce values having excessive number of uses.
 | |
|       // This otfen introduces many anti-dependencies.
 | |
|       const unsigned MAX_USE_COUNT = 20;
 | |
|       if (val->hasNUsesOrMore(MAX_USE_COUNT) && !llvm::isa<llvm::AtomicTypedIntrinsic>(tupleGeneratingInstruction)) {
 | |
|         isAnyNodeAnchored = true;
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // heuristic:
 | |
|   // we do not want to create 'wide' root variables, where most of the slots
 | |
|   // will be anyway non-coalesced (due to immediate constants/isolated variables)
 | |
|   // vISA will anyway not benefit, while this increases pressure and makes coloring
 | |
|   // with round-robin heuristic harder
 | |
|   if (numOperands > 2 && numConstants > (numOperands / 2)) {
 | |
|     if (!m_PayloadMapping.HasNonHomogeneousPayloadElements(tupleGeneratingInstruction)) {
 | |
|       // force non-coalescing:
 | |
|       isAnyNodeCoalescable = false;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| /// Creates fresh tuple.
 | |
| /// Fresh tuple consists of nodes which do not have any tuple assigned yet.
 | |
| /// Precondition is that at least one value is coalescable (otherwise does not make sense).
 | |
| void CoalescingEngine::CreateTuple(const uint numOperands, Instruction *tupleGeneratingInstruction) {
 | |
|   CCTuple *ccTuple = new CCTuple();
 | |
|   m_CCTupleList.push_back(ccTuple);
 | |
| 
 | |
|   for (uint i = 0; i < numOperands; i++) {
 | |
|     Value *val = GetPayloadElementToValueMapping(tupleGeneratingInstruction, i);
 | |
| 
 | |
|     if (IsValConstOrIsolated(val)) {
 | |
|       // It is important to initialize CC with empty slots, in order to get the proper
 | |
|       // size of the vISA variable that will back CCtuple that corresponds to payload.
 | |
|       ccTuple->ResizeBounds(i);
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     Value *RootV = getRegRoot(val);
 | |
|     if (!RootV) { // isolated
 | |
|       ccTuple->ResizeBounds(i);
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     IGC_ASSERT(ValueNodeMap.count(RootV));
 | |
|     ElementNode *RootNode = ValueNodeMap[RootV];
 | |
| 
 | |
|     auto CCTI = NodeCCTupleMap.find(RootNode);
 | |
|     if (CCTI == NodeCCTupleMap.end()) {
 | |
|       NodeCCTupleMap[RootNode] = ccTuple;
 | |
|       NodeOffsetMap[RootNode] = i;
 | |
|       ccTuple->InitializeIndexWithCCRoot(i, RootNode);
 | |
|       CurrentDominatingParent[RootV] = RootV;
 | |
|       ImmediateDominatingParent[RootV] = NULL;
 | |
|     } else {
 | |
|       // This must have been a value that is copied to payload multiple times.
 | |
|       // OR: The new CCtuple has been isolated, and this is the element that
 | |
|       // belongs to other tuple.
 | |
|       // Since this value belongs to another tuple, we should not increase the
 | |
|       // current tuple's size, no?
 | |
|       // ccTuple->ResizeBounds(i);
 | |
|     }
 | |
|   } // loop over arguments
 | |
| 
 | |
|   if (m_PayloadMapping.HasNonHomogeneousPayloadElements(tupleGeneratingInstruction)) {
 | |
|     ccTuple->SetHasNonHomogeneousElements(tupleGeneratingInstruction);
 | |
|   }
 | |
| }
 | |
| 
 | |
| /// Given a tuple generating instruction, picks out an 'anchored'(constrained)
 | |
| /// value and determines the relative offset
 | |
| /// output:
 | |
| /// ccTuple              - a tuple that is constraining a picked value
 | |
| /// thisTupleStartOffset - the offset of the value in the current tuple generating
 | |
| ///                        instruction
 | |
| /// rootTupleStartOffset - the offset of the value in constraining tuple
 | |
| /// Assumption: it is already checked all values belong to exactly one tuple!
 | |
| /// (otherwise the result might pick an arbitrary tuple)
 | |
| void CoalescingEngine::DetermineAnchor(const uint numOperands, const Instruction *tupleGeneratingInstruction,
 | |
|                                        CCTuple *&ccTuple, int &rootTupleStartOffset, int &thisTupleStartOffset) {
 | |
|   for (uint i = 0; i < numOperands; i++) {
 | |
|     Value *val = GetPayloadElementToValueMapping(tupleGeneratingInstruction, i);
 | |
| 
 | |
|     if (IsValConstOrIsolated(val)) {
 | |
|       continue;
 | |
|     }
 | |
|     Value *RootV = getRegRoot(val);
 | |
|     if (!RootV)
 | |
|       continue;
 | |
| 
 | |
|     IGC_ASSERT(ValueNodeMap.count(RootV));
 | |
|     ElementNode *RootNode = ValueNodeMap[RootV];
 | |
| 
 | |
|     auto CCTI = NodeCCTupleMap.find(RootNode);
 | |
|     if (CCTI != NodeCCTupleMap.end()) {
 | |
|       // Element is constrained by ccTuple.
 | |
|       IGC_ASSERT(NodeOffsetMap.count(RootNode));
 | |
|       rootTupleStartOffset = NodeOffsetMap[RootNode];
 | |
|       thisTupleStartOffset = i;
 | |
|       ccTuple = NodeCCTupleMap[RootNode];
 | |
|       // Just a sanity check.. :
 | |
|       IGC_ASSERT(RootNode == ccTuple->GetCCNodeWithIndex(NodeOffsetMap[RootNode]));
 | |
|       break;
 | |
|     }
 | |
| 
 | |
|   } // for
 | |
| }
 | |
| 
 | |
| /// Prepares the slot for moves (e.g. induced by const, isolated, uniform to vector)
 | |
| /// into payload by evicting any value that is currently occupying this slot.
 | |
| /// Assumption is that heuristic has determined it is profitable to do so.
 | |
| /// input: bool evictFullCongruenceClass - tells whether the full congruence class
 | |
| /// should be evicted, or only the element that occupies the tuple instruction
 | |
| /// for the lifetime of a 'mov' (this is sufficient for issuing a mov to the payload slot).
 | |
| void CoalescingEngine::PrepareInsertionSlot(CCTuple *ccTuple, const int index, Instruction *inst,
 | |
|                                             const bool evictFullCongruenceClass) {
 | |
|   ElementNode *RootNode = ccTuple->OffsetToCCMap[index];
 | |
|   IGC_ASSERT(RootNode);
 | |
| 
 | |
|   if (evictFullCongruenceClass) {
 | |
|     Value *dominatingParent = GetActualDominatingParent(RootNode->value, inst);
 | |
|     // It might turn out that the root node does not dominate the
 | |
|     // 'inst' since it is in an another branch of DT. In such case,
 | |
|     // isolate all nodes in the CC.
 | |
|     llvm::Value *CCVal = dominatingParent ? dominatingParent : CurrentDominatingParent[RootNode->value];
 | |
|     while (CCVal) {
 | |
|       if (getRegRoot(CCVal)) {
 | |
|         isolateReg(CCVal);
 | |
|       }
 | |
|       CCVal = ImmediateDominatingParent[CCVal];
 | |
|     }
 | |
|   } else {
 | |
|     // Evict dominating parent from CC.
 | |
|     Value *NewParent = GetActualDominatingParent(RootNode->value, inst);
 | |
|     if (NewParent)
 | |
|       isolateReg(NewParent);
 | |
|   }
 | |
| }
 | |
| 
 | |
| bool CoalescingEngine::IsInsertionSlotAvailable(CCTuple *ccTuple, const int index,
 | |
|                                                 Instruction *tupleGeneratingInstruction, const bool canExtend) {
 | |
|   bool avaiable = true;
 | |
| 
 | |
|   // ccTuple->OffsetToCCMap.count(index) == 0
 | |
|   if (!canExtend) {
 | |
|     const int tupleNumElements = ccTuple->GetNumElements();
 | |
| 
 | |
|     if (!(0 <= index && index < tupleNumElements)) {
 | |
|       return false;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   ElementNode *nodeToCheck = ccTuple->GetCCNodeWithIndex(index);
 | |
| 
 | |
|   if (nodeToCheck == NULL) {
 | |
|     // tuple   :  X       CC1   CC2
 | |
|     // payload :  <slot>   v1        v2
 | |
|     // Means: Tuple has no live interval (X) that is live at the point the payload is
 | |
|     // defined. It means, that a given 'register slot' can be used to issue a 'mov'
 | |
|     // with an immediate (or isolated) value.
 | |
|   } else {
 | |
|     // tuple   :  CC0       CC1   CC2
 | |
|     //  ....       | <- lifetime
 | |
|     // payload :  <slot>    v1       v2
 | |
|     //  .....      | <- lifetime
 | |
|     // Here we need to check whether the value in CC is live
 | |
|     // and intersects with the desired <slot> .
 | |
| 
 | |
|     // Sanity check: tuple contains the root element.
 | |
| 
 | |
|     // IGC_ASSERT(getRegRoot(nodeToCheck->value) == nodeToCheck->value);
 | |
|     Value *RootV = nodeToCheck->value;
 | |
|     Value *NewParent = GetActualDominatingParent(RootV, tupleGeneratingInstruction);
 | |
|     // FIXME: what if we do not have pure ssa form? Look at de-ssa comment.
 | |
|     if (NewParent && LV->isLiveAt(NewParent, tupleGeneratingInstruction)) {
 | |
|       avaiable = false;
 | |
|       // interferes = true;
 | |
|       // break;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return avaiable;
 | |
| }
 | |
| 
 | |
| /// Processes the tuple elements in sequential order.
 | |
| /// A function object is passed as an argument, which responds to
 | |
| /// specific events (whether given payload elements is const/isolated/copied etc.)
 | |
| /// It is meant to be used as a template method in various context where different
 | |
| /// functors could be passed, but the walking mechanism is the same.
 | |
| void CoalescingEngine::ProcessElements(const uint numOperands, Instruction *tupleInst, const int offsetDiff,
 | |
|                                        CCTuple *ccTuple, ElementFunctor *functor) {
 | |
|   SmallPtrSet<Value *, MaxTouchedTuples> touchedValuesSet;
 | |
| 
 | |
|   for (uint i = 0; i < numOperands; i++) {
 | |
|     functor->SetIndex(i);
 | |
|     Value *val = GetPayloadElementToValueMapping(tupleInst, i);
 | |
|     if (touchedValuesSet.count(val)) {
 | |
|       if (functor->visitCopy())
 | |
|         continue;
 | |
|       else
 | |
|         break;
 | |
|     } else {
 | |
|       touchedValuesSet.insert(val);
 | |
|     }
 | |
|     // Case 1: Constant
 | |
|     if (llvm::isa<llvm::Constant>(val)) {
 | |
|       if (functor->visitConstant())
 | |
|         continue;
 | |
|       else
 | |
|         break;
 | |
|     } else // Case 2: Variable
 | |
|     {
 | |
|       if (IsValIsolated(val) || !getRegRoot(val)) {
 | |
|         // If value is isolated, a copy <slot> will be necessary.
 | |
|         if (functor->visitIsolated())
 | |
|           continue;
 | |
|         else
 | |
|           break;
 | |
|       }
 | |
| 
 | |
|       IGC_ASSERT(ValueNodeMap.count(val));
 | |
|       ElementNode *Node = ValueNodeMap[val];
 | |
|       ElementNode *ccRootNode = ccTuple->GetCCNodeWithIndex(i + offsetDiff);
 | |
| 
 | |
|       // Interferes if and only if: live at this definition point AND has different value
 | |
|       if (ccRootNode == Node) {
 | |
|         // No interference, since it is the same value. We have got a reuse of the same value in
 | |
|         // different tuple.
 | |
|         if (functor->visitAnchored())
 | |
|           continue;
 | |
|         else
 | |
|           break;
 | |
|       } else {
 | |
|         IGC_ASSERT(llvm::isa<llvm::Instruction>(val));
 | |
|         // Here comes the meat, and the most interesting case:
 | |
|         if (ccRootNode != NULL) {
 | |
|           Value *CCRootV = ccRootNode->value;
 | |
|           Value *dominating = nullptr;
 | |
|           Value *dominated = nullptr;
 | |
| 
 | |
|           SymmetricInterferenceTest(CCRootV, dyn_cast<llvm::Instruction>(val), dominating, dominated);
 | |
| 
 | |
|           // Case 1):
 | |
|           //    (dominating=null)
 | |
|           //
 | |
|           //!   (value)
 | |
|           //
 | |
|           //    CC dominance tree:
 | |
|           //!   v67 <- (dominated)
 | |
|           //    ^
 | |
|           //    |
 | |
|           //!   v190
 | |
|           //
 | |
|           //!   (tupleInst)
 | |
|           // Value has no dominator inside CC class, but dominates elements in CC class
 | |
|           // and its lifetime spans until (but not including) tupleInst.
 | |
|           if (dominating == nullptr && dominated != nullptr) {
 | |
|             // IGC_ASSERT(LV->isLiveAt(val, tupleInst));
 | |
|             // Interference check: value is live at dominated
 | |
|             if (functor->visitInterfering(val, false))
 | |
|               continue;
 | |
|             else
 | |
|               break;
 | |
|           } else if (dominating != nullptr && dominated != nullptr) {
 | |
|             // Case 2):
 | |
|             //  CC dominance tree
 | |
|             //!  v67
 | |
|             //    ^
 | |
|             //    |
 | |
|             //!  v121 (dominating)
 | |
|             //    ^
 | |
|             //!   |  <-----(value)
 | |
|             //    |
 | |
|             //!  v190 <- (dominated)
 | |
|             // TODO: it would be possible to 'fit' v181 between v121 and v190 in some
 | |
|             //       cases, but this would require redesign of the 'dominance forest' logic
 | |
|             //       (regarding current dominating and immediate dominator handling)
 | |
|             //       For now, just assume there is an interference.
 | |
|             if (functor->visitInterfering(val, false))
 | |
|               continue;
 | |
|             else
 | |
|               break;
 | |
|           } else if (dominating != nullptr && dominated == nullptr) {
 | |
|             // Case 3):
 | |
|             //  CC dominance tree
 | |
|             //!  v121
 | |
|             //    ^
 | |
|             //    |
 | |
|             //!  v190 <- (dominating)
 | |
|             //    |
 | |
|             //!   |    ----- (value)
 | |
|             //    |
 | |
|             //    | (dominated == null)
 | |
|             if (LV->isLiveAt(dominating, dyn_cast<llvm::Instruction>(val))) {
 | |
|               if (functor->visitInterfering(val, false))
 | |
|                 continue;
 | |
|               else
 | |
|                 break;
 | |
|             }
 | |
|           } else {
 | |
|             // It is possible that the ccRootNode is not empty, but all
 | |
|             // elements that belong to it have been isolated in previously
 | |
|             // dominating tuples.
 | |
|             // IGC_ASSERT(0);
 | |
|           }
 | |
|         } else {
 | |
|           // ccRootNode == NULL -> congruence class not occupied yet
 | |
|           if (functor->visitPackedNonInterfering(val))
 | |
|             continue;
 | |
|           else
 | |
|             break;
 | |
|         }
 | |
|       } // if( ccRootNode == Node )
 | |
|     } // if( llvm::isa<llvm::Constant>( val ) )
 | |
|   } // for loop
 | |
| }
 | |
| 
 | |
| /// Here we have two major differences with the implementation in de-ssa:
 | |
| /// 1) Transactional character: even if one 'element' of CC-tuple does not interfere,
 | |
| /// some other element in CC-tuple might be interfering, rendering the whole group
 | |
| /// non-'coalescable'.
 | |
| /// 2) Values in payload coalescing are by default isolated, so if there is an interference,
 | |
| /// do nothing, no need for explicit isolation.
 | |
| bool CoalescingEngine::InterferenceCheck(const uint numOperands, Instruction *tupleInst, const int offsetDiff,
 | |
|                                          CCTuple *ccTuple, ProcessInterferencesElementFunctor *interferencesFunctor) {
 | |
|   SmallPtrSet<Value *, MaxTouchedTuples> touchedValuesSet;
 | |
|   GatherWeightElementFunctor gatherFunctor;
 | |
|   ProcessElements(numOperands, tupleInst, offsetDiff, ccTuple, &gatherFunctor);
 | |
|   bool forceEviction = (gatherFunctor.GetNumInsertionSlotsRequired() + gatherFunctor.GetNumNeedsDisplacement() <=
 | |
|                         gatherFunctor.GetNumAlignedAnchors());
 | |
| 
 | |
|   interferencesFunctor->SetForceEviction(forceEviction);
 | |
| 
 | |
|   ProcessElements(numOperands, tupleInst, offsetDiff, ccTuple, interferencesFunctor);
 | |
| 
 | |
|   return interferencesFunctor->IsInterfering();
 | |
| }
 | |
| 
 | |
| /// Given an instruction and numOperands (corresponding to the number of coalesce-partaking
 | |
| /// payload elements), return whether any 'value' that is an argument of an intrinsic
 | |
| /// takes part in the coalescing CC tuple. If yes, return non-zero value that is
 | |
| /// the representative tuple for this coalescing.
 | |
| /// output: zeroBasedPayloadElementOffset - an offset of this instructions payload
 | |
| /// relative to the containing ccTuple - this is necessary to correctly slice the
 | |
| /// the relevant part of the payload from ccTuple (e.g. for preparing a payload for URB writes)
 | |
| ///     offset [payload]
 | |
| ///     [ccTuple ...... ]
 | |
| /// note: obviously, if return value is null, then the meaning of zeroBasedPayloadElementOffset
 | |
| /// is undefined
 | |
| CoalescingEngine::CCTuple *CoalescingEngine::IsAnyValueCoalescedInCCTuple(llvm::Instruction *inst,
 | |
|                                                                           const uint numOperands,
 | |
|                                                                           int &zeroBasedPayloadElementOffset,
 | |
|                                                                           Value *&outVal) {
 | |
|   outVal = nullptr;
 | |
|   zeroBasedPayloadElementOffset = 0; // provide some meaningful default
 | |
|   CoalescingEngine::CCTuple *ccTuple = nullptr;
 | |
|   uint index = 0;
 | |
|   while (index < numOperands) {
 | |
|     Value *val = GetPayloadElementToValueMapping(inst, index);
 | |
|     CoalescingEngine::CCTuple *ccTupleCheck = GetValueCCTupleMapping(val);
 | |
|     if (!isa<Constant>(val) && ccTupleCheck) {
 | |
|       ccTuple = ccTupleCheck;
 | |
| 
 | |
|       Value *valRoot = getRegRoot(val);
 | |
|       IGC_ASSERT(valRoot);
 | |
|       ElementNode *Node = ValueNodeMap[valRoot];
 | |
|       int offset = NodeOffsetMap[Node];
 | |
|       zeroBasedPayloadElementOffset = (offset - ccTuple->GetLeftBound()) - index;
 | |
|       outVal = val;
 | |
|       return ccTuple;
 | |
|     }
 | |
|     index++;
 | |
|   }
 | |
|   return ccTuple;
 | |
| }
 | |
| 
 | |
| /// Return true if payload tuple might be completed (either by directly
 | |
| /// aliasing or by inserting an appropriate copy instruction).
 | |
| bool CoalescingEngine::IsPayloadCovered(llvm::Instruction *inst, CCTuple *ccTuple, const uint numOperands,
 | |
|                                         const int payloadToCCTupleRelativeOffset) {
 | |
|   uint relativeOffset = 0;
 | |
|   bool payloadCovered = false;
 | |
| 
 | |
|   if (ccTuple) {
 | |
|     SmallPtrSet<Value *, MaxTouchedTuples> touchedValuesSet;
 | |
| 
 | |
|     // index = 0;
 | |
|     payloadCovered = true;
 | |
|     for (uint index = 0; index < numOperands; index++) {
 | |
|       Value *val = GetPayloadElementToValueMapping(inst, index);
 | |
|       const int ccIndex = index + payloadToCCTupleRelativeOffset;
 | |
| 
 | |
|       if (touchedValuesSet.count(val)) {
 | |
|         if (!IsInsertionSlotAvailable(ccTuple, ccIndex, inst, false)) {
 | |
|           payloadCovered = false;
 | |
|           break;
 | |
|         }
 | |
|         continue;
 | |
|       } else {
 | |
|         touchedValuesSet.insert(val);
 | |
|       }
 | |
| 
 | |
|       if (IsValConstOrIsolated(val)) {
 | |
|         if (!IsInsertionSlotAvailable(ccTuple, ccIndex, inst, false)) {
 | |
|           payloadCovered = false;
 | |
|           break;
 | |
|         }
 | |
|       } else {
 | |
|         if (CoalescingEngine::CCTuple *thisCCTuple = GetValueCCTupleMapping(val)) {
 | |
|           if (thisCCTuple == ccTuple) {
 | |
|             relativeOffset = GetValueOffsetInCCTuple(val);
 | |
|             if (relativeOffset != (ccIndex)) {
 | |
|               payloadCovered = false;
 | |
|               break;
 | |
|             }
 | |
|           } else {
 | |
|             // different cc tuple
 | |
|             payloadCovered = false;
 | |
|             break;
 | |
|           }
 | |
| 
 | |
|         } else {
 | |
|           if (!IsInsertionSlotAvailable(ccTuple, ccIndex, inst, false)) {
 | |
|             payloadCovered = false;
 | |
|             break;
 | |
|           }
 | |
|         }
 | |
|       } // if constant
 | |
|     } // while
 | |
|   } // if cctuple
 | |
| 
 | |
|   return payloadCovered;
 | |
| }
 | |
| 
 | |
| /// Uses the tuple information provided by coalescing engine in order
 | |
| /// to generate optimized sequence of 'movs' for preparing a payload.
 | |
| CVariable *CoalescingEngine::PrepareExplicitPayload(CShader *outProgram, CEncoder *encoder, SIMDMode simdMode,
 | |
|                                                     const DataLayout *pDL, llvm::Instruction *inst,
 | |
|                                                     int &payloadOffsetInBytes) {
 | |
|   SetCurrentPart(inst, 0);
 | |
|   const uint numOperands = m_PayloadMapping.GetNumPayloadElements(inst);
 | |
|   const uint grfSize = outProgram->GetContext()->platform.getGRFSize();
 | |
|   const uint numSubVarsPerOperand = numLanes(simdMode) / (grfSize / 4);
 | |
|   IGC_ASSERT(numSubVarsPerOperand == 1 || numSubVarsPerOperand == 2);
 | |
|   CoalescingEngine::CCTuple *ccTuple = nullptr;
 | |
|   int payloadToCCTupleRelativeOffset = 0;
 | |
|   Value *dummyValPtr = nullptr;
 | |
|   ccTuple = IsAnyValueCoalescedInCCTuple(inst, numOperands, payloadToCCTupleRelativeOffset, dummyValPtr);
 | |
|   bool payloadCovered = IsPayloadCovered(inst, ccTuple, numOperands, payloadToCCTupleRelativeOffset);
 | |
|   bool payloadOffsetComputed = false;
 | |
|   payloadOffsetInBytes = 0;
 | |
|   CVariable *payload = nullptr;
 | |
| 
 | |
|   if (payloadToCCTupleRelativeOffset < 0) {
 | |
|     payloadCovered = false;
 | |
|   }
 | |
| 
 | |
|   // Check for EOT payload.
 | |
|   if (payloadCovered) {
 | |
|     if (IGCLLVM::TerminatorInst *terminator = inst->getParent()->getTerminator()) {
 | |
|       if (terminator != &(*terminator->getParent()->begin())) {
 | |
|         IGC_ASSERT(dyn_cast<GenIntrinsicInst>(inst));
 | |
|         IGC_ASSERT(terminator->getPrevNode());
 | |
|         if (terminator->getPrevNode() == inst) {
 | |
|           // heuristic:
 | |
|           // this inst is an EOT node. Be very careful about payload size:
 | |
|           // coalescing: hard constraint
 | |
|           if (ccTuple->GetNumElements() > MaxTupleSize) {
 | |
|             payloadCovered = false;
 | |
|           }
 | |
|         }
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (payloadCovered) {
 | |
|     SmallPtrSet<Value *, MaxTouchedTuples> touchedValuesSet;
 | |
| 
 | |
|     for (uint index = 0; index < numOperands; index++) {
 | |
|       Value *val = m_PayloadMapping.GetPayloadElementToValueMapping(inst, index);
 | |
| 
 | |
|       CVariable *dataElement = outProgram->GetSymbol(val);
 | |
|       // FIXME: need to do additional checks for size
 | |
|       uint subVar = (payloadToCCTupleRelativeOffset + index) * numSubVarsPerOperand;
 | |
|       encoder->SetDstSubVar(subVar);
 | |
|       payload = outProgram->GetCCTupleToVariableMapping(ccTuple);
 | |
| 
 | |
|       if (touchedValuesSet.count(val)) {
 | |
|         encoder->Copy(payload, dataElement);
 | |
|         encoder->Push();
 | |
|         continue;
 | |
|       } else {
 | |
|         touchedValuesSet.insert(val);
 | |
|       }
 | |
| 
 | |
|       if (IsValConstOrIsolated(val)) {
 | |
|         encoder->Copy(payload, dataElement);
 | |
|         encoder->Push();
 | |
|       } else {
 | |
|         if (GetValueCCTupleMapping(val)) {
 | |
|           if (!payloadOffsetComputed) {
 | |
|             payloadOffsetComputed = true;
 | |
| 
 | |
|             payloadOffsetInBytes = payloadToCCTupleRelativeOffset * GetSingleElementWidth(simdMode, pDL, val);
 | |
|           }
 | |
|         } else {
 | |
|           // this one actually encompasses the case for !getRegRoot(val)
 | |
|           encoder->Copy(payload, dataElement);
 | |
|           encoder->Push();
 | |
|         }
 | |
|       } // if constant
 | |
|     } // for
 | |
| 
 | |
|   } else {
 | |
|     payload = outProgram->GetNewVariable(
 | |
|         numOperands * numLanes(simdMode), ISA_TYPE_F,
 | |
|         outProgram->GetContext()->platform.getGRFSize() == 64 ? EALIGN_32WORD : EALIGN_HWORD, "CEExplicitPayload");
 | |
| 
 | |
|     // insert explicit lifetime start in case some of the operands are undefs
 | |
|     // otherwise, VISA will see the variable as not fully initialized and will extend the lifetime all the way to the
 | |
|     // beginning of the kernel
 | |
|     encoder->Lifetime(VISAVarLifetime::LIFETIME_START, payload);
 | |
| 
 | |
|     for (uint i = 0; i < numOperands; i++) {
 | |
|       Value *val = m_PayloadMapping.GetPayloadElementToValueMapping(inst, i);
 | |
|       CVariable *data = outProgram->GetSymbol(val);
 | |
|       encoder->SetDstSubVar(i * numSubVarsPerOperand);
 | |
|       encoder->Copy(payload, data);
 | |
|       encoder->Push();
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return payload;
 | |
| }
 | |
| 
 | |
| 
 | |
| // Prepares payload for the uniform URB Write messages that are used in
 | |
| // mesh and task shader stages.
 | |
| // On DG2 platform a uniform URB write is a simd1 message that writes data
 | |
| // from the fist DWORDs of each GRF of payload, e.g.:
 | |
| //   (W)     mov (1|M0)               r16.0<1>:ud   0x0:ud
 | |
| //   (W)     mov (1|M0)               r17.0<1>:f   -1.0:f
 | |
| //   (W)     mov (1|M0)               r18.0<1>:f    0.0:f
 | |
| //   (W)     mov (1|M0)               r19.0<1>:f    1.0:f
 | |
| //   (W)     send.urb (1|M0)          null     r2   r16:4  0x0  0x020827F7
 | |
| // On Xe2+ URB write messages are transposed LSC store messages that write
 | |
| // consecutive DWORDs of payload, e.g.:
 | |
| //   (W)     mov (1|M0)               r16.0<1>:ud   0x0:ud
 | |
| //   (W)     mov (1|M0)               r16.1<1>:f   -1.0:f
 | |
| //   (W)     mov (1|M0)               r16.2<1>:f    0.0:f
 | |
| //   (W)     mov (1|M0)               r16.3<1>:f    1.0:f
 | |
| //   (W)     store.urb.d32x4t.a32 (1|M0)  [r2:1+0x27F0] r16:1
 | |
| CVariable *CoalescingEngine::PrepareUniformUrbWritePayload(CShader *shader, CEncoder *encoder,
 | |
|                                                            llvm::GenIntrinsicInst *inst) {
 | |
|   IGC_ASSERT(inst->getIntrinsicID() == GenISAIntrinsic::GenISA_URBWrite);
 | |
|   CVariable *payload = nullptr;
 | |
|   const uint numOperands = m_PayloadMapping.GetNumPayloadElements(inst);
 | |
|   const uint grfSize = shader->GetContext()->platform.getGRFSize();
 | |
|   if (shader->GetContext()->platform.hasLSCUrbMessage()) {
 | |
|     // Payload for transposed LSC URB Write - all data chunks stored in subsequent DWORDs
 | |
|     payload = shader->GetNewVariable(numOperands, ISA_TYPE_F, (grfSize == 64 ? EALIGN_32WORD : EALIGN_HWORD),
 | |
|                                      true /*uniform*/, "CEExplicitPayload_Uniform");
 | |
|   } else {
 | |
|     // Payload spans multiple GRFs, only the first DWORD of each GRF
 | |
|     // will be populated with (uniform) data.
 | |
|     const uint elemsPerGrf = grfSize / CVariable::GetCISADataTypeSize(ISA_TYPE_F);
 | |
|     payload = shader->GetNewVariable(numOperands * elemsPerGrf, ISA_TYPE_F,
 | |
|                                      (grfSize == 64 ? EALIGN_32WORD : EALIGN_HWORD), "CEExplicitPayload");
 | |
|   }
 | |
|   for (uint i = 0; i < numOperands; i++) {
 | |
|     Value *val = m_PayloadMapping.GetPayloadElementToValueMapping(inst, i);
 | |
|     CVariable *data = shader->GetSymbol(val);
 | |
|     IGC_ASSERT(isUniform(val) && data->IsUniform());
 | |
|     encoder->SetNoMask();
 | |
|     encoder->SetSimdSize(SIMDMode::SIMD1);
 | |
|     if (shader->GetContext()->platform.hasLSCUrbMessage()) {
 | |
|       encoder->SetDstSubReg(i);
 | |
|     } else {
 | |
|       encoder->SetDstSubVar(i);
 | |
|     }
 | |
|     encoder->Copy(payload, data);
 | |
|   }
 | |
|   return payload;
 | |
| }
 | |
| 
 | |
| // Returns URB write payload for a split simd8 message in a simd16 mesh/task
 | |
| // shader.
 | |
| // TODO: This method does not use the CoalescingEngine currently and mearly
 | |
| // copies payload to a newly created variable.
 | |
| CVariable *CoalescingEngine::PrepareSplitUrbWritePayload(
 | |
|     CShader *outProgram, CEncoder *encoder,
 | |
|     SIMDMode simdMode,    ///< must be SIMD16
 | |
|     uint32_t splitPartNo, ///< can be either 0 - lower quarter, or 1 upper quarter
 | |
|     llvm::Instruction *inst) {
 | |
|   SetCurrentPart(inst, 0);
 | |
|   IGC_ASSERT(outProgram->m_SIMDSize == SIMDMode::SIMD16);
 | |
|   IGC_ASSERT(splitPartNo < 2);
 | |
|   const GenIntrinsicInst *const intrinsicInst = dyn_cast<GenIntrinsicInst>(inst);
 | |
|   IGC_ASSERT(nullptr != intrinsicInst);
 | |
|   IGC_ASSERT(intrinsicInst->getIntrinsicID() == GenISAIntrinsic::GenISA_URBWrite);
 | |
| 
 | |
|   const uint numOperands = m_PayloadMapping.GetNumPayloadElements(inst);
 | |
|   CVariable *payload = outProgram->GetNewVariable(
 | |
|       numOperands * numLanes(SIMDMode::SIMD8), ISA_TYPE_F,
 | |
|       outProgram->GetContext()->platform.getGRFSize() == 64 ? EALIGN_32WORD : EALIGN_HWORD, CName::NONE);
 | |
| 
 | |
|   for (uint i = 0; i < numOperands; i++) {
 | |
|     Value *val = m_PayloadMapping.GetPayloadElementToValueMapping(inst, i);
 | |
|     CVariable *data = outProgram->GetSymbol(val);
 | |
|     encoder->SetSimdSize(SIMDMode::SIMD8);
 | |
|     encoder->SetMask((splitPartNo == 0) ? EMASK_Q1 : EMASK_Q2);
 | |
|     encoder->SetSrcSubVar(0, data->IsUniform() ? 0 : splitPartNo);
 | |
|     encoder->SetDstSubVar(i);
 | |
|     encoder->Copy(payload, data);
 | |
|     encoder->Push();
 | |
|   }
 | |
|   return payload;
 | |
| }
 | |
| 
 | |
| CoalescingEngine::ElementNode *CoalescingEngine::ElementNode::getLeader() {
 | |
|   ElementNode *N = this;
 | |
|   ElementNode *Parent = parent.getPointer();
 | |
|   ElementNode *Grandparent = Parent->parent.getPointer();
 | |
| 
 | |
|   while (Parent != Grandparent) {
 | |
|     N->parent.setPointer(Grandparent);
 | |
|     N = Grandparent;
 | |
|     Parent = Parent->parent.getPointer();
 | |
|     Grandparent = Parent->parent.getPointer();
 | |
|   }
 | |
| 
 | |
|   return Parent;
 | |
| }
 | |
| 
 | |
| Value *CoalescingEngine::getRegRoot(Value *Val) const {
 | |
|   auto RI = ValueNodeMap.find(Val);
 | |
|   if (RI == ValueNodeMap.end())
 | |
|     return nullptr;
 | |
|   ElementNode *Node = RI->second;
 | |
|   if (Node->parent.getInt() & ElementNode::kRegisterIsolatedFlag)
 | |
|     return 0x0;
 | |
|   return Node->getLeader()->value;
 | |
| }
 | |
| 
 | |
| Value *CoalescingEngine::getPHIRoot(Instruction *PHI) const {
 | |
|   IGC_ASSERT(dyn_cast<PHINode>(PHI));
 | |
|   auto RI = ValueNodeMap.find(PHI);
 | |
|   IGC_ASSERT(RI != ValueNodeMap.end());
 | |
|   ElementNode *DestNode = RI->second;
 | |
|   if (DestNode->parent.getInt() & ElementNode::kPHIIsolatedFlag)
 | |
|     return 0x0;
 | |
|   for (unsigned i = 0; i < PHI->getNumOperands(); i++) {
 | |
|     Value *SrcVal = PHI->getOperand(i);
 | |
|     if (isa<Instruction>(SrcVal)) { // skip constant source
 | |
|       Value *SrcRoot = getRegRoot(SrcVal);
 | |
|       if (SrcRoot)
 | |
|         return SrcRoot;
 | |
|     }
 | |
|   }
 | |
|   return 0x0;
 | |
| }
 | |
| 
 | |
| void CoalescingEngine::unionRegs(Value *Val1, Value *Val2) {
 | |
|   ElementNode *Node1 = ValueNodeMap[Val1]->getLeader();
 | |
|   ElementNode *Node2 = ValueNodeMap[Val2]->getLeader();
 | |
| 
 | |
|   if (Node1->rank > Node2->rank) {
 | |
|     Node2->parent.setPointer(Node1->getLeader());
 | |
|   } else if (Node1->rank < Node2->rank) {
 | |
|     Node1->parent.setPointer(Node2->getLeader());
 | |
|   } else if (Node1 != Node2) {
 | |
|     Node2->parent.setPointer(Node1->getLeader());
 | |
|     Node1->rank++;
 | |
|   }
 | |
| }
 | |
| 
 | |
| // For now, return true if V is dessa-aliased/InsEltMap-ed/phi-coalesced.
 | |
| bool CoalescingEngine::isCoalescedByDeSSA(Value *V) const {
 | |
|   if (m_DeSSA && m_DeSSA->getRootValue(V))
 | |
|     return true;
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| void CoalescingEngine::ProcessBlock(llvm::BasicBlock *bb) {
 | |
|   llvm::BasicBlock::InstListType::iterator I, E;
 | |
| 
 | |
|   // Loop through instructions top to bottom
 | |
|   for (I = bb->begin(), E = bb->end(); I != E; ++I) {
 | |
|     llvm::Instruction &inst = (*I);
 | |
|     auto loopMarker = RLA->GetResourceLoopMarker(&inst);
 | |
|     // turn off coalescing in this block when there is a fused resource-loop
 | |
|     if ((loopMarker & ResourceLoopAnalysis::MarkResourceLoopInside) ||
 | |
|         ((loopMarker & ResourceLoopAnalysis::MarkResourceLoopStart) &&
 | |
|          !(loopMarker & ResourceLoopAnalysis::MarkResourceLoopEnd))) {
 | |
|       BBProcessingDefs[bb].clear();
 | |
|       return;
 | |
|     }
 | |
| 
 | |
|     visit(inst);
 | |
|   }
 | |
| }
 | |
| 
 | |
| bool CoalescingEngine::MatchSingleInstruction(llvm::GenIntrinsicInst *inst) {
 | |
|   if (isSampleInstruction(inst) || isLdInstruction(inst) || isURBWriteIntrinsic(inst) ||
 | |
|       llvm::isa<llvm::AtomicTypedIntrinsic>(inst) || llvm::isa<llvm::RTWriteIntrinsic>(inst) ||
 | |
|       (llvm::isa<llvm::RTDualBlendSourceIntrinsic>(inst) && m_Platform.hasDualKSPPS())) {
 | |
|     uint numOperands = inst->getNumOperands();
 | |
|     for (uint i = 0; i < numOperands; i++) {
 | |
|       Value *val = inst->getOperand(i);
 | |
|       if (llvm::isa<llvm::Constant>(val)) {
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       if (!ValueNodeMap.count(val)) {
 | |
|         Instruction *DefMI = dyn_cast<Instruction>(val);
 | |
|         if (DefMI) {
 | |
|           BBProcessingDefs[DefMI->getParent()].push_back(DefMI);
 | |
|         }
 | |
|         ValueNodeMap[val] = new (Allocator) ElementNode(val);
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     // Add itself to processing stack as well.
 | |
|     BBProcessingDefs[inst->getParent()].push_back(inst);
 | |
|   }
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| void CoalescingEngine::visitCallInst(llvm::CallInst &I) {
 | |
|   // we do not care if we do not match
 | |
|   // bool match = true;
 | |
|   if (GenIntrinsicInst *CI = llvm::dyn_cast<GenIntrinsicInst>(&I)) {
 | |
|     switch (CI->getIntrinsicID()) {
 | |
|     case GenISAIntrinsic::GenISA_ROUNDNE:
 | |
|     case GenISAIntrinsic::GenISA_imulH:
 | |
|     case GenISAIntrinsic::GenISA_umulH:
 | |
|     case GenISAIntrinsic::GenISA_uaddc:
 | |
|     case GenISAIntrinsic::GenISA_usubb:
 | |
|     case GenISAIntrinsic::GenISA_bfrev:
 | |
|       break;
 | |
|     case GenISAIntrinsic::GenISA_intatomicraw:
 | |
|     case GenISAIntrinsic::GenISA_floatatomicraw:
 | |
|     case GenISAIntrinsic::GenISA_icmpxchgatomicraw:
 | |
|     case GenISAIntrinsic::GenISA_fcmpxchgatomicraw:
 | |
|     case GenISAIntrinsic::GenISA_dwordatomicstructured:
 | |
|     case GenISAIntrinsic::GenISA_floatatomicstructured:
 | |
|     case GenISAIntrinsic::GenISA_cmpxchgatomicstructured:
 | |
|     case GenISAIntrinsic::GenISA_fcmpxchgatomicstructured:
 | |
|     case GenISAIntrinsic::GenISA_icmpxchgatomictyped:
 | |
|     case GenISAIntrinsic::GenISA_floatatomictyped:
 | |
|     case GenISAIntrinsic::GenISA_fcmpxchgatomictyped:
 | |
|     case GenISAIntrinsic::GenISA_ldstructured:
 | |
|     case GenISAIntrinsic::GenISA_atomiccounterinc:
 | |
|     case GenISAIntrinsic::GenISA_atomiccounterpredec:
 | |
|       break;
 | |
|     case GenISAIntrinsic::GenISA_GradientX:
 | |
|     case GenISAIntrinsic::GenISA_GradientY:
 | |
|     case GenISAIntrinsic::GenISA_GradientXfine:
 | |
|     case GenISAIntrinsic::GenISA_GradientYfine:
 | |
|       break;
 | |
|     default:
 | |
|       MatchSingleInstruction(CI);
 | |
|       // no pattern for the rest of the intrinsics
 | |
|       break;
 | |
|     }
 | |
|   } else if (IntrinsicInst *CI = llvm::dyn_cast<IntrinsicInst>(&I)) {
 | |
|     switch (CI->getIntrinsicID()) {
 | |
|     case Intrinsic::sqrt:
 | |
|     case Intrinsic::log2:
 | |
|     case Intrinsic::cos:
 | |
|     case Intrinsic::sin:
 | |
|     case Intrinsic::pow:
 | |
|     case Intrinsic::floor:
 | |
|     case Intrinsic::ceil:
 | |
|     case Intrinsic::trunc:
 | |
|     case Intrinsic::maxnum:
 | |
|     case Intrinsic::minnum:
 | |
|       break;
 | |
|     case Intrinsic::exp2:
 | |
|       break;
 | |
| 
 | |
|     default:
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| void CoalescingEngine::visitCastInst(llvm::CastInst &I) {}
 | |
| void CoalescingEngine::visitBinaryOperator(llvm::BinaryOperator &I) {}
 | |
| 
 | |
| void CoalescingEngine::visitCmpInst(llvm::CmpInst &I) {}
 | |
| 
 | |
| void CoalescingEngine::visitPHINode(llvm::PHINode &I) {}
 | |
| 
 | |
| void CoalescingEngine::visitUnaryInstruction(llvm::UnaryInstruction &I) {}
 | |
| 
 | |
| void CoalescingEngine::visitSelectInst(llvm::SelectInst &I) {}
 | |
| 
 | |
| void CoalescingEngine::visitBitCastInst(llvm::BitCastInst &I) {}
 | |
| 
 | |
| void CoalescingEngine::visitInstruction(llvm::Instruction &I) {}
 | |
| 
 | |
| void CoalescingEngine::visitLoadInst(LoadInst &I) {}
 | |
| 
 | |
| void CoalescingEngine::visitStoreInst(StoreInst &I) {}
 | |
| 
 | |
| } // namespace IGC
 | 
