mirror of
				https://github.com/intel/intel-graphics-compiler.git
				synced 2025-10-30 08:18:26 +08:00 
			
		
		
		
	Fix problem in split barrier
Fixed the problem in split barrier when we are using with regular barrier. Case: splitbarrier.signal() regularbarrier() splitbarrier.wait() was causing the hang due assigning the same ID of the barrier in the regular barrier and split barrier. Now, the split barrier will take other ID than the regular one.
This commit is contained in:
		 Wesierski, Lukasz
					Wesierski, Lukasz
				
			
				
					committed by
					
						 igcbot
						igcbot
					
				
			
			
				
	
			
			
			 igcbot
						igcbot
					
				
			
						parent
						
							d7a41cf31b
						
					
				
				
					commit
					66e66680f7
				
			| @ -3667,11 +3667,24 @@ void CEncoder::InitVISABuilderOptions(TARGET_PLATFORM VISAPlatform, bool canAbor | ||||
|  | ||||
|   SetAbortOnSpillThreshold(canAbortOnSpill, AllowSpill); | ||||
|  | ||||
|   if (context->type == ShaderType::COMPUTE_SHADER || | ||||
|       (context->type == ShaderType::OPENCL_SHADER && !(context->getModuleMetaData()->NBarrierCnt > 0))) { | ||||
|   if (context->allowATOB()) { | ||||
|     SaveOption(vISA_ActiveThreadsOnlyBarrier, true); | ||||
|   } | ||||
|  | ||||
|   if (context->m_instrTypes.hasSplitBarrier && | ||||
|     context->m_instrTypes.hasWorkgroupBarrier) | ||||
|   { | ||||
|     // The regular and split barrier cannnot share the | ||||
|     // same ID, because we could have such scenario: | ||||
|     // splitbarrier.signal(); | ||||
|     // workgroupbarrier(); | ||||
|     // splitbarrier.wait(); | ||||
|     // and this will cause a hang. | ||||
|     // The split barrier will use the ID 1 | ||||
|     // so, we cannot setup ATOB for this case | ||||
|     SaveOption(vISA_SplitBarrierID1, true); | ||||
|   } | ||||
|  | ||||
|   if (m_program->m_Platform->isCoreChildOf(IGFX_XE3_CORE)) { | ||||
|     if (uint Val = IGC_GET_FLAG_VALUE(VISASpillAllowed)) { | ||||
|       context->m_spillAllowed = Val; | ||||
| @ -5413,7 +5426,8 @@ void CEncoder::Compile(bool hasSymbolTable, GenXFunctionGroupAnalysis *&pFGA) { | ||||
|   // always set properly, even if a barrier is used as a part of Inline vISA | ||||
|   // code only. | ||||
|   if (jitInfo->numBarriers != 0 && !m_program->m_State.GetHasBarrier()) { | ||||
|     if (context->getModuleMetaData()->NBarrierCnt > 0 || additionalVISAAsmToLink) { | ||||
|     if (context->getModuleMetaData()->NBarrierCnt > 0 || additionalVISAAsmToLink || | ||||
|       jitInfo->numBarriers > 1) { | ||||
|       m_program->m_State.SetBarrierNumber(NamedBarriersResolution::AlignNBCnt2BarrierNumber(jitInfo->numBarriers)); | ||||
|     } else { | ||||
|       m_program->m_State.SetHasBarrier(); | ||||
|  | ||||
| @ -296,8 +296,12 @@ void CheckInstrTypes::visitCallInst(CallInst &C) { | ||||
|       g_InstrTypes.numWaveIntrinsics++; | ||||
|       break; | ||||
|     case GenISAIntrinsic::GenISA_threadgroupbarrier: | ||||
|       g_InstrTypes.hasWorkgroupBarrier = true; | ||||
|       g_InstrTypes.numBarrier++; | ||||
|       break; | ||||
|     case GenISAIntrinsic::GenISA_threadgroupbarrier_signal: | ||||
|       g_InstrTypes.hasSplitBarrier = true; | ||||
|       break; | ||||
|     case GenISAIntrinsic::GenISA_is_uniform: | ||||
|       g_InstrTypes.hasUniformAssumptions = true; | ||||
|       break; | ||||
|  | ||||
| @ -722,6 +722,28 @@ int32_t CodeGenContext::getNumThreadsPerEU() const { return -1; } | ||||
|  | ||||
| uint32_t CodeGenContext::getExpGRFSize() const { return 0; } | ||||
|  | ||||
| bool CodeGenContext::allowATOB() | ||||
| { | ||||
|   bool allow = type == ShaderType::COMPUTE_SHADER || | ||||
|     (type == ShaderType::OPENCL_SHADER && | ||||
|       !(getModuleMetaData()->NBarrierCnt > 0)); | ||||
|  | ||||
|   if (m_instrTypes.hasSplitBarrier && | ||||
|     m_instrTypes.hasWorkgroupBarrier) | ||||
|   { | ||||
|     // The regular and split barrier cannnot share the | ||||
|     // same ID, because we could have such scenario: | ||||
|     // splitbarrier.signal(); | ||||
|     // workgroupbarrier(); | ||||
|     // splitbarrier.wait(); | ||||
|     // and this will cause a hang. | ||||
|     // The split barrier will use the ID 1 | ||||
|     // so, we cannot setup ATOB for this case | ||||
|     allow = false; | ||||
|   } | ||||
|   return allow; | ||||
| } | ||||
|  | ||||
| /// parameter "returnDefault" controls what to return when | ||||
| /// there is no user-forced setting | ||||
| uint32_t CodeGenContext::getNumGRFPerThread(bool returnDefault) { | ||||
|  | ||||
| @ -296,6 +296,8 @@ struct SInstrTypes { | ||||
|   bool hasDynamicGenericLoadStore{}; | ||||
|   bool hasUnmaskedRegion{}; | ||||
|   bool hasSLM{}; | ||||
|   bool hasWorkgroupBarrier{}; | ||||
|   bool hasSplitBarrier{}; | ||||
|   unsigned int numCall{}; | ||||
|   unsigned int numBarrier{}; | ||||
|   unsigned int numLoadStore{}; | ||||
| @ -1043,6 +1045,8 @@ public: | ||||
|   virtual bool isBufferBoundsChecking() const; | ||||
|   virtual uint64_t getMinimumValidAddress() const; | ||||
|  | ||||
|   bool allowATOB(); | ||||
|  | ||||
|   UserAddrSpaceMD &getUserAddrSpaceMD() { | ||||
|     IGC_ASSERT(llvmCtxWrapper); | ||||
|     return llvmCtxWrapper->m_UserAddrSpaceMD; | ||||
|  | ||||
| @ -726,7 +726,7 @@ public: | ||||
|     // are set. | ||||
|     return maxId + 1; | ||||
|   } | ||||
|   void updateBarrier(); | ||||
|  | ||||
|   void updateNamedBarrier(G4_Operand *barrierId); | ||||
|  | ||||
|   G4_Declare *cloneDeclare(std::map<G4_Declare *, G4_Declare *> &dclMap, | ||||
| @ -2342,9 +2342,9 @@ public: | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////// | ||||
|   // default barrier functions | ||||
|   void generateSingleBarrier(G4_Predicate *prd); | ||||
|   void generateBarrierSend(G4_Predicate *prd); | ||||
|   void generateBarrierWait(G4_Predicate *prd); | ||||
|   void generateSingleBarrier(G4_Predicate *prd, uint32_t id); | ||||
|   void generateBarrierSend(G4_Predicate *prd, uint32_t id); | ||||
|   void generateBarrierWait(G4_Predicate *prd, uint32_t id); | ||||
|   int translateVISASplitBarrierInst(G4_Predicate *prd, bool isSignal); | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| @ -275,7 +275,7 @@ static void generateNamedBarrier(int &status, IR_Builder &irb, | ||||
| } | ||||
|  | ||||
|  | ||||
| void IR_Builder::generateSingleBarrier(G4_Predicate *prd) { | ||||
| void IR_Builder::generateSingleBarrier(G4_Predicate *prd, uint32_t id) { | ||||
|   // single barrier: # producer = # consumer = # threads, barrier id = 0 | ||||
|   // For now produce no fence | ||||
|   // Number of threads per threadgroup is r0.2[31:24] | ||||
| @ -286,12 +286,12 @@ void IR_Builder::generateSingleBarrier(G4_Predicate *prd) { | ||||
|   //   Hdr.2:d[31:24,23:16] | ||||
|   G4_Declare *header = createTempVar(8, Type_UD, getGRFAlign()); | ||||
|   auto dst = createDst(header->getRegVar(), 0, 2, 1, Type_UD); | ||||
|   uint32_t headerInitValDw2 = 0x0; // initial value for DWord2 | ||||
|   uint32_t headerInitValDw2 = id; // initial value for DWord2 | ||||
|   if (getPlatform() >= Xe2 && getOption(vISA_ActiveThreadsOnlyBarrier)) { | ||||
|     headerInitValDw2 |= (1 << 8); | ||||
|   } | ||||
|   // Header.2:d has the following format: | ||||
|   //  bits[7:0] = 0x0 (barrier id) | ||||
|   //  bits[7:0] = id (barrier id) | ||||
|   //  bits[8] = active only thread barrier | ||||
|   //  bits[15:14] = 0 (producer/consumer) | ||||
|   //  bits[23:16] = num producers = r0.11:b (r0.2[31:24] = num threads in tg) | ||||
| @ -311,7 +311,6 @@ void IR_Builder::generateSingleBarrier(G4_Predicate *prd) { | ||||
|       createSrc(getBuiltinR0()->getRegVar(), 0, 11, getRegionScalar(), Type_UB); | ||||
|   auto inst1 = createMov(g4::SIMD2, dst, src0, InstOpt_WriteEnable, true); | ||||
|   inst1->addComment("signal barrier payload (nprods, ncons)"); | ||||
|  | ||||
|   // 1 message length, 0 response length, no header, no ack | ||||
|   int desc = (0x1 << 25) + 0x4; | ||||
|  | ||||
| @ -534,16 +533,12 @@ int IR_Builder::translateVISAWaitInst(G4_Operand *mask) { | ||||
|   return VISA_SUCCESS; | ||||
| } | ||||
|  | ||||
| void IR_Builder::updateBarrier() { | ||||
|   // The legacy barrier is always allocated to id 0. | ||||
|   usedBarriers.set(0, true); | ||||
| } | ||||
|  | ||||
| void IR_Builder::generateBarrierSend(G4_Predicate *prd) { | ||||
|   updateBarrier(); | ||||
| void IR_Builder::generateBarrierSend(G4_Predicate *prd, uint32_t id = 0) { | ||||
|   // The id = 0 is the alias for the regular threadgroup barrier. | ||||
|   usedBarriers.set(id, true); | ||||
|  | ||||
|   if (hasUnifiedBarrier()) { | ||||
|     generateSingleBarrier(prd); | ||||
|     generateSingleBarrier(prd, id); | ||||
|     return; | ||||
|   } | ||||
|  | ||||
| @ -576,8 +571,9 @@ void IR_Builder::generateBarrierSend(G4_Predicate *prd) { | ||||
|                  createImm(desc, Type_UD), InstOpt_WriteEnable, msgDesc, true); | ||||
| } | ||||
|  | ||||
| void IR_Builder::generateBarrierWait(G4_Predicate *prd) { | ||||
|   updateBarrier(); | ||||
| void IR_Builder::generateBarrierWait(G4_Predicate *prd, uint32_t id = 0) { | ||||
|   // The id = 0 is the alias for the regular threadgroup barrier. | ||||
|   usedBarriers.set(id, true); | ||||
|  | ||||
|   G4_Operand *waitSrc = nullptr; | ||||
|   if (!hasUnifiedBarrier()) { | ||||
| @ -592,8 +588,8 @@ void IR_Builder::generateBarrierWait(G4_Predicate *prd) { | ||||
|     } | ||||
|   } else { | ||||
|     if (getPlatform() >= Xe_PVC) { | ||||
|       // PVC: sync.bar 0 | ||||
|       waitSrc = createImm(0, Type_UD); | ||||
|       // PVC: sync.bar id | ||||
|       waitSrc = createImm(id, Type_UD); | ||||
|     } else { | ||||
|       // DG2: sync.bar null | ||||
|       waitSrc = createNullSrc(Type_UD); | ||||
| @ -751,10 +747,24 @@ int IR_Builder::translateVISASplitBarrierInst(G4_Predicate *prd, | ||||
|                                               bool isSignal) { | ||||
|   TIME_SCOPE(VISA_BUILDER_IR_CONSTRUCTION); | ||||
|  | ||||
|   uint32_t id = 0; | ||||
|  | ||||
|   if (getOption(vISA_SplitBarrierID1) && | ||||
|     getPlatform() >= Xe_PVC) { | ||||
|     // We have a mix usage of the | ||||
|     // workgroupbarrier and splitbarrier. | ||||
|     // We need to split the ID usage: | ||||
|     // workgroupbarrier takes ID:0 | ||||
|     // splitbarrier takes ID:1 | ||||
|     // to avoid cross usage of the same ID | ||||
|     // and hang on the GPU. | ||||
|     id = 1; | ||||
|   } | ||||
|  | ||||
|   if (isSignal) { | ||||
|     generateBarrierSend(prd); | ||||
|     generateBarrierSend(prd, id); | ||||
|   } else { | ||||
|     generateBarrierWait(prd); | ||||
|     generateBarrierWait(prd, id); | ||||
|   } | ||||
|  | ||||
|   return VISA_SUCCESS; | ||||
|  | ||||
| @ -791,6 +791,11 @@ DEF_VISA_OPTION(vISA_ActiveThreadsOnlyBarrier, ET_BOOL, | ||||
|                 "This enables the active-only bit in workgroup barriers. " | ||||
|                 "With this option exited threads are not counted in expected " | ||||
|                 "arrival count total and will not cause hangs.", false) | ||||
| DEF_VISA_OPTION(vISA_SplitBarrierID1, ET_BOOL, | ||||
|                 "-splitbarrierid1", | ||||
|                 "This flag switch ID of the split barrier to 1. " | ||||
|                 "After that change, the workgroupbarrier and splitbarrer" | ||||
|                 "can work togheter and will not cause hangs.", false) | ||||
| DEF_VISA_OPTION(vISA_RestrictSrc1ByteSwizzle, ET_BOOL, | ||||
|                 "-restrictSrc1ByteSwizzle", | ||||
|                 "Enable the WA to restrict src1 byte swizzle case", false) | ||||
|  | ||||
		Reference in New Issue
	
	Block a user