Parameterize UnrollMaxCountForAllocai in GenTTI

Parameterize UnrollMaxCountForAllocai in GenTTI
This commit is contained in:
Liou, Jhe-Yu
2025-08-13 21:16:17 +00:00
committed by igcbot
parent 4c2e31a450
commit cedf0f970b
3 changed files with 19 additions and 11 deletions

View File

@ -308,7 +308,7 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// It can potentially do some global cost estimations.
// TODO: Having compilation retry enables loop unrolling for this case and determines if unrolling actually helps
// reduce register pressure.
const unsigned UnrollMaxCountForAlloca = 64; // May need to be higher for OpenCL
const unsigned UnrollMaxCountForAlloca = IGC_GET_FLAG_VALUE(PromoteLoopUnrollwithAllocaCountThreshold);
bool AllocaFound = false;
if (MaxTripCount && MaxTripCount <= UnrollMaxCountForAlloca &&
IGC_IS_FLAG_ENABLED(EnablePromoteLoopUnrollwithAlloca)) {
@ -332,12 +332,16 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
if (!AI)
continue;
Type *Ty = AI->getAllocatedType();
unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
if (AllocaSize > 1024 || AllocaSize == 0)
// Not fixed size or not in entry block
// TODO: Can a alloca with a fixed size not reside in the entry block?
if (!AI->isStaticAlloca())
continue;
// Assume every iteration consumes 1 alloca element.
if (cast<ConstantInt>(AI->getArraySize())->getZExtValue() > UnrollMaxCountForAlloca)
continue;
// Using alloca size in bytes as the threshold boost seems a bit tricky.
unsigned AllocaSize = *(AI->getAllocationSizeInBits(DL)) / 8;
ThresholdBoost += AllocaSize;
if (GEP)
isGEPLoopInduction[GEP] = true;
@ -348,7 +352,6 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
// LLVM default only to 10, boost to UnrollMaxCountForAlloca
UP.MaxIterationsCountToAnalyze = UnrollMaxCountForAlloca;
UP.Threshold += ThresholdBoost;
UP.Runtime = true;
UP.UpperBound = true;
UP.Force = true;

View File

@ -449,6 +449,10 @@ DECLARE_IGC_REGKEY(
"Disable this flag makes them always cost something as well as disables dynamic threshold increase based on the "
"size of alloca and number of GEP to the alloca in the loop, leading to the loop less likely to be unrolled.",
false)
DECLARE_IGC_REGKEY(DWORD, PromoteLoopUnrollwithAllocaCountThreshold, 256,
"The loop trip count OR number of alloca elements cutoff to stop regkey "
"EnablePromoteLoopUnrollwithAlloca (Check regkey description).",
false)
DECLARE_IGC_REGKEY(DWORD, SetRegisterPressureThresholdForLoopUnroll, 96,
"Set the register pressure threshold for limiting the loop unroll to smaller loops", false)
DECLARE_IGC_REGKEY(DWORD, SetBranchSwapThreshold, 400, "Set the branch swaping threshold.", false)

View File

@ -9,14 +9,15 @@ SPDX-License-Identifier: MIT
// windows unsupported due to issues on 32bit build, to be debugged.
// UNSUPPORTED: system-windows
// Disable loop unroll so that the private memory is not optimized out.
// checking the asm dump file
// RUN: ocloc compile -file %s -options " -g -igc_opts 'VISAOptions=-asmToConsole'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-ASM
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 VISAOptions=-asmToConsole'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-ASM
// checking the llvm-IR after EmitVISAPass
// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM
// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM2
// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM3
// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM4
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM2
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM3
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM4
// Looking for the comment which informs about the amount of spill size
// CHECK-ASM: //.private memory size