mirror of
https://github.com/intel/intel-graphics-compiler.git
synced 2025-10-30 08:18:26 +08:00
Parameterize UnrollMaxCountForAllocai in GenTTI
Parameterize UnrollMaxCountForAllocai in GenTTI
This commit is contained in:
@ -308,7 +308,7 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
// It can potentially do some global cost estimations.
|
||||
// TODO: Having compilation retry enables loop unrolling for this case and determines if unrolling actually helps
|
||||
// reduce register pressure.
|
||||
const unsigned UnrollMaxCountForAlloca = 64; // May need to be higher for OpenCL
|
||||
const unsigned UnrollMaxCountForAlloca = IGC_GET_FLAG_VALUE(PromoteLoopUnrollwithAllocaCountThreshold);
|
||||
bool AllocaFound = false;
|
||||
if (MaxTripCount && MaxTripCount <= UnrollMaxCountForAlloca &&
|
||||
IGC_IS_FLAG_ENABLED(EnablePromoteLoopUnrollwithAlloca)) {
|
||||
@ -332,12 +332,16 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
|
||||
if (!AI)
|
||||
continue;
|
||||
|
||||
Type *Ty = AI->getAllocatedType();
|
||||
unsigned AllocaSize = Ty->isSized() ? DL.getTypeAllocSize(Ty) : 0;
|
||||
if (AllocaSize > 1024 || AllocaSize == 0)
|
||||
// Not fixed size or not in entry block
|
||||
// TODO: Can a alloca with a fixed size not reside in the entry block?
|
||||
if (!AI->isStaticAlloca())
|
||||
continue;
|
||||
// Assume every iteration consumes 1 alloca element.
|
||||
if (cast<ConstantInt>(AI->getArraySize())->getZExtValue() > UnrollMaxCountForAlloca)
|
||||
continue;
|
||||
|
||||
// Using alloca size in bytes as the threshold boost seems a bit tricky.
|
||||
unsigned AllocaSize = *(AI->getAllocationSizeInBits(DL)) / 8;
|
||||
ThresholdBoost += AllocaSize;
|
||||
if (GEP)
|
||||
isGEPLoopInduction[GEP] = true;
|
||||
@ -348,7 +352,6 @@ void GenIntrinsicsTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
|
||||
// LLVM default only to 10, boost to UnrollMaxCountForAlloca
|
||||
UP.MaxIterationsCountToAnalyze = UnrollMaxCountForAlloca;
|
||||
UP.Threshold += ThresholdBoost;
|
||||
UP.Runtime = true;
|
||||
UP.UpperBound = true;
|
||||
UP.Force = true;
|
||||
|
||||
|
||||
@ -449,6 +449,10 @@ DECLARE_IGC_REGKEY(
|
||||
"Disable this flag makes them always cost something as well as disables dynamic threshold increase based on the "
|
||||
"size of alloca and number of GEP to the alloca in the loop, leading to the loop less likely to be unrolled.",
|
||||
false)
|
||||
DECLARE_IGC_REGKEY(DWORD, PromoteLoopUnrollwithAllocaCountThreshold, 256,
|
||||
"The loop trip count OR number of alloca elements cutoff to stop regkey "
|
||||
"EnablePromoteLoopUnrollwithAlloca (Check regkey description).",
|
||||
false)
|
||||
DECLARE_IGC_REGKEY(DWORD, SetRegisterPressureThresholdForLoopUnroll, 96,
|
||||
"Set the register pressure threshold for limiting the loop unroll to smaller loops", false)
|
||||
DECLARE_IGC_REGKEY(DWORD, SetBranchSwapThreshold, 400, "Set the branch swaping threshold.", false)
|
||||
|
||||
@ -9,14 +9,15 @@ SPDX-License-Identifier: MIT
|
||||
|
||||
// windows unsupported due to issues on 32bit build, to be debugged.
|
||||
// UNSUPPORTED: system-windows
|
||||
// Disable loop unroll so that the private memory is not optimized out.
|
||||
|
||||
// checking the asm dump file
|
||||
// RUN: ocloc compile -file %s -options " -g -igc_opts 'VISAOptions=-asmToConsole'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-ASM
|
||||
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 VISAOptions=-asmToConsole'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-ASM
|
||||
// checking the llvm-IR after EmitVISAPass
|
||||
// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM
|
||||
// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM2
|
||||
// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM3
|
||||
// RUN: ocloc compile -file %s -options " -g -igc_opts 'PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM4
|
||||
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM
|
||||
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM2
|
||||
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM3
|
||||
// RUN: ocloc compile -file %s -options " -g -igc_opts 'DisableLoopUnroll=1 PrintToConsole=1 PrintMDBeforeModule=1 PrintAfter=EmitPass'" -device pvc 2>&1 | FileCheck %s --check-prefix=CHECK-LLVM4
|
||||
|
||||
// Looking for the comment which informs about the amount of spill size
|
||||
// CHECK-ASM: //.private memory size
|
||||
|
||||
Reference in New Issue
Block a user