RelaxedOrdering: Queue size limit
Related-To: NEO-7458 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
c10aa90815
commit
1e41f7952b
|
@ -334,6 +334,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMaxRingBuffers, -1, "-1: default
|
|||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisablePrefetcher, -1, "-1: default, 0 - disable, 1 - enable. If enabled, disable prefetcher is being dispatched")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrdering, -1, "-1: default, 0 - disable, 1 - enable. If enabled, tasks sent to direct submission ring may be dispatched out of order")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingForBcs, -1, "-1: default, 0 - disable, 1 - enable. If set, enable RelaxedOrdering feature for BCS engine")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingQueueSizeLimit, -1, "-1: default, >0: Max gpu queue size. If limit is reached, scheduler wont consume new work")
|
||||
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")
|
||||
|
||||
/*FEATURE FLAGS*/
|
||||
|
|
|
@ -90,6 +90,8 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
|||
uint64_t schedulerStartAddress = schedulerCmdStream.getGpuBase();
|
||||
uint64_t deferredTasksListGpuVa = deferredTasksListAllocation->getGpuAddress();
|
||||
|
||||
uint64_t loopSectionStartAddress = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart;
|
||||
|
||||
// 1. Init section
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::Disable);
|
||||
|
@ -182,7 +184,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
|||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
||||
loopSectionStartAddress,
|
||||
AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false);
|
||||
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2, 0, true);
|
||||
|
@ -193,9 +195,19 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
|||
{
|
||||
*schedulerCmdStream.getSpaceForCmd<typename GfxFamily::MI_ARB_CHECK>() = GfxFamily::cmdInitArbCheck;
|
||||
|
||||
uint32_t queueSizeLimit = 2;
|
||||
if (DebugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get() != -1) {
|
||||
queueSizeLimit = static_cast<uint32_t>(DebugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get());
|
||||
}
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
||||
loopSectionStartAddress,
|
||||
CS_GPR_R1, queueSizeLimit, CompareOperation::GreaterOrEqual, false);
|
||||
|
||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||
schedulerCmdStream,
|
||||
loopSectionStartAddress,
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false);
|
||||
}
|
||||
|
||||
|
@ -246,7 +258,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingScheduler
|
|||
schedulerCmdStream.getSpace(sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); // skip patching
|
||||
}
|
||||
|
||||
// 7. Semaphore section
|
||||
// 3. Semaphore section
|
||||
{
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
|
|
@ -53,7 +53,7 @@ struct StaticSchedulerSizeAndOffsetSection {
|
|||
(2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeMiPredicate<GfxFamily>::getCmdSize();
|
||||
|
||||
static constexpr uint64_t drainRequestSectionStart = tasksListLoopCheckSectionStart + tasksListLoopCheckSectionSize;
|
||||
static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart();
|
||||
static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + (2 * EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||
|
||||
static constexpr uint64_t schedulerLoopCheckSectionJumpStart = drainRequestSectionStart + drainRequestSectionSize;
|
||||
static constexpr uint64_t schedulerLoopCheckSectionJumpSize = 2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_BATCH_BUFFER_START);
|
||||
|
|
|
@ -490,4 +490,5 @@ ForceAutoGrfCompilationMode = -1
|
|||
ForceComputeWalkerPostSyncFlush = -1
|
||||
DirectSubmissionRelaxedOrdering = -1
|
||||
DirectSubmissionRelaxedOrderingForBcs = -1
|
||||
OverrideUserFenceStartValue = -1
|
||||
OverrideUserFenceStartValue = -1
|
||||
DirectSubmissionRelaxedOrderingQueueSizeLimit = -1
|
|
@ -1458,13 +1458,25 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
|||
return false;
|
||||
}
|
||||
|
||||
uint32_t queueLimit = 2;
|
||||
if (DebugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get() != -1) {
|
||||
queueLimit = static_cast<uint32_t>(DebugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get());
|
||||
}
|
||||
|
||||
if (!verifyConditionalDataRegBbStart<FamilyType>(++arbCheck, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||
CS_GPR_R1, queueLimit, CompareOperation::GreaterOrEqual, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto conditionalBbStartcmds = ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||
|
||||
if (!verifyConditionalDataRegBbStart<FamilyType>(conditionalBbStartcmds, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||
CS_GPR_R5, 1, CompareOperation::Equal, false)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 6. Jump to scheduler loop check section (dynamic scheduler)
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(ptrOffset(conditionalBbStartcmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||
|
||||
if (!verifyLrr<FamilyType>(lrrCmd, CS_GPR_R0, CS_GPR_R9)) {
|
||||
return false;
|
||||
|
@ -1638,6 +1650,19 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenAllocatingResourcesThenCreate
|
|||
EXPECT_EQ(directSubmission.deferredTasksListAllocation, *allocsIter);
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenDebugFlagSetWhenDispatchingStaticSchedulerThenOverrideQueueSizeLimit, IsAtLeastXeHpcCore) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
DebugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.set(123);
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
directSubmission.initialize(true, false);
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||
EXPECT_TRUE(verifyStaticSchedulerProgramming<FamilyType>(*directSubmission.relaxedOrderingSchedulerAllocation,
|
||||
directSubmission.deferredTasksListAllocation->getGpuAddress()));
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenDispatchStaticScheduler, IsAtLeastXeHpcCore) {
|
||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||
|
||||
|
|
Loading…
Reference in New Issue