RelaxedOrdering: Queue size limit
Related-To: NEO-7458 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
parent
c10aa90815
commit
1e41f7952b
|
@ -334,6 +334,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionMaxRingBuffers, -1, "-1: default
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisablePrefetcher, -1, "-1: default, 0 - disable, 1 - enable. If enabled, disable prefetcher is being dispatched")
|
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionDisablePrefetcher, -1, "-1: default, 0 - disable, 1 - enable. If enabled, disable prefetcher is being dispatched")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrdering, -1, "-1: default, 0 - disable, 1 - enable. If enabled, tasks sent to direct submission ring may be dispatched out of order")
|
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrdering, -1, "-1: default, 0 - disable, 1 - enable. If enabled, tasks sent to direct submission ring may be dispatched out of order")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingForBcs, -1, "-1: default, 0 - disable, 1 - enable. If set, enable RelaxedOrdering feature for BCS engine")
|
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingForBcs, -1, "-1: default, 0 - disable, 1 - enable. If set, enable RelaxedOrdering feature for BCS engine")
|
||||||
|
DECLARE_DEBUG_VARIABLE(int32_t, DirectSubmissionRelaxedOrderingQueueSizeLimit, -1, "-1: default, >0: Max gpu queue size. If limit is reached, scheduler wont consume new work")
|
||||||
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")
|
DECLARE_DEBUG_VARIABLE(bool, DirectSubmissionPrintBuffers, false, "Print address of submitted command buffers")
|
||||||
|
|
||||||
/*FEATURE FLAGS*/
|
/*FEATURE FLAGS*/
|
||||||
|
|
|
@ -90,6 +90,8 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||||
uint64_t schedulerStartAddress = schedulerCmdStream.getGpuBase();
|
uint64_t schedulerStartAddress = schedulerCmdStream.getGpuBase();
|
||||||
uint64_t deferredTasksListGpuVa = deferredTasksListAllocation->getGpuAddress();
|
uint64_t deferredTasksListGpuVa = deferredTasksListAllocation->getGpuAddress();
|
||||||
|
|
||||||
|
uint64_t loopSectionStartAddress = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart;
|
||||||
|
|
||||||
// 1. Init section
|
// 1. Init section
|
||||||
{
|
{
|
||||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::Disable);
|
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::Disable);
|
||||||
|
@ -182,7 +184,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||||
|
|
||||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegRegBatchBufferStart(
|
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalRegRegBatchBufferStart(
|
||||||
schedulerCmdStream,
|
schedulerCmdStream,
|
||||||
schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
loopSectionStartAddress,
|
||||||
AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false);
|
AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false);
|
||||||
|
|
||||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2, 0, true);
|
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R2, 0, true);
|
||||||
|
@ -193,9 +195,19 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||||
{
|
{
|
||||||
*schedulerCmdStream.getSpaceForCmd<typename GfxFamily::MI_ARB_CHECK>() = GfxFamily::cmdInitArbCheck;
|
*schedulerCmdStream.getSpaceForCmd<typename GfxFamily::MI_ARB_CHECK>() = GfxFamily::cmdInitArbCheck;
|
||||||
|
|
||||||
|
uint32_t queueSizeLimit = 2;
|
||||||
|
if (DebugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get() != -1) {
|
||||||
|
queueSizeLimit = static_cast<uint32_t>(DebugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get());
|
||||||
|
}
|
||||||
|
|
||||||
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||||
schedulerCmdStream,
|
schedulerCmdStream,
|
||||||
schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart,
|
loopSectionStartAddress,
|
||||||
|
CS_GPR_R1, queueSizeLimit, CompareOperation::GreaterOrEqual, false);
|
||||||
|
|
||||||
|
EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataRegBatchBufferStart(
|
||||||
|
schedulerCmdStream,
|
||||||
|
loopSectionStartAddress,
|
||||||
CS_GPR_R5, 1, CompareOperation::Equal, false);
|
CS_GPR_R5, 1, CompareOperation::Equal, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,7 +258,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchRelaxedOrderingScheduler
|
||||||
schedulerCmdStream.getSpace(sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); // skip patching
|
schedulerCmdStream.getSpace(sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); // skip patching
|
||||||
}
|
}
|
||||||
|
|
||||||
// 7. Semaphore section
|
// 3. Semaphore section
|
||||||
{
|
{
|
||||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||||
|
|
|
@ -53,7 +53,7 @@ struct StaticSchedulerSizeAndOffsetSection {
|
||||||
(2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeMiPredicate<GfxFamily>::getCmdSize();
|
(2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeMiPredicate<GfxFamily>::getCmdSize();
|
||||||
|
|
||||||
static constexpr uint64_t drainRequestSectionStart = tasksListLoopCheckSectionStart + tasksListLoopCheckSectionSize;
|
static constexpr uint64_t drainRequestSectionStart = tasksListLoopCheckSectionStart + tasksListLoopCheckSectionSize;
|
||||||
static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart();
|
static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + (2 * EncodeBatchBufferStartOrEnd<GfxFamily>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||||
|
|
||||||
static constexpr uint64_t schedulerLoopCheckSectionJumpStart = drainRequestSectionStart + drainRequestSectionSize;
|
static constexpr uint64_t schedulerLoopCheckSectionJumpStart = drainRequestSectionStart + drainRequestSectionSize;
|
||||||
static constexpr uint64_t schedulerLoopCheckSectionJumpSize = 2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_BATCH_BUFFER_START);
|
static constexpr uint64_t schedulerLoopCheckSectionJumpSize = 2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_BATCH_BUFFER_START);
|
||||||
|
|
|
@ -491,3 +491,4 @@ ForceComputeWalkerPostSyncFlush = -1
|
||||||
DirectSubmissionRelaxedOrdering = -1
|
DirectSubmissionRelaxedOrdering = -1
|
||||||
DirectSubmissionRelaxedOrderingForBcs = -1
|
DirectSubmissionRelaxedOrderingForBcs = -1
|
||||||
OverrideUserFenceStartValue = -1
|
OverrideUserFenceStartValue = -1
|
||||||
|
DirectSubmissionRelaxedOrderingQueueSizeLimit = -1
|
|
@ -1458,13 +1458,25 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t queueLimit = 2;
|
||||||
|
if (DebugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get() != -1) {
|
||||||
|
queueLimit = static_cast<uint32_t>(DebugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.get());
|
||||||
|
}
|
||||||
|
|
||||||
if (!verifyConditionalDataRegBbStart<FamilyType>(++arbCheck, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
if (!verifyConditionalDataRegBbStart<FamilyType>(++arbCheck, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||||
|
CS_GPR_R1, queueLimit, CompareOperation::GreaterOrEqual, false)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto conditionalBbStartcmds = ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart());
|
||||||
|
|
||||||
|
if (!verifyConditionalDataRegBbStart<FamilyType>(conditionalBbStartcmds, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::loopStartSectionStart,
|
||||||
CS_GPR_R5, 1, CompareOperation::Equal, false)) {
|
CS_GPR_R5, 1, CompareOperation::Equal, false)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// 6. Jump to scheduler loop check section (dynamic scheduler)
|
// 6. Jump to scheduler loop check section (dynamic scheduler)
|
||||||
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
auto lrrCmd = reinterpret_cast<MI_LOAD_REGISTER_REG *>(ptrOffset(conditionalBbStartcmds, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataRegBatchBufferStart()));
|
||||||
|
|
||||||
if (!verifyLrr<FamilyType>(lrrCmd, CS_GPR_R0, CS_GPR_R9)) {
|
if (!verifyLrr<FamilyType>(lrrCmd, CS_GPR_R0, CS_GPR_R9)) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -1638,6 +1650,19 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenAllocatingResourcesThenCreate
|
||||||
EXPECT_EQ(directSubmission.deferredTasksListAllocation, *allocsIter);
|
EXPECT_EQ(directSubmission.deferredTasksListAllocation, *allocsIter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenDebugFlagSetWhenDispatchingStaticSchedulerThenOverrideQueueSizeLimit, IsAtLeastXeHpcCore) {
|
||||||
|
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||||
|
|
||||||
|
DebugManager.flags.DirectSubmissionRelaxedOrderingQueueSizeLimit.set(123);
|
||||||
|
|
||||||
|
MockDirectSubmissionHw<FamilyType, Dispatcher> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||||
|
directSubmission.initialize(true, false);
|
||||||
|
|
||||||
|
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||||
|
EXPECT_TRUE(verifyStaticSchedulerProgramming<FamilyType>(*directSubmission.relaxedOrderingSchedulerAllocation,
|
||||||
|
directSubmission.deferredTasksListAllocation->getGpuAddress()));
|
||||||
|
}
|
||||||
|
|
||||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenDispatchStaticScheduler, IsAtLeastXeHpcCore) {
|
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenDispatchStaticScheduler, IsAtLeastXeHpcCore) {
|
||||||
using Dispatcher = RenderDispatcher<FamilyType>;
|
using Dispatcher = RenderDispatcher<FamilyType>;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue