From a7d4162ca284d26e2a2951dc8d047b13e19a5298 Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Thu, 24 Nov 2022 16:57:18 +0000 Subject: [PATCH] RelaxedOrdering: Preallocate scheduler to optimize dispatch time Related-To: NEO-7458 Signed-off-by: Dunajski, Bartosz --- .../direct_submission/direct_submission_hw.h | 6 +- .../direct_submission_hw.inl | 217 ++++-- .../relaxed_ordering_helper.h | 24 +- .../common/mocks/mock_direct_submission_hw.h | 16 +- .../direct_submission_tests_2.cpp | 725 +++++++++++------- 5 files changed, 629 insertions(+), 359 deletions(-) diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index b5280224dc..98fe3a8dea 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -131,7 +131,7 @@ class DirectSubmissionHw { size_t getSizeDispatchRelaxedOrderingQueueStall(); void dispatchTaskStoreSection(uint64_t taskStartSectionVa); - MOCKABLE_VIRTUAL void preinitializeTaskStoreSection(); + MOCKABLE_VIRTUAL void preinitializeRelaxedOrderingSections(); void initRelaxedOrderingRegisters(); @@ -146,6 +146,8 @@ class DirectSubmissionHw { void dispatchDisablePrefetcher(bool disable); size_t getSizeDisablePrefetcher(); + MOCKABLE_VIRTUAL void dispatchStaticRelaxedOrderingScheduler(); + size_t getSizeEnd(); void dispatchPartitionRegisterConfiguration(); @@ -174,6 +176,7 @@ class DirectSubmissionHw { }; std::vector ringBuffers; std::unique_ptr preinitializedTaskStoreSection; + std::unique_ptr preinitializedRelaxedOrderingScheduler; uint32_t currentRingBuffer = 0u; uint32_t previousRingBuffer = 0u; uint32_t maxRingBufferCount = std::numeric_limits::max(); @@ -196,6 +199,7 @@ class DirectSubmissionHw { GraphicsAllocation *semaphores = nullptr; GraphicsAllocation *workPartitionAllocation = nullptr; GraphicsAllocation *deferredTasksListAllocation = nullptr; + GraphicsAllocation *relaxedOrderingSchedulerAllocation = nullptr; void *semaphorePtr = nullptr; volatile RingSemaphoreData *semaphoreData = nullptr; volatile void *workloadModeOneStoreAddress = nullptr; diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index 4f209157f9..ab6e9d15f8 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -81,38 +81,40 @@ DirectSubmissionHw::DirectSubmissionHw(const DirectSubmis } template -void DirectSubmissionHw::dispatchRelaxedOrderingSchedulerSection(uint32_t value) { - uint64_t schedulerStartAddress = ringCommandStream.getCurrentGpuAddressPosition(); +void DirectSubmissionHw::dispatchStaticRelaxedOrderingScheduler() { + LinearStream schedulerCmdStream(this->relaxedOrderingSchedulerAllocation); + uint64_t schedulerStartAddress = schedulerCmdStream.getGpuBase(); uint64_t deferredTasksListGpuVa = deferredTasksListAllocation->getGpuAddress(); // 1. Init section { - EncodeMiPredicate::encode(ringCommandStream, MiPredicateType::Disable); - EncodeBatchBufferStartOrEnd::programConditionalDataRegBatchBufferStart(ringCommandStream, - schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::semaphoreSectionStart, - CS_GPR_R1, 0, CompareOperation::Equal, false); + EncodeMiPredicate::encode(schedulerCmdStream, MiPredicateType::Disable); + EncodeBatchBufferStartOrEnd::programConditionalDataRegBatchBufferStart( + schedulerCmdStream, + schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::semaphoreSectionJumpStart, + CS_GPR_R1, 0, CompareOperation::Equal, false); - LriHelper::program(&ringCommandStream, CS_GPR_R2, 0, true); - LriHelper::program(&ringCommandStream, CS_GPR_R2 + 4, 0, true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R2, 0, true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R2 + 4, 0, true); - uint64_t removeTaskVa = schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::removeTaskSectionStart; - LriHelper::program(&ringCommandStream, CS_GPR_R3, static_cast(removeTaskVa & 0xFFFF'FFFFULL), true); - LriHelper::program(&ringCommandStream, CS_GPR_R3 + 4, static_cast(removeTaskVa >> 32), true); + uint64_t removeTaskVa = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::removeTaskSectionStart; + LriHelper::program(&schedulerCmdStream, CS_GPR_R3, static_cast(removeTaskVa & 0xFFFF'FFFFULL), true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R3 + 4, static_cast(removeTaskVa >> 32), true); - uint64_t walkersLoopConditionCheckVa = schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::tasksListLoopCheckSectionStart; - LriHelper::program(&ringCommandStream, CS_GPR_R4, static_cast(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL), true); - LriHelper::program(&ringCommandStream, CS_GPR_R4 + 4, static_cast(walkersLoopConditionCheckVa >> 32), true); + uint64_t walkersLoopConditionCheckVa = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::tasksListLoopCheckSectionStart; + LriHelper::program(&schedulerCmdStream, CS_GPR_R4, static_cast(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL), true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R4 + 4, static_cast(walkersLoopConditionCheckVa >> 32), true); } // 2. Dispatch task section (loop start) { - EncodeMiPredicate::encode(ringCommandStream, MiPredicateType::Disable); + EncodeMiPredicate::encode(schedulerCmdStream, MiPredicateType::Disable); - LriHelper::program(&ringCommandStream, CS_GPR_R6, 8, true); - LriHelper::program(&ringCommandStream, CS_GPR_R6 + 4, 0, true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R6, 8, true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R6 + 4, 0, true); - LriHelper::program(&ringCommandStream, CS_GPR_R8, static_cast(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true); - LriHelper::program(&ringCommandStream, CS_GPR_R8 + 4, static_cast(deferredTasksListGpuVa >> 32), true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R8, static_cast(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R8 + 4, static_cast(deferredTasksListGpuVa >> 32), true); EncodeAluHelper aluHelper; aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_2); @@ -126,27 +128,28 @@ void DirectSubmissionHw::dispatchRelaxedOrderingScheduler aluHelper.setNextAlu(AluRegisters::OPCODE_LOADIND, AluRegisters::R_0, AluRegisters::R_ACCU); aluHelper.setNextAlu(AluRegisters::OPCODE_FENCE_RD); - aluHelper.copyToCmdStream(ringCommandStream); + aluHelper.copyToCmdStream(schedulerCmdStream); - EncodeBatchBufferStartOrEnd::programBatchBufferStart(&ringCommandStream, 0, false, true, false); + EncodeBatchBufferStartOrEnd::programBatchBufferStart(&schedulerCmdStream, 0, false, true, false); } // 3. Remove task section { - EncodeMiPredicate::encode(ringCommandStream, MiPredicateType::Disable); + EncodeMiPredicate::encode(schedulerCmdStream, MiPredicateType::Disable); - EncodeMathMMIO::encodeDecrement(ringCommandStream, AluRegisters::R_1); - EncodeMathMMIO::encodeDecrement(ringCommandStream, AluRegisters::R_2); + EncodeMathMMIO::encodeDecrement(schedulerCmdStream, AluRegisters::R_1); + EncodeMathMMIO::encodeDecrement(schedulerCmdStream, AluRegisters::R_2); - EncodeBatchBufferStartOrEnd::programConditionalDataRegBatchBufferStart(ringCommandStream, - schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::semaphoreSectionStart, - CS_GPR_R1, 0, CompareOperation::Equal, false); + EncodeBatchBufferStartOrEnd::programConditionalDataRegBatchBufferStart( + schedulerCmdStream, + schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::semaphoreSectionJumpStart, + CS_GPR_R1, 0, CompareOperation::Equal, false); - LriHelper::program(&ringCommandStream, CS_GPR_R7, 8, true); - LriHelper::program(&ringCommandStream, CS_GPR_R7 + 4, 0, true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R7, 8, true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R7 + 4, 0, true); - LriHelper::program(&ringCommandStream, CS_GPR_R8, static_cast(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true); - LriHelper::program(&ringCommandStream, CS_GPR_R8 + 4, static_cast(deferredTasksListGpuVa >> 32), true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R8, static_cast(deferredTasksListGpuVa & 0xFFFF'FFFFULL), true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R8 + 4, static_cast(deferredTasksListGpuVa >> 32), true); EncodeAluHelper aluHelper; aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1); @@ -164,38 +167,79 @@ void DirectSubmissionHw::dispatchRelaxedOrderingScheduler aluHelper.setNextAlu(AluRegisters::OPCODE_STOREIND, AluRegisters::R_ACCU, AluRegisters::R_7); aluHelper.setNextAlu(AluRegisters::OPCODE_FENCE_WR); - aluHelper.copyToCmdStream(ringCommandStream); + aluHelper.copyToCmdStream(schedulerCmdStream); } // 4. List loop check section { - EncodeMiPredicate::encode(ringCommandStream, MiPredicateType::Disable); + EncodeMiPredicate::encode(schedulerCmdStream, MiPredicateType::Disable); - EncodeMathMMIO::encodeIncrement(ringCommandStream, AluRegisters::R_2); + EncodeMathMMIO::encodeIncrement(schedulerCmdStream, AluRegisters::R_2); - EncodeBatchBufferStartOrEnd::programConditionalRegRegBatchBufferStart(ringCommandStream, - schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::loopStartSectionStart, - AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false); + EncodeBatchBufferStartOrEnd::programConditionalRegRegBatchBufferStart( + schedulerCmdStream, + schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::loopStartSectionStart, + AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false); - LriHelper::program(&ringCommandStream, CS_GPR_R2, 0, true); - LriHelper::program(&ringCommandStream, CS_GPR_R2 + 4, 0, true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R2, 0, true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R2 + 4, 0, true); } // 5. Drain request section { - *ringCommandStream.getSpaceForCmd() = GfxFamily::cmdInitArbCheck; + *schedulerCmdStream.getSpaceForCmd() = GfxFamily::cmdInitArbCheck; - EncodeBatchBufferStartOrEnd::programConditionalDataRegBatchBufferStart(ringCommandStream, - schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::loopStartSectionStart, - CS_GPR_R5, 1, CompareOperation::Equal, false); + EncodeBatchBufferStartOrEnd::programConditionalDataRegBatchBufferStart( + schedulerCmdStream, + schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::loopStartSectionStart, + CS_GPR_R5, 1, CompareOperation::Equal, false); } - // 6. Scheduler loop check section - { - EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(ringCommandStream, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::endSectionStart, - semaphoreGpuVa, value, CompareOperation::GreaterOrEqual, false); + // Exit Static scheduler - EncodeBatchBufferStartOrEnd::programBatchBufferStart(&ringCommandStream, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::loopStartSectionStart, false, false, false); + // 6. Jump to scheduler loop check section (dynamic scheduler) + EncodeSetMMIO::encodeREG(schedulerCmdStream, CS_GPR_R0, CS_GPR_R9); + EncodeSetMMIO::encodeREG(schedulerCmdStream, CS_GPR_R0 + 4, CS_GPR_R9 + 4); + EncodeBatchBufferStartOrEnd::programBatchBufferStart(&schedulerCmdStream, 0, false, true, false); + + // 7. Jump to Semaphore section (dynamic scheduler) + EncodeMiPredicate::encode(schedulerCmdStream, MiPredicateType::Disable); + LriHelper::program(&schedulerCmdStream, CS_GPR_R10, static_cast(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::schedulerLoopCheckSectionSize), true); + + LriHelper::program(&schedulerCmdStream, CS_GPR_R10 + 4, 0, true); + + EncodeAluHelper aluHelper; + aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_9); + aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_10); + aluHelper.setNextAlu(AluRegisters::OPCODE_ADD); + aluHelper.setNextAlu(AluRegisters::OPCODE_STORE, AluRegisters::R_0, AluRegisters::R_ACCU); + aluHelper.copyToCmdStream(schedulerCmdStream); + + EncodeBatchBufferStartOrEnd::programBatchBufferStart(&schedulerCmdStream, 0, false, true, false); +} + +template +void DirectSubmissionHw::dispatchRelaxedOrderingSchedulerSection(uint32_t value) { + LinearStream schedulerCmdStream(this->preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::totalSize); + + // 1. Init section + + uint64_t schedulerStartVa = ringCommandStream.getCurrentGpuAddressPosition(); + + uint64_t schedulerLoopCheckVa = schedulerStartVa + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::schedulerLoopCheckSectionStart; + + LriHelper::program(&schedulerCmdStream, CS_GPR_R9, static_cast(schedulerLoopCheckVa & 0xFFFF'FFFFULL), true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R9 + 4, static_cast(schedulerLoopCheckVa >> 32), true); + + schedulerCmdStream.getSpace(sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); // skip patching + + // 2. Scheduler loop check section + { + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart( + schedulerCmdStream, schedulerStartVa + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::endSectionStart, + semaphoreGpuVa, value, CompareOperation::GreaterOrEqual, false); + + schedulerCmdStream.getSpace(sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); // skip patching } // 7. Semaphore section @@ -203,20 +247,17 @@ void DirectSubmissionHw::dispatchRelaxedOrderingScheduler using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; - EncodeMiPredicate::encode(ringCommandStream, MiPredicateType::Disable); + schedulerCmdStream.getSpace(EncodeMiPredicate::getCmdSize()); // skip patching - EncodeSempahore::addMiSemaphoreWaitCommand(ringCommandStream, - semaphoreGpuVa, - value, + EncodeSempahore::addMiSemaphoreWaitCommand(schedulerCmdStream, semaphoreGpuVa, value, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); } - // 8. End section - { - EncodeMiPredicate::encode(ringCommandStream, MiPredicateType::Disable); + // skip patching End section - LriHelper::program(&ringCommandStream, CS_GPR_R5, 0, true); - } + auto dst = ringCommandStream.getSpace(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::totalSize); + memcpy_s(dst, RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::totalSize, + this->preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::totalSize); } template @@ -269,6 +310,16 @@ bool DirectSubmissionHw::allocateResources() { UNRECOVERABLE_IF(deferredTasksListAllocation == nullptr); allocations.push_back(deferredTasksListAllocation); + + const AllocationProperties relaxedOrderingSchedulerAllocationProperties(rootDeviceIndex, + true, MemoryConstants::pageSize64k, + AllocationType::COMMAND_BUFFER, + isMultiOsContextCapable, false, osContext.getDeviceBitfield()); + + relaxedOrderingSchedulerAllocation = memoryManager->allocateGraphicsMemoryWithProperties(relaxedOrderingSchedulerAllocationProperties); + UNRECOVERABLE_IF(relaxedOrderingSchedulerAllocation == nullptr); + + allocations.push_back(relaxedOrderingSchedulerAllocation); } if (DebugManager.flags.DirectSubmissionPrintBuffers.get()) { @@ -368,9 +419,10 @@ bool DirectSubmissionHw::initialize(bool submitOnInit, bo this->systemMemoryFenceAddressSet = true; } if (this->relaxedOrderingEnabled) { - preinitializeTaskStoreSection(); + preinitializeRelaxedOrderingSections(); initRelaxedOrderingRegisters(); + dispatchStaticRelaxedOrderingScheduler(); startBufferSize += RelaxedOrderingHelper::getSizeRegistersInit(); this->relaxedOrderingInitialized = true; @@ -422,7 +474,8 @@ bool DirectSubmissionHw::startRingBuffer() { } if (this->relaxedOrderingEnabled && !this->relaxedOrderingInitialized) { - preinitializeTaskStoreSection(); + preinitializeRelaxedOrderingSections(); + dispatchStaticRelaxedOrderingScheduler(); initRelaxedOrderingRegisters(); this->relaxedOrderingInitialized = true; @@ -498,7 +551,7 @@ inline void DirectSubmissionHw::dispatchSemaphoreSection( template inline size_t DirectSubmissionHw::getSizeSemaphoreSection(bool firstSubmission) { - size_t semaphoreSize = (this->relaxedOrderingEnabled && !firstSubmission) ? RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::totalSize + size_t semaphoreSize = (this->relaxedOrderingEnabled && !firstSubmission) ? RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::totalSize : EncodeSempahore::getSizeMiSemaphoreWait(); semaphoreSize += getSizePrefetchMitigation(); @@ -688,7 +741,8 @@ void DirectSubmissionHw::initRelaxedOrderingRegisters() { } template -void DirectSubmissionHw::preinitializeTaskStoreSection() { +void DirectSubmissionHw::preinitializeRelaxedOrderingSections() { + // Task store section preinitializedTaskStoreSection = std::make_unique(RelaxedOrderingHelper::getSizeTaskStoreSection()); LinearStream stream(preinitializedTaskStoreSection.get(), RelaxedOrderingHelper::getSizeTaskStoreSection()); @@ -723,6 +777,46 @@ void DirectSubmissionHw::preinitializeTaskStoreSection() EncodeMathMMIO::encodeIncrement(stream, AluRegisters::R_1); UNRECOVERABLE_IF(stream.getUsed() != RelaxedOrderingHelper::getSizeTaskStoreSection()); + + // Scheduler section + preinitializedRelaxedOrderingScheduler = std::make_unique(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::totalSize); + LinearStream schedulerStream(preinitializedRelaxedOrderingScheduler.get(), RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::totalSize); + + uint64_t schedulerStartAddress = relaxedOrderingSchedulerAllocation->getGpuAddress(); + + // 1. Init section + LriHelper::program(&schedulerStream, CS_GPR_R9, 0, true); + LriHelper::program(&schedulerStream, CS_GPR_R9 + 4, 0, true); + EncodeBatchBufferStartOrEnd::programBatchBufferStart(&schedulerStream, schedulerStartAddress, false, false, false); + + // 2. Scheduler loop check section + { + + EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(schedulerStream, 0, 0, 0, CompareOperation::GreaterOrEqual, false); + + EncodeBatchBufferStartOrEnd::programBatchBufferStart(&schedulerStream, + schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::loopStartSectionStart, + false, false, false); + } + + // 3. Semaphore section + { + using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; + using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; + + EncodeMiPredicate::encode(schedulerStream, MiPredicateType::Disable); + + EncodeSempahore::addMiSemaphoreWaitCommand(schedulerStream, 0, 0, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); + } + + // 4. End section + { + EncodeMiPredicate::encode(schedulerStream, MiPredicateType::Disable); + + LriHelper::program(&schedulerStream, CS_GPR_R5, 0, true); + } + + UNRECOVERABLE_IF(schedulerStream.getUsed() != RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::totalSize); } template @@ -889,6 +983,7 @@ void DirectSubmissionHw::deallocateResources() { } memoryManager->freeGraphicsMemory(deferredTasksListAllocation); + memoryManager->freeGraphicsMemory(relaxedOrderingSchedulerAllocation); } template diff --git a/shared/source/direct_submission/relaxed_ordering_helper.h b/shared/source/direct_submission/relaxed_ordering_helper.h index bcee979c00..265c6730b5 100644 --- a/shared/source/direct_submission/relaxed_ordering_helper.h +++ b/shared/source/direct_submission/relaxed_ordering_helper.h @@ -32,10 +32,9 @@ constexpr size_t getSizeReturnPtrRegs() { } template -struct SchedulerSizeAndOffsetSection { - using MI_MATH = typename GfxFamily::MI_MATH; - using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE; +struct StaticSchedulerSizeAndOffsetSection { using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; + using MI_LOAD_REGISTER_REG = typename GfxFamily::MI_LOAD_REGISTER_REG; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; static constexpr uint64_t initSectionSize = EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart() + (6 * sizeof(MI_LOAD_REGISTER_IMM)) + @@ -56,7 +55,24 @@ struct SchedulerSizeAndOffsetSection { static constexpr uint64_t drainRequestSectionStart = tasksListLoopCheckSectionStart + tasksListLoopCheckSectionSize; static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart(); - static constexpr uint64_t schedulerLoopCheckSectionStart = drainRequestSectionStart + drainRequestSectionSize; + static constexpr uint64_t schedulerLoopCheckSectionJumpStart = drainRequestSectionStart + drainRequestSectionSize; + static constexpr uint64_t schedulerLoopCheckSectionJumpSize = 2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_BATCH_BUFFER_START); + + static constexpr uint64_t semaphoreSectionJumpStart = schedulerLoopCheckSectionJumpStart + schedulerLoopCheckSectionJumpSize; + static constexpr uint64_t semaphoreSectionJumpSize = EncodeMiPredicate::getCmdSize() + (2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper::getCmdsSize() + + sizeof(MI_BATCH_BUFFER_START); + + static constexpr uint64_t totalSize = semaphoreSectionJumpStart + semaphoreSectionJumpSize; +}; + +template +struct DynamicSchedulerSizeAndOffsetSection { + using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; + using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; + + static constexpr uint64_t initSectionSize = (2 * sizeof(MI_LOAD_REGISTER_IMM)) + sizeof(MI_BATCH_BUFFER_START); + + static constexpr uint64_t schedulerLoopCheckSectionStart = initSectionSize; static constexpr uint64_t schedulerLoopCheckSectionSize = EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart() + sizeof(MI_BATCH_BUFFER_START); static constexpr uint64_t semaphoreSectionStart = schedulerLoopCheckSectionStart + schedulerLoopCheckSectionSize; diff --git a/shared/test/common/mocks/mock_direct_submission_hw.h b/shared/test/common/mocks/mock_direct_submission_hw.h index 08e15b9a85..c1cafe8fb9 100644 --- a/shared/test/common/mocks/mock_direct_submission_hw.h +++ b/shared/test/common/mocks/mock_direct_submission_hw.h @@ -53,8 +53,10 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::partitionedMode; using BaseClass::performDiagnosticMode; using BaseClass::postSyncOffset; + using BaseClass::preinitializedRelaxedOrderingScheduler; using BaseClass::preinitializedTaskStoreSection; using BaseClass::relaxedOrderingInitialized; + using BaseClass::relaxedOrderingSchedulerAllocation; using BaseClass::reserved; using BaseClass::ringBuffers; using BaseClass::ringCommandStream; @@ -86,9 +88,14 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw return allocateOsResourcesReturn; } - void preinitializeTaskStoreSection() override { - preinitializeTaskStoreSectionCalled++; - BaseClass::preinitializeTaskStoreSection(); + void preinitializeRelaxedOrderingSections() override { + preinitializeRelaxedOrderingSectionsCalled++; + BaseClass::preinitializeRelaxedOrderingSections(); + } + + void dispatchStaticRelaxedOrderingScheduler() override { + dispatchStaticRelaxedOrderingSchedulerCalled++; + BaseClass::dispatchStaticRelaxedOrderingScheduler(); } bool makeResourcesResident(DirectSubmissionAllocations &allocations) override { @@ -146,7 +153,8 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw uint32_t submitCount = 0u; uint32_t handleResidencyCount = 0u; uint32_t disabledDiagnosticCalled = 0u; - uint32_t preinitializeTaskStoreSectionCalled = 0; + uint32_t preinitializeRelaxedOrderingSectionsCalled = 0; + uint32_t dispatchStaticRelaxedOrderingSchedulerCalled = 0; uint32_t makeResourcesResidentVectorSize = 0u; bool allocateOsResourcesReturn = true; bool submitReturn = true; diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index e0dc8abfad..b567a9c3ca 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -913,7 +913,10 @@ struct DirectSubmissionRelaxedOrderingTests : public DirectSubmissionDispatchBuf } template - bool verifySchedulerProgramming(LinearStream &cs, uint64_t deferredTaskListVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset, size_t &endOffset); + bool verifyDynamicSchedulerProgramming(LinearStream &cs, uint64_t schedulerAllocationGpuVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset, size_t &endOffset); + + template + bool verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa); template bool verifyMiPredicate(void *miPredicateCmd, MiPredicateType predicateType); @@ -1187,7 +1190,343 @@ bool DirectSubmissionRelaxedOrderingTests::verifyConditionalDataRegBbStart(void } template -bool DirectSubmissionRelaxedOrderingTests::verifySchedulerProgramming(LinearStream &cs, uint64_t deferredTaskListVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset, size_t &endOffset) { +bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa) { + using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; + using MI_SET_PREDICATE = typename FamilyType::MI_SET_PREDICATE; + using MI_MATH_ALU_INST_INLINE = typename FamilyType::MI_MATH_ALU_INST_INLINE; + using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK; + using MI_MATH = typename FamilyType::MI_MATH; + + uint64_t schedulerStartGpuAddress = schedulerAllocation.getGpuAddress(); + void *schedulerCmds = schedulerAllocation.getUnderlyingBuffer(); + + // 1. Init section + auto miPredicate = reinterpret_cast(schedulerCmds); + + if (!verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { + return false; + } + + miPredicate++; + if (!verifyConditionalDataRegBbStart(miPredicate, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::semaphoreSectionJumpStart, + CS_GPR_R1, 0, CompareOperation::Equal, false)) { + return false; + } + + auto lriCmd = reinterpret_cast(ptrOffset(miPredicate, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart())); + if (!verifyLri(lriCmd, CS_GPR_R2, 0)) { + return false; + } + + if (!verifyLri(++lriCmd, CS_GPR_R2 + 4, 0)) { + return false; + } + + uint64_t removeTaskVa = schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::removeTaskSectionStart; + + if (!verifyLri(++lriCmd, CS_GPR_R3, static_cast(removeTaskVa & 0xFFFF'FFFFULL))) { + return false; + } + + if (!verifyLri(++lriCmd, CS_GPR_R3 + 4, static_cast(removeTaskVa >> 32))) { + return false; + } + + uint64_t walkersLoopConditionCheckVa = schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::tasksListLoopCheckSectionStart; + + if (!verifyLri(++lriCmd, CS_GPR_R4, static_cast(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL))) { + return false; + } + + if (!verifyLri(++lriCmd, CS_GPR_R4 + 4, static_cast(walkersLoopConditionCheckVa >> 32))) { + return false; + } + + // 2. Dispatch task section (loop start) + miPredicate = reinterpret_cast(++lriCmd); + + if (!verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { + return false; + } + + lriCmd = reinterpret_cast(++miPredicate); + if (!verifyLri(lriCmd, CS_GPR_R6, 8)) { + return false; + } + + if (!verifyLri(++lriCmd, CS_GPR_R6 + 4, 0)) { + return false; + } + + if (!verifyLri(++lriCmd, CS_GPR_R8, static_cast(deferredTaskListVa & 0xFFFF'FFFFULL))) { + return false; + } + + if (!verifyLri(++lriCmd, CS_GPR_R8 + 4, static_cast(deferredTaskListVa >> 32))) { + return false; + } + + auto miMathCmd = reinterpret_cast(++lriCmd); + if (miMathCmd->DW0.BitField.DwordLength != 9) { + return false; + } + + auto miAluCmd = reinterpret_cast(++miMathCmd); + if (!verifyAlu(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_2)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_6)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_SHL, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ACCU)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_7)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_6, AluRegisters::R_ACCU)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOADIND, AluRegisters::R_0, AluRegisters::R_ACCU)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_FENCE_RD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { + return false; + } + + auto bbStart = reinterpret_cast(++miAluCmd); + if (!verifyBbStart(bbStart, 0, true, false)) { + return false; + } + + // 3. Remove task section + miPredicate = reinterpret_cast(++bbStart); + if (!verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { + return false; + } + + miPredicate++; + if (!verifyIncrementOrDecrement(miPredicate, AluRegisters::R_1, false)) { + return false; + } + + auto cmds = ptrOffset(miPredicate, EncodeMathMMIO::getCmdSizeForIncrementOrDecrement()); + + if (!verifyIncrementOrDecrement(cmds, AluRegisters::R_2, false)) { + return false; + } + + cmds = ptrOffset(cmds, EncodeMathMMIO::getCmdSizeForIncrementOrDecrement()); + + if (!verifyConditionalDataRegBbStart(cmds, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::semaphoreSectionJumpStart, + CS_GPR_R1, 0, CompareOperation::Equal, false)) { + return false; + } + + lriCmd = reinterpret_cast(ptrOffset(cmds, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart())); + if (!verifyLri(lriCmd, CS_GPR_R7, 8)) { + return false; + } + + if (!verifyLri(++lriCmd, CS_GPR_R7 + 4, 0)) { + return false; + } + + if (!verifyLri(++lriCmd, CS_GPR_R8, static_cast(deferredTaskListVa & 0xFFFF'FFFFULL))) { + return false; + } + + if (!verifyLri(++lriCmd, CS_GPR_R8 + 4, static_cast(deferredTaskListVa >> 32))) { + return false; + } + + miMathCmd = reinterpret_cast(++lriCmd); + if (miMathCmd->DW0.BitField.DwordLength != 13) { + return false; + } + + miAluCmd = reinterpret_cast(++miMathCmd); + if (!verifyAlu(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_7)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_SHL, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ACCU)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_7)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOADIND, AluRegisters::R_7, AluRegisters::R_ACCU)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_FENCE_RD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_6)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD0, AluRegisters::R_SRCB, AluRegisters::OPCODE_NONE)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_STOREIND, AluRegisters::R_ACCU, AluRegisters::R_7)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_FENCE_WR, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { + return false; + } + + // 4. List loop check section + + miPredicate = reinterpret_cast(++miAluCmd); + if (!verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { + return false; + } + + miPredicate++; + if (!verifyIncrementOrDecrement(miPredicate, AluRegisters::R_2, true)) { + return false; + } + + cmds = ptrOffset(miPredicate, EncodeMathMMIO::getCmdSizeForIncrementOrDecrement()); + + if (!verifyConditionalRegRegBbStart(cmds, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::loopStartSectionStart, + AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false)) { + return false; + } + + lriCmd = reinterpret_cast(ptrOffset(cmds, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalRegRegBatchBufferStart())); + + if (!verifyLri(lriCmd, CS_GPR_R2, 0)) { + return false; + } + + if (!verifyLri(++lriCmd, CS_GPR_R2 + 4, 0)) { + return false; + } + + // 5. Drain request section + auto arbCheck = reinterpret_cast(++lriCmd); + if (memcmp(arbCheck, &FamilyType::cmdInitArbCheck, sizeof(MI_ARB_CHECK)) != 0) { + return false; + } + + if (!verifyConditionalDataRegBbStart(++arbCheck, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::loopStartSectionStart, + CS_GPR_R5, 1, CompareOperation::Equal, false)) { + return false; + } + + // 6. Jump to scheduler loop check section (dynamic scheduler) + auto lrrCmd = reinterpret_cast(ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart())); + + if (!verifyLrr(lrrCmd, CS_GPR_R0, CS_GPR_R9)) { + return false; + } + + if (!verifyLrr(++lrrCmd, CS_GPR_R0 + 4, CS_GPR_R9 + 4)) { + return false; + } + + bbStart = reinterpret_cast(++lrrCmd); + if (!verifyBbStart(bbStart, 0, true, false)) { + return false; + } + + // 7. Jump to Semaphore section (dynamic scheduler) + miPredicate = reinterpret_cast(++bbStart); + + if (!verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { + return false; + } + + lriCmd = reinterpret_cast(++miPredicate); + + if (!verifyLri(lriCmd, CS_GPR_R10, static_cast(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::schedulerLoopCheckSectionSize))) { + return false; + } + + if (!verifyLri(++lriCmd, CS_GPR_R10 + 4, 0)) { + return false; + } + + miMathCmd = reinterpret_cast(++lriCmd); + if (miMathCmd->DW0.BitField.DwordLength != 3) { + return false; + } + + miAluCmd = reinterpret_cast(++miMathCmd); + if (!verifyAlu(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_9)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_10)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { + return false; + } + + if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_0, AluRegisters::R_ACCU)) { + return false; + } + + bbStart = reinterpret_cast(++miAluCmd); + if (!verifyBbStart(bbStart, 0, true, false)) { + return false; + } + + return true; +} + +template +bool DirectSubmissionRelaxedOrderingTests::verifyDynamicSchedulerProgramming(LinearStream &cs, uint64_t schedulerAllocationGpuVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset, size_t &endOffset) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_SET_PREDICATE = typename FamilyType::MI_SET_PREDICATE; @@ -1203,282 +1542,42 @@ bool DirectSubmissionRelaxedOrderingTests::verifySchedulerProgramming(LinearStre bool success = false; for (auto &it : hwParse.cmdList) { - if (auto miPredicate = genCmdCast(it)) { + if (auto lriCmd = genCmdCast(it)) { // 1. Init section - if (!verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { + + uint64_t schedulerStartAddress = cs.getGraphicsAllocation()->getGpuAddress() + ptrDiff(lriCmd, cs.getCpuBase()); + + uint64_t schedulerLoopCheckVa = schedulerStartAddress + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::schedulerLoopCheckSectionStart; + + if (!verifyLri(lriCmd, CS_GPR_R9, static_cast(schedulerLoopCheckVa & 0xFFFF'FFFFULL))) { continue; } - uint64_t schedulerStartAddress = cs.getGraphicsAllocation()->getGpuAddress() + ptrDiff(miPredicate, cs.getCpuBase()); - - miPredicate++; - if (!verifyConditionalDataRegBbStart(miPredicate, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::semaphoreSectionStart, - CS_GPR_R1, 0, CompareOperation::Equal, false)) { + if (!verifyLri(++lriCmd, CS_GPR_R9 + 4, static_cast(schedulerLoopCheckVa >> 32))) { continue; } - auto lriCmd = reinterpret_cast(ptrOffset(miPredicate, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart())); - if (!verifyLri(lriCmd, CS_GPR_R2, 0)) { + auto bbStart = reinterpret_cast(++lriCmd); + if (!verifyBbStart(bbStart, schedulerAllocationGpuVa, false, false)) { continue; } - if (!verifyLri(++lriCmd, CS_GPR_R2 + 4, 0)) { - continue; - } + // 2. Scheduler loop check section - uint64_t removeTaskVa = schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::removeTaskSectionStart; + bbStart++; - if (!verifyLri(++lriCmd, CS_GPR_R3, static_cast(removeTaskVa & 0xFFFF'FFFFULL))) { - continue; - } - - if (!verifyLri(++lriCmd, CS_GPR_R3 + 4, static_cast(removeTaskVa >> 32))) { - continue; - } - - uint64_t walkersLoopConditionCheckVa = schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::tasksListLoopCheckSectionStart; - - if (!verifyLri(++lriCmd, CS_GPR_R4, static_cast(walkersLoopConditionCheckVa & 0xFFFF'FFFFULL))) { - continue; - } - - if (!verifyLri(++lriCmd, CS_GPR_R4 + 4, static_cast(walkersLoopConditionCheckVa >> 32))) { - continue; - } - - // 2. Dispatch task section (loop start) - miPredicate = reinterpret_cast(++lriCmd); - - if (!verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { - continue; - } - - lriCmd = reinterpret_cast(++miPredicate); - if (!verifyLri(lriCmd, CS_GPR_R6, 8)) { - continue; - } - - if (!verifyLri(++lriCmd, CS_GPR_R6 + 4, 0)) { - continue; - } - - if (!verifyLri(++lriCmd, CS_GPR_R8, static_cast(deferredTaskListVa & 0xFFFF'FFFFULL))) { - continue; - } - - if (!verifyLri(++lriCmd, CS_GPR_R8 + 4, static_cast(deferredTaskListVa >> 32))) { - continue; - } - - auto miMathCmd = reinterpret_cast(++lriCmd); - if (miMathCmd->DW0.BitField.DwordLength != 9) { - continue; - } - - auto miAluCmd = reinterpret_cast(++miMathCmd); - if (!verifyAlu(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_2)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_6)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_SHL, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ACCU)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_7)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_6, AluRegisters::R_ACCU)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOADIND, AluRegisters::R_0, AluRegisters::R_ACCU)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_FENCE_RD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { - continue; - } - - auto bbStart = reinterpret_cast(++miAluCmd); - if (!verifyBbStart(bbStart, 0, true, false)) { - continue; - } - - // 3. Remove task section - miPredicate = reinterpret_cast(++bbStart); - if (!verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { - continue; - } - - miPredicate++; - if (!verifyIncrementOrDecrement(miPredicate, AluRegisters::R_1, false)) { - continue; - } - - auto cmds = ptrOffset(miPredicate, EncodeMathMMIO::getCmdSizeForIncrementOrDecrement()); - - if (!verifyIncrementOrDecrement(cmds, AluRegisters::R_2, false)) { - continue; - } - - cmds = ptrOffset(cmds, EncodeMathMMIO::getCmdSizeForIncrementOrDecrement()); - - if (!verifyConditionalDataRegBbStart(cmds, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::semaphoreSectionStart, - CS_GPR_R1, 0, CompareOperation::Equal, false)) { - continue; - } - - lriCmd = reinterpret_cast(ptrOffset(cmds, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart())); - if (!verifyLri(lriCmd, CS_GPR_R7, 8)) { - continue; - } - - if (!verifyLri(++lriCmd, CS_GPR_R7 + 4, 0)) { - continue; - } - - if (!verifyLri(++lriCmd, CS_GPR_R8, static_cast(deferredTaskListVa & 0xFFFF'FFFFULL))) { - continue; - } - - if (!verifyLri(++lriCmd, CS_GPR_R8 + 4, static_cast(deferredTaskListVa >> 32))) { - continue; - } - - miMathCmd = reinterpret_cast(++lriCmd); - if (miMathCmd->DW0.BitField.DwordLength != 13) { - continue; - } - - miAluCmd = reinterpret_cast(++miMathCmd); - if (!verifyAlu(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_7)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_SHL, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_STORE, AluRegisters::R_7, AluRegisters::R_ACCU)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_7)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOADIND, AluRegisters::R_7, AluRegisters::R_ACCU)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_FENCE_RD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_6)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_LOAD0, AluRegisters::R_SRCB, AluRegisters::OPCODE_NONE)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_ADD, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_STOREIND, AluRegisters::R_ACCU, AluRegisters::R_7)) { - continue; - } - - if (!verifyAlu(++miAluCmd, AluRegisters::OPCODE_FENCE_WR, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE)) { - continue; - } - - // 4. List loop check section - - miPredicate = reinterpret_cast(++miAluCmd); - if (!verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { - continue; - } - - miPredicate++; - if (!verifyIncrementOrDecrement(miPredicate, AluRegisters::R_2, true)) { - continue; - } - - cmds = ptrOffset(miPredicate, EncodeMathMMIO::getCmdSizeForIncrementOrDecrement()); - - if (!verifyConditionalRegRegBbStart(cmds, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::loopStartSectionStart, - AluRegisters::R_1, AluRegisters::R_2, CompareOperation::NotEqual, false)) { - continue; - } - - lriCmd = reinterpret_cast(ptrOffset(cmds, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalRegRegBatchBufferStart())); - - if (!verifyLri(lriCmd, CS_GPR_R2, 0)) { - continue; - } - - if (!verifyLri(++lriCmd, CS_GPR_R2 + 4, 0)) { - continue; - } - - // 5. Drain request section - auto arbCheck = reinterpret_cast(++lriCmd); - if (memcmp(arbCheck, &FamilyType::cmdInitArbCheck, sizeof(MI_ARB_CHECK)) != 0) { - continue; - } - - if (!verifyConditionalDataRegBbStart(++arbCheck, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::loopStartSectionStart, - CS_GPR_R5, 1, CompareOperation::Equal, false)) { - continue; - } - - // 6. Scheduler loop check section - auto cmds2 = ptrOffset(arbCheck, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart()); - - if (!verifyConditionalDataMemBbStart(cmds2, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::endSectionStart, + if (!verifyConditionalDataMemBbStart(bbStart, schedulerStartAddress + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::endSectionStart, semaphoreGpuVa, semaphoreValue, CompareOperation::GreaterOrEqual, false)) { continue; } - bbStart = reinterpret_cast(ptrOffset(cmds2, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart())); - if (!verifyBbStart(bbStart, schedulerStartAddress + RelaxedOrderingHelper::SchedulerSizeAndOffsetSection::loopStartSectionStart, false, false)) { + bbStart = reinterpret_cast(ptrOffset(bbStart, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart())); + if (!verifyBbStart(bbStart, schedulerAllocationGpuVa + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::loopStartSectionStart, false, false)) { continue; } - // 7. Semaphore section - miPredicate = reinterpret_cast(++bbStart); + // 3. Semaphore section + auto miPredicate = reinterpret_cast(++bbStart); if (!verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { continue; } @@ -1490,7 +1589,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifySchedulerProgramming(LinearStre continue; } - // 8. End section + // 4. End section miPredicate = reinterpret_cast(++semaphore); if (!verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { @@ -1512,7 +1611,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifySchedulerProgramming(LinearStre return success; } -HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenAllocatingResourcesThenCreateDeferredTasksAllocation) { +HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenAllocatingResourcesThenCreateDeferredTasksAndSchedulerAllocation) { using Dispatcher = RenderDispatcher; auto mockMemoryOperations = new MockMemoryOperations(); @@ -1525,12 +1624,57 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenAllocatingResourcesThenCreate directSubmission.initialize(false, false); + auto allocsIter = mockMemoryOperations->gfxAllocationsForMakeResident.rbegin(); + + EXPECT_EQ(AllocationType::COMMAND_BUFFER, directSubmission.relaxedOrderingSchedulerAllocation->getAllocationType()); + EXPECT_NE(nullptr, directSubmission.relaxedOrderingSchedulerAllocation); + EXPECT_EQ(directSubmission.relaxedOrderingSchedulerAllocation, *allocsIter); + + allocsIter++; + EXPECT_EQ(AllocationType::DEFERRED_TASKS_LIST, directSubmission.deferredTasksListAllocation->getAllocationType()); EXPECT_NE(nullptr, directSubmission.deferredTasksListAllocation); - EXPECT_EQ(directSubmission.deferredTasksListAllocation, mockMemoryOperations->gfxAllocationsForMakeResident.back()); + EXPECT_EQ(directSubmission.deferredTasksListAllocation, *allocsIter); } -HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitializeTaskStoreSectionAndInitRegs) { +HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenDispatchStaticScheduler, IsAtLeastXeHpcCore) { + using Dispatcher = RenderDispatcher; + + { + MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + directSubmission.initialize(false, false); + + EXPECT_EQ(0u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); + } + + { + MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + directSubmission.initialize(true, false); + + EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); + EXPECT_TRUE(verifyStaticSchedulerProgramming(*directSubmission.relaxedOrderingSchedulerAllocation, + directSubmission.deferredTasksListAllocation->getGpuAddress())); + } + + { + MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); + directSubmission.initialize(false, false); + EXPECT_EQ(0u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); + + directSubmission.startRingBuffer(); + + EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); + + directSubmission.startRingBuffer(); + EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); + + FlushStampTracker flushStamp(true); + directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); + EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); + } +} + +HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitializeTaskStoreSectionAndStaticSchedulerAndInitRegs) { using Dispatcher = RenderDispatcher; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; @@ -1575,9 +1719,10 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitialize directSubmission.initialize(false, false); EXPECT_FALSE(verifyInitRegisters(directSubmission.ringCommandStream, 0)); - EXPECT_EQ(0u, directSubmission.preinitializeTaskStoreSectionCalled); + EXPECT_EQ(0u, directSubmission.preinitializeRelaxedOrderingSectionsCalled); EXPECT_FALSE(directSubmission.relaxedOrderingInitialized); EXPECT_EQ(nullptr, directSubmission.preinitializedTaskStoreSection.get()); + EXPECT_EQ(nullptr, directSubmission.preinitializedRelaxedOrderingScheduler.get()); } { @@ -1585,33 +1730,35 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenPreinitialize directSubmission.initialize(true, false); EXPECT_TRUE(verifyInitRegisters(directSubmission.ringCommandStream, 0)); - EXPECT_EQ(1u, directSubmission.preinitializeTaskStoreSectionCalled); + EXPECT_EQ(1u, directSubmission.preinitializeRelaxedOrderingSectionsCalled); EXPECT_TRUE(directSubmission.relaxedOrderingInitialized); EXPECT_NE(nullptr, directSubmission.preinitializedTaskStoreSection.get()); + EXPECT_NE(nullptr, directSubmission.preinitializedRelaxedOrderingScheduler.get()); size_t offset = directSubmission.ringCommandStream.getUsed(); directSubmission.startRingBuffer(); EXPECT_FALSE(verifyInitRegisters(directSubmission.ringCommandStream, offset)); - EXPECT_EQ(1u, directSubmission.preinitializeTaskStoreSectionCalled); + EXPECT_EQ(1u, directSubmission.preinitializeRelaxedOrderingSectionsCalled); } { MockDirectSubmissionHw directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver); directSubmission.initialize(false, false); - EXPECT_EQ(0u, directSubmission.preinitializeTaskStoreSectionCalled); + EXPECT_EQ(0u, directSubmission.preinitializeRelaxedOrderingSectionsCalled); directSubmission.startRingBuffer(); - EXPECT_EQ(1u, directSubmission.preinitializeTaskStoreSectionCalled); + EXPECT_EQ(1u, directSubmission.preinitializeRelaxedOrderingSectionsCalled); EXPECT_TRUE(directSubmission.relaxedOrderingInitialized); EXPECT_NE(nullptr, directSubmission.preinitializedTaskStoreSection.get()); + EXPECT_NE(nullptr, directSubmission.preinitializedRelaxedOrderingScheduler.get()); size_t offset = directSubmission.ringCommandStream.getUsed(); directSubmission.startRingBuffer(); EXPECT_FALSE(verifyInitRegisters(directSubmission.ringCommandStream, offset)); - EXPECT_EQ(1u, directSubmission.preinitializeTaskStoreSectionCalled); + EXPECT_EQ(1u, directSubmission.preinitializeRelaxedOrderingSectionsCalled); } } @@ -1713,23 +1860,23 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchS directSubmission.initialize(true, false); auto offset = directSubmission.ringCommandStream.getUsed(); - uint64_t deferredTasksListVa = directSubmission.deferredTasksListAllocation->getGpuAddress(); + uint64_t staticSchedulerGpuAddress = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress(); uint64_t semaphoreGpuVa = directSubmission.semaphoreGpuVa; size_t endOffset = 0; - EXPECT_FALSE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset)); + EXPECT_FALSE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset)); FlushStampTracker flushStamp(true); directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); - EXPECT_TRUE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset)); + EXPECT_TRUE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset)); offset = directSubmission.ringCommandStream.getUsed(); directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); - EXPECT_TRUE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset)); + EXPECT_TRUE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset)); } HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispatchingThenProgramSchedulerWithR5, IsAtLeastXeHpcCore) { @@ -1742,19 +1889,19 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa directSubmission.initialize(true, false); size_t offset = directSubmission.ringCommandStream.getUsed(); - uint64_t deferredTasksListVa = directSubmission.deferredTasksListAllocation->getGpuAddress(); + uint64_t staticSchedulerGpuAddress = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress(); uint64_t semaphoreGpuVa = directSubmission.semaphoreGpuVa; size_t endOffset = 0; - EXPECT_FALSE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset)); + EXPECT_FALSE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset)); FlushStampTracker flushStamp(true); batchBuffer.hasStallingCmds = false; directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); - EXPECT_TRUE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset)); - EXPECT_FALSE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset)); + EXPECT_TRUE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset)); + EXPECT_FALSE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset)); offset = directSubmission.ringCommandStream.getUsed(); @@ -1787,11 +1934,11 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenBbWithStallingCmdsWhenDispa ASSERT_TRUE(success); offset = ptrDiff(++lriCmd, directSubmission.ringCommandStream.getCpuBase()); - EXPECT_TRUE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount - 1, offset, endOffset)); + EXPECT_TRUE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount - 1, offset, endOffset)); EXPECT_TRUE(endOffset > offset); - EXPECT_TRUE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset)); + EXPECT_TRUE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset)); } HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenFirstBbWithStallingCmdsWhenDispatchingThenDontProgramSchedulerWithR5, IsAtLeastXeHpcCore) { @@ -1804,12 +1951,12 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenFirstBbWithStallingCmdsWhen directSubmission.initialize(true, false); size_t offset = directSubmission.ringCommandStream.getUsed(); - uint64_t deferredTasksListVa = directSubmission.deferredTasksListAllocation->getGpuAddress(); + uint64_t staticSchedulerGpuAddress = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress(); uint64_t semaphoreGpuVa = directSubmission.semaphoreGpuVa; size_t endOffset = 0; - EXPECT_FALSE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset)); + EXPECT_FALSE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset)); FlushStampTracker flushStamp(true); batchBuffer.hasStallingCmds = true; @@ -1845,19 +1992,19 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched directSubmission.initialize(true, false); size_t offset = directSubmission.ringCommandStream.getUsed(); - uint64_t deferredTasksListVa = directSubmission.deferredTasksListAllocation->getGpuAddress(); + uint64_t staticSchedulerGpuAddress = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress(); uint64_t semaphoreGpuVa = directSubmission.semaphoreGpuVa; size_t endOffset = 0; - EXPECT_FALSE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset)); + EXPECT_FALSE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset)); FlushStampTracker flushStamp(true); batchBuffer.hasStallingCmds = false; directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp); - EXPECT_TRUE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset)); - EXPECT_FALSE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset)); + EXPECT_TRUE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset)); + EXPECT_FALSE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset)); offset = directSubmission.ringCommandStream.getUsed(); @@ -1889,11 +2036,11 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenStoppingRingThenProgramSched ASSERT_TRUE(success); offset = ptrDiff(lriCmd, directSubmission.ringCommandStream.getCpuBase()); - EXPECT_TRUE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset)); + EXPECT_TRUE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, offset, endOffset)); EXPECT_TRUE(endOffset > offset); - EXPECT_FALSE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset)); + EXPECT_FALSE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, endOffset, endOffset)); } HWTEST2_F(DirectSubmissionRelaxedOrderingTests, WhenStoppingRingWithoutSubmissionThenDontProgramSchedulerWithR5, IsAtLeastXeHpcCore) { @@ -1906,12 +2053,12 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, WhenStoppingRingWithoutSubmissio directSubmission.initialize(true, false); size_t offset = directSubmission.ringCommandStream.getUsed(); - uint64_t deferredTasksListVa = directSubmission.deferredTasksListAllocation->getGpuAddress(); + uint64_t staticSchedulerGpuAddress = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress(); uint64_t semaphoreGpuVa = directSubmission.semaphoreGpuVa; size_t endOffset = 0; - EXPECT_FALSE(verifySchedulerProgramming(directSubmission.ringCommandStream, deferredTasksListVa, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset)); + EXPECT_FALSE(verifyDynamicSchedulerProgramming(directSubmission.ringCommandStream, staticSchedulerGpuAddress, semaphoreGpuVa, directSubmission.currentQueueWorkCount, 0, endOffset)); directSubmission.stopRingBuffer();