diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 018a734748..faedf4b02e 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -439,6 +439,7 @@ struct EncodeBatchBufferStartOrEnd { static void programConditionalDataMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint32_t compareData, CompareOperation compareOperation, bool indirect); static void programConditionalDataRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint32_t compareReg, uint32_t compareData, CompareOperation compareOperation, bool indirect); static void programConditionalRegRegBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, AluRegisters compareReg0, AluRegisters compareReg1, CompareOperation compareOperation, bool indirect); + static void programConditionalRegMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint32_t compareReg, CompareOperation compareOperation, bool indirect); static size_t constexpr getCmdSizeConditionalDataMemBatchBufferStart() { return (getCmdSizeConditionalBufferStartBase() + sizeof(typename GfxFamily::MI_LOAD_REGISTER_MEM) + (3 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM))); @@ -448,6 +449,10 @@ struct EncodeBatchBufferStartOrEnd { return (getCmdSizeConditionalBufferStartBase() + sizeof(typename GfxFamily::MI_LOAD_REGISTER_REG) + (3 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM))); } + static size_t constexpr getCmdSizeConditionalRegMemBatchBufferStart() { + return (getCmdSizeConditionalBufferStartBase() + +sizeof(typename GfxFamily::MI_LOAD_REGISTER_MEM) + sizeof(typename GfxFamily::MI_LOAD_REGISTER_REG) + (2 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM))); + } + static size_t constexpr getCmdSizeConditionalRegRegBatchBufferStart() { return getCmdSizeConditionalBufferStartBase(); } diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index af894b7365..22b99da407 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -889,6 +889,18 @@ void EncodeBatchBufferStartOrEnd::programConditionalRegRegBatchBufferSta programConditionalBatchBufferStartBase(commandStream, startAddress, compareReg0, compareReg1, compareOperation, indirect); } +template +void EncodeBatchBufferStartOrEnd::programConditionalRegMemBatchBufferStart(LinearStream &commandStream, uint64_t startAddress, uint64_t compareAddress, uint32_t compareReg, + CompareOperation compareOperation, bool indirect) { + EncodeSetMMIO::encodeMEM(commandStream, CS_GPR_R7, compareAddress); + LriHelper::program(&commandStream, CS_GPR_R7 + 4, 0, true); + + EncodeSetMMIO::encodeREG(commandStream, CS_GPR_R8, compareReg); + LriHelper::program(&commandStream, CS_GPR_R8 + 4, 0, true); + + programConditionalBatchBufferStartBase(commandStream, startAddress, AluRegisters::R_7, AluRegisters::R_8, compareOperation, indirect); +} + template void EncodeBatchBufferStartOrEnd::programConditionalBatchBufferStartBase(LinearStream &commandStream, uint64_t startAddress, AluRegisters regA, AluRegisters regB, CompareOperation compareOperation, bool indirect) { diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index 6c0ccd1c08..c73e4a8d31 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -111,10 +111,11 @@ void DirectSubmissionHw::dispatchStaticRelaxedOrderingSch // 1. Init section { EncodeMiPredicate::encode(schedulerCmdStream, MiPredicateType::Disable); - EncodeBatchBufferStartOrEnd::programConditionalDataRegBatchBufferStart( - schedulerCmdStream, - schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::semaphoreSectionJumpStart, - CS_GPR_R1, 0, CompareOperation::Equal, false); + + EncodeSetMMIO::encodeREG(schedulerCmdStream, CS_GPR_R0, CS_GPR_R9); + EncodeSetMMIO::encodeREG(schedulerCmdStream, CS_GPR_R0 + 4, CS_GPR_R9 + 4); + + EncodeBatchBufferStartOrEnd::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, CS_GPR_R1, 0, CompareOperation::Equal, true); LriHelper::program(&schedulerCmdStream, CS_GPR_R2, 0, true); LriHelper::program(&schedulerCmdStream, CS_GPR_R2 + 4, 0, true); @@ -130,6 +131,8 @@ void DirectSubmissionHw::dispatchStaticRelaxedOrderingSch // 2. Dispatch task section (loop start) { + UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::loopStartSectionStart); + EncodeMiPredicate::encode(schedulerCmdStream, MiPredicateType::Disable); LriHelper::program(&schedulerCmdStream, CS_GPR_R6, 8, true); @@ -158,15 +161,17 @@ void DirectSubmissionHw::dispatchStaticRelaxedOrderingSch // 3. Remove task section { + UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::removeTaskSectionStart); + EncodeMiPredicate::encode(schedulerCmdStream, MiPredicateType::Disable); EncodeMathMMIO::encodeDecrement(schedulerCmdStream, AluRegisters::R_1); EncodeMathMMIO::encodeDecrement(schedulerCmdStream, AluRegisters::R_2); - EncodeBatchBufferStartOrEnd::programConditionalDataRegBatchBufferStart( - schedulerCmdStream, - schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::semaphoreSectionJumpStart, - CS_GPR_R1, 0, CompareOperation::Equal, false); + EncodeSetMMIO::encodeREG(schedulerCmdStream, CS_GPR_R0, CS_GPR_R9); + EncodeSetMMIO::encodeREG(schedulerCmdStream, CS_GPR_R0 + 4, CS_GPR_R9 + 4); + + EncodeBatchBufferStartOrEnd::programConditionalDataRegBatchBufferStart(schedulerCmdStream, 0, CS_GPR_R1, 0, CompareOperation::Equal, true); LriHelper::program(&schedulerCmdStream, CS_GPR_R7, 8, true); LriHelper::program(&schedulerCmdStream, CS_GPR_R7 + 4, 0, true); @@ -196,6 +201,8 @@ void DirectSubmissionHw::dispatchStaticRelaxedOrderingSch // 4. List loop check section { + UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::tasksListLoopCheckSectionStart); + EncodeMiPredicate::encode(schedulerCmdStream, MiPredicateType::Disable); EncodeMathMMIO::encodeIncrement(schedulerCmdStream, AluRegisters::R_2); @@ -211,6 +218,8 @@ void DirectSubmissionHw::dispatchStaticRelaxedOrderingSch // 5. Drain request section { + UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::drainRequestSectionStart); + using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; EncodeDummyBlitWaArgs waArgs{false, const_cast(&this->rootDeviceEnvironment)}; @@ -233,28 +242,28 @@ void DirectSubmissionHw::dispatchStaticRelaxedOrderingSch CS_GPR_R5, 1, CompareOperation::Equal, false); } - // Exit Static scheduler + // 6. Scheduler loop check section + { + UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::schedulerLoopCheckSectionStart); - // 6. Jump to scheduler loop check section (dynamic scheduler) - EncodeSetMMIO::encodeREG(schedulerCmdStream, CS_GPR_R0, CS_GPR_R9); - EncodeSetMMIO::encodeREG(schedulerCmdStream, CS_GPR_R0 + 4, CS_GPR_R9 + 4); - EncodeBatchBufferStartOrEnd::programBatchBufferStart(&schedulerCmdStream, 0, false, true, false); + LriHelper::program(&schedulerCmdStream, CS_GPR_R10, static_cast(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::semaphoreSectionSize), true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R10 + 4, 0, true); - // 7. Jump to Semaphore section (dynamic scheduler) - EncodeMiPredicate::encode(schedulerCmdStream, MiPredicateType::Disable); - LriHelper::program(&schedulerCmdStream, CS_GPR_R10, static_cast(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::schedulerLoopCheckSectionSize), true); + EncodeAluHelper aluHelper; + aluHelper.setMocs(miMathMocs); + aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_9); + aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_10); + aluHelper.setNextAlu(AluRegisters::OPCODE_ADD); + aluHelper.setNextAlu(AluRegisters::OPCODE_STORE, AluRegisters::R_0, AluRegisters::R_ACCU); + aluHelper.copyToCmdStream(schedulerCmdStream); - LriHelper::program(&schedulerCmdStream, CS_GPR_R10 + 4, 0, true); + EncodeBatchBufferStartOrEnd::programConditionalRegMemBatchBufferStart(schedulerCmdStream, 0, semaphoreGpuVa, CS_GPR_R11, CompareOperation::GreaterOrEqual, true); - EncodeAluHelper aluHelper; - aluHelper.setMocs(miMathMocs); - aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_9); - aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_10); - aluHelper.setNextAlu(AluRegisters::OPCODE_ADD); - aluHelper.setNextAlu(AluRegisters::OPCODE_STORE, AluRegisters::R_0, AluRegisters::R_ACCU); - aluHelper.copyToCmdStream(schedulerCmdStream); + EncodeBatchBufferStartOrEnd::programBatchBufferStart(&schedulerCmdStream, schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::loopStartSectionStart, + false, false, false); + } - EncodeBatchBufferStartOrEnd::programBatchBufferStart(&schedulerCmdStream, 0, false, true, false); + UNRECOVERABLE_IF(schedulerCmdStream.getUsed() != RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::totalSize); } template @@ -265,23 +274,15 @@ void DirectSubmissionHw::dispatchRelaxedOrderingScheduler uint64_t schedulerStartVa = ringCommandStream.getCurrentGpuAddressPosition(); - uint64_t schedulerLoopCheckVa = schedulerStartVa + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::schedulerLoopCheckSectionStart; + uint64_t semaphoreSectionVa = schedulerStartVa + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::semaphoreSectionStart; - LriHelper::program(&schedulerCmdStream, CS_GPR_R9, static_cast(schedulerLoopCheckVa & 0xFFFF'FFFFULL), true); - LriHelper::program(&schedulerCmdStream, CS_GPR_R9 + 4, static_cast(schedulerLoopCheckVa >> 32), true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R11, value, true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R9, static_cast(semaphoreSectionVa & 0xFFFF'FFFFULL), true); + LriHelper::program(&schedulerCmdStream, CS_GPR_R9 + 4, static_cast(semaphoreSectionVa >> 32), true); schedulerCmdStream.getSpace(sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); // skip patching - // 2. Scheduler loop check section - { - EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart( - schedulerCmdStream, schedulerStartVa + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::endSectionStart, - semaphoreGpuVa, value, CompareOperation::GreaterOrEqual, false); - - schedulerCmdStream.getSpace(sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)); // skip patching - } - - // 3. Semaphore section + // 2. Semaphore section { using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; @@ -850,21 +851,12 @@ void DirectSubmissionHw::preinitializeRelaxedOrderingSect uint64_t schedulerStartAddress = relaxedOrderingSchedulerAllocation->getGpuAddress(); // 1. Init section + LriHelper::program(&schedulerStream, CS_GPR_R11, 0, true); LriHelper::program(&schedulerStream, CS_GPR_R9, 0, true); LriHelper::program(&schedulerStream, CS_GPR_R9 + 4, 0, true); EncodeBatchBufferStartOrEnd::programBatchBufferStart(&schedulerStream, schedulerStartAddress, false, false, false); - // 2. Scheduler loop check section - { - - EncodeBatchBufferStartOrEnd::programConditionalDataMemBatchBufferStart(schedulerStream, 0, 0, 0, CompareOperation::GreaterOrEqual, false); - - EncodeBatchBufferStartOrEnd::programBatchBufferStart(&schedulerStream, - schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::loopStartSectionStart, - false, false, false); - } - - // 3. Semaphore section + // 2. Semaphore section { using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; @@ -873,7 +865,7 @@ void DirectSubmissionHw::preinitializeRelaxedOrderingSect EncodeSemaphore::addMiSemaphoreWaitCommand(schedulerStream, 0, 0, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); } - // 4. End section + // 3. End section { EncodeMiPredicate::encode(schedulerStream, MiPredicateType::Disable); diff --git a/shared/source/direct_submission/relaxed_ordering_helper.h b/shared/source/direct_submission/relaxed_ordering_helper.h index 26b1f468c0..40eb735837 100644 --- a/shared/source/direct_submission/relaxed_ordering_helper.h +++ b/shared/source/direct_submission/relaxed_ordering_helper.h @@ -62,7 +62,7 @@ struct StaticSchedulerSizeAndOffsetSection { using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; static constexpr uint64_t initSectionSize = EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart() + (6 * sizeof(MI_LOAD_REGISTER_IMM)) + - EncodeMiPredicate::getCmdSize(); + EncodeMiPredicate::getCmdSize() + (2 * sizeof(MI_LOAD_REGISTER_REG)); static constexpr uint64_t loopStartSectionStart = initSectionSize; static constexpr uint64_t loopStartSectionSize = (4 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper::getCmdsSize() + sizeof(MI_BATCH_BUFFER_START) + @@ -70,7 +70,8 @@ struct StaticSchedulerSizeAndOffsetSection { static constexpr uint64_t removeTaskSectionStart = loopStartSectionStart + loopStartSectionSize; static constexpr uint64_t removeStartSectionSize = (2 * EncodeMathMMIO::getCmdSizeForIncrementOrDecrement()) + EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart() + - (4 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper::getCmdsSize() + EncodeMiPredicate::getCmdSize(); + (4 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper::getCmdsSize() + EncodeMiPredicate::getCmdSize() + + (2 * sizeof(MI_LOAD_REGISTER_REG)); static constexpr uint64_t tasksListLoopCheckSectionStart = removeTaskSectionStart + removeStartSectionSize; static constexpr uint64_t tasksListLoopCheckSectionSize = EncodeMathMMIO::getCmdSizeForIncrementOrDecrement() + EncodeBatchBufferStartOrEnd::getCmdSizeConditionalRegRegBatchBufferStart() + @@ -78,14 +79,12 @@ struct StaticSchedulerSizeAndOffsetSection { static constexpr uint64_t drainRequestSectionStart = tasksListLoopCheckSectionStart + tasksListLoopCheckSectionSize; static constexpr uint64_t drainRequestSectionSize = sizeof(typename GfxFamily::MI_ARB_CHECK) + (2 * EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart()); - static constexpr uint64_t schedulerLoopCheckSectionJumpStart = drainRequestSectionStart + drainRequestSectionSize; - static constexpr uint64_t schedulerLoopCheckSectionJumpSize = 2 * sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_BATCH_BUFFER_START); - static constexpr uint64_t semaphoreSectionJumpStart = schedulerLoopCheckSectionJumpStart + schedulerLoopCheckSectionJumpSize; - static constexpr uint64_t semaphoreSectionJumpSize = EncodeMiPredicate::getCmdSize() + (2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper::getCmdsSize() + - sizeof(MI_BATCH_BUFFER_START); + static constexpr uint64_t schedulerLoopCheckSectionStart = drainRequestSectionStart + drainRequestSectionSize; + static constexpr uint64_t schedulerLoopCheckSectionSize = (2 * sizeof(MI_LOAD_REGISTER_IMM)) + EncodeAluHelper::getCmdsSize() + + EncodeBatchBufferStartOrEnd::getCmdSizeConditionalRegMemBatchBufferStart() + sizeof(MI_BATCH_BUFFER_START); - static constexpr uint64_t totalSize = semaphoreSectionJumpStart + semaphoreSectionJumpSize; + static constexpr uint64_t totalSize = schedulerLoopCheckSectionStart + schedulerLoopCheckSectionSize; }; template @@ -93,12 +92,9 @@ struct DynamicSchedulerSizeAndOffsetSection { using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; - static constexpr uint64_t initSectionSize = (2 * sizeof(MI_LOAD_REGISTER_IMM)) + sizeof(MI_BATCH_BUFFER_START); + static constexpr uint64_t initSectionSize = (3 * sizeof(MI_LOAD_REGISTER_IMM)) + sizeof(MI_BATCH_BUFFER_START); - static constexpr uint64_t schedulerLoopCheckSectionStart = initSectionSize; - static constexpr uint64_t schedulerLoopCheckSectionSize = EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart() + sizeof(MI_BATCH_BUFFER_START); - - static constexpr uint64_t semaphoreSectionStart = schedulerLoopCheckSectionStart + schedulerLoopCheckSectionSize; + static constexpr uint64_t semaphoreSectionStart = initSectionSize; static constexpr uint64_t semaphoreSectionSize = EncodeSemaphore::getSizeMiSemaphoreWait() + EncodeMiPredicate::getCmdSize(); static constexpr uint64_t endSectionStart = semaphoreSectionStart + semaphoreSectionSize; diff --git a/shared/test/common/helpers/relaxed_ordering_commands_helper.h b/shared/test/common/helpers/relaxed_ordering_commands_helper.h index 78932b0f52..9d6eeac767 100644 --- a/shared/test/common/helpers/relaxed_ordering_commands_helper.h +++ b/shared/test/common/helpers/relaxed_ordering_commands_helper.h @@ -278,5 +278,35 @@ bool verifyConditionalDataRegBbStart(void *cmds, uint64_t startAddress, uint32_t return verifyBaseConditionalBbStart(++lriCmd, compareOperation, startAddress, indirect, AluRegisters::R_7, AluRegisters::R_8); } +template +bool verifyConditionalRegMemBbStart(void *cmds, uint64_t startAddress, uint64_t compareAddress, uint32_t compareReg, + CompareOperation compareOperation, bool indirect) { + using MI_LOAD_REGISTER_MEM = typename FamilyType::MI_LOAD_REGISTER_MEM; + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; + + auto lrmCmd = reinterpret_cast(cmds); + if ((lrmCmd->getRegisterAddress() != CS_GPR_R7) || (lrmCmd->getMemoryAddress() != compareAddress)) { + return false; + } + + auto lriCmd = reinterpret_cast(++lrmCmd); + if (!verifyLri(lriCmd, CS_GPR_R7 + 4, 0)) { + return false; + } + + auto lrrCmd = reinterpret_cast(++lriCmd); + if (!verifyLrr(lrrCmd, CS_GPR_R8, compareReg)) { + return false; + } + + lriCmd = reinterpret_cast(++lrrCmd); + if (!verifyLri(lriCmd, CS_GPR_R8 + 4, 0)) { + return false; + } + + return verifyBaseConditionalBbStart(++lriCmd, compareOperation, startAddress, indirect, AluRegisters::R_7, AluRegisters::R_8); +} + } // namespace RelaxedOrderingCommandsHelper } // namespace NEO diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index de9f32bd0a..0f275882a6 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -1024,7 +1024,7 @@ struct DirectSubmissionRelaxedOrderingTests : public DirectSubmissionDispatchBuf bool verifyDynamicSchedulerProgramming(LinearStream &cs, uint64_t schedulerAllocationGpuVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset, size_t &endOffset); template - bool verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa, uint32_t expectedQueueSizeLimit, uint32_t miMathMocs); + bool verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa, uint64_t semaphoreGpuVa, uint32_t expectedQueueSizeLimit, uint32_t miMathMocs); template bool verifyDummyBlt(typename FamilyType::XY_COLOR_BLT *cmd); @@ -1042,7 +1042,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyDummyBlt(typename FamilyType::X } template -bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa, uint32_t expectedQueueSizeLimit, uint32_t miMathMocs) { +bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa, uint64_t semaphoreGpuVa, uint32_t expectedQueueSizeLimit, uint32_t miMathMocs) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG; @@ -1064,13 +1064,21 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap return false; } - miPredicate++; - if (!RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart(miPredicate, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::semaphoreSectionJumpStart, - CS_GPR_R1, 0, CompareOperation::Equal, false)) { + auto lrrCmd = reinterpret_cast(++miPredicate); + + if (!RelaxedOrderingCommandsHelper::verifyLrr(lrrCmd, CS_GPR_R0, CS_GPR_R9)) { return false; } - auto lriCmd = reinterpret_cast(ptrOffset(miPredicate, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart())); + if (!RelaxedOrderingCommandsHelper::verifyLrr(++lrrCmd, CS_GPR_R0 + 4, CS_GPR_R9 + 4)) { + return false; + } + + if (!RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart(++lrrCmd, 0, CS_GPR_R1, 0, CompareOperation::Equal, true)) { + return false; + } + + auto lriCmd = reinterpret_cast(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart())); if (!RelaxedOrderingCommandsHelper::verifyLri(lriCmd, CS_GPR_R2, 0)) { return false; } @@ -1197,14 +1205,21 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap return false; } - cmds = ptrOffset(cmds, EncodeMathMMIO::getCmdSizeForIncrementOrDecrement()); + lrrCmd = reinterpret_cast(ptrOffset(cmds, EncodeMathMMIO::getCmdSizeForIncrementOrDecrement())); - if (!RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart(cmds, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::semaphoreSectionJumpStart, - CS_GPR_R1, 0, CompareOperation::Equal, false)) { + if (!RelaxedOrderingCommandsHelper::verifyLrr(lrrCmd, CS_GPR_R0, CS_GPR_R9)) { return false; } - lriCmd = reinterpret_cast(ptrOffset(cmds, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart())); + if (!RelaxedOrderingCommandsHelper::verifyLrr(++lrrCmd, CS_GPR_R0 + 4, CS_GPR_R9 + 4)) { + return false; + } + + if (!RelaxedOrderingCommandsHelper::verifyConditionalDataRegBbStart(++lrrCmd, 0, CS_GPR_R1, 0, CompareOperation::Equal, true)) { + return false; + } + + lriCmd = reinterpret_cast(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart())); if (!RelaxedOrderingCommandsHelper::verifyLri(lriCmd, CS_GPR_R7, 8)) { return false; } @@ -1337,32 +1352,10 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap return false; } - // 6. Jump to scheduler loop check section (dynamic scheduler) - auto lrrCmd = reinterpret_cast(ptrOffset(conditionalBbStartcmds, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart())); + // 6. Scheduler loop check section + lriCmd = reinterpret_cast(ptrOffset(conditionalBbStartcmds, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataRegBatchBufferStart())); - if (!RelaxedOrderingCommandsHelper::verifyLrr(lrrCmd, CS_GPR_R0, CS_GPR_R9)) { - return false; - } - - if (!RelaxedOrderingCommandsHelper::verifyLrr(++lrrCmd, CS_GPR_R0 + 4, CS_GPR_R9 + 4)) { - return false; - } - - bbStart = reinterpret_cast(++lrrCmd); - if (!RelaxedOrderingCommandsHelper::verifyBbStart(bbStart, 0, true, false)) { - return false; - } - - // 7. Jump to Semaphore section (dynamic scheduler) - miPredicate = reinterpret_cast(++bbStart); - - if (!RelaxedOrderingCommandsHelper::verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { - return false; - } - - lriCmd = reinterpret_cast(++miPredicate); - - if (!RelaxedOrderingCommandsHelper::verifyLri(lriCmd, CS_GPR_R10, static_cast(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::schedulerLoopCheckSectionSize))) { + if (!RelaxedOrderingCommandsHelper::verifyLri(lriCmd, CS_GPR_R10, static_cast(RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::semaphoreSectionSize))) { return false; } @@ -1398,8 +1391,12 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap return false; } - bbStart = reinterpret_cast(++miAluCmd); - if (!RelaxedOrderingCommandsHelper::verifyBbStart(bbStart, 0, true, false)) { + if (!RelaxedOrderingCommandsHelper::verifyConditionalRegMemBbStart(++miAluCmd, 0, semaphoreGpuVa, CS_GPR_R11, CompareOperation::GreaterOrEqual, true)) { + return false; + } + + bbStart = reinterpret_cast(ptrOffset(miAluCmd, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalRegMemBatchBufferStart())); + if (!RelaxedOrderingCommandsHelper::verifyBbStart(bbStart, schedulerStartGpuAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::loopStartSectionStart, false, false)) { return false; } @@ -1431,13 +1428,17 @@ bool DirectSubmissionRelaxedOrderingTests::verifyDynamicSchedulerProgramming(Lin uint64_t schedulerStartAddress = cs.getGraphicsAllocation()->getGpuAddress() + ptrDiff(lriCmd, cs.getCpuBase()); - uint64_t schedulerLoopCheckVa = schedulerStartAddress + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::schedulerLoopCheckSectionStart; + uint64_t semaphoreSectionVa = schedulerStartAddress + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::semaphoreSectionStart; - if (!RelaxedOrderingCommandsHelper::verifyLri(lriCmd, CS_GPR_R9, static_cast(schedulerLoopCheckVa & 0xFFFF'FFFFULL))) { + if (!RelaxedOrderingCommandsHelper::verifyLri(lriCmd, CS_GPR_R11, semaphoreValue)) { continue; } - if (!RelaxedOrderingCommandsHelper::verifyLri(++lriCmd, CS_GPR_R9 + 4, static_cast(schedulerLoopCheckVa >> 32))) { + if (!RelaxedOrderingCommandsHelper::verifyLri(++lriCmd, CS_GPR_R9, static_cast(semaphoreSectionVa & 0xFFFF'FFFFULL))) { + continue; + } + + if (!RelaxedOrderingCommandsHelper::verifyLri(++lriCmd, CS_GPR_R9 + 4, static_cast(semaphoreSectionVa >> 32))) { continue; } @@ -1446,21 +1447,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyDynamicSchedulerProgramming(Lin continue; } - // 2. Scheduler loop check section - - bbStart++; - - if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart(bbStart, schedulerStartAddress + RelaxedOrderingHelper::DynamicSchedulerSizeAndOffsetSection::endSectionStart, - semaphoreGpuVa, semaphoreValue, CompareOperation::GreaterOrEqual, false)) { - continue; - } - - bbStart = reinterpret_cast(ptrOffset(bbStart, EncodeBatchBufferStartOrEnd::getCmdSizeConditionalDataMemBatchBufferStart())); - if (!RelaxedOrderingCommandsHelper::verifyBbStart(bbStart, schedulerAllocationGpuVa + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection::loopStartSectionStart, false, false)) { - continue; - } - - // 3. Semaphore section + // 2. Semaphore section auto miPredicate = reinterpret_cast(++bbStart); if (!RelaxedOrderingCommandsHelper::verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { continue; @@ -1473,7 +1460,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyDynamicSchedulerProgramming(Lin continue; } - // 4. End section + // 3. End section miPredicate = reinterpret_cast(++semaphore); if (!RelaxedOrderingCommandsHelper::verifyMiPredicate(miPredicate, MiPredicateType::Disable)) { @@ -1531,7 +1518,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenDebugFlagSetWhenDispatching EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); EXPECT_TRUE(verifyStaticSchedulerProgramming(*directSubmission.relaxedOrderingSchedulerAllocation, - directSubmission.deferredTasksListAllocation->getGpuAddress(), 123, + directSubmission.deferredTasksListAllocation->getGpuAddress(), directSubmission.semaphoreGpuVa, 123, pDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER))); } @@ -1545,7 +1532,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenNewNumberOfClientsWhenDispa EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); EXPECT_EQ(RelaxedOrderingHelper::queueSizeMultiplier, directSubmission.currentRelaxedOrderingQueueSize); EXPECT_TRUE(verifyStaticSchedulerProgramming(*directSubmission.relaxedOrderingSchedulerAllocation, - directSubmission.deferredTasksListAllocation->getGpuAddress(), RelaxedOrderingHelper::queueSizeMultiplier, + directSubmission.deferredTasksListAllocation->getGpuAddress(), directSubmission.semaphoreGpuVa, RelaxedOrderingHelper::queueSizeMultiplier, pDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER))); const uint64_t expectedQueueSizeValueVa = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress() + @@ -1616,7 +1603,7 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenDispatchStat EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled); EXPECT_TRUE(verifyStaticSchedulerProgramming(*directSubmission.relaxedOrderingSchedulerAllocation, - directSubmission.deferredTasksListAllocation->getGpuAddress(), RelaxedOrderingHelper::queueSizeMultiplier, + directSubmission.deferredTasksListAllocation->getGpuAddress(), directSubmission.semaphoreGpuVa, RelaxedOrderingHelper::queueSizeMultiplier, pDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER))); }