From 0c301e5e9984f332fbcb8970991417fc73b0b730 Mon Sep 17 00:00:00 2001 From: Zbigniew Zdanowicz Date: Wed, 27 Jul 2022 15:35:15 +0000 Subject: [PATCH] Limit preemption programming in level zero command queues When multiple command queues use the same context and retain the same state No preemption programming for copy command queues Program preemption preamble only for mid thread preemption Related-To: NEO-7187 Signed-off-by: Zbigniew Zdanowicz --- level_zero/core/source/cmdqueue/cmdqueue.h | 5 - .../core/source/cmdqueue/cmdqueue_hw.inl | 108 ++++---- .../test/unit_tests/mocks/mock_cmdqueue.h | 1 - .../sources/cmdqueue/test_cmdqueue_2.cpp | 1 - .../test_cmdqueue_enqueue_cmdlist.cpp | 230 +++++++++++++++--- .../test_cmdqueue_enqueue_cmdlist_2.cpp | 75 ++++++ .../command_stream/command_stream_receiver.h | 8 + shared/test/common/helpers/unit_test_helper.h | 5 + .../unit_test_helper_bdw_and_later.inl | 10 + .../unit_test_helper_xehp_and_later.inl | 11 + .../command_stream_receiver_tests.cpp | 8 + 11 files changed, 368 insertions(+), 94 deletions(-) diff --git a/level_zero/core/source/cmdqueue/cmdqueue.h b/level_zero/core/source/cmdqueue/cmdqueue.h index a171e8e7e4..0b2062ba48 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue.h +++ b/level_zero/core/source/cmdqueue/cmdqueue.h @@ -51,14 +51,9 @@ struct CommandQueue : _ze_command_queue_handle_t { ze_command_queue_handle_t toHandle() { return this; } - void setCommandQueuePreemptionMode(NEO::PreemptionMode newPreemptionMode) { - commandQueuePreemptionMode = newPreemptionMode; - } - bool peekIsCopyOnlyCommandQueue() const { return this->isCopyOnlyCommandQueue; } protected: - NEO::PreemptionMode commandQueuePreemptionMode = NEO::PreemptionMode::Initial; uint32_t partitionCount = 1; uint32_t activeSubDevices = 1; bool preemptionCmdSyncProgramming = true; diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index a04ee088e5..dc191424fb 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -124,26 +124,23 @@ ze_result_t CommandQueueHw::executeCommandLists( NEO::Device *neoDevice = device->getNEODevice(); auto devicePreemption = device->getDevicePreemptionMode(); - const bool initialPreemptionMode = commandQueuePreemptionMode == NEO::PreemptionMode::Initial; - NEO::PreemptionMode cmdQueuePreemption = commandQueuePreemptionMode; - if (initialPreemptionMode) { - cmdQueuePreemption = devicePreemption; - } - NEO::PreemptionMode statePreemption = cmdQueuePreemption; + auto contextPreemptionMode = csr->getPreemptionMode(); + const bool initialPreemptionMode = contextPreemptionMode == NEO::PreemptionMode::Initial; + NEO::PreemptionMode statePreemption = contextPreemptionMode; const bool stateSipRequired = (initialPreemptionMode && devicePreemption == NEO::PreemptionMode::MidThread) || (neoDevice->getDebugger() && NEO::Debugger::isDebugEnabled(internalUsage)); - if (initialPreemptionMode) { - preemptionSize += NEO::PreemptionHelper::getRequiredPreambleSize(*neoDevice); - } + if (!isCopyOnlyCommandQueue) { + if (initialPreemptionMode) { + preemptionSize += NEO::PreemptionHelper::getRequiredPreambleSize(*neoDevice); + } - if (stateSipRequired) { - preemptionSize += NEO::PreemptionHelper::getRequiredStateSipCmdSize(*neoDevice, csr->isRcs()); + if (stateSipRequired) { + preemptionSize += NEO::PreemptionHelper::getRequiredStateSipCmdSize(*neoDevice, csr->isRcs()); + } } - preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize(devicePreemption, commandQueuePreemptionMode); - if (NEO::Debugger::isDebugEnabled(internalUsage) && !commandQueueDebugCmdsProgrammed) { if (neoDevice->getSourceLevelDebugger() != nullptr) { debuggerCmdsSize += NEO::PreambleHelper::getKernelDebuggingCommandsSize(true); @@ -184,25 +181,27 @@ ze_result_t CommandQueueHw::executeCommandLists( totalCmdBuffers += commandList->commandContainer.getCmdBufferAllocations().size(); spaceForResidency += commandList->commandContainer.getResidencyContainer().size(); - auto commandListPreemption = commandList->getCommandListPreemptionMode(); - if (statePreemption != commandListPreemption) { - if (preemptionCmdSyncProgramming) { - preemptionSize += NEO::MemorySynchronizationCommands::getSizeForSingleBarrier(); + if (!isCopyOnlyCommandQueue) { + auto commandListPreemption = commandList->getCommandListPreemptionMode(); + if (statePreemption != commandListPreemption) { + if (preemptionCmdSyncProgramming) { + preemptionSize += NEO::MemorySynchronizationCommands::getSizeForSingleBarrier(); + } + preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize(commandListPreemption, statePreemption); + statePreemption = commandListPreemption; } - preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize(commandListPreemption, statePreemption); - statePreemption = commandListPreemption; - } - perThreadScratchSpaceSize = std::max(perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize()); + perThreadScratchSpaceSize = std::max(perThreadScratchSpaceSize, commandList->getCommandListPerThreadScratchSize()); - perThreadPrivateScratchSize = std::max(perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize()); + perThreadPrivateScratchSize = std::max(perThreadPrivateScratchSize, commandList->getCommandListPerThreadPrivateScratchSize()); - if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) { - if (commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) { - heapContainer.push_back(commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation()); - } - for (auto element : commandList->commandContainer.sshAllocations) { - heapContainer.push_back(element); + if (commandList->getCommandListPerThreadScratchSize() != 0 || commandList->getCommandListPerThreadPrivateScratchSize() != 0) { + if (commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE) != nullptr) { + heapContainer.push_back(commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getGraphicsAllocation()); + } + for (auto element : commandList->commandContainer.sshAllocations) { + heapContainer.push_back(element); + } } } @@ -343,15 +342,6 @@ ze_result_t CommandQueueHw::executeCommandLists( NEO::PreemptionHelper::programStateSip(child, *neoDevice, csr->getLogicalStateHelper()); } - if (cmdQueuePreemption != commandQueuePreemptionMode) { - NEO::PreemptionHelper::programCmdStream(child, - cmdQueuePreemption, - commandQueuePreemptionMode, - csr->getPreemptionAllocation()); - } - - statePreemption = cmdQueuePreemption; - const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread || (neoDevice->getDebugger() != nullptr && NEO::Debugger::isDebugEnabled(internalUsage)); @@ -383,33 +373,35 @@ ze_result_t CommandQueueHw::executeCommandLists( csr->getLogicalStateHelper()->writeStreamInline(child, false); } + statePreemption = contextPreemptionMode; + for (auto i = 0u; i < numCommandLists; ++i) { auto commandList = CommandList::fromHandle(phCommandLists[i]); auto &cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations(); auto cmdBufferCount = cmdBufferAllocations.size(); bool immediateMode = (commandList->cmdListType == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false; - auto commandListPreemption = commandList->getCommandListPreemptionMode(); - if (statePreemption != commandListPreemption) { - if (NEO::DebugManager.flags.EnableSWTags.get()) { - neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( - child, - *neoDevice, - "ComandList Preemption Mode update", 0u); - } - - if (preemptionCmdSyncProgramming) { - NEO::PipeControlArgs args; - NEO::MemorySynchronizationCommands::addSingleBarrier(child, args); - } - NEO::PreemptionHelper::programCmdStream(child, - commandListPreemption, - statePreemption, - csr->getPreemptionAllocation()); - statePreemption = commandListPreemption; - } - if (!isCopyOnlyCommandQueue) { + auto commandListPreemption = commandList->getCommandListPreemptionMode(); + if (statePreemption != commandListPreemption) { + if (NEO::DebugManager.flags.EnableSWTags.get()) { + neoDevice->getRootDeviceEnvironment().tagsManager->insertTag( + child, + *neoDevice, + "ComandList Preemption Mode update", 0u); + } + + if (preemptionCmdSyncProgramming) { + NEO::PipeControlArgs args; + NEO::MemorySynchronizationCommands::addSingleBarrier(child, args); + } + NEO::PreemptionHelper::programCmdStream(child, + commandListPreemption, + statePreemption, + csr->getPreemptionAllocation()); + statePreemption = commandListPreemption; + } + bool programVfe = frontEndStateDirty; if (isPatchingVfeStateAllowed) { auto &requiredStreamState = commandList->getRequiredStreamState(); @@ -453,7 +445,7 @@ ze_result_t CommandQueueHw::executeCommandLists( NEO::PreemptionHelper::programStateSipEndWa(child, *neoDevice); } - commandQueuePreemptionMode = statePreemption; + csr->setPreemptionMode(statePreemption); if (hFence) { fence = Fence::fromHandle(hFence); diff --git a/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h b/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h index 7536d53275..3dc1cc15c1 100644 --- a/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h +++ b/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h @@ -33,7 +33,6 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp { using BaseClass::synchronizeByPollingForTaskCount; using BaseClass::taskCount; using CommandQueue::activeSubDevices; - using CommandQueue::commandQueuePreemptionMode; using CommandQueue::internalUsage; using CommandQueue::partitionCount; diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp index 94ab0340de..41e10d4d3d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp @@ -122,7 +122,6 @@ HWTEST_TEMPLATED_F(AubCsrTest, givenAubCsrWhenCallingExecuteCommandListsThenPoll auto aubCsr = static_cast *>(csr); CommandQueue *queue = static_cast(L0::CommandQueue::fromHandle(commandQueue)); - queue->setCommandQueuePreemptionMode(PreemptionMode::Disabled); EXPECT_EQ(aubCsr->pollForCompletionCalled, 0u); std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::Compute, 0u, returnValue)); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp index 428d1b7a7d..09e524d4f8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist.cpp @@ -407,13 +407,16 @@ HWTEST_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsAr desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; + auto currentCsr = neoDevice->getDefaultEngine().commandStreamReceiver; + std::array testedInternalFlags = {true, false}; for (auto flagInternal : testedInternalFlags) { ze_result_t returnValue; + currentCsr->setPreemptionMode(NEO::PreemptionMode::Initial); auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, - neoDevice->getDefaultEngine().commandStreamReceiver, + currentCsr, &desc, false, flagInternal, @@ -458,13 +461,16 @@ HWTEST2_F(CommandQueueExecuteCommandLists, givenMidThreadPreemptionWhenCommandsA desc.index = 0u; desc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL; + auto currentCsr = neoDevice->getDefaultEngine().commandStreamReceiver; + std::array testedInternalFlags = {true, false}; for (auto flagInternal : testedInternalFlags) { ze_result_t returnValue; + currentCsr->setPreemptionMode(NEO::PreemptionMode::Initial); auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, - neoDevice->getDefaultEngine().commandStreamReceiver, + currentCsr, &desc, false, flagInternal, @@ -645,10 +651,12 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p desc.stype = ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC; desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + auto currentCsr = neoDevice->getDefaultEngine().commandStreamReceiver; + ze_result_t returnValue; auto commandQueue = whiteboxCast(CommandQueue::create( productFamily, - device, neoDevice->getDefaultEngine().commandStreamReceiver, &desc, false, false, returnValue)); + device, currentCsr, &desc, false, false, returnValue)); ASSERT_NE(nullptr, commandQueue->commandStream); commandQueue->preemptionCmdSyncProgramming = preemptionCmdProgramming; preemptionCmdProgramming = NEO::PreemptionHelper::getRequiredCmdStreamSize(NEO::PreemptionMode::ThreadGroup, NEO::PreemptionMode::Disabled) > 0u; @@ -670,12 +678,12 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p result = commandQueue->synchronize(0); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(NEO::PreemptionMode::Disabled, commandQueue->commandQueuePreemptionMode); + EXPECT_EQ(NEO::PreemptionMode::Disabled, currentCsr->getPreemptionMode()); result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); ASSERT_EQ(ZE_RESULT_SUCCESS, result); - EXPECT_EQ(NEO::PreemptionMode::Disabled, commandQueue->commandQueuePreemptionMode); + EXPECT_EQ(NEO::PreemptionMode::Disabled, currentCsr->getPreemptionMode()); auto usedSpaceAfter = commandQueue->commandStream->getUsed(); ASSERT_GT(usedSpaceAfter, usedSpaceBefore); @@ -693,7 +701,16 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p auto preemptionMode = neoDevice->getPreemptionMode(); GenCmdList::iterator itor = cmdList.begin(); - GenCmdList::iterator itorStateSip = find(cmdList.begin(), cmdList.end()); + GenCmdList::iterator itorCsrCmd = NEO::UnitTestHelper::findMidThreadPreemptionAllocationCommand(cmdList.begin(), cmdList.end()); + if (preemptionMode == NEO::PreemptionMode::MidThread) { + EXPECT_NE(itorCsrCmd, cmdList.end()); + + itor = itorCsrCmd; + } else { + EXPECT_EQ(itorCsrCmd, cmdList.end()); + } + + GenCmdList::iterator itorStateSip = find(itor, cmdList.end()); if (preemptionMode == NEO::PreemptionMode::MidThread) { EXPECT_NE(itorStateSip, cmdList.end()); @@ -702,38 +719,52 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p EXPECT_EQ(itorStateSip, cmdList.end()); } + constexpr uint32_t registerOffset = 0x2580; + constexpr uint32_t disabledPreemptionRegisterData = (1 << 2) | (((1 << 1) | (1 << 2)) << 16); + constexpr uint32_t threadGroupPreemptionRegisterData = (1 << 1) | (((1 << 1) | (1 << 2)) << 16); + + // MMIO programming of 1st disabled preemption command list: initial->disabled MI_LOAD_REGISTER_IMM *lriCmd = nullptr; auto itorLri = find(itor, cmdList.end()); if (preemptionCmdProgramming) { EXPECT_NE(itorLri, cmdList.end()); - //Initial cmdQ preemption lriCmd = static_cast(*itorLri); - EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset()); + EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset()); + EXPECT_EQ(disabledPreemptionRegisterData, lriCmd->getDataDword()); + + // verify presence of sync PIPE_CONTROL just before LRI switching preemption + auto itorPipeControl = find(itor, itorLri); + if (commandQueue->preemptionCmdSyncProgramming) { + EXPECT_NE(itorPipeControl, itorLri); + } else { + EXPECT_EQ(itorPipeControl, itorLri); + } itor = itorLri; } else { EXPECT_EQ(itorLri, cmdList.end()); } - uint32_t data = 0; - //next should be BB_START to 1st Disabled preemption Cmd List + // next should be BB_START to 1st disabled preemption Cmd List auto itorBBStart = find(itor, cmdList.end()); EXPECT_NE(itorBBStart, cmdList.end()); itor = itorBBStart; + // MMIO programming of thread-group preemption command list: disabled->thread-group itorLri = find(itor, cmdList.end()); if (preemptionCmdProgramming) { EXPECT_NE(itorLri, cmdList.end()); lriCmd = static_cast(*itorLri); - EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset()); - data = (1 << 1) | (((1 << 1) | (1 << 2)) << 16); - EXPECT_EQ(data, lriCmd->getDataDword()); + EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset()); + EXPECT_EQ(threadGroupPreemptionRegisterData, lriCmd->getDataDword()); - //verify presence of sync PIPE_CONTROL just before LRI switching to thread-group + // verify presence of sync PIPE_CONTROL just before LRI switching preemption + auto itorPipeControl = find(itor, itorLri); if (commandQueue->preemptionCmdSyncProgramming) { - auto itorPipeControl = find(itor, itorLri); - EXPECT_NE(itorPipeControl, cmdList.end()); + EXPECT_NE(itorPipeControl, itorLri); + } else { + EXPECT_EQ(itorPipeControl, itorLri); } itor = itorLri; @@ -741,23 +772,25 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p EXPECT_EQ(itorLri, cmdList.end()); } - //start of thread-group command list + // start of thread-group preemption Cmd List itorBBStart = find(itor, cmdList.end()); EXPECT_NE(itorBBStart, cmdList.end()); itor = itorBBStart; + // MMIO programming of 2nd disabled preemption command list: thread-group->disabled itorLri = find(itor, cmdList.end()); if (preemptionCmdProgramming) { EXPECT_NE(itorLri, cmdList.end()); lriCmd = static_cast(*itorLri); - EXPECT_EQ(0x2580u, lriCmd->getRegisterOffset()); - data = (1 << 2) | (((1 << 1) | (1 << 2)) << 16); - EXPECT_EQ(data, lriCmd->getDataDword()); + EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset()); + EXPECT_EQ(disabledPreemptionRegisterData, lriCmd->getDataDword()); - //verify presence of sync PIPE_CONTROL just before LRI switching to thread-group + // verify presence of sync PIPE_CONTROL just before LRI switching preemption + auto itorPipeControl = find(itor, itorLri); if (commandQueue->preemptionCmdSyncProgramming) { - auto itorPipeControl = find(itor, itorLri); - EXPECT_NE(itorPipeControl, cmdList.end()); + EXPECT_NE(itorPipeControl, itorLri); + } else { + EXPECT_EQ(itorPipeControl, itorLri); } itor = itorLri; @@ -765,29 +798,114 @@ void CommandQueueExecuteCommandLists::twoCommandListCommandPreemptionTest(bool p EXPECT_EQ(itorLri, cmdList.end()); } - //start of thread-group command list + // start of 2nd disabled preemption command list itorBBStart = find(itor, cmdList.end()); EXPECT_NE(itorBBStart, cmdList.end()); itor = itorBBStart; - // BB end - auto itorBBEnd = find(itor, cmdList.end()); + // BB end or ULLS BB start + if (currentCsr->isDirectSubmissionEnabled()) { + itorBBStart = find(itor, cmdList.end()); + EXPECT_NE(itorBBStart, cmdList.end()); + itor = itorBBStart; + } else { + auto itorBBEnd = find(itor, cmdList.end()); + EXPECT_NE(itorBBEnd, cmdList.end()); + itor = itorBBEnd; + } + + GenCmdList::iterator firstExecListItor = itor; + + // second execution of command lists: + + // BB_START to 1st disabled preemption Cmd List + itorBBStart = find(itor, cmdList.end()); EXPECT_NE(itorBBStart, cmdList.end()); + // no MMIO programming prior 1st disabled cmd list, since command queue retains disabled preemption state + itorLri = find(itor, itorBBStart); + EXPECT_EQ(itorLri, itorBBStart); + itor = itorBBStart; + + // MMIO programming of thread-group preemption command list: disabled->thread-group + itorLri = find(itor, cmdList.end()); + if (preemptionCmdProgramming) { + EXPECT_NE(itorLri, cmdList.end()); + + lriCmd = static_cast(*itorLri); + EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset()); + EXPECT_EQ(threadGroupPreemptionRegisterData, lriCmd->getDataDword()); + + // verify presence of sync PIPE_CONTROL just before LRI switching preemption + auto itorPipeControl = find(itor, itorLri); + if (commandQueue->preemptionCmdSyncProgramming) { + EXPECT_NE(itorPipeControl, itorLri); + } else { + EXPECT_EQ(itorPipeControl, itorLri); + } + + itor = itorLri; + } else { + EXPECT_EQ(itorLri, cmdList.end()); + } + + // start of thread-group preemption Cmd List + itorBBStart = find(itor, cmdList.end()); + EXPECT_NE(itorBBStart, cmdList.end()); + itor = itorBBStart; + + // MMIO programming of 2nd disabled preemption command list: thread-group->disabled + itorLri = find(itor, cmdList.end()); + if (preemptionCmdProgramming) { + EXPECT_NE(itorLri, cmdList.end()); + lriCmd = static_cast(*itorLri); + EXPECT_EQ(registerOffset, lriCmd->getRegisterOffset()); + EXPECT_EQ(disabledPreemptionRegisterData, lriCmd->getDataDword()); + + // verify presence of sync PIPE_CONTROL just before LRI switching preemption + auto itorPipeControl = find(itor, itorLri); + if (commandQueue->preemptionCmdSyncProgramming) { + EXPECT_NE(itorPipeControl, itorLri); + } else { + EXPECT_EQ(itorPipeControl, itorLri); + } + + itor = itorLri; + } else { + EXPECT_EQ(itorLri, cmdList.end()); + } + + // start of 2nd disabled preemption command list + itorBBStart = find(itor, cmdList.end()); + EXPECT_NE(itorBBStart, cmdList.end()); + itor = itorBBStart; + + // BB end or ULLS BB start + if (currentCsr->isDirectSubmissionEnabled()) { + itorBBStart = find(itor, cmdList.end()); + EXPECT_NE(itorBBStart, cmdList.end()); + } else { + auto itorBBEnd = find(itor, cmdList.end()); + EXPECT_NE(itorBBEnd, cmdList.end()); + } + + auto allCsrCmds = NEO::UnitTestHelper::findAllMidThreadPreemptionAllocationCommand(cmdList.begin(), cmdList.end()); auto allStateSips = findAll(cmdList.begin(), cmdList.end()); if (preemptionMode == NEO::PreemptionMode::MidThread) { EXPECT_EQ(1u, allStateSips.size()); + EXPECT_EQ(1u, allCsrCmds.size()); } else { EXPECT_EQ(0u, allStateSips.size()); + EXPECT_EQ(0u, allCsrCmds.size()); } - auto firstExecMmioCount = countMmio(cmdList.begin(), itorBBEnd, 0x2580u); - size_t expectedMmioCount = preemptionCmdProgramming ? 4u : 0u; + auto firstExecMmioCount = countMmio(cmdList.begin(), firstExecListItor, registerOffset); + size_t expectedMmioCount = preemptionCmdProgramming ? 3u : 0u; EXPECT_EQ(expectedMmioCount, firstExecMmioCount); // Count next MMIOs for preemption - only two should be present as last cmdlist from 1st exec // and first cmdlist from 2nd exec has the same mode - cmdQ state should remember it - auto secondExecMmioCount = countMmio(itorBBEnd, cmdList.end(), 0x2580u); + auto secondExecMmioCount = countMmio(firstExecListItor, cmdList.end(), registerOffset); expectedMmioCount = preemptionCmdProgramming ? 2u : 0u; EXPECT_EQ(expectedMmioCount, secondExecMmioCount); @@ -804,6 +922,60 @@ HWTEST2_F(CommandQueueExecuteCommandLists, GivenCmdListsWithDifferentPreemptionM twoCommandListCommandPreemptionTest(true); } +HWTEST_F(CommandQueueExecuteCommandLists, GivenCopyCommandQueueWhenExecutingCopyCommandListThenExpectNoPreemptionProgramming) { + using STATE_SIP = typename FamilyType::STATE_SIP; + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + + constexpr uint32_t preemptionRegisterOffset = 0x2580; + + ze_result_t returnValue; + std::unique_ptr commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + + // force command list to have preemption state to verify this state is not used during execution + commandList->commandListPreemptionMode = NEO::PreemptionMode::MidThread; + + auto currentCsr = neoDevice->getDefaultEngine().commandStreamReceiver; + EXPECT_EQ(NEO::PreemptionMode::Initial, currentCsr->getPreemptionMode()); + + const ze_command_queue_desc_t desc{}; + auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, + device, + currentCsr, + &desc, + true, + false, + returnValue)); + ASSERT_NE(nullptr, commandQueue); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + EXPECT_TRUE(commandQueue->peekIsCopyOnlyCommandQueue()); + + zet_command_list_handle_t cmdListHandle = commandList->toHandle(); + returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + size_t usedSpaceAfter = commandQueue->commandStream->getUsed(); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + commandQueue->commandStream->getCpuBase(), + usedSpaceAfter)); + + size_t preemptionMmioCount = countMmio(cmdList.begin(), cmdList.end(), preemptionRegisterOffset); + constexpr size_t expectedMmioCount = 0; + EXPECT_EQ(expectedMmioCount, preemptionMmioCount); + + auto allStateSips = findAll(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, allStateSips.size()); + + auto allCsrCmds = NEO::UnitTestHelper::findAllMidThreadPreemptionAllocationCommand(cmdList.begin(), cmdList.end()); + EXPECT_EQ(0u, allCsrCmds.size()); + + EXPECT_EQ(NEO::PreemptionMode::Initial, currentCsr->getPreemptionMode()); + + commandQueue->destroy(); +} + struct CommandQueueExecuteCommandListSWTagsTests : public Test { void SetUp() override { DebugManager.flags.EnableSWTags.set(true); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp index f504a7d76c..8398973c61 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp @@ -183,5 +183,80 @@ HWTEST2_F(CommandQueueExecuteCommandListsSimpleTest, givenTwoCommandQueuesUsingS commandQueue2->destroy(); } +using IsMmioPreemptionUsed = IsWithinGfxCore; + +HWTEST2_F(CommandQueueExecuteCommandListsSimpleTest, givenTwoCommandQueuesUsingSingleCsrWhenExecutingFirstTimeOnBothQueuesThenPreemptionModeIsProgrammedOnce, IsMmioPreemptionUsed) { + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + + ze_result_t returnValue; + + ze_command_list_handle_t commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle(); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + CommandList::fromHandle(commandList)->commandListPreemptionMode = NEO::PreemptionMode::ThreadGroup; + + ze_command_queue_desc_t queueDesc = {}; + queueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + ASSERT_NE(nullptr, commandQueue->commandStream); + + auto usedSpaceBefore = commandQueue->commandStream->getUsed(); + returnValue = commandQueue->executeCommandLists(1, &commandList, nullptr, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + auto usedSpaceAfter = commandQueue->commandStream->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(commandQueue->commandStream->getCpuBase(), usedSpaceBefore), + usedSpaceAfter - usedSpaceBefore)); + + auto loadRegisterImmList = findAll(cmdList.begin(), cmdList.end()); + size_t foundPreemptionMmioCount = 0; + for (auto it : loadRegisterImmList) { + auto cmd = genCmdCast(*it); + if (cmd->getRegisterOffset() == 0x2580) { + foundPreemptionMmioCount++; + } + } + + constexpr size_t expectedFirstPreemptionMmioCount = 1u; + EXPECT_EQ(expectedFirstPreemptionMmioCount, foundPreemptionMmioCount); + + cmdList.clear(); + foundPreemptionMmioCount = 0; + + auto commandQueue2 = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + ASSERT_NE(nullptr, commandQueue2->commandStream); + + usedSpaceBefore = commandQueue2->commandStream->getUsed(); + returnValue = commandQueue2->executeCommandLists(1, &commandList, nullptr, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue); + usedSpaceAfter = commandQueue2->commandStream->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, + ptrOffset(commandQueue2->commandStream->getCpuBase(), usedSpaceBefore), + usedSpaceAfter - usedSpaceBefore)); + + loadRegisterImmList = findAll(cmdList.begin(), cmdList.end()); + for (auto it : loadRegisterImmList) { + auto cmd = genCmdCast(*it); + if (cmd->getRegisterOffset() == 0x2580) { + foundPreemptionMmioCount++; + } + } + + constexpr size_t expectedSecondPreemptionMmioCount = 0u; + EXPECT_EQ(expectedSecondPreemptionMmioCount, foundPreemptionMmioCount); + + CommandList::fromHandle(commandList)->destroy(); + commandQueue->destroy(); + commandQueue2->destroy(); +} + } // namespace ult } // namespace L0 diff --git a/shared/source/command_stream/command_stream_receiver.h b/shared/source/command_stream/command_stream_receiver.h index 2401ffcd5f..4cbb90609e 100644 --- a/shared/source/command_stream/command_stream_receiver.h +++ b/shared/source/command_stream/command_stream_receiver.h @@ -348,6 +348,14 @@ class CommandStreamReceiver { isPreambleSent = value; } + PreemptionMode getPreemptionMode() const { + return lastPreemptionMode; + } + + void setPreemptionMode(PreemptionMode value) { + lastPreemptionMode = value; + } + protected: void cleanupResources(); void printDeviceIndex(); diff --git a/shared/test/common/helpers/unit_test_helper.h b/shared/test/common/helpers/unit_test_helper.h index 385389e3d6..e7da944115 100644 --- a/shared/test/common/helpers/unit_test_helper.h +++ b/shared/test/common/helpers/unit_test_helper.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/helpers/aux_translation.h" +#include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include #include @@ -82,6 +83,10 @@ struct UnitTestHelper { static bool timestampRegisterHighAddress(); static void validateSbaMocs(uint32_t expectedMocs, CommandStreamReceiver &csr); + + static GenCmdList::iterator findMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end); + + static std::vector findAllMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end); }; } // namespace NEO diff --git a/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl b/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl index 9c9da21b68..127886f4d0 100644 --- a/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl +++ b/shared/test/common/helpers/unit_test_helper_bdw_and_later.inl @@ -77,4 +77,14 @@ inline bool UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCm return false; } +template +GenCmdList::iterator UnitTestHelper::findMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end) { + return find(begin, end); +} + +template +std::vector UnitTestHelper::findAllMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end) { + return findAll(begin, end); +} + } // namespace NEO diff --git a/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl b/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl index cfb5378363..62fc554db6 100644 --- a/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl +++ b/shared/test/common/helpers/unit_test_helper_xehp_and_later.inl @@ -105,4 +105,15 @@ inline bool UnitTestHelper::getWorkloadPartitionForStoreRegisterMemCm return storeRegisterMem.getWorkloadPartitionIdOffsetEnable(); } +template +GenCmdList::iterator UnitTestHelper::findMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end) { + return end; +} + +template +std::vector UnitTestHelper::findAllMidThreadPreemptionAllocationCommand(GenCmdList::iterator begin, GenCmdList::iterator end) { + std::vector emptyList; + return emptyList; +} + } // namespace NEO diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index eb2e755b23..88ab884f76 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -2103,3 +2103,11 @@ TEST_F(CommandStreamReceiverTest, givenPreambleFlagIsSetWhenGettingFlagStateThen commandStreamReceiver->setPreambleSetFlag(true); EXPECT_TRUE(commandStreamReceiver->getPreambleSetFlag()); } + +TEST_F(CommandStreamReceiverTest, givenPreemptionSentIsInitialWhenSettingPreemptionToNewModeThenExpectCorrectPreemption) { + PreemptionMode mode = PreemptionMode::Initial; + EXPECT_EQ(mode, commandStreamReceiver->getPreemptionMode()); + mode = PreemptionMode::ThreadGroup; + commandStreamReceiver->setPreemptionMode(mode); + EXPECT_EQ(mode, commandStreamReceiver->getPreemptionMode()); +}