From 54042a191ea70b796e6742896f632270e5b932fd Mon Sep 17 00:00:00 2001 From: "Naklicki, Mateusz" Date: Thu, 21 Jul 2022 14:51:09 +0000 Subject: [PATCH] Implement PauseOnEnqueue for L0 Allow pausing execution before and after enqueuing kernel using the PauseOnEnqueue and PauseOnGpuMode debug flags. Related-To: NEO-6570 Signed-off-by: Naklicki, Mateusz --- level_zero/core/source/cmdlist/cmdlist.h | 4 + level_zero/core/source/cmdlist/cmdlist_hw.inl | 6 + .../core/source/cmdlist/cmdlist_hw_base.inl | 52 ++-- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 20 +- .../core/source/cmdqueue/cmdqueue_hw.inl | 19 ++ .../core/source/cmdqueue/cmdqueue_hw_base.inl | 64 +++- .../cmdqueue_xe_hp_core_and_later.inl | 59 +++- .../sources/cmdlist/test_cmdlist_3.cpp | 44 +++ .../sources/cmdqueue/test_cmdqueue_2.cpp | 27 ++ .../test_cmdqueue_enqueue_cmdlist_2.cpp | 274 ++++++++++++++++++ .../command_container/command_encoder.h | 3 + .../command_encoder_bdw_and_later.inl | 19 ++ .../command_encoder_xehp_and_later.inl | 19 ++ shared/source/device/device.h | 2 + .../encoders/test_encode_dispatch_kernel.cpp | 32 ++ 15 files changed, 620 insertions(+), 24 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist.h b/level_zero/core/source/cmdlist/cmdlist.h index c935ef1781..9ead9e24e6 100644 --- a/level_zero/core/source/cmdlist/cmdlist.h +++ b/level_zero/core/source/cmdlist/cmdlist.h @@ -50,6 +50,10 @@ struct CommandList : _ze_command_list_handle_t { struct CommandToPatch { enum CommandType { FrontEndState, + PauseOnEnqueueSemaphoreStart, + PauseOnEnqueueSemaphoreEnd, + PauseOnEnqueuePipeControlStart, + PauseOnEnqueuePipeControlEnd, Invalid }; void *pDestination = nullptr; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 43a610ae77..d665c8c15e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -2317,6 +2317,12 @@ void CommandListCoreFamily::clearCommandsToPatch() { UNRECOVERABLE_IF(commandToPatch.pCommand == nullptr); delete reinterpret_cast(commandToPatch.pCommand); break; + case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreStart: + case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreEnd: + case CommandList::CommandToPatch::PauseOnEnqueuePipeControlStart: + case CommandList::CommandToPatch::PauseOnEnqueuePipeControlEnd: + UNRECOVERABLE_IF(commandToPatch.pCommand == nullptr); + break; default: UNRECOVERABLE_IF(true); } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 15be09a5f2..ef57256219 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -10,6 +10,7 @@ #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/linear_stream.h" #include "shared/source/command_stream/preemption.h" +#include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/helpers/simd_helper.h" @@ -116,25 +117,28 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K kernel->getKernelDescriptor().kernelMetadata.kernelName.c_str(), 0u); } + std::list additionalCommands; + updateStreamProperties(*kernel, false, launchParams.isCooperative); NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ - 0, // eventAddress - neoDevice, // device - kernel, // dispatchInterface - reinterpret_cast(threadGroupDimensions), // threadGroupDimensions - commandListPreemptionMode, // preemptionMode - 0, // partitionCount - launchParams.isIndirect, // isIndirect - launchParams.isPredicate, // isPredicate - false, // isTimestampEvent - this->containsStatelessUncachedResource, // requiresUncachedMocs - false, // useGlobalAtomics - internalUsage, // isInternal - launchParams.isCooperative, // isCooperative - false, // isHostScopeSignalEvent - false, // isKernelUsingSystemAllocation - cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList - engineGroupType == NEO::EngineGroupType::RenderCompute // isRcs + 0, // eventAddress + neoDevice, // device + kernel, // dispatchInterface + reinterpret_cast(threadGroupDimensions), // threadGroupDimensions + commandListPreemptionMode, // preemptionMode + 0, // partitionCount + launchParams.isIndirect, // isIndirect + launchParams.isPredicate, // isPredicate + false, // isTimestampEvent + this->containsStatelessUncachedResource, // requiresUncachedMocs + false, // useGlobalAtomics + internalUsage, // isInternal + launchParams.isCooperative, // isCooperative + false, // isHostScopeSignalEvent + false, // isKernelUsingSystemAllocation + cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList + engineGroupType == NEO::EngineGroupType::RenderCompute, // isRcs + &additionalCommands // additionalCommands }; NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper()); @@ -172,6 +176,20 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K storePrintfFunction(kernel); } + if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { + commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlStart}); + additionalCommands.pop_front(); + commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreStart}); + additionalCommands.pop_front(); + } + + if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { + commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlEnd}); + additionalCommands.pop_front(); + commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreEnd}); + additionalCommands.pop_front(); + } + return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 059ffbf303..6cfc0666a2 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -9,6 +9,7 @@ #include "shared/source/command_container/implicit_scaling.h" #include "shared/source/command_stream/preemption.h" #include "shared/source/helpers/cache_flush_xehp_and_later.inl" +#include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/source/helpers/pipeline_select_helper.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/indirect_heap/indirect_heap.h" @@ -228,6 +229,8 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs(); this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs(); + std::list additionalCommands; + NEO::EncodeDispatchKernelArgs dispatchKernelArgs{ eventAddress, // eventAddress neoDevice, // device @@ -245,7 +248,8 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K isHostSignalScopeEvent, // isHostScopeSignalEvent isKernelUsingSystemAllocation, // isKernelUsingSystemAllocation cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList - engineGroupType == NEO::EngineGroupType::RenderCompute // isRcs + engineGroupType == NEO::EngineGroupType::RenderCompute, // isRcs + &additionalCommands // additionalCommands }; NEO::EncodeDispatchKernel::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper()); this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs; @@ -306,6 +310,20 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K } } + if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { + commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlStart}); + additionalCommands.pop_front(); + commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreStart}); + additionalCommands.pop_front(); + } + + if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { + commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlEnd}); + additionalCommands.pop_front(); + commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreEnd}); + additionalCommands.pop_front(); + } + return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 4a8fcd5806..5d96fc2fa0 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -23,6 +23,7 @@ #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_info.h" #include "shared/source/helpers/logical_state_helper.h" +#include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/preamble.h" #include "shared/source/memory_manager/graphics_allocation.h" @@ -98,6 +99,10 @@ ze_result_t CommandQueueHw::executeCommandLists( ret = this->executeCommandListsRegular(ctx, numCommandLists, phCommandLists, hFence); } + if (NEO::DebugManager.flags.PauseOnEnqueue.get() != -1) { + this->device->getNEODevice()->debugExecutionCounter++; + } + return ret; } @@ -562,6 +567,20 @@ size_t CommandQueueHw::estimateLinearStreamSizeInitial( linearStreamSizeEstimate += NEO::EncodeKernelArgsBuffer::getKernelArgsBufferCmdsSize(this->csr->getKernelArgsBufferAllocation(), this->csr->getLogicalStateHelper()); + if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), + this->device->getNEODevice()->debugExecutionCounter.load(), + NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { + linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(this->device->getHwInfo(), false); + linearStreamSizeEstimate += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT); + } + + if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), + this->device->getNEODevice()->debugExecutionCounter.load(), + NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { + linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(this->device->getHwInfo(), false); + linearStreamSizeEstimate += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT); + } + return linearStreamSizeEstimate; } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl index a04bc545ee..a43cfb8e9b 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl @@ -7,6 +7,7 @@ #pragma once +#include "shared/source/command_container/cmdcontainer.h" #include "shared/source/command_container/command_encoder.h" #include "shared/source/command_stream/csr_definitions.h" #include "shared/source/command_stream/linear_stream.h" @@ -121,8 +122,69 @@ void CommandQueueHw::handleScratchSpace(NEO::HeapContainer &heapC template void CommandQueueHw::patchCommands(CommandList &commandList, uint64_t scratchAddress) { + using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; + using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; + auto &commandsToPatch = commandList.getCommandsToPatch(); - UNRECOVERABLE_IF(!commandsToPatch.empty()); + for (auto &commandToPatch : commandsToPatch) { + switch (commandToPatch.type) { + case CommandList::CommandToPatch::FrontEndState: { + UNRECOVERABLE_IF(true); + break; + } + case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreStart: { + NEO::EncodeSempahore::programMiSemaphoreWait(reinterpret_cast(commandToPatch.pCommand), + csr->getDebugPauseStateGPUAddress(), + static_cast(NEO::DebugPauseState::hasUserStartConfirmation), + COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, + false); + break; + } + case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreEnd: { + NEO::EncodeSempahore::programMiSemaphoreWait(reinterpret_cast(commandToPatch.pCommand), + csr->getDebugPauseStateGPUAddress(), + static_cast(NEO::DebugPauseState::hasUserEndConfirmation), + COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, + false); + break; + } + case CommandList::CommandToPatch::PauseOnEnqueuePipeControlStart: { + auto &hwInfo = device->getNEODevice()->getHardwareInfo(); + + NEO::PipeControlArgs args; + args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); + + auto command = reinterpret_cast(commandToPatch.pCommand); + NEO::MemorySynchronizationCommands::setBarrierWithPostSyncOperation( + command, + NEO::PostSyncMode::ImmediateData, + csr->getDebugPauseStateGPUAddress(), + static_cast(NEO::DebugPauseState::waitingForUserStartConfirmation), + hwInfo, + args); + break; + } + case CommandList::CommandToPatch::PauseOnEnqueuePipeControlEnd: { + auto &hwInfo = device->getNEODevice()->getHardwareInfo(); + + NEO::PipeControlArgs args; + args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); + + auto command = reinterpret_cast(commandToPatch.pCommand); + NEO::MemorySynchronizationCommands::setBarrierWithPostSyncOperation( + command, + NEO::PostSyncMode::ImmediateData, + csr->getDebugPauseStateGPUAddress(), + static_cast(NEO::DebugPauseState::waitingForUserEndConfirmation), + hwInfo, + args); + break; + } + default: { + UNRECOVERABLE_IF(true); + } + } + } } } // namespace L0 diff --git a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl index ed7912a407..aea21e771d 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl @@ -147,15 +147,15 @@ void CommandQueueHw::handleScratchSpace(NEO::HeapContainer &sshHe template void CommandQueueHw::patchCommands(CommandList &commandList, uint64_t scratchAddress) { using CFE_STATE = typename GfxFamily::CFE_STATE; - - uint32_t lowScratchAddress = uint32_t(0xFFFFFFFF & scratchAddress); - - CFE_STATE *cfeStateCmd = nullptr; + using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT; + using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION; auto &commandsToPatch = commandList.getCommandsToPatch(); for (auto &commandToPatch : commandsToPatch) { switch (commandToPatch.type) { - case CommandList::CommandToPatch::FrontEndState: + case CommandList::CommandToPatch::FrontEndState: { + uint32_t lowScratchAddress = uint32_t(0xFFFFFFFF & scratchAddress); + CFE_STATE *cfeStateCmd = nullptr; cfeStateCmd = reinterpret_cast(commandToPatch.pCommand); cfeStateCmd->setScratchSpaceBuffer(lowScratchAddress); @@ -163,6 +163,55 @@ void CommandQueueHw::patchCommands(CommandList &commandList, uint *reinterpret_cast(commandToPatch.pDestination) = *cfeStateCmd; break; + } + case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreStart: { + NEO::EncodeSempahore::programMiSemaphoreWait(reinterpret_cast(commandToPatch.pCommand), + csr->getDebugPauseStateGPUAddress(), + static_cast(NEO::DebugPauseState::hasUserStartConfirmation), + COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, + false); + break; + } + case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreEnd: { + NEO::EncodeSempahore::programMiSemaphoreWait(reinterpret_cast(commandToPatch.pCommand), + csr->getDebugPauseStateGPUAddress(), + static_cast(NEO::DebugPauseState::hasUserEndConfirmation), + COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, + false); + break; + } + case CommandList::CommandToPatch::PauseOnEnqueuePipeControlStart: { + auto &hwInfo = device->getNEODevice()->getHardwareInfo(); + + NEO::PipeControlArgs args; + args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); + + auto command = reinterpret_cast(commandToPatch.pCommand); + NEO::MemorySynchronizationCommands::setBarrierWithPostSyncOperation( + command, + NEO::PostSyncMode::ImmediateData, + csr->getDebugPauseStateGPUAddress(), + static_cast(NEO::DebugPauseState::waitingForUserStartConfirmation), + hwInfo, + args); + break; + } + case CommandList::CommandToPatch::PauseOnEnqueuePipeControlEnd: { + auto &hwInfo = device->getNEODevice()->getHardwareInfo(); + + NEO::PipeControlArgs args; + args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); + + auto command = reinterpret_cast(commandToPatch.pCommand); + NEO::MemorySynchronizationCommands::setBarrierWithPostSyncOperation( + command, + NEO::PostSyncMode::ImmediateData, + csr->getDebugPauseStateGPUAddress(), + static_cast(NEO::DebugPauseState::waitingForUserEndConfirmation), + hwInfo, + args); + break; + } default: UNRECOVERABLE_IF(true); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp index c635341085..18c66626d8 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp @@ -1270,6 +1270,50 @@ HWTEST2_F(CommandListCreate, givenNonEmptyCommandsToPatchWhenClearCommandsToPatc pCommandList->commandsToPatch.push_back(commandToPatch); EXPECT_NO_THROW(pCommandList->clearCommandsToPatch()); EXPECT_TRUE(pCommandList->commandsToPatch.empty()); + + commandToPatch = {}; + commandToPatch.type = CommandList::CommandToPatch::PauseOnEnqueueSemaphoreStart; + pCommandList->commandsToPatch.push_back(commandToPatch); + EXPECT_ANY_THROW(pCommandList->clearCommandsToPatch()); + pCommandList->commandsToPatch.clear(); + + commandToPatch.pCommand = reinterpret_cast(0x1234); + pCommandList->commandsToPatch.push_back(commandToPatch); + EXPECT_NO_THROW(pCommandList->clearCommandsToPatch()); + EXPECT_TRUE(pCommandList->commandsToPatch.empty()); + + commandToPatch = {}; + commandToPatch.type = CommandList::CommandToPatch::PauseOnEnqueueSemaphoreEnd; + pCommandList->commandsToPatch.push_back(commandToPatch); + EXPECT_ANY_THROW(pCommandList->clearCommandsToPatch()); + pCommandList->commandsToPatch.clear(); + + commandToPatch.pCommand = reinterpret_cast(0x1234); + pCommandList->commandsToPatch.push_back(commandToPatch); + EXPECT_NO_THROW(pCommandList->clearCommandsToPatch()); + EXPECT_TRUE(pCommandList->commandsToPatch.empty()); + + commandToPatch = {}; + commandToPatch.type = CommandList::CommandToPatch::PauseOnEnqueuePipeControlStart; + pCommandList->commandsToPatch.push_back(commandToPatch); + EXPECT_ANY_THROW(pCommandList->clearCommandsToPatch()); + pCommandList->commandsToPatch.clear(); + + commandToPatch.pCommand = reinterpret_cast(0x1234); + pCommandList->commandsToPatch.push_back(commandToPatch); + EXPECT_NO_THROW(pCommandList->clearCommandsToPatch()); + EXPECT_TRUE(pCommandList->commandsToPatch.empty()); + + commandToPatch = {}; + commandToPatch.type = CommandList::CommandToPatch::PauseOnEnqueuePipeControlEnd; + pCommandList->commandsToPatch.push_back(commandToPatch); + EXPECT_ANY_THROW(pCommandList->clearCommandsToPatch()); + pCommandList->commandsToPatch.clear(); + + commandToPatch.pCommand = reinterpret_cast(0x1234); + pCommandList->commandsToPatch.push_back(commandToPatch); + EXPECT_NO_THROW(pCommandList->clearCommandsToPatch()); + EXPECT_TRUE(pCommandList->commandsToPatch.empty()); } template diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp index 1b3901de11..1e39721be5 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp @@ -1005,5 +1005,32 @@ HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorr } } +using IsWithinNotSupported = IsWithinGfxCore; + +HWTEST2_F(CommandQueueScratchTests, givenCommandsToPatchToNotSupportedPlatformWhenPatchCommandsIsCalledThenAbortIsThrown, IsWithinNotSupported) { + ze_command_queue_desc_t desc = {}; + NEO::CommandStreamReceiver *csr = nullptr; + device->getCsrForOrdinalAndIndex(&csr, 0u, 0u); + auto commandQueue = std::make_unique>(device, csr, &desc); + auto commandList = std::make_unique>>(); + + EXPECT_NO_THROW(commandQueue->patchCommands(*commandList, 0)); + commandList->commandsToPatch.push_back({}); + EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0)); + commandList->commandsToPatch.clear(); + + CommandList::CommandToPatch commandToPatch; + + commandToPatch.type = CommandList::CommandToPatch::FrontEndState; + commandList->commandsToPatch.push_back(commandToPatch); + EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0)); + commandList->commandsToPatch.clear(); + + commandToPatch.type = CommandList::CommandToPatch::Invalid; + commandList->commandsToPatch.push_back(commandToPatch); + EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0)); + commandList->commandsToPatch.clear(); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp index 8398973c61..f4eeb9655d 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/test/common/cmd_parse/gen_cmd_parse.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/test_macros/hw_test.h" @@ -12,6 +13,7 @@ #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/source/fence/fence.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" +#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" #include "level_zero/core/test/unit_tests/mocks/mock_fence.h" @@ -258,5 +260,277 @@ HWTEST2_F(CommandQueueExecuteCommandListsSimpleTest, givenTwoCommandQueuesUsingS commandQueue2->destroy(); } +struct PauseOnGpuTests : public Test { + void SetUp() override { + ModuleFixture::setUp(); + + auto &csr = neoDevice->getGpgpuCommandStreamReceiver(); + debugPauseStateAddress = csr.getDebugPauseStateGPUAddress(); + + createKernel(); + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue; + commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); + ASSERT_NE(nullptr, commandQueue->commandStream); + + commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue); + commandListHandle = commandList->toHandle(); + } + + void TearDown() override { + commandList->destroy(); + commandQueue->destroy(); + ModuleFixture::tearDown(); + } + + template + bool verifySemaphore(const GenCmdList::iterator &iterator, uint64_t debugPauseStateAddress, DebugPauseState requiredDebugPauseState) { + auto semaphoreCmd = genCmdCast(*iterator); + + if ((static_cast(requiredDebugPauseState) == semaphoreCmd->getSemaphoreDataDword()) && + (debugPauseStateAddress == semaphoreCmd->getSemaphoreGraphicsAddress())) { + + EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation()); + EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreCmd->getWaitMode()); + + return true; + } + + return false; + } + + template + bool verifyPipeControl(const GenCmdList::iterator &iterator, uint64_t debugPauseStateAddress, DebugPauseState requiredDebugPauseState) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + auto pipeControlCmd = genCmdCast(*iterator); + + if ((static_cast(requiredDebugPauseState) == pipeControlCmd->getImmediateData()) && + (debugPauseStateAddress == NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControlCmd))) { + EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); + EXPECT_EQ(MemorySynchronizationCommands::getDcFlushEnable(true, *defaultHwInfo), pipeControlCmd->getDcFlushEnable()); + EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControlCmd->getPostSyncOperation()); + return true; + } + + return false; + } + + template + bool verifyLoadRegImm(const GenCmdList::iterator &iterator) { + using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; + uint32_t expectedRegisterOffset = DebugManager.flags.GpuScratchRegWriteRegisterOffset.get(); + uint32_t expectedRegisterData = DebugManager.flags.GpuScratchRegWriteRegisterData.get(); + auto loadRegImm = genCmdCast(*iterator); + + if ((expectedRegisterOffset == loadRegImm->getRegisterOffset()) && + (expectedRegisterData == loadRegImm->getDataDword())) { + return true; + } + + return false; + } + + template + void findSemaphores(GenCmdList &cmdList) { + auto semaphore = find(cmdList.begin(), cmdList.end()); + + while (semaphore != cmdList.end()) { + if (verifySemaphore(semaphore, debugPauseStateAddress, DebugPauseState::hasUserStartConfirmation)) { + semaphoreBeforeWalkerFound++; + } + + if (verifySemaphore(semaphore, debugPauseStateAddress, DebugPauseState::hasUserEndConfirmation)) { + semaphoreAfterWalkerFound++; + } + + semaphore = find(++semaphore, cmdList.end()); + } + } + + template + void findPipeControls(GenCmdList &cmdList) { + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + auto pipeControl = find(cmdList.begin(), cmdList.end()); + + while (pipeControl != cmdList.end()) { + if (verifyPipeControl(pipeControl, debugPauseStateAddress, DebugPauseState::waitingForUserStartConfirmation)) { + pipeControlBeforeWalkerFound++; + } + + if (verifyPipeControl(pipeControl, debugPauseStateAddress, DebugPauseState::waitingForUserEndConfirmation)) { + pipeControlAfterWalkerFound++; + } + + pipeControl = find(++pipeControl, cmdList.end()); + } + } + + void enqueueKernel() { + auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + result = commandList->close(); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + result = commandQueue->executeCommandLists(1u, &commandListHandle, nullptr, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + } + + DebugManagerStateRestore restore; + + CmdListKernelLaunchParams launchParams = {}; + ze_group_count_t groupCount{1, 1, 1}; + + L0::ult::CommandQueue *commandQueue = nullptr; + L0::CommandList *commandList = nullptr; + ze_command_list_handle_t commandListHandle = {}; + + uint64_t debugPauseStateAddress = 0; + + uint32_t semaphoreBeforeWalkerFound = 0; + uint32_t semaphoreAfterWalkerFound = 0; + uint32_t pipeControlBeforeWalkerFound = 0; + uint32_t pipeControlAfterWalkerFound = 0; +}; + +HWTEST_F(PauseOnGpuTests, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenInsertPauseCommandsAroundSpecifiedEnqueue) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + DebugManager.flags.PauseOnEnqueue.set(1); + + auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); + + enqueueKernel(); + enqueueKernel(); + + auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); + + findSemaphores(cmdList); + findPipeControls(cmdList); + + EXPECT_EQ(1u, semaphoreBeforeWalkerFound); + EXPECT_EQ(1u, semaphoreAfterWalkerFound); + EXPECT_EQ(1u, pipeControlBeforeWalkerFound); + EXPECT_EQ(1u, pipeControlAfterWalkerFound); +} + +HWTEST_F(PauseOnGpuTests, givenPauseOnEnqueueFlagSetToAlwaysWhenDispatchWalkersThenInsertPauseCommandsAroundEachEnqueue) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + DebugManager.flags.PauseOnEnqueue.set(-2); + + auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); + + enqueueKernel(); + enqueueKernel(); + + auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); + + findSemaphores(cmdList); + findPipeControls(cmdList); + + EXPECT_EQ(2u, semaphoreBeforeWalkerFound); + EXPECT_EQ(2u, semaphoreAfterWalkerFound); + EXPECT_EQ(2u, pipeControlBeforeWalkerFound); + EXPECT_EQ(2u, pipeControlAfterWalkerFound); +} + +HWTEST_F(PauseOnGpuTests, givenPauseModeSetToBeforeOnlyWhenDispatchingThenInsertPauseOnlyBeforeEnqueue) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + DebugManager.flags.PauseOnEnqueue.set(0); + DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::BeforeWorkload); + + auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); + + enqueueKernel(); + enqueueKernel(); + + auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); + + findSemaphores(cmdList); + + findPipeControls(cmdList); + + EXPECT_EQ(1u, semaphoreBeforeWalkerFound); + EXPECT_EQ(0u, semaphoreAfterWalkerFound); + EXPECT_EQ(1u, pipeControlBeforeWalkerFound); + EXPECT_EQ(0u, pipeControlAfterWalkerFound); +} + +HWTEST_F(PauseOnGpuTests, givenPauseModeSetToAfterOnlyWhenDispatchingThenInsertPauseOnlyAfterEnqueue) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + DebugManager.flags.PauseOnEnqueue.set(0); + DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::AfterWorkload); + + auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); + + enqueueKernel(); + + auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); + + findSemaphores(cmdList); + findPipeControls(cmdList); + + EXPECT_EQ(0u, semaphoreBeforeWalkerFound); + EXPECT_EQ(1u, semaphoreAfterWalkerFound); + EXPECT_EQ(0u, pipeControlBeforeWalkerFound); + EXPECT_EQ(1u, pipeControlAfterWalkerFound); +} + +HWTEST_F(PauseOnGpuTests, givenPauseModeSetToBeforeAndAfterWhenDispatchingThenInsertPauseAroundEnqueue) { + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; + + DebugManager.flags.PauseOnEnqueue.set(0); + DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::BeforeAndAfterWorkload); + + auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed(); + + enqueueKernel(); + + auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); + + findSemaphores(cmdList); + + findPipeControls(cmdList); + + EXPECT_EQ(1u, semaphoreBeforeWalkerFound); + EXPECT_EQ(1u, semaphoreAfterWalkerFound); + EXPECT_EQ(1u, pipeControlBeforeWalkerFound); + EXPECT_EQ(1u, pipeControlAfterWalkerFound); +} + } // namespace ult } // namespace L0 diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 9f911d20d3..2f8b2e02c9 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -19,6 +19,8 @@ #include "encode_surface_state_args.h" +#include + namespace NEO { class BindlessHeapsHelper; @@ -48,6 +50,7 @@ struct EncodeDispatchKernelArgs { bool isKernelUsingSystemAllocation = false; bool isKernelDispatchedFromImmediateCmdList = false; bool isRcs = false; + std::list *additionalCommands = nullptr; }; struct EncodeWalkerArgs { diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 56a946f8b7..2fa31bbe76 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -13,6 +13,7 @@ #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/api_specific_config.h" #include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/simd_helper.h" #include "shared/source/helpers/state_base_address.h" @@ -232,6 +233,15 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension(); EncodeDispatchKernel::adjustInterfaceDescriptorData(idd, hwInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired); + if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { + void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo, false)); + args.additionalCommands->push_back(commandBuffer); + + using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT; + MI_SEMAPHORE_WAIT *semaphoreCommand = listCmdBufferStream->getSpaceForCmd(); + args.additionalCommands->push_back(semaphoreCommand); + } + PreemptionHelper::applyPreemptionWaCmdsBegin(listCmdBufferStream, *args.device); auto buffer = listCmdBufferStream->getSpace(sizeof(cmd)); @@ -244,6 +254,15 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis } args.partitionCount = 1; + + if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { + void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo, false)); + args.additionalCommands->push_back(commandBuffer); + + using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT; + MI_SEMAPHORE_WAIT *semaphoreCommand = listCmdBufferStream->getSpaceForCmd(); + args.additionalCommands->push_back(semaphoreCommand); + } } template diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index cee329d5fd..0f6a4667a6 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -20,6 +20,7 @@ #include "shared/source/helpers/constants.h" #include "shared/source/helpers/hw_helper.h" #include "shared/source/helpers/hw_walk_order.h" +#include "shared/source/helpers/pause_on_gpu_properties.h" #include "shared/source/helpers/pipe_control_args.h" #include "shared/source/helpers/pipeline_select_helper.h" #include "shared/source/helpers/ray_tracing_helper.h" @@ -228,6 +229,15 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis args.requiresUncachedMocs = false; } + if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { + void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo, false)); + args.additionalCommands->push_back(commandBuffer); + + using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT; + MI_SEMAPHORE_WAIT *semaphoreCommand = listCmdBufferStream->getSpaceForCmd(); + args.additionalCommands->push_back(reinterpret_cast(semaphoreCommand)); + } + walkerCmd.setIndirectDataStartAddress(static_cast(offsetThreadData)); walkerCmd.setIndirectDataLength(sizeThreadData); @@ -301,6 +311,15 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis } PreemptionHelper::applyPreemptionWaCmdsEnd(listCmdBufferStream, *args.device); + + if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { + void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo, false)); + args.additionalCommands->push_back(commandBuffer); + + using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT; + MI_SEMAPHORE_WAIT *semaphoreCommand = listCmdBufferStream->getSpaceForCmd(); + args.additionalCommands->push_back(semaphoreCommand); + } } template diff --git a/shared/source/device/device.h b/shared/source/device/device.h index 127cfaab9e..1961d4deea 100644 --- a/shared/source/device/device.h +++ b/shared/source/device/device.h @@ -150,6 +150,8 @@ class Device : public ReferenceTrackedObject { MOCKABLE_VIRTUAL bool verifyAdapterLuid(); void getAdapterMask(uint32_t &nodeMask); + std::atomic debugExecutionCounter = 0; + protected: Device() = delete; Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex); diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp index b142b3c98f..0969028eb5 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel.cpp @@ -698,6 +698,38 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeorWhenD ASSERT_NE(itorPC, commands.end()); } +HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToNeverWhenEncodingWalkerThenCommandsToPatchAreNotPresent) { + DebugManagerStateRestore restorer; + DebugManager.flags.PauseOnEnqueue.set(-1); + + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + + uint32_t dims[] = {1, 1, 1}; + bool requiresUncachedMocs = false; + std::list cmdsToPatch; + EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); + dispatchArgs.additionalCommands = &cmdsToPatch; + EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs, nullptr); + + EXPECT_EQ(cmdsToPatch.size(), 0u); +} + +HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToAlwaysWhenEncodingWalkerThenCommandsToPatchAreFilled) { + DebugManagerStateRestore restorer; + DebugManager.flags.PauseOnEnqueue.set(-2); + + std::unique_ptr dispatchInterface(new MockDispatchKernelEncoder()); + + uint32_t dims[] = {1, 1, 1}; + bool requiresUncachedMocs = false; + std::list cmdsToPatch; + EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs); + dispatchArgs.additionalCommands = &cmdsToPatch; + EncodeDispatchKernel::encode(*cmdContainer.get(), dispatchArgs, nullptr); + + EXPECT_EQ(cmdsToPatch.size(), 4u); +} + using EncodeDispatchKernelTest = Test; HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenSshFromContainerIsUsed, IsAtLeastSkl) {