Implement PauseOnEnqueue for L0

Allow pausing execution before and after enqueuing kernel
using the PauseOnEnqueue and PauseOnGpuMode debug flags.

Related-To: NEO-6570
Signed-off-by: Naklicki, Mateusz <mateusz.naklicki@intel.com>
This commit is contained in:
Naklicki, Mateusz
2022-07-21 14:51:09 +00:00
committed by Compute-Runtime-Automation
parent a820e73dd7
commit 54042a191e
15 changed files with 620 additions and 24 deletions

View File

@ -50,6 +50,10 @@ struct CommandList : _ze_command_list_handle_t {
struct CommandToPatch {
enum CommandType {
FrontEndState,
PauseOnEnqueueSemaphoreStart,
PauseOnEnqueueSemaphoreEnd,
PauseOnEnqueuePipeControlStart,
PauseOnEnqueuePipeControlEnd,
Invalid
};
void *pDestination = nullptr;

View File

@ -2317,6 +2317,12 @@ void CommandListCoreFamily<gfxCoreFamily>::clearCommandsToPatch() {
UNRECOVERABLE_IF(commandToPatch.pCommand == nullptr);
delete reinterpret_cast<VFE_STATE_TYPE *>(commandToPatch.pCommand);
break;
case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreStart:
case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreEnd:
case CommandList::CommandToPatch::PauseOnEnqueuePipeControlStart:
case CommandList::CommandToPatch::PauseOnEnqueuePipeControlEnd:
UNRECOVERABLE_IF(commandToPatch.pCommand == nullptr);
break;
default:
UNRECOVERABLE_IF(true);
}

View File

@ -10,6 +10,7 @@
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/source/helpers/simd_helper.h"
@ -116,25 +117,28 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
kernel->getKernelDescriptor().kernelMetadata.kernelName.c_str(), 0u);
}
std::list<void *> additionalCommands;
updateStreamProperties(*kernel, false, launchParams.isCooperative);
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
0, // eventAddress
neoDevice, // device
kernel, // dispatchInterface
reinterpret_cast<const void *>(threadGroupDimensions), // threadGroupDimensions
commandListPreemptionMode, // preemptionMode
0, // partitionCount
launchParams.isIndirect, // isIndirect
launchParams.isPredicate, // isPredicate
false, // isTimestampEvent
this->containsStatelessUncachedResource, // requiresUncachedMocs
false, // useGlobalAtomics
internalUsage, // isInternal
launchParams.isCooperative, // isCooperative
false, // isHostScopeSignalEvent
false, // isKernelUsingSystemAllocation
cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList
engineGroupType == NEO::EngineGroupType::RenderCompute // isRcs
0, // eventAddress
neoDevice, // device
kernel, // dispatchInterface
reinterpret_cast<const void *>(threadGroupDimensions), // threadGroupDimensions
commandListPreemptionMode, // preemptionMode
0, // partitionCount
launchParams.isIndirect, // isIndirect
launchParams.isPredicate, // isPredicate
false, // isTimestampEvent
this->containsStatelessUncachedResource, // requiresUncachedMocs
false, // useGlobalAtomics
internalUsage, // isInternal
launchParams.isCooperative, // isCooperative
false, // isHostScopeSignalEvent
false, // isKernelUsingSystemAllocation
cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList
engineGroupType == NEO::EngineGroupType::RenderCompute, // isRcs
&additionalCommands // additionalCommands
};
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
@ -172,6 +176,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
storePrintfFunction(kernel);
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlStart});
additionalCommands.pop_front();
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreStart});
additionalCommands.pop_front();
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlEnd});
additionalCommands.pop_front();
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreEnd});
additionalCommands.pop_front();
}
return ZE_RESULT_SUCCESS;
}

View File

@ -9,6 +9,7 @@
#include "shared/source/command_container/implicit_scaling.h"
#include "shared/source/command_stream/preemption.h"
#include "shared/source/helpers/cache_flush_xehp_and_later.inl"
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/helpers/pipeline_select_helper.h"
#include "shared/source/helpers/simd_helper.h"
#include "shared/source/indirect_heap/indirect_heap.h"
@ -228,6 +229,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
this->containsStatelessUncachedResource |= kernelImp->getKernelRequiresUncachedMocs();
this->requiresQueueUncachedMocs |= kernelImp->getKernelRequiresQueueUncachedMocs();
std::list<void *> additionalCommands;
NEO::EncodeDispatchKernelArgs dispatchKernelArgs{
eventAddress, // eventAddress
neoDevice, // device
@ -245,7 +248,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
isHostSignalScopeEvent, // isHostScopeSignalEvent
isKernelUsingSystemAllocation, // isKernelUsingSystemAllocation
cmdListType == CommandListType::TYPE_IMMEDIATE, // isKernelDispatchedFromImmediateCmdList
engineGroupType == NEO::EngineGroupType::RenderCompute // isRcs
engineGroupType == NEO::EngineGroupType::RenderCompute, // isRcs
&additionalCommands // additionalCommands
};
NEO::EncodeDispatchKernel<GfxFamily>::encode(commandContainer, dispatchKernelArgs, getLogicalStateHelper());
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
@ -306,6 +310,20 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlStart});
additionalCommands.pop_front();
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreStart});
additionalCommands.pop_front();
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), neoDevice->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueuePipeControlEnd});
additionalCommands.pop_front();
commandsToPatch.push_back({0x0, additionalCommands.front(), CommandToPatch::PauseOnEnqueueSemaphoreEnd});
additionalCommands.pop_front();
}
return ZE_RESULT_SUCCESS;
}

View File

@ -23,6 +23,7 @@
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/logical_state_helper.h"
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/preamble.h"
#include "shared/source/memory_manager/graphics_allocation.h"
@ -98,6 +99,10 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
ret = this->executeCommandListsRegular(ctx, numCommandLists, phCommandLists, hFence);
}
if (NEO::DebugManager.flags.PauseOnEnqueue.get() != -1) {
this->device->getNEODevice()->debugExecutionCounter++;
}
return ret;
}
@ -562,6 +567,20 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeInitial(
linearStreamSizeEstimate += NEO::EncodeKernelArgsBuffer<GfxFamily>::getKernelArgsBufferCmdsSize(this->csr->getKernelArgsBufferAllocation(),
this->csr->getLogicalStateHelper());
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(),
this->device->getNEODevice()->debugExecutionCounter.load(),
NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(this->device->getHwInfo(), false);
linearStreamSizeEstimate += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(),
this->device->getNEODevice()->debugExecutionCounter.load(),
NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(this->device->getHwInfo(), false);
linearStreamSizeEstimate += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT);
}
return linearStreamSizeEstimate;
}

View File

@ -7,6 +7,7 @@
#pragma once
#include "shared/source/command_container/cmdcontainer.h"
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/csr_definitions.h"
#include "shared/source/command_stream/linear_stream.h"
@ -121,8 +122,69 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &heapC
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint64_t scratchAddress) {
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
auto &commandsToPatch = commandList.getCommandsToPatch();
UNRECOVERABLE_IF(!commandsToPatch.empty());
for (auto &commandToPatch : commandsToPatch) {
switch (commandToPatch.type) {
case CommandList::CommandToPatch::FrontEndState: {
UNRECOVERABLE_IF(true);
break;
}
case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreStart: {
NEO::EncodeSempahore<GfxFamily>::programMiSemaphoreWait(reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandToPatch.pCommand),
csr->getDebugPauseStateGPUAddress(),
static_cast<uint32_t>(NEO::DebugPauseState::hasUserStartConfirmation),
COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
false);
break;
}
case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreEnd: {
NEO::EncodeSempahore<GfxFamily>::programMiSemaphoreWait(reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandToPatch.pCommand),
csr->getDebugPauseStateGPUAddress(),
static_cast<uint32_t>(NEO::DebugPauseState::hasUserEndConfirmation),
COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
false);
break;
}
case CommandList::CommandToPatch::PauseOnEnqueuePipeControlStart: {
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo);
auto command = reinterpret_cast<void *>(commandToPatch.pCommand);
NEO::MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(
command,
NEO::PostSyncMode::ImmediateData,
csr->getDebugPauseStateGPUAddress(),
static_cast<uint64_t>(NEO::DebugPauseState::waitingForUserStartConfirmation),
hwInfo,
args);
break;
}
case CommandList::CommandToPatch::PauseOnEnqueuePipeControlEnd: {
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo);
auto command = reinterpret_cast<void *>(commandToPatch.pCommand);
NEO::MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(
command,
NEO::PostSyncMode::ImmediateData,
csr->getDebugPauseStateGPUAddress(),
static_cast<uint64_t>(NEO::DebugPauseState::waitingForUserEndConfirmation),
hwInfo,
args);
break;
}
default: {
UNRECOVERABLE_IF(true);
}
}
}
}
} // namespace L0

View File

@ -147,15 +147,15 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHe
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint64_t scratchAddress) {
using CFE_STATE = typename GfxFamily::CFE_STATE;
uint32_t lowScratchAddress = uint32_t(0xFFFFFFFF & scratchAddress);
CFE_STATE *cfeStateCmd = nullptr;
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
auto &commandsToPatch = commandList.getCommandsToPatch();
for (auto &commandToPatch : commandsToPatch) {
switch (commandToPatch.type) {
case CommandList::CommandToPatch::FrontEndState:
case CommandList::CommandToPatch::FrontEndState: {
uint32_t lowScratchAddress = uint32_t(0xFFFFFFFF & scratchAddress);
CFE_STATE *cfeStateCmd = nullptr;
cfeStateCmd = reinterpret_cast<CFE_STATE *>(commandToPatch.pCommand);
cfeStateCmd->setScratchSpaceBuffer(lowScratchAddress);
@ -163,6 +163,55 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
*reinterpret_cast<CFE_STATE *>(commandToPatch.pDestination) = *cfeStateCmd;
break;
}
case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreStart: {
NEO::EncodeSempahore<GfxFamily>::programMiSemaphoreWait(reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandToPatch.pCommand),
csr->getDebugPauseStateGPUAddress(),
static_cast<uint32_t>(NEO::DebugPauseState::hasUserStartConfirmation),
COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
false);
break;
}
case CommandList::CommandToPatch::PauseOnEnqueueSemaphoreEnd: {
NEO::EncodeSempahore<GfxFamily>::programMiSemaphoreWait(reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandToPatch.pCommand),
csr->getDebugPauseStateGPUAddress(),
static_cast<uint32_t>(NEO::DebugPauseState::hasUserEndConfirmation),
COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD,
false);
break;
}
case CommandList::CommandToPatch::PauseOnEnqueuePipeControlStart: {
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo);
auto command = reinterpret_cast<void *>(commandToPatch.pCommand);
NEO::MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(
command,
NEO::PostSyncMode::ImmediateData,
csr->getDebugPauseStateGPUAddress(),
static_cast<uint64_t>(NEO::DebugPauseState::waitingForUserStartConfirmation),
hwInfo,
args);
break;
}
case CommandList::CommandToPatch::PauseOnEnqueuePipeControlEnd: {
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
NEO::PipeControlArgs args;
args.dcFlushEnable = NEO::MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, hwInfo);
auto command = reinterpret_cast<void *>(commandToPatch.pCommand);
NEO::MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(
command,
NEO::PostSyncMode::ImmediateData,
csr->getDebugPauseStateGPUAddress(),
static_cast<uint64_t>(NEO::DebugPauseState::waitingForUserEndConfirmation),
hwInfo,
args);
break;
}
default:
UNRECOVERABLE_IF(true);
}

View File

@ -1270,6 +1270,50 @@ HWTEST2_F(CommandListCreate, givenNonEmptyCommandsToPatchWhenClearCommandsToPatc
pCommandList->commandsToPatch.push_back(commandToPatch);
EXPECT_NO_THROW(pCommandList->clearCommandsToPatch());
EXPECT_TRUE(pCommandList->commandsToPatch.empty());
commandToPatch = {};
commandToPatch.type = CommandList::CommandToPatch::PauseOnEnqueueSemaphoreStart;
pCommandList->commandsToPatch.push_back(commandToPatch);
EXPECT_ANY_THROW(pCommandList->clearCommandsToPatch());
pCommandList->commandsToPatch.clear();
commandToPatch.pCommand = reinterpret_cast<void *>(0x1234);
pCommandList->commandsToPatch.push_back(commandToPatch);
EXPECT_NO_THROW(pCommandList->clearCommandsToPatch());
EXPECT_TRUE(pCommandList->commandsToPatch.empty());
commandToPatch = {};
commandToPatch.type = CommandList::CommandToPatch::PauseOnEnqueueSemaphoreEnd;
pCommandList->commandsToPatch.push_back(commandToPatch);
EXPECT_ANY_THROW(pCommandList->clearCommandsToPatch());
pCommandList->commandsToPatch.clear();
commandToPatch.pCommand = reinterpret_cast<void *>(0x1234);
pCommandList->commandsToPatch.push_back(commandToPatch);
EXPECT_NO_THROW(pCommandList->clearCommandsToPatch());
EXPECT_TRUE(pCommandList->commandsToPatch.empty());
commandToPatch = {};
commandToPatch.type = CommandList::CommandToPatch::PauseOnEnqueuePipeControlStart;
pCommandList->commandsToPatch.push_back(commandToPatch);
EXPECT_ANY_THROW(pCommandList->clearCommandsToPatch());
pCommandList->commandsToPatch.clear();
commandToPatch.pCommand = reinterpret_cast<void *>(0x1234);
pCommandList->commandsToPatch.push_back(commandToPatch);
EXPECT_NO_THROW(pCommandList->clearCommandsToPatch());
EXPECT_TRUE(pCommandList->commandsToPatch.empty());
commandToPatch = {};
commandToPatch.type = CommandList::CommandToPatch::PauseOnEnqueuePipeControlEnd;
pCommandList->commandsToPatch.push_back(commandToPatch);
EXPECT_ANY_THROW(pCommandList->clearCommandsToPatch());
pCommandList->commandsToPatch.clear();
commandToPatch.pCommand = reinterpret_cast<void *>(0x1234);
pCommandList->commandsToPatch.push_back(commandToPatch);
EXPECT_NO_THROW(pCommandList->clearCommandsToPatch());
EXPECT_TRUE(pCommandList->commandsToPatch.empty());
}
template <NEO::AllocationType AllocType>

View File

@ -1005,5 +1005,32 @@ HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorr
}
}
using IsWithinNotSupported = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_GEN12LP_CORE>;
HWTEST2_F(CommandQueueScratchTests, givenCommandsToPatchToNotSupportedPlatformWhenPatchCommandsIsCalledThenAbortIsThrown, IsWithinNotSupported) {
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr = nullptr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
auto commandQueue = std::make_unique<MockCommandQueueHw<gfxCoreFamily>>(device, csr, &desc);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
EXPECT_NO_THROW(commandQueue->patchCommands(*commandList, 0));
commandList->commandsToPatch.push_back({});
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0));
commandList->commandsToPatch.clear();
CommandList::CommandToPatch commandToPatch;
commandToPatch.type = CommandList::CommandToPatch::FrontEndState;
commandList->commandsToPatch.push_back(commandToPatch);
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0));
commandList->commandsToPatch.clear();
commandToPatch.type = CommandList::CommandToPatch::Invalid;
commandList->commandsToPatch.push_back(commandToPatch);
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0));
commandList->commandsToPatch.clear();
}
} // namespace ult
} // namespace L0

View File

@ -5,6 +5,7 @@
*
*/
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/test_macros/hw_test.h"
@ -12,6 +13,7 @@
#include "level_zero/core/source/cmdlist/cmdlist.h"
#include "level_zero/core/source/fence/fence.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_fence.h"
@ -258,5 +260,277 @@ HWTEST2_F(CommandQueueExecuteCommandListsSimpleTest, givenTwoCommandQueuesUsingS
commandQueue2->destroy();
}
struct PauseOnGpuTests : public Test<ModuleFixture> {
void SetUp() override {
ModuleFixture::setUp();
auto &csr = neoDevice->getGpgpuCommandStreamReceiver();
debugPauseStateAddress = csr.getDebugPauseStateGPUAddress();
createKernel();
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue;
commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue));
ASSERT_NE(nullptr, commandQueue->commandStream);
commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue);
commandListHandle = commandList->toHandle();
}
void TearDown() override {
commandList->destroy();
commandQueue->destroy();
ModuleFixture::tearDown();
}
template <typename MI_SEMAPHORE_WAIT>
bool verifySemaphore(const GenCmdList::iterator &iterator, uint64_t debugPauseStateAddress, DebugPauseState requiredDebugPauseState) {
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*iterator);
if ((static_cast<uint32_t>(requiredDebugPauseState) == semaphoreCmd->getSemaphoreDataDword()) &&
(debugPauseStateAddress == semaphoreCmd->getSemaphoreGraphicsAddress())) {
EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation());
EXPECT_EQ(MI_SEMAPHORE_WAIT::WAIT_MODE::WAIT_MODE_POLLING_MODE, semaphoreCmd->getWaitMode());
return true;
}
return false;
}
template <typename FamilyType>
bool verifyPipeControl(const GenCmdList::iterator &iterator, uint64_t debugPauseStateAddress, DebugPauseState requiredDebugPauseState) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*iterator);
if ((static_cast<uint32_t>(requiredDebugPauseState) == pipeControlCmd->getImmediateData()) &&
(debugPauseStateAddress == NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControlCmd))) {
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
EXPECT_EQ(MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, *defaultHwInfo), pipeControlCmd->getDcFlushEnable());
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControlCmd->getPostSyncOperation());
return true;
}
return false;
}
template <typename FamilyType>
bool verifyLoadRegImm(const GenCmdList::iterator &iterator) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
uint32_t expectedRegisterOffset = DebugManager.flags.GpuScratchRegWriteRegisterOffset.get();
uint32_t expectedRegisterData = DebugManager.flags.GpuScratchRegWriteRegisterData.get();
auto loadRegImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*iterator);
if ((expectedRegisterOffset == loadRegImm->getRegisterOffset()) &&
(expectedRegisterData == loadRegImm->getDataDword())) {
return true;
}
return false;
}
template <typename MI_SEMAPHORE_WAIT>
void findSemaphores(GenCmdList &cmdList) {
auto semaphore = find<MI_SEMAPHORE_WAIT *>(cmdList.begin(), cmdList.end());
while (semaphore != cmdList.end()) {
if (verifySemaphore<MI_SEMAPHORE_WAIT>(semaphore, debugPauseStateAddress, DebugPauseState::hasUserStartConfirmation)) {
semaphoreBeforeWalkerFound++;
}
if (verifySemaphore<MI_SEMAPHORE_WAIT>(semaphore, debugPauseStateAddress, DebugPauseState::hasUserEndConfirmation)) {
semaphoreAfterWalkerFound++;
}
semaphore = find<MI_SEMAPHORE_WAIT *>(++semaphore, cmdList.end());
}
}
template <typename FamilyType>
void findPipeControls(GenCmdList &cmdList) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto pipeControl = find<PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
while (pipeControl != cmdList.end()) {
if (verifyPipeControl<FamilyType>(pipeControl, debugPauseStateAddress, DebugPauseState::waitingForUserStartConfirmation)) {
pipeControlBeforeWalkerFound++;
}
if (verifyPipeControl<FamilyType>(pipeControl, debugPauseStateAddress, DebugPauseState::waitingForUserEndConfirmation)) {
pipeControlAfterWalkerFound++;
}
pipeControl = find<PIPE_CONTROL *>(++pipeControl, cmdList.end());
}
}
void enqueueKernel() {
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandQueue->executeCommandLists(1u, &commandListHandle, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
DebugManagerStateRestore restore;
CmdListKernelLaunchParams launchParams = {};
ze_group_count_t groupCount{1, 1, 1};
L0::ult::CommandQueue *commandQueue = nullptr;
L0::CommandList *commandList = nullptr;
ze_command_list_handle_t commandListHandle = {};
uint64_t debugPauseStateAddress = 0;
uint32_t semaphoreBeforeWalkerFound = 0;
uint32_t semaphoreAfterWalkerFound = 0;
uint32_t pipeControlBeforeWalkerFound = 0;
uint32_t pipeControlAfterWalkerFound = 0;
};
HWTEST_F(PauseOnGpuTests, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenInsertPauseCommandsAroundSpecifiedEnqueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManager.flags.PauseOnEnqueue.set(1);
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
enqueueKernel();
enqueueKernel();
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
findSemaphores<MI_SEMAPHORE_WAIT>(cmdList);
findPipeControls<FamilyType>(cmdList);
EXPECT_EQ(1u, semaphoreBeforeWalkerFound);
EXPECT_EQ(1u, semaphoreAfterWalkerFound);
EXPECT_EQ(1u, pipeControlBeforeWalkerFound);
EXPECT_EQ(1u, pipeControlAfterWalkerFound);
}
HWTEST_F(PauseOnGpuTests, givenPauseOnEnqueueFlagSetToAlwaysWhenDispatchWalkersThenInsertPauseCommandsAroundEachEnqueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManager.flags.PauseOnEnqueue.set(-2);
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
enqueueKernel();
enqueueKernel();
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
findSemaphores<MI_SEMAPHORE_WAIT>(cmdList);
findPipeControls<FamilyType>(cmdList);
EXPECT_EQ(2u, semaphoreBeforeWalkerFound);
EXPECT_EQ(2u, semaphoreAfterWalkerFound);
EXPECT_EQ(2u, pipeControlBeforeWalkerFound);
EXPECT_EQ(2u, pipeControlAfterWalkerFound);
}
HWTEST_F(PauseOnGpuTests, givenPauseModeSetToBeforeOnlyWhenDispatchingThenInsertPauseOnlyBeforeEnqueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManager.flags.PauseOnEnqueue.set(0);
DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::BeforeWorkload);
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
enqueueKernel();
enqueueKernel();
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
findSemaphores<MI_SEMAPHORE_WAIT>(cmdList);
findPipeControls<FamilyType>(cmdList);
EXPECT_EQ(1u, semaphoreBeforeWalkerFound);
EXPECT_EQ(0u, semaphoreAfterWalkerFound);
EXPECT_EQ(1u, pipeControlBeforeWalkerFound);
EXPECT_EQ(0u, pipeControlAfterWalkerFound);
}
HWTEST_F(PauseOnGpuTests, givenPauseModeSetToAfterOnlyWhenDispatchingThenInsertPauseOnlyAfterEnqueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManager.flags.PauseOnEnqueue.set(0);
DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::AfterWorkload);
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
enqueueKernel();
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
findSemaphores<MI_SEMAPHORE_WAIT>(cmdList);
findPipeControls<FamilyType>(cmdList);
EXPECT_EQ(0u, semaphoreBeforeWalkerFound);
EXPECT_EQ(1u, semaphoreAfterWalkerFound);
EXPECT_EQ(0u, pipeControlBeforeWalkerFound);
EXPECT_EQ(1u, pipeControlAfterWalkerFound);
}
HWTEST_F(PauseOnGpuTests, givenPauseModeSetToBeforeAndAfterWhenDispatchingThenInsertPauseAroundEnqueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManager.flags.PauseOnEnqueue.set(0);
DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::BeforeAndAfterWorkload);
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
enqueueKernel();
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
findSemaphores<MI_SEMAPHORE_WAIT>(cmdList);
findPipeControls<FamilyType>(cmdList);
EXPECT_EQ(1u, semaphoreBeforeWalkerFound);
EXPECT_EQ(1u, semaphoreAfterWalkerFound);
EXPECT_EQ(1u, pipeControlBeforeWalkerFound);
EXPECT_EQ(1u, pipeControlAfterWalkerFound);
}
} // namespace ult
} // namespace L0

View File

@ -19,6 +19,8 @@
#include "encode_surface_state_args.h"
#include <list>
namespace NEO {
class BindlessHeapsHelper;
@ -48,6 +50,7 @@ struct EncodeDispatchKernelArgs {
bool isKernelUsingSystemAllocation = false;
bool isKernelDispatchedFromImmediateCmdList = false;
bool isRcs = false;
std::list<void *> *additionalCommands = nullptr;
};
struct EncodeWalkerArgs {

View File

@ -13,6 +13,7 @@
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/api_specific_config.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/simd_helper.h"
#include "shared/source/helpers/state_base_address.h"
@ -232,6 +233,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
auto threadGroupCount = cmd.getThreadGroupIdXDimension() * cmd.getThreadGroupIdYDimension() * cmd.getThreadGroupIdZDimension();
EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(idd, hwInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired);
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(hwInfo, false));
args.additionalCommands->push_back(commandBuffer);
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
MI_SEMAPHORE_WAIT *semaphoreCommand = listCmdBufferStream->getSpaceForCmd<MI_SEMAPHORE_WAIT>();
args.additionalCommands->push_back(semaphoreCommand);
}
PreemptionHelper::applyPreemptionWaCmdsBegin<Family>(listCmdBufferStream, *args.device);
auto buffer = listCmdBufferStream->getSpace(sizeof(cmd));
@ -244,6 +254,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
}
args.partitionCount = 1;
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(hwInfo, false));
args.additionalCommands->push_back(commandBuffer);
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
MI_SEMAPHORE_WAIT *semaphoreCommand = listCmdBufferStream->getSpaceForCmd<MI_SEMAPHORE_WAIT>();
args.additionalCommands->push_back(semaphoreCommand);
}
}
template <typename Family>

View File

@ -20,6 +20,7 @@
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/hw_walk_order.h"
#include "shared/source/helpers/pause_on_gpu_properties.h"
#include "shared/source/helpers/pipe_control_args.h"
#include "shared/source/helpers/pipeline_select_helper.h"
#include "shared/source/helpers/ray_tracing_helper.h"
@ -228,6 +229,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.requiresUncachedMocs = false;
}
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(hwInfo, false));
args.additionalCommands->push_back(commandBuffer);
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
MI_SEMAPHORE_WAIT *semaphoreCommand = listCmdBufferStream->getSpaceForCmd<MI_SEMAPHORE_WAIT>();
args.additionalCommands->push_back(reinterpret_cast<void *>(semaphoreCommand));
}
walkerCmd.setIndirectDataStartAddress(static_cast<uint32_t>(offsetThreadData));
walkerCmd.setIndirectDataLength(sizeThreadData);
@ -301,6 +311,15 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
}
PreemptionHelper::applyPreemptionWaCmdsEnd<Family>(listCmdBufferStream, *args.device);
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::DebugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(hwInfo, false));
args.additionalCommands->push_back(commandBuffer);
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
MI_SEMAPHORE_WAIT *semaphoreCommand = listCmdBufferStream->getSpaceForCmd<MI_SEMAPHORE_WAIT>();
args.additionalCommands->push_back(semaphoreCommand);
}
}
template <typename Family>

View File

@ -150,6 +150,8 @@ class Device : public ReferenceTrackedObject<Device> {
MOCKABLE_VIRTUAL bool verifyAdapterLuid();
void getAdapterMask(uint32_t &nodeMask);
std::atomic<uint32_t> debugExecutionCounter = 0;
protected:
Device() = delete;
Device(ExecutionEnvironment *executionEnvironment, const uint32_t rootDeviceIndex);

View File

@ -698,6 +698,38 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, giveNextIddInBlockZeorWhenD
ASSERT_NE(itorPC, commands.end());
}
HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToNeverWhenEncodingWalkerThenCommandsToPatchAreNotPresent) {
DebugManagerStateRestore restorer;
DebugManager.flags.PauseOnEnqueue.set(-1);
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
uint32_t dims[] = {1, 1, 1};
bool requiresUncachedMocs = false;
std::list<void *> cmdsToPatch;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.additionalCommands = &cmdsToPatch;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs, nullptr);
EXPECT_EQ(cmdsToPatch.size(), 0u);
}
HWTEST_F(CommandEncodeStatesTest, givenPauseOnEnqueueSetToAlwaysWhenEncodingWalkerThenCommandsToPatchAreFilled) {
DebugManagerStateRestore restorer;
DebugManager.flags.PauseOnEnqueue.set(-2);
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
uint32_t dims[] = {1, 1, 1};
bool requiresUncachedMocs = false;
std::list<void *> cmdsToPatch;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
dispatchArgs.additionalCommands = &cmdsToPatch;
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs, nullptr);
EXPECT_EQ(cmdsToPatch.size(), 4u);
}
using EncodeDispatchKernelTest = Test<CommandEncodeStatesFixture>;
HWTEST2_F(EncodeDispatchKernelTest, givenBindfulKernelWhenDispatchingKernelThenSshFromContainerIsUsed, IsAtLeastSkl) {