fix: PauseOnEnqueue for Immediate command lists on L0

Related-To: NEO-7560
Signed-off-by: Naklicki, Mateusz <mateusz.naklicki@intel.com>
This commit is contained in:
Naklicki, Mateusz
2023-01-03 13:14:36 +00:00
committed by Compute-Runtime-Automation
parent 468d722efb
commit 48247cc42f
5 changed files with 236 additions and 16 deletions

View File

@ -9,6 +9,7 @@
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/scratch_space_controller.h"
#include "shared/source/command_stream/wait_status.h"
#include "shared/source/helpers/bindless_heaps_helper.h"
#include "shared/source/helpers/completion_stamp.h"
@ -20,6 +21,7 @@
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/source/cmdqueue/cmdqueue_hw.h"
#include "level_zero/core/source/device/bcs_split.h"
#include "level_zero/core/source/helpers/error_code_helper_l0.h"
@ -199,6 +201,8 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
this->cmdQImmediate->makeResidentAndMigrate(performMigration, this->commandContainer.getResidencyContainer());
static_cast<CommandQueueHw<gfxCoreFamily> *>(this->cmdQImmediate)->patchCommands(*this, 0u);
if (performMigration) {
this->migrateSharedAllocations();
}
@ -238,6 +242,11 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommandListImm
this->containsAnyKernel = false;
this->handlePostSubmissionState();
if (NEO::DebugManager.flags.PauseOnEnqueue.get() != -1) {
this->device->getNEODevice()->debugExecutionCounter++;
}
return ZE_RESULT_SUCCESS;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2022 Intel Corporation
* Copyright (C) 2020-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -148,7 +148,7 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
NEO::PipeControlArgs args;
args.dcFlushEnable = this->csr->getDcFlushSupport();
args.dcFlushEnable = csr->getDcFlushSupport();
auto command = reinterpret_cast<void *>(commandToPatch.pCommand);
NEO::MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(
@ -164,7 +164,7 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
NEO::PipeControlArgs args;
args.dcFlushEnable = this->csr->getDcFlushSupport();
args.dcFlushEnable = csr->getDcFlushSupport();
auto command = reinterpret_cast<void *>(commandToPatch.pCommand);
NEO::MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
* Copyright (C) 2021-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -145,6 +145,7 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
for (auto &commandToPatch : commandsToPatch) {
switch (commandToPatch.type) {
case CommandList::CommandToPatch::FrontEndState: {
UNRECOVERABLE_IF(scratchAddress == 0u);
uint32_t lowScratchAddress = uint32_t(0xFFFFFFFF & scratchAddress);
CFE_STATE *cfeStateCmd = nullptr;
cfeStateCmd = reinterpret_cast<CFE_STATE *>(commandToPatch.pCommand);
@ -175,7 +176,7 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
NEO::PipeControlArgs args;
args.dcFlushEnable = this->csr->getDcFlushSupport();
args.dcFlushEnable = csr->getDcFlushSupport();
auto command = reinterpret_cast<void *>(commandToPatch.pCommand);
NEO::MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(
@ -191,7 +192,7 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
auto &hwInfo = device->getNEODevice()->getHardwareInfo();
NEO::PipeControlArgs args;
args.dcFlushEnable = this->csr->getDcFlushSupport();
args.dcFlushEnable = csr->getDcFlushSupport();
auto command = reinterpret_cast<void *>(commandToPatch.pCommand);
NEO::MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(

View File

@ -1004,6 +1004,29 @@ HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorr
}
}
HWTEST2_F(CommandQueueScratchTests, givenInvalidScratchAddressWhenPatchCommandsIsCalledThenAbortIsThrown, IsAtLeastXeHpCore) {
using CFE_STATE = typename FamilyType::CFE_STATE;
ze_command_queue_desc_t desc = {};
NEO::CommandStreamReceiver *csr = nullptr;
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
auto commandQueue = std::make_unique<MockCommandQueueHw<gfxCoreFamily>>(device, csr, &desc);
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
CFE_STATE destinationCfeState;
auto sourceCfeState = new CFE_STATE;
*sourceCfeState = FamilyType::cmdInitCfeState;
CommandList::CommandToPatch commandToPatch;
commandToPatch.pDestination = &destinationCfeState;
commandToPatch.pCommand = sourceCfeState;
commandToPatch.type = CommandList::CommandToPatch::CommandType::FrontEndState;
commandList->commandsToPatch.push_back(commandToPatch);
uint64_t invalidScratchAddress = 0u;
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, invalidScratchAddress));
}
using IsWithinNotSupported = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_GEN12LP_CORE>;
HWTEST2_F(CommandQueueScratchTests, givenCommandsToPatchToNotSupportedPlatformWhenPatchCommandsIsCalledThenAbortIsThrown, IsWithinNotSupported) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -262,21 +262,14 @@ HWTEST2_F(CommandQueueExecuteCommandListsSimpleTest, givenTwoCommandQueuesUsingS
commandQueue2->destroy();
}
struct PauseOnGpuTests : public Test<ModuleFixture> {
void SetUp() override {
struct PauseOnGpuFixture : public Test<ModuleFixture> {
void setUp() {
ModuleFixture::setUp();
auto &csr = neoDevice->getGpgpuCommandStreamReceiver();
debugPauseStateAddress = csr.getDebugPauseStateGPUAddress();
createKernel();
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue;
commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue));
ASSERT_NE(nullptr, commandQueue);
commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue);
commandListHandle = commandList->toHandle();
}
void TearDown() override {
@ -285,6 +278,16 @@ struct PauseOnGpuTests : public Test<ModuleFixture> {
ModuleFixture::tearDown();
}
void createImmediateCommandList() {
commandList->destroy();
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue;
commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue);
ASSERT_NE(nullptr, commandList);
commandListHandle = commandList->toHandle();
}
template <typename MI_SEMAPHORE_WAIT>
bool verifySemaphore(const GenCmdList::iterator &iterator, uint64_t debugPauseStateAddress, DebugPauseState requiredDebugPauseState) {
auto semaphoreCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*iterator);
@ -396,6 +399,32 @@ struct PauseOnGpuTests : public Test<ModuleFixture> {
uint32_t pipeControlAfterWalkerFound = 0;
};
struct PauseOnGpuTests : public PauseOnGpuFixture {
void SetUp() override {
PauseOnGpuFixture::setUp();
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue;
commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue));
ASSERT_NE(nullptr, commandQueue);
commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue);
ASSERT_NE(nullptr, commandList);
commandListHandle = commandList->toHandle();
}
void enqueueKernel() {
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->close();
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
result = commandQueue->executeCommandLists(1u, &commandListHandle, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
};
HWTEST_F(PauseOnGpuTests, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenInsertPauseCommandsAroundSpecifiedEnqueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
@ -534,6 +563,164 @@ HWTEST_F(PauseOnGpuTests, givenPauseModeSetToBeforeAndAfterWhenDispatchingThenIn
EXPECT_EQ(1u, pipeControlAfterWalkerFound);
}
struct PauseOnGpuWithImmediateCommandListTests : public PauseOnGpuFixture {
void SetUp() override {
PauseOnGpuFixture::setUp();
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue;
commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue));
ASSERT_NE(nullptr, commandQueue);
commandList = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue);
ASSERT_NE(nullptr, commandList);
commandListHandle = commandList->toHandle();
}
void enqueueKernel() {
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
}
};
HWTEST_F(PauseOnGpuWithImmediateCommandListTests, givenPauseOnEnqueueFlagSetWhenDispatchWalkersThenInsertPauseCommandsAroundSpecifiedEnqueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManager.flags.PauseOnEnqueue.set(1);
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
enqueueKernel();
enqueueKernel();
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
findSemaphores<MI_SEMAPHORE_WAIT>(cmdList);
findPipeControls<FamilyType>(cmdList);
EXPECT_EQ(1u, semaphoreBeforeWalkerFound);
EXPECT_EQ(1u, semaphoreAfterWalkerFound);
EXPECT_EQ(1u, pipeControlBeforeWalkerFound);
EXPECT_EQ(1u, pipeControlAfterWalkerFound);
}
HWTEST_F(PauseOnGpuWithImmediateCommandListTests, givenPauseOnEnqueueFlagSetToAlwaysWhenDispatchWalkersThenInsertPauseCommandsAroundEachEnqueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManager.flags.PauseOnEnqueue.set(-2);
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
enqueueKernel();
enqueueKernel();
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
findSemaphores<MI_SEMAPHORE_WAIT>(cmdList);
findPipeControls<FamilyType>(cmdList);
EXPECT_EQ(2u, semaphoreBeforeWalkerFound);
EXPECT_EQ(2u, semaphoreAfterWalkerFound);
EXPECT_EQ(2u, pipeControlBeforeWalkerFound);
EXPECT_EQ(2u, pipeControlAfterWalkerFound);
}
HWTEST_F(PauseOnGpuWithImmediateCommandListTests, givenPauseModeSetToBeforeOnlyWhenDispatchingThenInsertPauseOnlyBeforeEnqueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManager.flags.PauseOnEnqueue.set(0);
DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::BeforeWorkload);
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
enqueueKernel();
enqueueKernel();
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
findSemaphores<MI_SEMAPHORE_WAIT>(cmdList);
findPipeControls<FamilyType>(cmdList);
EXPECT_EQ(1u, semaphoreBeforeWalkerFound);
EXPECT_EQ(0u, semaphoreAfterWalkerFound);
EXPECT_EQ(1u, pipeControlBeforeWalkerFound);
EXPECT_EQ(0u, pipeControlAfterWalkerFound);
}
HWTEST_F(PauseOnGpuWithImmediateCommandListTests, givenPauseModeSetToAfterOnlyWhenDispatchingThenInsertPauseOnlyAfterEnqueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManager.flags.PauseOnEnqueue.set(0);
DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::AfterWorkload);
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
enqueueKernel();
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
findSemaphores<MI_SEMAPHORE_WAIT>(cmdList);
findPipeControls<FamilyType>(cmdList);
EXPECT_EQ(0u, semaphoreBeforeWalkerFound);
EXPECT_EQ(1u, semaphoreAfterWalkerFound);
EXPECT_EQ(0u, pipeControlBeforeWalkerFound);
EXPECT_EQ(1u, pipeControlAfterWalkerFound);
}
HWTEST_F(PauseOnGpuWithImmediateCommandListTests, givenPauseModeSetToBeforeAndAfterWhenDispatchingThenInsertPauseAroundEnqueue) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
DebugManager.flags.PauseOnEnqueue.set(0);
DebugManager.flags.PauseOnGpuMode.set(PauseOnGpuProperties::PauseMode::BeforeAndAfterWorkload);
auto usedSpaceBefore = commandList->commandContainer.getCommandStream()->getUsed();
enqueueKernel();
auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
findSemaphores<MI_SEMAPHORE_WAIT>(cmdList);
findPipeControls<FamilyType>(cmdList);
EXPECT_EQ(1u, semaphoreBeforeWalkerFound);
EXPECT_EQ(1u, semaphoreAfterWalkerFound);
EXPECT_EQ(1u, pipeControlBeforeWalkerFound);
EXPECT_EQ(1u, pipeControlAfterWalkerFound);
}
using CmdListPipelineSelectStateTest = Test<CmdListPipelineSelectStateFixture>;
using SystolicSupport = IsAnyProducts<IGFX_ALDERLAKE_P, IGFX_XE_HP_SDV, IGFX_DG2, IGFX_PVC>;