fix: add pipe control before scratch register write

Signed-off-by: Mateusz Jablonski <mateusz.jablonski@intel.com>
This commit is contained in:
Mateusz Jablonski 2025-01-30 23:18:50 +00:00 committed by Compute-Runtime-Automation
parent 3bc841a8a8
commit 0a068ce96a
2 changed files with 62 additions and 3 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2024 Intel Corporation
* Copyright (C) 2018-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -165,6 +165,16 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
if (PauseOnGpuProperties::gpuScratchRegWriteAllowed(debugManager.flags.GpuScratchRegWriteAfterWalker.get(), commandQueue.getGpgpuCommandStreamReceiver().peekTaskCount())) {
uint32_t registerOffset = debugManager.flags.GpuScratchRegWriteRegisterOffset.get();
uint32_t registerData = debugManager.flags.GpuScratchRegWriteRegisterData.get();
PipeControlArgs args;
args.dcFlushEnable = MemorySynchronizationCommands<GfxFamily>::getDcFlushEnable(true, commandQueue.getDevice().getRootDeviceEnvironment());
MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(
*commandStream,
PostSyncMode::noWrite,
0u,
0u,
commandQueue.getDevice().getRootDeviceEnvironment(),
args);
LriHelper<GfxFamily>::program(commandStream, registerOffset, registerData, EncodeSetMMIO<GfxFamily>::isRemapApplicable(registerOffset), commandQueue.isBcs());
}

View File

@ -1789,6 +1789,25 @@ struct PauseOnGpuTests : public EnqueueKernelTest {
return false;
}
template <typename FamilyType>
bool verifyPipeControlNoPostSync(const GenCmdList::iterator &iterator) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
auto pipeControlCmd = genCmdCast<PIPE_CONTROL *>(*iterator);
const auto dcFlushEnable = MemorySynchronizationCommands<FamilyType>::getDcFlushEnable(true, this->pDevice->getRootDeviceEnvironment());
if (0u == pipeControlCmd->getImmediateData() && 0u == NEO::UnitTestHelper<FamilyType>::getPipeControlPostSyncAddress(*pipeControlCmd) && dcFlushEnable == pipeControlCmd->getDcFlushEnable()) {
EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable());
EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE, pipeControlCmd->getPostSyncOperation());
return true;
}
return false;
}
template <typename FamilyType>
bool verifyLoadRegImm(const GenCmdList::iterator &iterator) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
@ -1854,6 +1873,33 @@ struct PauseOnGpuTests : public EnqueueKernelTest {
}
}
template <typename FamilyType>
void findPipeControlsBeforeLoadRegImm(GenCmdList &cmdList) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto itWalkers = NEO::UnitTestHelper<FamilyType>::findAllWalkerTypeCmds(cmdList.begin(), cmdList.end());
for (auto walkerId = 0u; walkerId < itWalkers.size(); walkerId++) {
auto threshold = walkerId + 1 < itWalkers.size() ? itWalkers[walkerId + 1] : cmdList.end();
auto walker = itWalkers[walkerId];
auto loadRegImm = find<MI_LOAD_REGISTER_IMM *>(walker, threshold);
if (loadRegImm == threshold) {
continue;
}
if (verifyLoadRegImm<FamilyType>(loadRegImm)) {
auto pipeControl = find<PIPE_CONTROL *>(walker, loadRegImm);
while (pipeControl != loadRegImm) {
if (verifyPipeControlNoPostSync<FamilyType>(pipeControl)) {
pipeControlsBeforeLoadRegImm++;
}
pipeControl = find<PIPE_CONTROL *>(++pipeControl, loadRegImm);
}
}
}
}
DebugManagerStateRestore restore;
const size_t off[3] = {0, 0, 0};
@ -1866,6 +1912,7 @@ struct PauseOnGpuTests : public EnqueueKernelTest {
uint32_t pipeControlBeforeWalkerFound = 0;
uint32_t pipeControlAfterWalkerFound = 0;
uint32_t loadRegImmsFound = 0;
uint32_t pipeControlsBeforeLoadRegImm = 0;
bool heaplessStateInit = false;
};
@ -2037,8 +2084,8 @@ HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenDispatchWalkersThenInse
EXPECT_EQ(pCmdQ->getHeaplessStateInitEnabled() ? 2u : 1u, loadRegImmsFound);
}
HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenDispatcMultiplehWalkersThenInsertLoadRegisterImmCommandOnlyOnce) {
debugManager.flags.GpuScratchRegWriteAfterWalker.set(1);
HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenDispatchMultiplehWalkersThenInsertPipeControlAndLoadRegisterImmCommandsOnlyOnce) {
debugManager.flags.GpuScratchRegWriteAfterWalker.set(2);
debugManager.flags.GpuScratchRegWriteRegisterData.set(0x1234);
debugManager.flags.GpuScratchRegWriteRegisterOffset.set(0x5678);
@ -2054,8 +2101,10 @@ HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenDispatcMultiplehWalkers
hwParser.parseCommands<FamilyType>(*pCmdQ);
findLoadRegImms<FamilyType>(hwParser.cmdList);
findPipeControlsBeforeLoadRegImm<FamilyType>(hwParser.cmdList);
EXPECT_EQ(1u, loadRegImmsFound);
EXPECT_EQ(1u, pipeControlsBeforeLoadRegImm);
}
HWTEST_F(PauseOnGpuTests, givenGpuScratchWriteEnabledWhenEstimatingCommandStreamSizeThenMiLoadRegisterImmCommandSizeIsIncluded) {