refactor: remove not needed patch preamble function

Related-To: NEO-16434

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2025-10-22 16:08:53 +00:00
committed by Compute-Runtime-Automation
parent a84629c8b3
commit 9474d455b1
6 changed files with 35 additions and 35 deletions

View File

@@ -407,9 +407,4 @@ void CommandQueueImp::makeResidentForResidencyContainer(const NEO::ResidencyCont
}
}
bool CommandQueueImp::checkNeededPatchPreambleWait(CommandList *commandList) {
uint64_t tagGpuAddress = commandList->getLatestTagGpuAddress();
return this->saveWaitForPreamble && (tagGpuAddress != 0) && (getCsr()->getTagAllocation()->getGpuAddress() != tagGpuAddress);
}
} // namespace L0

View File

@@ -119,6 +119,7 @@ struct CommandQueueHw : public CommandQueueImp {
bool lockScratchController = false;
bool cmdListScratchAddressPatchingEnabled = false;
bool containsParentImmediateStream = false;
bool patchPreambleWaitSyncNeeded = false;
};
inline void processMemAdviseOperations(CommandList *commandList);
@@ -160,7 +161,7 @@ struct CommandQueueHw : public CommandQueueImp {
inline size_t estimateCommandListPatchPreamble(CommandListExecutionContext &ctx, uint32_t numCommandLists);
inline size_t estimateCommandListPatchPreambleFrontEndCmd(CommandListExecutionContext &ctx, CommandList *commandList);
inline void getCommandListPatchPreambleData(CommandListExecutionContext &ctx, CommandList *commandList);
inline size_t estimateCommandListPatchPreambleWaitSync(CommandListExecutionContext &ctx, CommandList *commandList);
size_t estimateCommandListPatchPreambleWaitSync(CommandListExecutionContext &ctx, CommandList *commandList);
inline size_t estimateTotalPatchPreambleData(CommandListExecutionContext &ctx);
inline void retrivePatchPreambleSpace(CommandListExecutionContext &ctx, NEO::LinearStream &commandStream);
inline void dispatchPatchPreambleEnding(CommandListExecutionContext &ctx);

View File

@@ -934,9 +934,10 @@ template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimateCommandListPatchPreambleWaitSync(CommandListExecutionContext &ctx, CommandList *commandList) {
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
size_t waitSize = 0;
if (this->patchingPreamble) {
bool needWait = this->checkNeededPatchPreambleWait(commandList);
if (needWait) {
if (this->patchingPreamble && this->saveWaitForPreamble) {
uint64_t tagGpuAddress = commandList->getLatestTagGpuAddress();
ctx.patchPreambleWaitSyncNeeded = (tagGpuAddress != 0) && (getCsr()->getTagAllocation()->getGpuAddress() != tagGpuAddress);
if (ctx.patchPreambleWaitSyncNeeded) {
waitSize = NEO::EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait();
waitSize += (2 * sizeof(MI_LOAD_REGISTER_IMM));
}
@@ -1042,7 +1043,7 @@ void CommandQueueHw<gfxCoreFamily>::dispatchPatchPreambleCommandListWaitSync(Com
using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
if (this->patchingPreamble) {
if (this->checkNeededPatchPreambleWait(commandList)) {
if (ctx.patchPreambleWaitSyncNeeded) {
constexpr uint32_t firstRegister = RegisterOffsets::csGprR0;
constexpr uint32_t secondRegister = RegisterOffsets::csGprR0 + 4;

View File

@@ -120,8 +120,6 @@ struct CommandQueueImp : public CommandQueue {
}
void makeResidentForResidencyContainer(const NEO::ResidencyContainer &residencyContainer);
bool checkNeededPatchPreambleWait(CommandList *commandList);
protected:
MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,
bool isCooperative);

View File

@@ -92,6 +92,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
using BaseClass = ::L0::CommandQueueHw<gfxCoreFamily>;
using BaseClass::commandStream;
using BaseClass::estimateCommandListPatchPreambleWaitSync;
using BaseClass::estimateStreamSizeForExecuteCommandListsRegularHeapless;
using BaseClass::executeCommandListsRegularHeapless;
using BaseClass::forceBbStartJump;

View File

@@ -20,6 +20,7 @@
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_fence.h"
#include "level_zero/core/test/unit_tests/sources/helper/ze_object_utils.h"
namespace L0 {
namespace ult {
@@ -1295,6 +1296,9 @@ HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, givenPatchPreambleWhenTwoCmd
}
HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, givenPatchPreambleAndSavingWaitDataWhenQueueSavesDataThenCommandListsHaveCorrectData) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
ze_result_t returnValue;
ze_command_queue_desc_t queueDesc{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
queueDesc.ordinal = 0u;
@@ -1302,14 +1306,11 @@ HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, givenPatchPreambleAndSavingW
queueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
queueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
WhiteBox<L0::CommandQueue> *commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&queueDesc,
false,
false,
false,
returnValue));
constexpr size_t expectedSize = 2 * sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_SEMAPHORE_WAIT);
typename MockCommandQueueHw<FamilyType::gfxCoreFamily>::CommandListExecutionContext ctx{};
auto mockCmdQHw = makeZeUniquePtr<MockCommandQueueHw<FamilyType::gfxCoreFamily>>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &queueDesc);
returnValue = mockCmdQHw->initialize(false, false, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::compute, 0u, returnValue, false);
@@ -1318,38 +1319,42 @@ HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, givenPatchPreambleAndSavingW
ze_command_list_handle_t commandListHandle = commandList->toHandle();
commandList->close();
commandQueue->setPatchingPreamble(true, false);
EXPECT_TRUE(commandQueue->getPatchingPreamble());
EXPECT_FALSE(commandQueue->getSaveWaitForPreamble());
mockCmdQHw->setPatchingPreamble(true, false);
EXPECT_TRUE(mockCmdQHw->getPatchingPreamble());
EXPECT_FALSE(mockCmdQHw->getSaveWaitForPreamble());
NEO::GraphicsAllocation *expectedGpuAllocation = commandQueue->getCsr()->getTagAllocation();
NEO::GraphicsAllocation *expectedGpuAllocation = mockCmdQHw->getCsr()->getTagAllocation();
TaskCountType expectedTaskCount = 0x456;
uint64_t expectedGpuAddress = expectedGpuAllocation->getGpuAddress();
commandQueue->saveTagAndTaskCountForCommandLists(1, &commandListHandle, expectedGpuAllocation, expectedTaskCount);
mockCmdQHw->saveTagAndTaskCountForCommandLists(1, &commandListHandle, expectedGpuAllocation, expectedTaskCount);
// save and wait is disabled, so nothing to be saved
EXPECT_EQ(0u, commandList->getLatestTagGpuAddress());
EXPECT_EQ(0u, commandList->getLatestTaskCount());
EXPECT_FALSE(commandQueue->checkNeededPatchPreambleWait(commandList));
EXPECT_EQ(0u, mockCmdQHw->estimateCommandListPatchPreambleWaitSync(ctx, commandList));
EXPECT_FALSE(ctx.patchPreambleWaitSyncNeeded);
commandQueue->setPatchingPreamble(true, true);
EXPECT_TRUE(commandQueue->getPatchingPreamble());
EXPECT_TRUE(commandQueue->getSaveWaitForPreamble());
mockCmdQHw->setPatchingPreamble(true, true);
EXPECT_TRUE(mockCmdQHw->getPatchingPreamble());
EXPECT_TRUE(mockCmdQHw->getSaveWaitForPreamble());
EXPECT_FALSE(commandQueue->checkNeededPatchPreambleWait(commandList));
EXPECT_EQ(0u, mockCmdQHw->estimateCommandListPatchPreambleWaitSync(ctx, commandList));
EXPECT_FALSE(ctx.patchPreambleWaitSyncNeeded);
commandQueue->saveTagAndTaskCountForCommandLists(1, &commandListHandle, expectedGpuAllocation, expectedTaskCount);
mockCmdQHw->saveTagAndTaskCountForCommandLists(1, &commandListHandle, expectedGpuAllocation, expectedTaskCount);
// save and wait is now enabled
EXPECT_EQ(expectedGpuAddress, commandList->getLatestTagGpuAddress());
EXPECT_EQ(expectedTaskCount, commandList->getLatestTaskCount());
EXPECT_FALSE(commandQueue->checkNeededPatchPreambleWait(commandList));
EXPECT_EQ(0u, mockCmdQHw->estimateCommandListPatchPreambleWaitSync(ctx, commandList));
EXPECT_FALSE(ctx.patchPreambleWaitSyncNeeded);
MockGraphicsAllocation otherTagAllocation(nullptr, expectedGpuAddress + 0x1000, 1);
commandQueue->saveTagAndTaskCountForCommandLists(1, &commandListHandle, &otherTagAllocation, expectedTaskCount);
EXPECT_TRUE(commandQueue->checkNeededPatchPreambleWait(commandList));
mockCmdQHw->saveTagAndTaskCountForCommandLists(1, &commandListHandle, &otherTagAllocation, expectedTaskCount);
EXPECT_EQ(expectedSize, mockCmdQHw->estimateCommandListPatchPreambleWaitSync(ctx, commandList));
EXPECT_TRUE(ctx.patchPreambleWaitSyncNeeded);
commandList->reset();
EXPECT_EQ(0u, commandList->getLatestTagGpuAddress());
@@ -1357,7 +1362,6 @@ HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, givenPatchPreambleAndSavingW
EXPECT_EQ(nullptr, commandList->getLatestTagGpuAllocation());
commandList->destroy();
commandQueue->destroy();
}
HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, givenPatchPreambleAndSavingWaitDataWhenCmdListExecutedByQueueThenCmdListHaveCorrectData) {