refactor: remove not needed patch preamble function

Related-To: NEO-16434 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
2026-01-08 14:02:58 +08:00 · 2025-10-22 16:08:53 +00:00
parent a84629c8b3
commit 9474d455b1
6 changed files with 35 additions and 35 deletions
--- a/level_zero/core/source/cmdqueue/cmdqueue.cpp
+++ b/level_zero/core/source/cmdqueue/cmdqueue.cpp
@@ -407,9 +407,4 @@ void CommandQueueImp::makeResidentForResidencyContainer(const NEO::ResidencyCont
    }
 }

-bool CommandQueueImp::checkNeededPatchPreambleWait(CommandList *commandList) {
-    uint64_t tagGpuAddress = commandList->getLatestTagGpuAddress();
-    return this->saveWaitForPreamble && (tagGpuAddress != 0) && (getCsr()->getTagAllocation()->getGpuAddress() != tagGpuAddress);
-}
-
 } // namespace L0
--- a/level_zero/core/source/cmdqueue/cmdqueue_hw.h
+++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.h
@@ -119,6 +119,7 @@ struct CommandQueueHw : public CommandQueueImp {
        bool lockScratchController = false;
        bool cmdListScratchAddressPatchingEnabled = false;
        bool containsParentImmediateStream = false;
+        bool patchPreambleWaitSyncNeeded = false;
    };

    inline void processMemAdviseOperations(CommandList *commandList);
@@ -160,7 +161,7 @@ struct CommandQueueHw : public CommandQueueImp {
    inline size_t estimateCommandListPatchPreamble(CommandListExecutionContext &ctx, uint32_t numCommandLists);
    inline size_t estimateCommandListPatchPreambleFrontEndCmd(CommandListExecutionContext &ctx, CommandList *commandList);
    inline void getCommandListPatchPreambleData(CommandListExecutionContext &ctx, CommandList *commandList);
-    inline size_t estimateCommandListPatchPreambleWaitSync(CommandListExecutionContext &ctx, CommandList *commandList);
+    size_t estimateCommandListPatchPreambleWaitSync(CommandListExecutionContext &ctx, CommandList *commandList);
    inline size_t estimateTotalPatchPreambleData(CommandListExecutionContext &ctx);
    inline void retrivePatchPreambleSpace(CommandListExecutionContext &ctx, NEO::LinearStream &commandStream);
    inline void dispatchPatchPreambleEnding(CommandListExecutionContext &ctx);
--- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl
+++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl
@@ -934,9 +934,10 @@ template <GFXCORE_FAMILY gfxCoreFamily>
 size_t CommandQueueHw<gfxCoreFamily>::estimateCommandListPatchPreambleWaitSync(CommandListExecutionContext &ctx, CommandList *commandList) {
    using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;
    size_t waitSize = 0;
-    if (this->patchingPreamble) {
-        bool needWait = this->checkNeededPatchPreambleWait(commandList);
-        if (needWait) {
+    if (this->patchingPreamble && this->saveWaitForPreamble) {
+        uint64_t tagGpuAddress = commandList->getLatestTagGpuAddress();
+        ctx.patchPreambleWaitSyncNeeded = (tagGpuAddress != 0) && (getCsr()->getTagAllocation()->getGpuAddress() != tagGpuAddress);
+        if (ctx.patchPreambleWaitSyncNeeded) {
            waitSize = NEO::EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait();
            waitSize += (2 * sizeof(MI_LOAD_REGISTER_IMM));
        }
@@ -1042,7 +1043,7 @@ void CommandQueueHw<gfxCoreFamily>::dispatchPatchPreambleCommandListWaitSync(Com
    using MI_LOAD_REGISTER_IMM = typename GfxFamily::MI_LOAD_REGISTER_IMM;

    if (this->patchingPreamble) {
-        if (this->checkNeededPatchPreambleWait(commandList)) {
+        if (ctx.patchPreambleWaitSyncNeeded) {
            constexpr uint32_t firstRegister = RegisterOffsets::csGprR0;
            constexpr uint32_t secondRegister = RegisterOffsets::csGprR0 + 4;

--- a/level_zero/core/source/cmdqueue/cmdqueue_imp.h
+++ b/level_zero/core/source/cmdqueue/cmdqueue_imp.h
@@ -120,8 +120,6 @@ struct CommandQueueImp : public CommandQueue {
    }
    void makeResidentForResidencyContainer(const NEO::ResidencyContainer &residencyContainer);

-    bool checkNeededPatchPreambleWait(CommandList *commandList);
-
  protected:
    MOCKABLE_VIRTUAL NEO::SubmissionStatus submitBatchBuffer(size_t offset, NEO::ResidencyContainer &residencyContainer, void *endingCmdPtr,
                                                             bool isCooperative);
--- a/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h
+++ b/level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h
@@ -92,6 +92,7 @@ template <GFXCORE_FAMILY gfxCoreFamily>
 struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
    using BaseClass = ::L0::CommandQueueHw<gfxCoreFamily>;
    using BaseClass::commandStream;
+    using BaseClass::estimateCommandListPatchPreambleWaitSync;
    using BaseClass::estimateStreamSizeForExecuteCommandListsRegularHeapless;
    using BaseClass::executeCommandListsRegularHeapless;
    using BaseClass::forceBbStartJump;
--- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp
+++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_enqueue_cmdlist_2.cpp
@@ -20,6 +20,7 @@
 #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
 #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
 #include "level_zero/core/test/unit_tests/mocks/mock_fence.h"
+#include "level_zero/core/test/unit_tests/sources/helper/ze_object_utils.h"

 namespace L0 {
 namespace ult {
@@ -1295,6 +1296,9 @@ HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, givenPatchPreambleWhenTwoCmd
 }

 HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, givenPatchPreambleAndSavingWaitDataWhenQueueSavesDataThenCommandListsHaveCorrectData) {
+    using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
+    using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
+
    ze_result_t returnValue;
    ze_command_queue_desc_t queueDesc{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
    queueDesc.ordinal = 0u;
@@ -1302,14 +1306,11 @@ HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, givenPatchPreambleAndSavingW
    queueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
    queueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;

-    WhiteBox<L0::CommandQueue> *commandQueue = whiteboxCast(CommandQueue::create(productFamily,
-                                                                                 device,
-                                                                                 neoDevice->getDefaultEngine().commandStreamReceiver,
-                                                                                 &queueDesc,
-                                                                                 false,
-                                                                                 false,
-                                                                                 false,
-                                                                                 returnValue));
+    constexpr size_t expectedSize = 2 * sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_SEMAPHORE_WAIT);
+    typename MockCommandQueueHw<FamilyType::gfxCoreFamily>::CommandListExecutionContext ctx{};
+
+    auto mockCmdQHw = makeZeUniquePtr<MockCommandQueueHw<FamilyType::gfxCoreFamily>>(device, device->getNEODevice()->getDefaultEngine().commandStreamReceiver, &queueDesc);
+    returnValue = mockCmdQHw->initialize(false, false, false);
    EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);

    auto commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::compute, 0u, returnValue, false);
@@ -1318,38 +1319,42 @@ HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, givenPatchPreambleAndSavingW
    ze_command_list_handle_t commandListHandle = commandList->toHandle();
    commandList->close();

-    commandQueue->setPatchingPreamble(true, false);
-    EXPECT_TRUE(commandQueue->getPatchingPreamble());
-    EXPECT_FALSE(commandQueue->getSaveWaitForPreamble());
+    mockCmdQHw->setPatchingPreamble(true, false);
+    EXPECT_TRUE(mockCmdQHw->getPatchingPreamble());
+    EXPECT_FALSE(mockCmdQHw->getSaveWaitForPreamble());

-    NEO::GraphicsAllocation *expectedGpuAllocation = commandQueue->getCsr()->getTagAllocation();
+    NEO::GraphicsAllocation *expectedGpuAllocation = mockCmdQHw->getCsr()->getTagAllocation();
    TaskCountType expectedTaskCount = 0x456;
    uint64_t expectedGpuAddress = expectedGpuAllocation->getGpuAddress();

-    commandQueue->saveTagAndTaskCountForCommandLists(1, &commandListHandle, expectedGpuAllocation, expectedTaskCount);
+    mockCmdQHw->saveTagAndTaskCountForCommandLists(1, &commandListHandle, expectedGpuAllocation, expectedTaskCount);
    // save and wait is disabled, so nothing to be saved
    EXPECT_EQ(0u, commandList->getLatestTagGpuAddress());
    EXPECT_EQ(0u, commandList->getLatestTaskCount());

-    EXPECT_FALSE(commandQueue->checkNeededPatchPreambleWait(commandList));
+    EXPECT_EQ(0u, mockCmdQHw->estimateCommandListPatchPreambleWaitSync(ctx, commandList));
+    EXPECT_FALSE(ctx.patchPreambleWaitSyncNeeded);

-    commandQueue->setPatchingPreamble(true, true);
-    EXPECT_TRUE(commandQueue->getPatchingPreamble());
-    EXPECT_TRUE(commandQueue->getSaveWaitForPreamble());
+    mockCmdQHw->setPatchingPreamble(true, true);
+    EXPECT_TRUE(mockCmdQHw->getPatchingPreamble());
+    EXPECT_TRUE(mockCmdQHw->getSaveWaitForPreamble());

-    EXPECT_FALSE(commandQueue->checkNeededPatchPreambleWait(commandList));
+    EXPECT_EQ(0u, mockCmdQHw->estimateCommandListPatchPreambleWaitSync(ctx, commandList));
+    EXPECT_FALSE(ctx.patchPreambleWaitSyncNeeded);

-    commandQueue->saveTagAndTaskCountForCommandLists(1, &commandListHandle, expectedGpuAllocation, expectedTaskCount);
+    mockCmdQHw->saveTagAndTaskCountForCommandLists(1, &commandListHandle, expectedGpuAllocation, expectedTaskCount);
    // save and wait is now enabled
    EXPECT_EQ(expectedGpuAddress, commandList->getLatestTagGpuAddress());
    EXPECT_EQ(expectedTaskCount, commandList->getLatestTaskCount());

-    EXPECT_FALSE(commandQueue->checkNeededPatchPreambleWait(commandList));
+    EXPECT_EQ(0u, mockCmdQHw->estimateCommandListPatchPreambleWaitSync(ctx, commandList));
+    EXPECT_FALSE(ctx.patchPreambleWaitSyncNeeded);

    MockGraphicsAllocation otherTagAllocation(nullptr, expectedGpuAddress + 0x1000, 1);

-    commandQueue->saveTagAndTaskCountForCommandLists(1, &commandListHandle, &otherTagAllocation, expectedTaskCount);
-    EXPECT_TRUE(commandQueue->checkNeededPatchPreambleWait(commandList));
+    mockCmdQHw->saveTagAndTaskCountForCommandLists(1, &commandListHandle, &otherTagAllocation, expectedTaskCount);
+    EXPECT_EQ(expectedSize, mockCmdQHw->estimateCommandListPatchPreambleWaitSync(ctx, commandList));
+    EXPECT_TRUE(ctx.patchPreambleWaitSyncNeeded);

    commandList->reset();
    EXPECT_EQ(0u, commandList->getLatestTagGpuAddress());
@@ -1357,7 +1362,6 @@ HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, givenPatchPreambleAndSavingW
    EXPECT_EQ(nullptr, commandList->getLatestTagGpuAllocation());

    commandList->destroy();
-    commandQueue->destroy();
 }

 HWTEST_F(CommandQueueExecuteCommandListsSimpleTest, givenPatchPreambleAndSavingWaitDataWhenCmdListExecutedByQueueThenCmdListHaveCorrectData) {