refactor: Passing information about the engine

Extension of the interface with information about the engine type passed to the function Related-To: NEO-10678 Signed-off-by: Andrzej Koska <andrzej.koska@intel.com>
2026-01-05 09:09:04 +08:00 · 2024-04-19 14:20:27 +00:00
parent 31b2dcfe57
commit ae139aeffd
73 changed files with 296 additions and 251 deletions
--- a/level_zero/core/source/cmdlist/cmdlist.h
+++ b/level_zero/core/source/cmdlist/cmdlist.h
@@ -154,7 +154,7 @@ struct CommandList : _ze_command_list_handle_t {
                                                    const size_t *pOffsets, ze_event_handle_t hSignalEvent,
                                                    uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) = 0;

-    virtual ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) = 0;
+    virtual ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value, bool isBcs) = 0;
    virtual ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) = 0;
    virtual ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) = 0;
    virtual ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) = 0;
--- a/level_zero/core/source/cmdlist/cmdlist_extended/cmdlist_extended.inl
+++ b/level_zero/core/source/cmdlist/cmdlist_extended/cmdlist_extended.inl
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020-2023 Intel Corporation
+ * Copyright (C) 2020-2024 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@@ -14,7 +14,7 @@
 namespace L0 {

 template <GFXCORE_FAMILY gfxCoreFamily>
-ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMILoadRegImm(uint32_t reg, uint32_t value) {
+ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMILoadRegImm(uint32_t reg, uint32_t value, bool isBcs) {
    return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
 }
 template <GFXCORE_FAMILY gfxCoreFamily>
--- a/level_zero/core/source/cmdlist/cmdlist_hw.h
+++ b/level_zero/core/source/cmdlist/cmdlist_hw.h
@@ -162,7 +162,7 @@ struct CommandListCoreFamily : public CommandListImp {
                                 uint32_t numWaitEvents,
                                 ze_event_handle_t *phWaitEvents, bool relaxedOrderingDispatch) override;

-    ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value) override;
+    ze_result_t appendMILoadRegImm(uint32_t reg, uint32_t value, bool isBcs) override;
    ze_result_t appendMILoadRegReg(uint32_t reg1, uint32_t reg2) override;
    ze_result_t appendMILoadRegMem(uint32_t reg1, uint64_t address) override;
    ze_result_t appendMIStoreRegMem(uint32_t reg1, uint64_t address) override;
--- a/level_zero/core/source/cmdlist/cmdlist_hw.inl
+++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl
@@ -543,7 +543,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchMultipleKernelsInd
    commandContainer.addToResidencyContainer(alloc);

    for (uint32_t i = 0; i < numKernels; i++) {
-        NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i);
+        NEO::EncodeMathMMIO<GfxFamily>::encodeGreaterThanPredicate(commandContainer, alloc->getGpuAddress(), i, isCopyOnly());

        ret = appendLaunchKernelWithParams(Kernel::fromHandle(kernelHandles[i]),
                                           pLaunchArgumentsBuffer[i],
@@ -2518,7 +2518,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnInOrderDependency(std::sh

    for (uint32_t i = 0; i < inOrderExecInfo->getNumDevicePartitionsToWait(); i++) {
        if (relaxedOrderingAllowed) {
-            NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter());
+            NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddress, waitValue, NEO::CompareOperation::less, true, isQwordInOrderCounter(), isCopyOnly());

        } else {
            using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
@@ -2784,8 +2784,8 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWriteKernelTimestamp(Event *eve
    uint64_t contextAddress = ptrOffset(baseAddr, contextOffset);

    if (maskLsb) {
-        NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer);
-        NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer);
+        NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::globalTimestampLdw, mask, globalAddress, workloadPartition, globalPostSyncCmdBuffer, isCopyOnly());
+        NEO::EncodeMathMMIO<GfxFamily>::encodeBitwiseAndVal(commandContainer, RegisterOffsets::gpThreadTimeRegAddressOffsetLow, mask, contextAddress, workloadPartition, contextPostSyncCmdBuffer, isCopyOnly());
    } else {
        NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::globalTimestampLdw, globalAddress, workloadPartition, globalPostSyncCmdBuffer);
        NEO::EncodeStoreMMIO<GfxFamily>::encode(*commandContainer.getCommandStream(), RegisterOffsets::gpThreadTimeRegAddressOffsetLow, contextAddress, workloadPartition, contextPostSyncCmdBuffer);
@@ -3593,8 +3593,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendWaitOnMemory(void *desc,
        if (isQwordInOrderCounter()) {
            indirectMode = true;

-            NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0, getLowPart(data), true);
-            NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0 + 4, getHighPart(data), true);
+            NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0, getLowPart(data), true, isCopyOnly());
+            NEO::LriHelper<GfxFamily>::program(commandContainer.getCommandStream(), RegisterOffsets::csGprR0 + 4, getHighPart(data), true, isCopyOnly());

        } else {
            return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
@@ -3850,7 +3850,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendWaitOnSingleEvent(Event *event,
    for (uint32_t i = 0u; i < packetsToWait; i++) {
        if (relaxedOrderingAllowed) {
            NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(*commandContainer.getCommandStream(), 0, gpuAddr, Event::STATE_CLEARED,
-                                                                                                   NEO::CompareOperation::equal, true, false);
+                                                                                                   NEO::CompareOperation::equal, true, false, isCopyOnly());
        } else {
            NEO::EncodeSemaphore<GfxFamily>::addMiSemaphoreWaitCommand(*commandContainer.getCommandStream(),
                                                                       gpuAddr,
@@ -4045,7 +4045,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit()

    // Patch Primary Tile section skip (to Secondary Tile section)
    NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(skipPrimaryTileSectionCmdStream, cmdStream->getCurrentGpuAddressPosition(), workPartitionAllocationGpuVa, 0,
-                                                                                           NEO::CompareOperation::notEqual, false, false);
+                                                                                           NEO::CompareOperation::notEqual, false, false, isCopyOnly());

    // Secondary Tile section
    {
@@ -4059,7 +4059,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendFullSynchronizedDispatchInit()

    // Patch Primary Tile section jump to end
    NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programConditionalDataMemBatchBufferStart(jumpToEndSectionFromPrimaryTile, cmdStream->getCurrentGpuAddressPosition(), syncAllocationGpuVa + sizeof(uint32_t), queueId,
-                                                                                           NEO::CompareOperation::equal, false, false);
+                                                                                           NEO::CompareOperation::equal, false, false, isCopyOnly());

    // End section
    NEO::EncodeMiPredicate<GfxFamily>::encode(*cmdStream, NEO::MiPredicateType::disable);
--- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl
+++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl
@@ -473,13 +473,15 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
 template <GFXCORE_FAMILY gfxCoreFamily>
 void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionPrologue(uint32_t partitionDataSize) {
    NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
-                                                                    partitionDataSize);
+                                                                    partitionDataSize,
+                                                                    isCopyOnly());
 }

 template <GFXCORE_FAMILY gfxCoreFamily>
 void CommandListCoreFamily<gfxCoreFamily>::appendMultiPartitionEpilogue() {
    NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(),
-                                                                    NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset());
+                                                                    NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset(),
+                                                                    isCopyOnly());
 }

 template <GFXCORE_FAMILY gfxCoreFamily>
@@ -568,7 +570,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendDispatchOffsetRegister(bool wor
    if (workloadPartitionEvent && !device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout()) {
        auto offset = beforeProfilingCmds ? NEO::ImplicitScalingDispatch<GfxFamily>::getTimeStampPostSyncOffset() : NEO::ImplicitScalingDispatch<GfxFamily>::getImmediateWritePostSyncOffset();

-        NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset);
+        NEO::ImplicitScalingDispatch<GfxFamily>::dispatchOffsetRegister(*commandContainer.getCommandStream(), offset, isCopyOnly());
    }
 }

--- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl
+++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl
@@ -23,6 +23,7 @@
 #include "shared/source/helpers/api_specific_config.h"
 #include "shared/source/helpers/compiler_product_helper.h"
 #include "shared/source/helpers/definitions/command_encoder_args.h"
+#include "shared/source/helpers/engine_node_helper.h"
 #include "shared/source/helpers/gfx_core_helper.h"
 #include "shared/source/helpers/heap_base_address_model.h"
 #include "shared/source/helpers/pause_on_gpu_properties.h"
@@ -1005,7 +1006,8 @@ void CommandQueueHw<gfxCoreFamily>::programCommandQueueDebugCmdsForSourceLevelOr
    if (isDebugEnabled && !this->commandQueueDebugCmdsProgrammed) {
        if (this->device->getL0Debugger()) {
            this->device->getL0Debugger()->programSbaAddressLoad(cmdStream,
-                                                                 device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())->getGpuAddress());
+                                                                 device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())->getGpuAddress(),
+                                                                 NEO::EngineHelpers::isBcs(this->csr->getOsContext().getEngineType()));
            this->commandQueueDebugCmdsProgrammed = true;
        }
    }
--- a/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h
+++ b/level_zero/core/test/unit_tests/mocks/mock_cmdlist.h
@@ -517,7 +517,8 @@ struct MockCommandList : public CommandList {

    ADDMETHOD_NOBASE(appendMILoadRegImm, ze_result_t, ZE_RESULT_SUCCESS,
                     (uint32_t reg,
-                      uint32_t value));
+                      uint32_t value,
+                      bool isBcs));

    ADDMETHOD_NOBASE(appendMILoadRegReg, ze_result_t, ZE_RESULT_SUCCESS,
                     (uint32_t reg1,
--- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp
+++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp
@@ -1857,7 +1857,7 @@ HWTEST2_F(CommandListCreate, givenInOrderExecutionWhenDispatchingRelaxedOrdering
    lrrCmd++;

    EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(lrrCmd, 0, cmdList->inOrderExecInfo->getBaseDeviceAddress(), 2,
-                                                                                           NEO::CompareOperation::less, true, FamilyType::isQwordInOrderCounter));
+                                                                                           NEO::CompareOperation::less, true, FamilyType::isQwordInOrderCounter, false));
 }

 TEST_F(CommandListCreate, GivenGpuHangWhenCreatingImmCmdListWithSyncModeAndAppendBarrierThenAppendBarrierReturnsDeviceLost) {
--- a/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp
+++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_in_order_cmdlist.cpp
@@ -1969,7 +1969,7 @@ HWTEST2_F(InOrderCmdListTests, givenRelaxedOrderingWhenProgrammingTimestampEvent
        auto eventEndGpuVa = events[0]->getCompletionFieldGpuAddress(device);

        EXPECT_TRUE(RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(lrrCmd, 0, eventEndGpuVa, static_cast<uint64_t>(Event::STATE_CLEARED),
-                                                                                               NEO::CompareOperation::equal, true, false));
+                                                                                               NEO::CompareOperation::equal, true, false, false));

        auto sdiCmd = genCmdCast<MI_STORE_DATA_IMM *>(ptrOffset(lrrCmd, EncodeBatchBufferStartOrEnd<FamilyType>::getCmdSizeConditionalDataMemBatchBufferStart(false)));
        ASSERT_NE(nullptr, sdiCmd);
@@ -6393,7 +6393,7 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppending
        }

        // Primary Tile section skip - patching
-        if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(primaryTileSectionSkipVa, castToUint64(miPredicate), workPartitionGpuVa, 0, NEO::CompareOperation::notEqual, false, false)) {
+        if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(primaryTileSectionSkipVa, castToUint64(miPredicate), workPartitionGpuVa, 0, NEO::CompareOperation::notEqual, false, false, false)) {
            return false;
        }

@@ -6409,7 +6409,7 @@ HWTEST2_F(MultiTileSynchronizedDispatchTests, givenFullSyncDispatchWhenAppending
        }

        // Jump to end from Primary Tile section - patching
-        if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(jumpToEndSectionFromPrimaryTile, castToUint64(miPredicate), syncAllocGpuVa + sizeof(uint32_t), queueId, NEO::CompareOperation::equal, false, false)) {
+        if (!RelaxedOrderingCommandsHelper::verifyConditionalDataMemBbStart<FamilyType>(jumpToEndSectionFromPrimaryTile, castToUint64(miPredicate), syncAllocGpuVa + sizeof(uint32_t), queueId, NEO::CompareOperation::equal, false, false, false)) {
            return false;
        }