Add LogicalStateHelper getter for CommandQueue.

Refactor Kernel handling Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
2025-09-10 12:53:42 +08:00 · 2022-06-27 17:20:50 +00:00
parent 5e5df0fe87
commit 4fb4a1d77b
10 changed files with 43 additions and 27 deletions
--- a/opencl/source/command_queue/command_queue.cpp
+++ b/opencl/source/command_queue/command_queue.cpp
@ -12,6 +12,7 @@
 #include "shared/source/helpers/array_count.h"
 #include "shared/source/helpers/engine_node_helper.h"
 #include "shared/source/helpers/get_info.h"
+#include "shared/source/helpers/logical_state_helper.h"
 #include "shared/source/helpers/ptr_math.h"
 #include "shared/source/helpers/string.h"
 #include "shared/source/helpers/timestamp_packet.h"
@ -1229,4 +1230,8 @@ void CommandQueue::clearLastBcsPackets() {
    }
 }

+LogicalStateHelper *CommandQueue::getLogicalStateHelper() const {
+    return logicalStateHelper.get();
+}
+
 } // namespace NEO
--- a/opencl/source/command_queue/command_queue.h
+++ b/opencl/source/command_queue/command_queue.h
@ -35,6 +35,7 @@ class IndirectHeap;
 class Kernel;
 class MemObj;
 class PerformanceCounters;
+class LogicalStateHelper;
 struct CompletionStamp;
 struct MultiDispatchInfo;

@ -358,6 +359,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {

    bool isTextureCacheFlushNeeded(uint32_t commandType) const;

+    LogicalStateHelper *getLogicalStateHelper() const;
+
  protected:
    void *enqueueReadMemObjForMap(TransferProperties &transferProperties, EventsRequest &eventsRequest, cl_int &errcodeRet);
    cl_int enqueueWriteMemObjForUnmap(MemObj *memObj, void *mappedPtr, EventsRequest &eventsRequest);
@ -425,6 +428,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {

    std::unique_ptr<TimestampPacketContainer> deferredTimestampPackets;
    std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
+    std::unique_ptr<LogicalStateHelper> logicalStateHelper;
+
    struct BcsTimestampPacketContainers {
        TimestampPacketContainer lastBarrierToWaitFor;
        TimestampPacketContainer lastSignalledPacket;
--- a/opencl/source/command_queue/command_queue_hw.h
+++ b/opencl/source/command_queue/command_queue_hw.h
@ -505,7 +505,5 @@ class CommandQueueHw : public CommandQueue {
    void setupEvent(EventBuilder &eventBuilder, cl_event *outEvent, uint32_t cmdType);

    bool isBlitAuxTranslationRequired(const MultiDispatchInfo &multiDispatchInfo);
-
-    std::unique_ptr<LogicalStateHelper> logicalStateHelper;
 };
 } // namespace NEO
--- a/opencl/source/command_queue/hardware_interface_bdw_and_later.inl
+++ b/opencl/source/command_queue/hardware_interface_bdw_and_later.inl
@ -88,7 +88,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
        ioh,
        ssh,
        kernel,
-        kernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        kernel.getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
        simd,
        localWorkSizes,
        offsetInterfaceDescriptorTable,
--- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl
+++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl
@ -103,7 +103,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
        ioh,
        ssh,
        kernel,
-        kernel.getKernelStartOffset(localIdsGenerationByRuntime, kernelUsesLocalIds, isCcsUsed),
+        kernel.getKernelStartAddress(localIdsGenerationByRuntime, kernelUsesLocalIds, isCcsUsed, false),
        simd,
        localWorkSizes,
        offsetInterfaceDescriptorTable,
--- a/opencl/source/kernel/kernel.cpp
+++ b/opencl/source/kernel/kernel.cpp
@ -2020,15 +2020,13 @@ void Kernel::addAllocationToCacheFlushVector(uint32_t argIndex, GraphicsAllocati
    }
 }

-uint64_t Kernel::getKernelStartOffset(
-    const bool localIdsGenerationByRuntime,
-    const bool kernelUsesLocalIds,
-    const bool isCssUsed) const {
+uint64_t Kernel::getKernelStartAddress(const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed, const bool returnFullAddress) const {

    uint64_t kernelStartOffset = 0;

    if (kernelInfo.getGraphicsAllocation()) {
-        kernelStartOffset = kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
+        kernelStartOffset = returnFullAddress ? kernelInfo.getGraphicsAllocation()->getGpuAddress()
+                                              : kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();
        if (localIdsGenerationByRuntime == false && kernelUsesLocalIds == true) {
            kernelStartOffset += kernelInfo.kernelDescriptor.entryPoints.skipPerThreadDataLoad;
        }
--- a/opencl/source/kernel/kernel.h
+++ b/opencl/source/kernel/kernel.h
@ -360,10 +360,7 @@ class Kernel : public ReferenceTrackedObject<Kernel> {
                                   size_t *localWorkSize);
    uint32_t getMaxWorkGroupCount(const cl_uint workDim, const size_t *localWorkSize, const CommandQueue *commandQueue) const;

-    uint64_t getKernelStartOffset(
-        const bool localIdsGenerationByRuntime,
-        const bool kernelUsesLocalIds,
-        const bool isCssUsed) const;
+    uint64_t getKernelStartAddress(const bool localIdsGenerationByRuntime, const bool kernelUsesLocalIds, const bool isCssUsed, const bool returnFullAddress) const;

    bool isKernelDebugEnabled() const { return debugEnabled; }
    int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue);
--- a/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp
+++ b/opencl/test/unit_test/gen12lp/tgllp/kernel_tests_tgllp.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2020-2021 Intel Corporation
+ * Copyright (C) 2020-2022 Intel Corporation
 *
 * SPDX-License-Identifier: MIT
 *
@ -32,7 +32,7 @@ TGLLPTEST_F(KernelTgllpTests, GivenUseOffsetToSkipSetFFIDGPWorkaroundActiveWhenS
        mockKernelWithInternals.kernelInfo.kernelDescriptor.entryPoints.skipSetFFIDGP = additionalOffsetDueToFfid;

        for (auto isCcsUsed : ::testing::Bool()) {
-            uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartOffset(false, false, isCcsUsed);
+            uint64_t kernelStartOffset = mockKernelWithInternals.mockKernel->getKernelStartAddress(false, false, isCcsUsed, false);

            if (stepping == REVISION_A0 && isCcsUsed) {
                EXPECT_EQ(defaultKernelStartOffset + additionalOffsetDueToFfid, kernelStartOffset);
--- a/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp
+++ b/opencl/test/unit_test/helpers/hardware_commands_helper_tests.cpp
@ -340,7 +340,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenAllocatingIndirectStateRes
        ioh,
        ssh,
        *kernel,
-        kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        kernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
        kernel->getKernelInfo().getMaxSimdSize(),
        localWorkSizes,
        idToffset,
@ -395,7 +395,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWithFourBindingTabl
        ioh,
        ssh,
        *mockKernelWithInternal->mockKernel,
-        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
        mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
        localWorkSizes,
        0,
@ -441,7 +441,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, givenKernelWith100BindingTable
        ioh,
        ssh,
        *mockKernelWithInternal->mockKernel,
-        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
        mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
        localWorkSizes,
        0,
@ -520,7 +520,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, whenSendingIndirectStateThenKe
        ioh,
        ssh,
        mockKernel,
-        mockKernel.getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        mockKernel.getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
        modifiedKernelInfo.getMaxSimdSize(),
        localWorkSizes,
        idToffset,
@ -610,7 +610,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenSendingIndirectStateThenBi
        ioh,
        ssh,
        *kernel,
-        kernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        kernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
        kernel->getKernelInfo().getMaxSimdSize(),
        localWorkSizes,
        0,
@ -719,7 +719,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, WhenGettingBindingTableStateTh
            ioh,
            ssh,
            *pKernel,
-            pKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+            pKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
            pKernel->getKernelInfo().getMaxSimdSize(),
            localWorkSizes,
            0,
@ -860,7 +860,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerS
        ioh,
        ssh,
        *mockKernelWithInternal->mockKernel,
-        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
        mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
        localWorkSizes,
        0,
@ -884,7 +884,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithInvalidSamplerS
        ioh,
        ssh,
        *mockKernelWithInternal->mockKernel,
-        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
        mockKernelWithInternal->mockKernel->getKernelInfo().getMaxSimdSize(),
        localWorkSizes,
        0,
@ -954,7 +954,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, HardwareCommandsTest, GivenKernelWithSamplersWhenInd
        ioh,
        ssh,
        *mockKernelWithInternal->mockKernel,
-        mockKernelWithInternal->mockKernel->getKernelStartOffset(true, kernelUsesLocalIds, isCcsUsed),
+        mockKernelWithInternal->mockKernel->getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
        8,
        localWorkSizes,
        interfaceDescriptorTableOffset,
--- a/opencl/test/unit_test/kernel/kernel_tests.cpp
+++ b/opencl/test/unit_test/kernel/kernel_tests.cpp
@ -2845,11 +2845,24 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseWhenGettingStartOffse
    auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();

    mockKernel.mockKernel->setStartOffset(128);
-    auto offset = mockKernel.mockKernel->getKernelStartOffset(false, true, false);
+    auto offset = mockKernel.mockKernel->getKernelStartAddress(false, true, false, false);
    EXPECT_EQ(allocationOffset + 256u, offset);
    device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
 }

+TEST(KernelTest, givenFullAddressRequestWhenAskingForKernelStartAddressThenReturnFullAddress) {
+    auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));
+
+    MockKernelWithInternals mockKernel(*device);
+
+    mockKernel.kernelInfo.createKernelAllocation(device->getDevice(), false);
+
+    auto address = mockKernel.mockKernel->getKernelStartAddress(false, true, false, true);
+    EXPECT_EQ(mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddress(), address);
+
+    device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
+}
+
 TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGettingStartOffsetThenOffsetToSkipPerThreadDataLoadIsNotAdded) {
    auto device = clUniquePtr(new MockClDevice(MockDevice::createWithNewExecutionEnvironment<MockDevice>(defaultHwInfo.get())));

@ -2861,7 +2874,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeTrueAndLocalIdsUsedWhenGet
    auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();

    mockKernel.mockKernel->setStartOffset(128);
-    auto offset = mockKernel.mockKernel->getKernelStartOffset(true, true, false);
+    auto offset = mockKernel.mockKernel->getKernelStartAddress(true, true, false, false);
    EXPECT_EQ(allocationOffset + 128u, offset);
    device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
 }
@ -2877,7 +2890,7 @@ TEST(KernelTest, givenKernelLocalIdGenerationByRuntimeFalseAndLocalIdsNotUsedWhe
    auto allocationOffset = mockKernel.kernelInfo.getGraphicsAllocation()->getGpuAddressToPatch();

    mockKernel.mockKernel->setStartOffset(128);
-    auto offset = mockKernel.mockKernel->getKernelStartOffset(false, false, false);
+    auto offset = mockKernel.mockKernel->getKernelStartAddress(false, false, false, false);
    EXPECT_EQ(allocationOffset + 128u, offset);
    device->getMemoryManager()->freeGraphicsMemory(mockKernel.kernelInfo.getGraphicsAllocation());
 }