Disable implicit scaling for cooperative kernels

When implicit scaling is disabled use useSingleSubdeviceValue = true. Resolves: NEO-5757 Signed-off-by: Filip Hazubski <filip.hazubski@intel.com>
2026-01-11 00:10:58 +08:00 · 2021-06-21 15:24:14 +00:00
parent b5d5784b81
commit 29c64c3dd0
27 changed files with 256 additions and 107 deletions
--- a/shared/test/common/gen12lp/test_command_encoder_gen12lp.cpp
+++ b/shared/test/common/gen12lp/test_command_encoder_gen12lp.cpp
@@ -105,9 +105,11 @@ GEN12LPTEST_F(CommandEncoderTest, givenVariousEngineTypesWhenEstimateCommandBuff
    using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
    using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;

-    auto sizeWA = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1), false);
+    auto sizeWA = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0),
+                                                                                         Vec3<size_t>(1, 1, 1), false, false);
    static_cast<MockOsContext *>(pDevice->getDefaultEngine().osContext)->engineType = aub_stream::ENGINE_CCS;
-    auto size = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0), Vec3<size_t>(1, 1, 1), false);
+    auto size = EncodeDispatchKernel<FamilyType>::estimateEncodeDispatchKernelCmdsSize(pDevice, Vec3<size_t>(0, 0, 0),
+                                                                                       Vec3<size_t>(1, 1, 1), false, false);

    auto expectedDiff = 2 * PreambleHelper<FamilyType>::getCmdSizeForPipelineSelect(pDevice->getHardwareInfo());
    auto diff = sizeWA - size;
--- a/shared/test/common/mocks/mock_tbx_csr.h
+++ b/shared/test/common/mocks/mock_tbx_csr.h
@@ -41,8 +41,8 @@ class MockTbxCsr : public TbxCommandStreamReceiverHw<GfxFamily> {
        TbxCommandStreamReceiverHw<GfxFamily>::writeMemory(gpuAddress, cpuAddress, size, memoryBank, entryBits);
        writeMemoryCalled = true;
    }
-    void submitBatchBuffer(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits, bool overrideRingHead) override {
-        TbxCommandStreamReceiverHw<GfxFamily>::submitBatchBuffer(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, entryBits, overrideRingHead);
+    void submitBatchBufferTbx(uint64_t batchBufferGpuAddress, const void *batchBuffer, size_t batchBufferSize, uint32_t memoryBank, uint64_t entryBits, bool overrideRingHead) override {
+        TbxCommandStreamReceiverHw<GfxFamily>::submitBatchBufferTbx(batchBufferGpuAddress, batchBuffer, batchBufferSize, memoryBank, entryBits, overrideRingHead);
        overrideRingHeadPassed = overrideRingHead;
        submitBatchBufferCalled = true;
    }
--- a/shared/test/common/xe_hp_core/test_encode_xe_hp_core.cpp
+++ b/shared/test/common/xe_hp_core/test_encode_xe_hp_core.cpp
@@ -95,7 +95,8 @@ XE_HP_CORE_TEST_F(EncodeKernelGlobalAtomicsTestWithImplicitScalingTests, givenCl
    bool useGlobalAtomics = true;
    uint32_t partitionCount = 0;
    EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
-                                             pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics, partitionCount, false);
+                                             pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics,
+                                             partitionCount, false, false);

    EXPECT_TRUE(cmdContainer->lastSentUseGlobalAtomics);

@@ -122,7 +123,8 @@ XE_HP_CORE_TEST_F(EncodeKernelGlobalAtomicsTestWithImplicitScalingTests, givenCl
    cmdContainer->lastSentUseGlobalAtomics = true;
    uint32_t partitionCount = 0;
    EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
-                                             pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics, partitionCount, false);
+                                             pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics,
+                                             partitionCount, false, false);

    EXPECT_TRUE(cmdContainer->lastSentUseGlobalAtomics);

@@ -146,7 +148,8 @@ XE_HP_CORE_TEST_F(EncodeKernelGlobalAtomicsTestWithImplicitScalingTests, givenCl
    cmdContainer->lastSentUseGlobalAtomics = true;
    uint32_t partitionCount = 0;
    EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
-                                             pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics, partitionCount, false);
+                                             pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics,
+                                             partitionCount, false, false);

    EXPECT_FALSE(cmdContainer->lastSentUseGlobalAtomics);

@@ -173,7 +176,8 @@ XE_HP_CORE_TEST_F(EncodeKernelGlobalAtomicsTestWithImplicitScalingTests, givenCl
    cmdContainer->lastSentUseGlobalAtomics = true;
    uint32_t partitionCount = 0;
    EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
-                                             pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics, partitionCount, false);
+                                             pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics,
+                                             partitionCount, false, false);

    EXPECT_FALSE(cmdContainer->lastSentUseGlobalAtomics);

@@ -199,7 +203,8 @@ XE_HP_CORE_TEST_F(EncodeKernelGlobalAtomicsTestWithImplicitScalingTests, givenCl
    bool useGlobalAtomics = false;
    uint32_t partitionCount = 0;
    EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
-                                             pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics, partitionCount, false);
+                                             pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics,
+                                             partitionCount, false, false);

    GenCmdList commands;
    CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
@@ -233,7 +238,8 @@ XE_HP_CORE_TEST_F(EncodeKernelGlobalAtomicsTestWithNoImplicitScalingTests, given
    bool useGlobalAtomics = true;
    uint32_t partitionCount = 0;
    EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, false, false,
-                                             pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics, partitionCount, false);
+                                             pDevice, NEO::PreemptionMode::Disabled, requiresUncachedMocs, useGlobalAtomics,
+                                             partitionCount, false, false);

    EXPECT_FALSE(cmdContainer->lastSentUseGlobalAtomics);
    GenCmdList commands;