Use cached group sizes in zeKernelSetGroupSize

Optimize zeKernelSetGroupSize by early returning success if group size values have not changed since last function call. Related-To: NEO-7394 Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
2025-12-21 09:14:47 +08:00 · 2022-12-14 14:56:33 +00:00
parent cf6d706d4b
commit 7ec94c6aaa
2 changed files with 56 additions and 0 deletions
--- a/level_zero/core/source/kernel/kernel_imp.cpp
+++ b/level_zero/core/source/kernel/kernel_imp.cpp
@@ -270,6 +270,12 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
        return ZE_RESULT_ERROR_INVALID_ARGUMENT;
    }

+    if (this->groupSize[0] == groupSizeX &&
+        this->groupSize[1] == groupSizeY &&
+        this->groupSize[2] == groupSizeZ) {
+        return ZE_RESULT_SUCCESS;
+    }
+
    auto numChannels = kernelImmData->getDescriptor().kernelAttributes.numLocalIdChannels;
    Vec3<size_t> groupSize{groupSizeX, groupSizeY, groupSizeZ};
    auto itemsInGroup = Math::computeTotalElementsCount(groupSize);
--- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp
+++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp
@@ -134,6 +134,18 @@ struct MockKernelWithCallTracking : Mock<::L0::Kernel> {
        return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation);
    }
    size_t setArgBufferWithAllocCalled = 0u;
+
+    ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) override {
+        if (this->groupSize[0] == groupSizeX &&
+            this->groupSize[1] == groupSizeY &&
+            this->groupSize[2] == groupSizeZ) {
+            setGroupSizeSkipCount++;
+        } else {
+            setGroupSizeSkipCount = 0u;
+        }
+        return KernelImp::setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
+    }
+    size_t setGroupSizeSkipCount = 0u;
 };

 using SetKernelArgCacheTest = Test<ModuleFixture>;
@@ -310,6 +322,44 @@ TEST_F(KernelImpSetGroupSizeTest, givenZeroGroupSizeWhenSettingGroupSizeThenInva
    EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, ret);
 }

+TEST_F(KernelImpSetGroupSizeTest, givenValidGroupSizeWhenSetMultipleTimesThenSetGroupSizeIsOnlyExecutedIfNeeded) {
+    MockKernelWithCallTracking mockKernel;
+    Mock<Module> mockModule(this->device, nullptr);
+    mockKernel.module = &mockModule;
+
+    // First call with {2u, 3u, 5u} group size - don't skip setGroupSize execution
+    auto ret = mockKernel.setGroupSize(2u, 3u, 5u);
+    EXPECT_EQ(2u, mockKernel.groupSize[0]);
+    EXPECT_EQ(3u, mockKernel.groupSize[1]);
+    EXPECT_EQ(5u, mockKernel.groupSize[2]);
+    EXPECT_EQ(0u, mockKernel.setGroupSizeSkipCount);
+    EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
+
+    // Second call with {2u, 3u, 5u} group size - skip setGroupSize execution
+    ret = mockKernel.setGroupSize(2u, 3u, 5u);
+    EXPECT_EQ(2u, mockKernel.groupSize[0]);
+    EXPECT_EQ(3u, mockKernel.groupSize[1]);
+    EXPECT_EQ(5u, mockKernel.groupSize[2]);
+    EXPECT_EQ(1u, mockKernel.setGroupSizeSkipCount);
+    EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
+
+    // First call with {1u, 2u, 3u} group size - don't skip setGroupSize execution
+    ret = mockKernel.setGroupSize(1u, 2u, 3u);
+    EXPECT_EQ(1u, mockKernel.groupSize[0]);
+    EXPECT_EQ(2u, mockKernel.groupSize[1]);
+    EXPECT_EQ(3u, mockKernel.groupSize[2]);
+    EXPECT_EQ(0u, mockKernel.setGroupSizeSkipCount);
+    EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
+
+    // Second call with {1u, 2u, 3u} group size - skip setGroupSize execution
+    ret = mockKernel.setGroupSize(1u, 2u, 3u);
+    EXPECT_EQ(1u, mockKernel.groupSize[0]);
+    EXPECT_EQ(2u, mockKernel.groupSize[1]);
+    EXPECT_EQ(3u, mockKernel.groupSize[2]);
+    EXPECT_EQ(1u, mockKernel.setGroupSizeSkipCount);
+    EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
+}
+
 using SetKernelArg = Test<ModuleFixture>;
 using ImageSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;