diff --git a/level_zero/core/source/kernel/kernel_imp.cpp b/level_zero/core/source/kernel/kernel_imp.cpp index 63d741a9b5..9a1bc1674c 100644 --- a/level_zero/core/source/kernel/kernel_imp.cpp +++ b/level_zero/core/source/kernel/kernel_imp.cpp @@ -270,6 +270,12 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, return ZE_RESULT_ERROR_INVALID_ARGUMENT; } + if (this->groupSize[0] == groupSizeX && + this->groupSize[1] == groupSizeY && + this->groupSize[2] == groupSizeZ) { + return ZE_RESULT_SUCCESS; + } + auto numChannels = kernelImmData->getDescriptor().kernelAttributes.numLocalIdChannels; Vec3 groupSize{groupSizeX, groupSizeY, groupSizeZ}; auto itemsInGroup = Math::computeTotalElementsCount(groupSize); diff --git a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp index 00d024cd46..92dd077975 100644 --- a/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp +++ b/level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp @@ -134,6 +134,18 @@ struct MockKernelWithCallTracking : Mock<::L0::Kernel> { return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation); } size_t setArgBufferWithAllocCalled = 0u; + + ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) override { + if (this->groupSize[0] == groupSizeX && + this->groupSize[1] == groupSizeY && + this->groupSize[2] == groupSizeZ) { + setGroupSizeSkipCount++; + } else { + setGroupSizeSkipCount = 0u; + } + return KernelImp::setGroupSize(groupSizeX, groupSizeY, groupSizeZ); + } + size_t setGroupSizeSkipCount = 0u; }; using SetKernelArgCacheTest = Test; @@ -310,6 +322,44 @@ TEST_F(KernelImpSetGroupSizeTest, givenZeroGroupSizeWhenSettingGroupSizeThenInva EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, ret); } +TEST_F(KernelImpSetGroupSizeTest, givenValidGroupSizeWhenSetMultipleTimesThenSetGroupSizeIsOnlyExecutedIfNeeded) { + MockKernelWithCallTracking mockKernel; + Mock mockModule(this->device, nullptr); + mockKernel.module = &mockModule; + + // First call with {2u, 3u, 5u} group size - don't skip setGroupSize execution + auto ret = mockKernel.setGroupSize(2u, 3u, 5u); + EXPECT_EQ(2u, mockKernel.groupSize[0]); + EXPECT_EQ(3u, mockKernel.groupSize[1]); + EXPECT_EQ(5u, mockKernel.groupSize[2]); + EXPECT_EQ(0u, mockKernel.setGroupSizeSkipCount); + EXPECT_EQ(ZE_RESULT_SUCCESS, ret); + + // Second call with {2u, 3u, 5u} group size - skip setGroupSize execution + ret = mockKernel.setGroupSize(2u, 3u, 5u); + EXPECT_EQ(2u, mockKernel.groupSize[0]); + EXPECT_EQ(3u, mockKernel.groupSize[1]); + EXPECT_EQ(5u, mockKernel.groupSize[2]); + EXPECT_EQ(1u, mockKernel.setGroupSizeSkipCount); + EXPECT_EQ(ZE_RESULT_SUCCESS, ret); + + // First call with {1u, 2u, 3u} group size - don't skip setGroupSize execution + ret = mockKernel.setGroupSize(1u, 2u, 3u); + EXPECT_EQ(1u, mockKernel.groupSize[0]); + EXPECT_EQ(2u, mockKernel.groupSize[1]); + EXPECT_EQ(3u, mockKernel.groupSize[2]); + EXPECT_EQ(0u, mockKernel.setGroupSizeSkipCount); + EXPECT_EQ(ZE_RESULT_SUCCESS, ret); + + // Second call with {1u, 2u, 3u} group size - skip setGroupSize execution + ret = mockKernel.setGroupSize(1u, 2u, 3u); + EXPECT_EQ(1u, mockKernel.groupSize[0]); + EXPECT_EQ(2u, mockKernel.groupSize[1]); + EXPECT_EQ(3u, mockKernel.groupSize[2]); + EXPECT_EQ(1u, mockKernel.setGroupSizeSkipCount); + EXPECT_EQ(ZE_RESULT_SUCCESS, ret); +} + using SetKernelArg = Test; using ImageSupport = IsWithinProducts;