Use cached group sizes in zeKernelSetGroupSize

Optimize zeKernelSetGroupSize by early returning success if group size
values have not changed since last function call.

Related-To: NEO-7394
Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwolinski
2022-12-14 14:56:33 +00:00
committed by Compute-Runtime-Automation
parent cf6d706d4b
commit 7ec94c6aaa
2 changed files with 56 additions and 0 deletions

View File

@@ -270,6 +270,12 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
if (this->groupSize[0] == groupSizeX &&
this->groupSize[1] == groupSizeY &&
this->groupSize[2] == groupSizeZ) {
return ZE_RESULT_SUCCESS;
}
auto numChannels = kernelImmData->getDescriptor().kernelAttributes.numLocalIdChannels;
Vec3<size_t> groupSize{groupSizeX, groupSizeY, groupSizeZ};
auto itemsInGroup = Math::computeTotalElementsCount(groupSize);

View File

@@ -134,6 +134,18 @@ struct MockKernelWithCallTracking : Mock<::L0::Kernel> {
return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation);
}
size_t setArgBufferWithAllocCalled = 0u;
ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) override {
if (this->groupSize[0] == groupSizeX &&
this->groupSize[1] == groupSizeY &&
this->groupSize[2] == groupSizeZ) {
setGroupSizeSkipCount++;
} else {
setGroupSizeSkipCount = 0u;
}
return KernelImp::setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
}
size_t setGroupSizeSkipCount = 0u;
};
using SetKernelArgCacheTest = Test<ModuleFixture>;
@@ -310,6 +322,44 @@ TEST_F(KernelImpSetGroupSizeTest, givenZeroGroupSizeWhenSettingGroupSizeThenInva
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, ret);
}
TEST_F(KernelImpSetGroupSizeTest, givenValidGroupSizeWhenSetMultipleTimesThenSetGroupSizeIsOnlyExecutedIfNeeded) {
MockKernelWithCallTracking mockKernel;
Mock<Module> mockModule(this->device, nullptr);
mockKernel.module = &mockModule;
// First call with {2u, 3u, 5u} group size - don't skip setGroupSize execution
auto ret = mockKernel.setGroupSize(2u, 3u, 5u);
EXPECT_EQ(2u, mockKernel.groupSize[0]);
EXPECT_EQ(3u, mockKernel.groupSize[1]);
EXPECT_EQ(5u, mockKernel.groupSize[2]);
EXPECT_EQ(0u, mockKernel.setGroupSizeSkipCount);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
// Second call with {2u, 3u, 5u} group size - skip setGroupSize execution
ret = mockKernel.setGroupSize(2u, 3u, 5u);
EXPECT_EQ(2u, mockKernel.groupSize[0]);
EXPECT_EQ(3u, mockKernel.groupSize[1]);
EXPECT_EQ(5u, mockKernel.groupSize[2]);
EXPECT_EQ(1u, mockKernel.setGroupSizeSkipCount);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
// First call with {1u, 2u, 3u} group size - don't skip setGroupSize execution
ret = mockKernel.setGroupSize(1u, 2u, 3u);
EXPECT_EQ(1u, mockKernel.groupSize[0]);
EXPECT_EQ(2u, mockKernel.groupSize[1]);
EXPECT_EQ(3u, mockKernel.groupSize[2]);
EXPECT_EQ(0u, mockKernel.setGroupSizeSkipCount);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
// Second call with {1u, 2u, 3u} group size - skip setGroupSize execution
ret = mockKernel.setGroupSize(1u, 2u, 3u);
EXPECT_EQ(1u, mockKernel.groupSize[0]);
EXPECT_EQ(2u, mockKernel.groupSize[1]);
EXPECT_EQ(3u, mockKernel.groupSize[2]);
EXPECT_EQ(1u, mockKernel.setGroupSizeSkipCount);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
}
using SetKernelArg = Test<ModuleFixture>;
using ImageSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;