Use cached group sizes in zeKernelSetGroupSize

Optimize zeKernelSetGroupSize by early returning success if group size
values have not changed since last function call.

Moved ImplicitArgs construction above setGroupSize call
in kernel initialization to prevent pImplicitArgs being nullptr
in calls in which we use cached group sizes and early return.

Related-To: NEO-7394
Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwolinski 2023-01-11 11:50:28 +00:00 committed by Compute-Runtime-Automation
parent 3a5a418488
commit 9dfed7cd54
2 changed files with 64 additions and 7 deletions

View File

@ -271,6 +271,12 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
if (this->groupSize[0] == groupSizeX &&
this->groupSize[1] == groupSizeY &&
this->groupSize[2] == groupSizeZ) {
return ZE_RESULT_SUCCESS;
}
auto numChannels = kernelImmData->getDescriptor().kernelAttributes.numLocalIdChannels;
Vec3<size_t> groupSize{groupSizeX, groupSizeY, groupSizeZ};
auto itemsInGroup = Math::computeTotalElementsCount(groupSize);
@ -873,6 +879,14 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
this->dynamicStateHeapDataSize = kernelImmData->getDynamicStateHeapDataSize();
}
if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) {
pImplicitArgs = std::make_unique<NEO::ImplicitArgs>();
*pImplicitArgs = {};
pImplicitArgs->structSize = sizeof(NEO::ImplicitArgs);
pImplicitArgs->structVersion = 0;
pImplicitArgs->simdWidth = kernelDescriptor.kernelAttributes.simdSize;
}
if (kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] > 0) {
auto *reqdSize = kernelDescriptor.kernelAttributes.requiredWorkgroupSize;
UNRECOVERABLE_IF(reqdSize[1] == 0);
@ -896,13 +910,6 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
this->patchCrossthreadDataWithPrivateAllocation(this->privateMemoryGraphicsAllocation);
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation);
}
if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) {
pImplicitArgs = std::make_unique<NEO::ImplicitArgs>();
*pImplicitArgs = {};
pImplicitArgs->structSize = sizeof(NEO::ImplicitArgs);
pImplicitArgs->structVersion = 0;
pImplicitArgs->simdWidth = kernelDescriptor.kernelAttributes.simdSize;
}
this->createPrintfBuffer();

View File

@ -138,6 +138,18 @@ struct MockKernelWithCallTracking : Mock<::L0::Kernel> {
return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation);
}
size_t setArgBufferWithAllocCalled = 0u;
ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) override {
if (this->groupSize[0] == groupSizeX &&
this->groupSize[1] == groupSizeY &&
this->groupSize[2] == groupSizeZ) {
setGroupSizeSkipCount++;
} else {
setGroupSizeSkipCount = 0u;
}
return KernelImp::setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
}
size_t setGroupSizeSkipCount = 0u;
};
using SetKernelArgCacheTest = Test<ModuleFixture>;
@ -314,6 +326,44 @@ TEST_F(KernelImpSetGroupSizeTest, givenZeroGroupSizeWhenSettingGroupSizeThenInva
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, ret);
}
TEST_F(KernelImpSetGroupSizeTest, givenValidGroupSizeWhenSetMultipleTimesThenSetGroupSizeIsOnlyExecutedIfNeeded) {
MockKernelWithCallTracking mockKernel;
Mock<Module> mockModule(this->device, nullptr);
mockKernel.module = &mockModule;
// First call with {2u, 3u, 5u} group size - don't skip setGroupSize execution
auto ret = mockKernel.setGroupSize(2u, 3u, 5u);
EXPECT_EQ(2u, mockKernel.groupSize[0]);
EXPECT_EQ(3u, mockKernel.groupSize[1]);
EXPECT_EQ(5u, mockKernel.groupSize[2]);
EXPECT_EQ(0u, mockKernel.setGroupSizeSkipCount);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
// Second call with {2u, 3u, 5u} group size - skip setGroupSize execution
ret = mockKernel.setGroupSize(2u, 3u, 5u);
EXPECT_EQ(2u, mockKernel.groupSize[0]);
EXPECT_EQ(3u, mockKernel.groupSize[1]);
EXPECT_EQ(5u, mockKernel.groupSize[2]);
EXPECT_EQ(1u, mockKernel.setGroupSizeSkipCount);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
// First call with {1u, 2u, 3u} group size - don't skip setGroupSize execution
ret = mockKernel.setGroupSize(1u, 2u, 3u);
EXPECT_EQ(1u, mockKernel.groupSize[0]);
EXPECT_EQ(2u, mockKernel.groupSize[1]);
EXPECT_EQ(3u, mockKernel.groupSize[2]);
EXPECT_EQ(0u, mockKernel.setGroupSizeSkipCount);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
// Second call with {1u, 2u, 3u} group size - skip setGroupSize execution
ret = mockKernel.setGroupSize(1u, 2u, 3u);
EXPECT_EQ(1u, mockKernel.groupSize[0]);
EXPECT_EQ(2u, mockKernel.groupSize[1]);
EXPECT_EQ(3u, mockKernel.groupSize[2]);
EXPECT_EQ(1u, mockKernel.setGroupSizeSkipCount);
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
}
using SetKernelArg = Test<ModuleFixture>;
using ImageSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;