Use cached group sizes in zeKernelSetGroupSize
Optimize zeKernelSetGroupSize by early returning success if group size values have not changed since last function call. Moved ImplicitArgs construction above setGroupSize call in kernel initialization to prevent pImplicitArgs being nullptr in calls in which we use cached group sizes and early return. Related-To: NEO-7394 Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
This commit is contained in:
parent
3a5a418488
commit
9dfed7cd54
|
@ -271,6 +271,12 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
|||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (this->groupSize[0] == groupSizeX &&
|
||||
this->groupSize[1] == groupSizeY &&
|
||||
this->groupSize[2] == groupSizeZ) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
auto numChannels = kernelImmData->getDescriptor().kernelAttributes.numLocalIdChannels;
|
||||
Vec3<size_t> groupSize{groupSizeX, groupSizeY, groupSizeZ};
|
||||
auto itemsInGroup = Math::computeTotalElementsCount(groupSize);
|
||||
|
@ -873,6 +879,14 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
|||
this->dynamicStateHeapDataSize = kernelImmData->getDynamicStateHeapDataSize();
|
||||
}
|
||||
|
||||
if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) {
|
||||
pImplicitArgs = std::make_unique<NEO::ImplicitArgs>();
|
||||
*pImplicitArgs = {};
|
||||
pImplicitArgs->structSize = sizeof(NEO::ImplicitArgs);
|
||||
pImplicitArgs->structVersion = 0;
|
||||
pImplicitArgs->simdWidth = kernelDescriptor.kernelAttributes.simdSize;
|
||||
}
|
||||
|
||||
if (kernelDescriptor.kernelAttributes.requiredWorkgroupSize[0] > 0) {
|
||||
auto *reqdSize = kernelDescriptor.kernelAttributes.requiredWorkgroupSize;
|
||||
UNRECOVERABLE_IF(reqdSize[1] == 0);
|
||||
|
@ -896,13 +910,6 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
|
|||
this->patchCrossthreadDataWithPrivateAllocation(this->privateMemoryGraphicsAllocation);
|
||||
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation);
|
||||
}
|
||||
if (kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs) {
|
||||
pImplicitArgs = std::make_unique<NEO::ImplicitArgs>();
|
||||
*pImplicitArgs = {};
|
||||
pImplicitArgs->structSize = sizeof(NEO::ImplicitArgs);
|
||||
pImplicitArgs->structVersion = 0;
|
||||
pImplicitArgs->simdWidth = kernelDescriptor.kernelAttributes.simdSize;
|
||||
}
|
||||
|
||||
this->createPrintfBuffer();
|
||||
|
||||
|
|
|
@ -138,6 +138,18 @@ struct MockKernelWithCallTracking : Mock<::L0::Kernel> {
|
|||
return KernelImp::setArgBufferWithAlloc(argIndex, argVal, allocation);
|
||||
}
|
||||
size_t setArgBufferWithAllocCalled = 0u;
|
||||
|
||||
ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY, uint32_t groupSizeZ) override {
|
||||
if (this->groupSize[0] == groupSizeX &&
|
||||
this->groupSize[1] == groupSizeY &&
|
||||
this->groupSize[2] == groupSizeZ) {
|
||||
setGroupSizeSkipCount++;
|
||||
} else {
|
||||
setGroupSizeSkipCount = 0u;
|
||||
}
|
||||
return KernelImp::setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
||||
}
|
||||
size_t setGroupSizeSkipCount = 0u;
|
||||
};
|
||||
|
||||
using SetKernelArgCacheTest = Test<ModuleFixture>;
|
||||
|
@ -314,6 +326,44 @@ TEST_F(KernelImpSetGroupSizeTest, givenZeroGroupSizeWhenSettingGroupSizeThenInva
|
|||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, ret);
|
||||
}
|
||||
|
||||
TEST_F(KernelImpSetGroupSizeTest, givenValidGroupSizeWhenSetMultipleTimesThenSetGroupSizeIsOnlyExecutedIfNeeded) {
|
||||
MockKernelWithCallTracking mockKernel;
|
||||
Mock<Module> mockModule(this->device, nullptr);
|
||||
mockKernel.module = &mockModule;
|
||||
|
||||
// First call with {2u, 3u, 5u} group size - don't skip setGroupSize execution
|
||||
auto ret = mockKernel.setGroupSize(2u, 3u, 5u);
|
||||
EXPECT_EQ(2u, mockKernel.groupSize[0]);
|
||||
EXPECT_EQ(3u, mockKernel.groupSize[1]);
|
||||
EXPECT_EQ(5u, mockKernel.groupSize[2]);
|
||||
EXPECT_EQ(0u, mockKernel.setGroupSizeSkipCount);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
// Second call with {2u, 3u, 5u} group size - skip setGroupSize execution
|
||||
ret = mockKernel.setGroupSize(2u, 3u, 5u);
|
||||
EXPECT_EQ(2u, mockKernel.groupSize[0]);
|
||||
EXPECT_EQ(3u, mockKernel.groupSize[1]);
|
||||
EXPECT_EQ(5u, mockKernel.groupSize[2]);
|
||||
EXPECT_EQ(1u, mockKernel.setGroupSizeSkipCount);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
// First call with {1u, 2u, 3u} group size - don't skip setGroupSize execution
|
||||
ret = mockKernel.setGroupSize(1u, 2u, 3u);
|
||||
EXPECT_EQ(1u, mockKernel.groupSize[0]);
|
||||
EXPECT_EQ(2u, mockKernel.groupSize[1]);
|
||||
EXPECT_EQ(3u, mockKernel.groupSize[2]);
|
||||
EXPECT_EQ(0u, mockKernel.setGroupSizeSkipCount);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
|
||||
// Second call with {1u, 2u, 3u} group size - skip setGroupSize execution
|
||||
ret = mockKernel.setGroupSize(1u, 2u, 3u);
|
||||
EXPECT_EQ(1u, mockKernel.groupSize[0]);
|
||||
EXPECT_EQ(2u, mockKernel.groupSize[1]);
|
||||
EXPECT_EQ(3u, mockKernel.groupSize[2]);
|
||||
EXPECT_EQ(1u, mockKernel.setGroupSizeSkipCount);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, ret);
|
||||
}
|
||||
|
||||
using SetKernelArg = Test<ModuleFixture>;
|
||||
using ImageSupport = IsWithinProducts<IGFX_SKYLAKE, IGFX_TIGERLAKE_LP>;
|
||||
|
||||
|
|
Loading…
Reference in New Issue