fix: set kernel LWS prior to obtaining indirect heaps

When calculating the size of the indirect object heap,
the local work group size from kernel implicit args is taken into account.
If the LWS is not set before this calculation,
it can lead to insufficient ioh allocation size.
Such a problem is seen when local ids are generated by the runtime
and then written to ioh. The write fails due to lack of space in the allocation.

Related-To: IGC-7708
Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
This commit is contained in:
Fabian Zwolinski
2024-03-13 20:17:06 +00:00
committed by Compute-Runtime-Automation
parent 98824fdaf6
commit eb259c1f52
2 changed files with 36 additions and 2 deletions

View File

@@ -86,6 +86,12 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
const auto lws = generateWorkgroupSize(dispatchInfo);
const_cast<DispatchInfo &>(dispatchInfo).setLWS(lws);
}
if (dispatchInfo.getKernel() == mainKernel) {
if (!mainKernel->isLocalWorkSize2Patchable()) {
const auto &lws = dispatchInfo.getLocalWorkgroupSize();
mainKernel->setLocalWorkSizeValues(static_cast<uint32_t>(lws.x), static_cast<uint32_t>(lws.y), static_cast<uint32_t>(lws.z));
}
}
}
// Allocate command stream and indirect heaps

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018-2023 Intel Corporation
* Copyright (C) 2018-2024 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -1386,4 +1386,32 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp
size = alignUp(size, MemoryConstants::cacheLineSize);
EXPECT_EQ(size, iohSizeWithImplicitArgs);
}
}
}
HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsAndLocalWorkSizeIsSetThenIohRequiresMoreSpace) {
debugManager.flags.EnableHwGenerationLocalIds.set(0);
size_t globalOffsets[3] = {0, 0, 0};
size_t workItems[3] = {1, 1, 1};
size_t workGroupSize[3] = {683, 1, 1};
cl_uint dimensions = 1;
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 1u;
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(kernelInfo.kernelDescriptor);
MockKernel kernelWithImplicitArgs(program.get(), kernelInfo, *pClDevice);
ASSERT_EQ(CL_SUCCESS, kernelWithImplicitArgs.initialize());
DispatchInfo dispatchInfoWithImplicitArgs(pClDevice, const_cast<MockKernel *>(&kernelWithImplicitArgs), dimensions, workItems, workGroupSize, globalOffsets);
dispatchInfoWithImplicitArgs.setNumberOfWorkgroups({1, 1, 1});
dispatchInfoWithImplicitArgs.setTotalNumberOfWorkgroups({1, 1, 1});
auto iohSizeWithImplicitArgsWithoutLWS = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(kernelWithImplicitArgs, workGroupSize);
dispatchInfoWithImplicitArgs.setLWS({683, 1, 1});
auto lws = dispatchInfoWithImplicitArgs.getLocalWorkgroupSize();
kernelWithImplicitArgs.setLocalWorkSizeValues(static_cast<uint32_t>(lws.x), static_cast<uint32_t>(lws.y), static_cast<uint32_t>(lws.z));
auto iohSizeWithImplicitArgsWithLWS = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(kernelWithImplicitArgs, workGroupSize);
EXPECT_LE(iohSizeWithImplicitArgsWithoutLWS, iohSizeWithImplicitArgsWithLWS);
}