mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-25 13:33:02 +08:00
fix: set kernel LWS prior to obtaining indirect heaps
When calculating the size of the indirect object heap, the local work group size from kernel implicit args is taken into account. If the LWS is not set before this calculation, it can lead to insufficient ioh allocation size. Such a problem is seen when local ids are generated by the runtime and then written to ioh. The write fails due to lack of space in the allocation. Related-To: IGC-7708 Signed-off-by: Fabian Zwolinski <fabian.zwolinski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
98824fdaf6
commit
eb259c1f52
@@ -86,6 +86,12 @@ void HardwareInterface<GfxFamily>::dispatchWalker(
|
||||
const auto lws = generateWorkgroupSize(dispatchInfo);
|
||||
const_cast<DispatchInfo &>(dispatchInfo).setLWS(lws);
|
||||
}
|
||||
if (dispatchInfo.getKernel() == mainKernel) {
|
||||
if (!mainKernel->isLocalWorkSize2Patchable()) {
|
||||
const auto &lws = dispatchInfo.getLocalWorkgroupSize();
|
||||
mainKernel->setLocalWorkSizeValues(static_cast<uint32_t>(lws.x), static_cast<uint32_t>(lws.y), static_cast<uint32_t>(lws.z));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate command stream and indirect heaps
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2018-2023 Intel Corporation
|
||||
* Copyright (C) 2018-2024 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -1386,4 +1386,32 @@ HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsThenIohRequiresMoreSp
|
||||
size = alignUp(size, MemoryConstants::cacheLineSize);
|
||||
EXPECT_EQ(size, iohSizeWithImplicitArgs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(DispatchWalkerTest, WhenKernelRequiresImplicitArgsAndLocalWorkSizeIsSetThenIohRequiresMoreSpace) {
|
||||
debugManager.flags.EnableHwGenerationLocalIds.set(0);
|
||||
size_t globalOffsets[3] = {0, 0, 0};
|
||||
size_t workItems[3] = {1, 1, 1};
|
||||
size_t workGroupSize[3] = {683, 1, 1};
|
||||
cl_uint dimensions = 1;
|
||||
|
||||
kernelInfo.kernelDescriptor.kernelAttributes.simdSize = 1u;
|
||||
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(kernelInfo.kernelDescriptor);
|
||||
MockKernel kernelWithImplicitArgs(program.get(), kernelInfo, *pClDevice);
|
||||
ASSERT_EQ(CL_SUCCESS, kernelWithImplicitArgs.initialize());
|
||||
|
||||
DispatchInfo dispatchInfoWithImplicitArgs(pClDevice, const_cast<MockKernel *>(&kernelWithImplicitArgs), dimensions, workItems, workGroupSize, globalOffsets);
|
||||
dispatchInfoWithImplicitArgs.setNumberOfWorkgroups({1, 1, 1});
|
||||
dispatchInfoWithImplicitArgs.setTotalNumberOfWorkgroups({1, 1, 1});
|
||||
|
||||
auto iohSizeWithImplicitArgsWithoutLWS = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(kernelWithImplicitArgs, workGroupSize);
|
||||
|
||||
dispatchInfoWithImplicitArgs.setLWS({683, 1, 1});
|
||||
|
||||
auto lws = dispatchInfoWithImplicitArgs.getLocalWorkgroupSize();
|
||||
kernelWithImplicitArgs.setLocalWorkSizeValues(static_cast<uint32_t>(lws.x), static_cast<uint32_t>(lws.y), static_cast<uint32_t>(lws.z));
|
||||
|
||||
auto iohSizeWithImplicitArgsWithLWS = HardwareCommandsHelper<FamilyType>::getSizeRequiredIOH(kernelWithImplicitArgs, workGroupSize);
|
||||
|
||||
EXPECT_LE(iohSizeWithImplicitArgsWithoutLWS, iohSizeWithImplicitArgsWithLWS);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user