refactor: add validateWorkgroupSize() method

Related-To: NEO-14209

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2025-06-05 09:23:11 +00:00
committed by Compute-Runtime-Automation
parent af1747a55f
commit 708b81bab5
3 changed files with 15 additions and 7 deletions

View File

@@ -375,13 +375,6 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
patchWorkgroupSizeInCrossThreadData(groupSizeX, groupSizeY, groupSizeZ);
auto simdSize = kernelDescriptor.kernelAttributes.simdSize;
auto remainderSimdLanes = itemsInGroup & (simdSize - 1u);
threadExecutionMask = static_cast<uint32_t>(maxNBitValue(remainderSimdLanes));
if (!threadExecutionMask) {
threadExecutionMask = static_cast<uint32_t>(maxNBitValue((isSimd1(simdSize)) ? 32 : simdSize));
}
evaluateIfRequiresGenerationOfLocalIdsByRuntime(kernelDescriptor);
auto grfCount = kernelDescriptor.kernelAttributes.numGrfRequired;
auto neoDevice = module->getDevice()->getNEODevice();
auto &rootDeviceEnvironment = neoDevice->getRootDeviceEnvironment();
@@ -389,6 +382,17 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
this->numThreadsPerThreadGroup = gfxCoreHelper.calculateNumThreadsPerThreadGroup(
simdSize, static_cast<uint32_t>(itemsInGroup), grfCount, rootDeviceEnvironment);
if (auto wgSizeRet = validateWorkgroupSize(); wgSizeRet != ZE_RESULT_SUCCESS) {
return wgSizeRet;
}
auto remainderSimdLanes = itemsInGroup & (simdSize - 1u);
threadExecutionMask = static_cast<uint32_t>(maxNBitValue(remainderSimdLanes));
if (!threadExecutionMask) {
threadExecutionMask = static_cast<uint32_t>(maxNBitValue((isSimd1(simdSize)) ? 32 : simdSize));
}
evaluateIfRequiresGenerationOfLocalIdsByRuntime(kernelDescriptor);
if (kernelRequiresGenerationOfLocalIdsByRuntime) {
auto grfSize = this->module->getDevice()->getHwInfo().capabilityTable.grfSize;
uint32_t perThreadDataSizeForWholeThreadGroupNeeded =

View File

@@ -251,6 +251,7 @@ struct KernelImp : Kernel {
virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0;
void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless);
uint32_t getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const;
ze_result_t validateWorkgroupSize() const;
const KernelImmutableData *kernelImmData = nullptr;
Module *module = nullptr;

View File

@@ -14,4 +14,7 @@ KernelExt *KernelImp::getExtension(uint32_t extensionType) { return nullptr; }
void KernelImp::patchRegionParams(const CmdListKernelLaunchParams &launchParams, const ze_group_count_t &threadGroupDimensions) {}
ze_result_t KernelImp::validateWorkgroupSize() const {
return ZE_RESULT_SUCCESS;
}
} // namespace L0