mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
refactor: add validateWorkgroupSize() method
Related-To: NEO-14209 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
af1747a55f
commit
708b81bab5
@@ -375,13 +375,6 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
||||
patchWorkgroupSizeInCrossThreadData(groupSizeX, groupSizeY, groupSizeZ);
|
||||
|
||||
auto simdSize = kernelDescriptor.kernelAttributes.simdSize;
|
||||
auto remainderSimdLanes = itemsInGroup & (simdSize - 1u);
|
||||
threadExecutionMask = static_cast<uint32_t>(maxNBitValue(remainderSimdLanes));
|
||||
if (!threadExecutionMask) {
|
||||
threadExecutionMask = static_cast<uint32_t>(maxNBitValue((isSimd1(simdSize)) ? 32 : simdSize));
|
||||
}
|
||||
evaluateIfRequiresGenerationOfLocalIdsByRuntime(kernelDescriptor);
|
||||
|
||||
auto grfCount = kernelDescriptor.kernelAttributes.numGrfRequired;
|
||||
auto neoDevice = module->getDevice()->getNEODevice();
|
||||
auto &rootDeviceEnvironment = neoDevice->getRootDeviceEnvironment();
|
||||
@@ -389,6 +382,17 @@ ze_result_t KernelImp::setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
||||
this->numThreadsPerThreadGroup = gfxCoreHelper.calculateNumThreadsPerThreadGroup(
|
||||
simdSize, static_cast<uint32_t>(itemsInGroup), grfCount, rootDeviceEnvironment);
|
||||
|
||||
if (auto wgSizeRet = validateWorkgroupSize(); wgSizeRet != ZE_RESULT_SUCCESS) {
|
||||
return wgSizeRet;
|
||||
}
|
||||
|
||||
auto remainderSimdLanes = itemsInGroup & (simdSize - 1u);
|
||||
threadExecutionMask = static_cast<uint32_t>(maxNBitValue(remainderSimdLanes));
|
||||
if (!threadExecutionMask) {
|
||||
threadExecutionMask = static_cast<uint32_t>(maxNBitValue((isSimd1(simdSize)) ? 32 : simdSize));
|
||||
}
|
||||
evaluateIfRequiresGenerationOfLocalIdsByRuntime(kernelDescriptor);
|
||||
|
||||
if (kernelRequiresGenerationOfLocalIdsByRuntime) {
|
||||
auto grfSize = this->module->getDevice()->getHwInfo().capabilityTable.grfSize;
|
||||
uint32_t perThreadDataSizeForWholeThreadGroupNeeded =
|
||||
|
||||
@@ -251,6 +251,7 @@ struct KernelImp : Kernel {
|
||||
virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0;
|
||||
void *patchBindlessSurfaceState(NEO::GraphicsAllocation *alloc, uint32_t bindless);
|
||||
uint32_t getSurfaceStateIndexForBindlessOffset(NEO::CrossThreadDataOffset bindlessOffset) const;
|
||||
ze_result_t validateWorkgroupSize() const;
|
||||
|
||||
const KernelImmutableData *kernelImmData = nullptr;
|
||||
Module *module = nullptr;
|
||||
|
||||
@@ -14,4 +14,7 @@ KernelExt *KernelImp::getExtension(uint32_t extensionType) { return nullptr; }
|
||||
|
||||
void KernelImp::patchRegionParams(const CmdListKernelLaunchParams &launchParams, const ze_group_count_t &threadGroupDimensions) {}
|
||||
|
||||
ze_result_t KernelImp::validateWorkgroupSize() const {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user