mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Apply heuristics when setting TG dispatch size on XE_HPC_CORE
The default TG dispatch size can be changed to a better value based on number of threads in TG or currently available amount of threads on GPU. Decision on what TG dispatch size should be are based on implemented heuristics. Signed-off-by: Rafal Maziejuk <rafal.maziejuk@intel.com> Related-To: NEO-6989
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
52133e61ce
commit
ed0c36117e
@@ -65,6 +65,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
size_t globalOffsets[3] = {dispatchInfo.getOffset().x, dispatchInfo.getOffset().y, dispatchInfo.getOffset().z};
|
||||
size_t startWorkGroups[3] = {walkerArgs.startOfWorkgroups->x, walkerArgs.startOfWorkgroups->y, walkerArgs.startOfWorkgroups->z};
|
||||
size_t numWorkGroups[3] = {walkerArgs.numberOfWorkgroups->x, walkerArgs.numberOfWorkgroups->y, walkerArgs.numberOfWorkgroups->z};
|
||||
auto threadGroupCount = static_cast<uint32_t>(walkerArgs.numberOfWorkgroups->x * walkerArgs.numberOfWorkgroups->y * walkerArgs.numberOfWorkgroups->z);
|
||||
|
||||
if (walkerArgs.currentTimestampPacketNodes && commandQueue.getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
|
||||
auto timestampPacketNode = walkerArgs.currentTimestampPacketNodes->peekNodes().at(walkerArgs.currentDispatchIndex);
|
||||
@@ -83,6 +84,7 @@ inline void HardwareInterface<GfxFamily>::programWalker(
|
||||
kernel.getKernelStartAddress(true, kernelUsesLocalIds, isCcsUsed, false),
|
||||
simd,
|
||||
walkerArgs.localWorkSizes,
|
||||
threadGroupCount,
|
||||
walkerArgs.offsetInterfaceDescriptorTable,
|
||||
walkerArgs.interfaceDescriptorIndex,
|
||||
walkerArgs.preemptionMode,
|
||||
|
||||
Reference in New Issue
Block a user