mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-12 17:33:00 +08:00
Apply heuristics when setting TG dispatch size on XE_HPC_CORE
The default TG dispatch size can be changed to a better value based on number of threads in TG or currently available amount of threads on GPU. Decision on what TG dispatch size should be are based on implemented heuristics. Signed-off-by: Rafal Maziejuk <rafal.maziejuk@intel.com> Related-To: NEO-6989
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
52133e61ce
commit
ed0c36117e
@@ -47,6 +47,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
||||
size_t bindingTablePointer,
|
||||
[[maybe_unused]] size_t offsetSamplerState,
|
||||
uint32_t numSamplers,
|
||||
const uint32_t threadGroupCount,
|
||||
uint32_t numThreadsPerThreadGroup,
|
||||
const Kernel &kernel,
|
||||
uint32_t bindingTablePrefetchSize,
|
||||
@@ -79,6 +80,7 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
||||
uint64_t kernelStartOffset,
|
||||
uint32_t simd,
|
||||
const size_t localWorkSize[3],
|
||||
const uint32_t threadGroupCount,
|
||||
const uint64_t offsetInterfaceDescriptorTable,
|
||||
uint32_t &interfaceDescriptorIndex,
|
||||
PreemptionMode preemptionMode,
|
||||
|
||||
@@ -113,6 +113,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||
size_t bindingTablePointer,
|
||||
[[maybe_unused]] size_t offsetSamplerState,
|
||||
uint32_t numSamplers,
|
||||
const uint32_t threadGroupCount,
|
||||
uint32_t threadsPerThreadGroup,
|
||||
const Kernel &kernel,
|
||||
uint32_t bindingTablePrefetchSize,
|
||||
@@ -169,7 +170,8 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||
hardwareInfo);
|
||||
|
||||
PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(&interfaceDescriptor, preemptionMode);
|
||||
EncodeDispatchKernel<GfxFamily>::adjustInterfaceDescriptorData(interfaceDescriptor, hardwareInfo);
|
||||
|
||||
EncodeDispatchKernel<GfxFamily>::adjustInterfaceDescriptorData(interfaceDescriptor, hardwareInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired);
|
||||
|
||||
*pInterfaceDescriptor = interfaceDescriptor;
|
||||
return (size_t)offsetInterfaceDescriptor;
|
||||
@@ -185,6 +187,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
uint64_t kernelStartOffset,
|
||||
uint32_t simd,
|
||||
const size_t localWorkSize[3],
|
||||
const uint32_t threadGroupCount,
|
||||
const uint64_t offsetInterfaceDescriptorTable,
|
||||
uint32_t &interfaceDescriptorIndex,
|
||||
PreemptionMode preemptionMode,
|
||||
@@ -263,6 +266,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
dstBindingTablePointer,
|
||||
samplerStateOffset,
|
||||
samplerCount,
|
||||
threadGroupCount,
|
||||
threadsPerThreadGroup,
|
||||
kernel,
|
||||
bindingTablePrefetchSize,
|
||||
|
||||
Reference in New Issue
Block a user