mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-24 21:18:24 +08:00
performance: adjust thread group dispatch size
adjust thread group dispatch size on pvc if chosen size does not evenly divide dimension this is to avoid leftover thread groups Related-To: NEO-7927 Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
4f297cf971
commit
c84c7a0c91
@@ -52,7 +52,8 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
|
||||
uint32_t bindingTablePrefetchSize,
|
||||
PreemptionMode preemptionMode,
|
||||
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
|
||||
const Device &device);
|
||||
const Device &device,
|
||||
WALKER_TYPE *walkerCmd);
|
||||
|
||||
static void sendMediaStateFlush(
|
||||
LinearStream &commandStream,
|
||||
|
||||
@@ -119,7 +119,8 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||
uint32_t bindingTablePrefetchSize,
|
||||
PreemptionMode preemptionMode,
|
||||
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
|
||||
const Device &device) {
|
||||
const Device &device,
|
||||
WALKER_TYPE *walkerCmd) {
|
||||
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
|
||||
using SHARED_LOCAL_MEMORY_SIZE = typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
|
||||
|
||||
@@ -174,7 +175,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
|
||||
|
||||
PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(&interfaceDescriptor, preemptionMode);
|
||||
|
||||
EncodeDispatchKernel<GfxFamily>::adjustInterfaceDescriptorData(interfaceDescriptor, device, hardwareInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired);
|
||||
EncodeDispatchKernel<GfxFamily>::adjustInterfaceDescriptorData(interfaceDescriptor, device, hardwareInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, *walkerCmd);
|
||||
|
||||
*pInterfaceDescriptor = interfaceDescriptor;
|
||||
return (size_t)offsetInterfaceDescriptor;
|
||||
@@ -284,7 +285,8 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
|
||||
bindingTablePrefetchSize,
|
||||
preemptionMode,
|
||||
inlineInterfaceDescriptor,
|
||||
device);
|
||||
device,
|
||||
walkerCmd);
|
||||
|
||||
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
|
||||
PatchInfoData patchInfoData(kernelStartOffset, 0, PatchInfoAllocationType::InstructionHeap, dsh.getGraphicsAllocation()->getGpuAddress(), offsetInterfaceDescriptor, PatchInfoAllocationType::DynamicStateHeap);
|
||||
|
||||
Reference in New Issue
Block a user