performance: adjust thread group dispatch size

adjust thread group dispatch size on pvc if chosen size does not evenly
divide dimension

this is to avoid leftover thread groups

Related-To: NEO-7927

Signed-off-by: Dominik Dabek <dominik.dabek@intel.com>
This commit is contained in:
Dominik Dabek
2023-04-27 16:58:48 +00:00
committed by Compute-Runtime-Automation
parent 4f297cf971
commit c84c7a0c91
13 changed files with 125 additions and 36 deletions

View File

@@ -52,7 +52,8 @@ struct HardwareCommandsHelper : public PerThreadDataHelper {
uint32_t bindingTablePrefetchSize,
PreemptionMode preemptionMode,
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
const Device &device);
const Device &device,
WALKER_TYPE *walkerCmd);
static void sendMediaStateFlush(
LinearStream &commandStream,

View File

@@ -119,7 +119,8 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
uint32_t bindingTablePrefetchSize,
PreemptionMode preemptionMode,
INTERFACE_DESCRIPTOR_DATA *inlineInterfaceDescriptor,
const Device &device) {
const Device &device,
WALKER_TYPE *walkerCmd) {
using SAMPLER_STATE = typename GfxFamily::SAMPLER_STATE;
using SHARED_LOCAL_MEMORY_SIZE = typename INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE;
@@ -174,7 +175,7 @@ size_t HardwareCommandsHelper<GfxFamily>::sendInterfaceDescriptorData(
PreemptionHelper::programInterfaceDescriptorDataPreemption<GfxFamily>(&interfaceDescriptor, preemptionMode);
EncodeDispatchKernel<GfxFamily>::adjustInterfaceDescriptorData(interfaceDescriptor, device, hardwareInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired);
EncodeDispatchKernel<GfxFamily>::adjustInterfaceDescriptorData(interfaceDescriptor, device, hardwareInfo, threadGroupCount, kernelDescriptor.kernelAttributes.numGrfRequired, *walkerCmd);
*pInterfaceDescriptor = interfaceDescriptor;
return (size_t)offsetInterfaceDescriptor;
@@ -284,7 +285,8 @@ size_t HardwareCommandsHelper<GfxFamily>::sendIndirectState(
bindingTablePrefetchSize,
preemptionMode,
inlineInterfaceDescriptor,
device);
device,
walkerCmd);
if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) {
PatchInfoData patchInfoData(kernelStartOffset, 0, PatchInfoAllocationType::InstructionHeap, dsh.getGraphicsAllocation()->getGpuAddress(), offsetInterfaceDescriptor, PatchInfoAllocationType::DynamicStateHeap);