mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-20 13:11:34 +08:00
Improve EncodeDispatchKernel
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
baea633bdd
commit
93ba4e646b
@ -66,6 +66,8 @@ struct EncodeDispatchKernel {
|
||||
|
||||
static void adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo);
|
||||
|
||||
static void adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount);
|
||||
|
||||
static void adjustTimestampPacket(WALKER_TYPE &walkerCmd, const HardwareInfo &hwInfo);
|
||||
};
|
||||
|
||||
|
@ -441,6 +441,25 @@ void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &conta
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount) {
|
||||
auto enablePrefetch = EncodeSurfaceState<Family>::doBindingTablePrefetch();
|
||||
if (DebugManager.flags.ForceBtpPrefetchMode.get() != -1) {
|
||||
enablePrefetch = static_cast<bool>(DebugManager.flags.ForceBtpPrefetchMode.get());
|
||||
}
|
||||
|
||||
if (enablePrefetch) {
|
||||
interfaceDescriptor.setSamplerCount(static_cast<typename INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT>((samplerCount + 3) / 4));
|
||||
interfaceDescriptor.setBindingTableEntryCount(std::min(bindingTableEntryCount, 31u));
|
||||
} else {
|
||||
interfaceDescriptor.setSamplerCount(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT::SAMPLER_COUNT_NO_SAMPLERS_USED);
|
||||
interfaceDescriptor.setBindingTableEntryCount(0u);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, const uint32_t *lws) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
|
@ -77,28 +77,21 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
? slmSize
|
||||
: INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K);
|
||||
|
||||
{
|
||||
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||
uint32_t bindingTablePointer = 0u;
|
||||
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||
uint32_t bindingTablePointer = 0u;
|
||||
|
||||
if (bindingTableStateCount > 0u) {
|
||||
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
sshOffset = ssh->getUsed();
|
||||
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
|
||||
*ssh, bindingTableStateCount,
|
||||
dispatchInterface->getSurfaceStateHeapData(),
|
||||
dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
|
||||
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
|
||||
}
|
||||
|
||||
idd.setBindingTablePointer(bindingTablePointer);
|
||||
|
||||
uint32_t bindingTableStatePrefetchCount = 0;
|
||||
if (EncodeSurfaceState<Family>::doBindingTablePrefetch()) {
|
||||
bindingTableStatePrefetchCount = std::min(31u, bindingTableStateCount);
|
||||
}
|
||||
idd.setBindingTableEntryCount(bindingTableStatePrefetchCount);
|
||||
if (bindingTableStateCount > 0u) {
|
||||
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
sshOffset = ssh->getUsed();
|
||||
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
|
||||
*ssh, bindingTableStateCount,
|
||||
dispatchInterface->getSurfaceStateHeapData(),
|
||||
dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
|
||||
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
|
||||
}
|
||||
|
||||
idd.setBindingTablePointer(bindingTablePointer);
|
||||
|
||||
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, preemptionMode);
|
||||
|
||||
auto heap = container.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
@ -116,9 +109,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
}
|
||||
|
||||
idd.setSamplerStatePointer(samplerStateOffset);
|
||||
auto samplerCountState =
|
||||
static_cast<typename INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT>((samplerCount + 3) / 4);
|
||||
idd.setSamplerCount(samplerCountState);
|
||||
|
||||
EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount);
|
||||
|
||||
auto numGrfCrossThreadData = static_cast<uint32_t>(sizeCrossThreadData / sizeof(float[8]));
|
||||
idd.setCrossThreadConstantDataReadLength(numGrfCrossThreadData);
|
||||
@ -310,9 +302,6 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareIn
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize) {}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {}
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device) {
|
||||
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
|
||||
|
Reference in New Issue
Block a user