mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-30 01:35:20 +08:00
Improve EncodeDispatchKernel
Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
baea633bdd
commit
93ba4e646b
@@ -66,6 +66,8 @@ struct EncodeDispatchKernel {
|
||||
|
||||
static void adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo);
|
||||
|
||||
static void adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount);
|
||||
|
||||
static void adjustTimestampPacket(WALKER_TYPE &walkerCmd, const HardwareInfo &hwInfo);
|
||||
};
|
||||
|
||||
|
||||
@@ -441,6 +441,25 @@ void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &conta
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount) {
|
||||
auto enablePrefetch = EncodeSurfaceState<Family>::doBindingTablePrefetch();
|
||||
if (DebugManager.flags.ForceBtpPrefetchMode.get() != -1) {
|
||||
enablePrefetch = static_cast<bool>(DebugManager.flags.ForceBtpPrefetchMode.get());
|
||||
}
|
||||
|
||||
if (enablePrefetch) {
|
||||
interfaceDescriptor.setSamplerCount(static_cast<typename INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT>((samplerCount + 3) / 4));
|
||||
interfaceDescriptor.setBindingTableEntryCount(std::min(bindingTableEntryCount, 31u));
|
||||
} else {
|
||||
interfaceDescriptor.setSamplerCount(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT::SAMPLER_COUNT_NO_SAMPLERS_USED);
|
||||
interfaceDescriptor.setBindingTableEntryCount(0u);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, const uint32_t *lws) {
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
|
||||
@@ -77,28 +77,21 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
? slmSize
|
||||
: INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K);
|
||||
|
||||
{
|
||||
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||
uint32_t bindingTablePointer = 0u;
|
||||
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||
uint32_t bindingTablePointer = 0u;
|
||||
|
||||
if (bindingTableStateCount > 0u) {
|
||||
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
sshOffset = ssh->getUsed();
|
||||
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
|
||||
*ssh, bindingTableStateCount,
|
||||
dispatchInterface->getSurfaceStateHeapData(),
|
||||
dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
|
||||
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
|
||||
}
|
||||
|
||||
idd.setBindingTablePointer(bindingTablePointer);
|
||||
|
||||
uint32_t bindingTableStatePrefetchCount = 0;
|
||||
if (EncodeSurfaceState<Family>::doBindingTablePrefetch()) {
|
||||
bindingTableStatePrefetchCount = std::min(31u, bindingTableStateCount);
|
||||
}
|
||||
idd.setBindingTableEntryCount(bindingTableStatePrefetchCount);
|
||||
if (bindingTableStateCount > 0u) {
|
||||
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
sshOffset = ssh->getUsed();
|
||||
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
|
||||
*ssh, bindingTableStateCount,
|
||||
dispatchInterface->getSurfaceStateHeapData(),
|
||||
dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
|
||||
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
|
||||
}
|
||||
|
||||
idd.setBindingTablePointer(bindingTablePointer);
|
||||
|
||||
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, preemptionMode);
|
||||
|
||||
auto heap = container.getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
@@ -116,9 +109,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
}
|
||||
|
||||
idd.setSamplerStatePointer(samplerStateOffset);
|
||||
auto samplerCountState =
|
||||
static_cast<typename INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT>((samplerCount + 3) / 4);
|
||||
idd.setSamplerCount(samplerCountState);
|
||||
|
||||
EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount);
|
||||
|
||||
auto numGrfCrossThreadData = static_cast<uint32_t>(sizeCrossThreadData / sizeof(float[8]));
|
||||
idd.setCrossThreadConstantDataReadLength(numGrfCrossThreadData);
|
||||
@@ -310,9 +302,6 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareIn
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize) {}
|
||||
|
||||
template <typename Family>
|
||||
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {}
|
||||
|
||||
template <typename Family>
|
||||
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device) {
|
||||
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;
|
||||
|
||||
@@ -83,6 +83,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableUsmCompression, -1, "enable compression su
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostUsmSupport, -1, "-1: default, 0: disable, 1: enable, Enables USM host memory")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MediaVfeStateMaxSubSlices, -1, ">=0: Programs Media Vfe State Maximum Number of Dual-Subslices to given value ")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnableMockSourceLevelDebugger, 0, "Switches driver to mode with active debugger. Active modes: 1: opt-disabled, 2: opt-enabled")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ForceBtpPrefetchMode, -1, "-1: default, 0: disable, 1: enable, Enables Btp prefetching")
|
||||
|
||||
/*LOGGING FLAGS*/
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")
|
||||
|
||||
@@ -258,6 +258,105 @@ HWTEST_F(CommandEncodeStatesTest, givenIndarectOffsetsSizeWhenDispatchingKernelT
|
||||
ASSERT_NE(itor, commands.end());
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDebugFlagWhenDispatchingKernelThenValuesAreSetUpCorrectly) {
|
||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
|
||||
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
|
||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
uint32_t dims[] = {2, 1, 1};
|
||||
uint32_t numBindingTable = 1;
|
||||
uint32_t numSamplers = 1;
|
||||
SAMPLER_STATE samplerState;
|
||||
BINDING_TABLE_STATE bindingTable;
|
||||
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.numEntries = numBindingTable;
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.tableOffset = 0;
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.samplerTable.numSamplers = numSamplers;
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.samplerTable.tableOffset = 0;
|
||||
dispatchInterface->kernelDescriptor.payloadMappings.samplerTable.borderColor = 0;
|
||||
unsigned char *samplerStateRaw = reinterpret_cast<unsigned char *>(&samplerState);
|
||||
EXPECT_CALL(*dispatchInterface.get(), getDynamicStateHeapData()).WillRepeatedly(::testing::Return(samplerStateRaw));
|
||||
unsigned char *bindingTableRaw = reinterpret_cast<unsigned char *>(&bindingTable);
|
||||
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapData()).WillRepeatedly(::testing::Return(bindingTableRaw));
|
||||
|
||||
{
|
||||
DebugManager.flags.ForceBtpPrefetchMode.set(-1);
|
||||
cmdContainer.reset(new MyMockCommandContainer());
|
||||
cmdContainer->initialize(pDevice);
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
|
||||
|
||||
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, cmdContainer->getCommandStream()->getCpuBase(), cmdContainer->getCommandStream()->getUsed());
|
||||
|
||||
auto itorMIDL = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(commands.begin(), commands.end());
|
||||
EXPECT_NE(itorMIDL, commands.end());
|
||||
|
||||
auto cmd = genCmdCast<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(*itorMIDL);
|
||||
EXPECT_NE(cmd, nullptr);
|
||||
|
||||
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), cmd->getInterfaceDescriptorDataStartAddress()));
|
||||
|
||||
if (EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
|
||||
EXPECT_EQ(numBindingTable, idd->getBindingTableEntryCount());
|
||||
EXPECT_EQ(static_cast<typename INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT>((numSamplers + 3) / 4), idd->getSamplerCount());
|
||||
} else {
|
||||
EXPECT_EQ(0u, idd->getBindingTableEntryCount());
|
||||
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd->getSamplerCount());
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
DebugManager.flags.ForceBtpPrefetchMode.set(0);
|
||||
cmdContainer.reset(new MyMockCommandContainer());
|
||||
cmdContainer->initialize(pDevice);
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
|
||||
|
||||
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, cmdContainer->getCommandStream()->getCpuBase(), cmdContainer->getCommandStream()->getUsed());
|
||||
|
||||
auto itorMIDL = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(commands.begin(), commands.end());
|
||||
EXPECT_NE(itorMIDL, commands.end());
|
||||
|
||||
auto cmd = genCmdCast<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(*itorMIDL);
|
||||
EXPECT_NE(cmd, nullptr);
|
||||
|
||||
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), cmd->getInterfaceDescriptorDataStartAddress()));
|
||||
|
||||
EXPECT_EQ(0u, idd->getBindingTableEntryCount());
|
||||
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd->getSamplerCount());
|
||||
}
|
||||
|
||||
{
|
||||
DebugManager.flags.ForceBtpPrefetchMode.set(1);
|
||||
cmdContainer.reset(new MyMockCommandContainer());
|
||||
cmdContainer->initialize(pDevice);
|
||||
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
|
||||
|
||||
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
|
||||
|
||||
GenCmdList commands;
|
||||
CmdParse<FamilyType>::parseCommandBuffer(commands, cmdContainer->getCommandStream()->getCpuBase(), cmdContainer->getCommandStream()->getUsed());
|
||||
|
||||
auto itorMIDL = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(commands.begin(), commands.end());
|
||||
EXPECT_NE(itorMIDL, commands.end());
|
||||
|
||||
auto cmd = genCmdCast<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(*itorMIDL);
|
||||
EXPECT_NE(cmd, nullptr);
|
||||
|
||||
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), cmd->getInterfaceDescriptorDataStartAddress()));
|
||||
|
||||
EXPECT_EQ(numBindingTable, idd->getBindingTableEntryCount());
|
||||
EXPECT_EQ(static_cast<typename INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT>((numSamplers + 3) / 4), idd->getSamplerCount());
|
||||
}
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedWhenDispatchKernelThenFlushNotAdded) {
|
||||
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
|
||||
uint32_t dims[] = {2, 1, 1};
|
||||
|
||||
Reference in New Issue
Block a user