Improve EncodeDispatchKernel

Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
Bartosz Dunajski
2020-11-27 09:22:59 +00:00
committed by Compute-Runtime-Automation
parent baea633bdd
commit 93ba4e646b
10 changed files with 155 additions and 35 deletions

View File

@@ -66,6 +66,8 @@ struct EncodeDispatchKernel {
static void adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo);
static void adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount);
static void adjustTimestampPacket(WALKER_TYPE &walkerCmd, const HardwareInfo &hwInfo);
};

View File

@@ -441,6 +441,25 @@ void EncodeIndirectParams<Family>::setGroupCountIndirect(CommandContainer &conta
}
}
template <typename Family>
void EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, uint32_t samplerCount, uint32_t bindingTableEntryCount) {
auto enablePrefetch = EncodeSurfaceState<Family>::doBindingTablePrefetch();
if (DebugManager.flags.ForceBtpPrefetchMode.get() != -1) {
enablePrefetch = static_cast<bool>(DebugManager.flags.ForceBtpPrefetchMode.get());
}
if (enablePrefetch) {
interfaceDescriptor.setSamplerCount(static_cast<typename INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT>((samplerCount + 3) / 4));
interfaceDescriptor.setBindingTableEntryCount(std::min(bindingTableEntryCount, 31u));
} else {
interfaceDescriptor.setSamplerCount(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT::SAMPLER_COUNT_NO_SAMPLERS_USED);
interfaceDescriptor.setBindingTableEntryCount(0u);
}
}
template <typename Family>
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {}
template <typename Family>
void EncodeIndirectParams<Family>::setGlobalWorkSizeIndirect(CommandContainer &container, const NEO::CrossThreadDataOffset offsets[3], void *crossThreadAddress, const uint32_t *lws) {
for (int i = 0; i < 3; ++i) {

View File

@@ -77,28 +77,21 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
? slmSize
: INTERFACE_DESCRIPTOR_DATA::SHARED_LOCAL_MEMORY_SIZE_ENCODES_0K);
{
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
uint32_t bindingTablePointer = 0u;
uint32_t bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
uint32_t bindingTablePointer = 0u;
if (bindingTableStateCount > 0u) {
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
sshOffset = ssh->getUsed();
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
*ssh, bindingTableStateCount,
dispatchInterface->getSurfaceStateHeapData(),
dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
}
idd.setBindingTablePointer(bindingTablePointer);
uint32_t bindingTableStatePrefetchCount = 0;
if (EncodeSurfaceState<Family>::doBindingTablePrefetch()) {
bindingTableStatePrefetchCount = std::min(31u, bindingTableStateCount);
}
idd.setBindingTableEntryCount(bindingTableStatePrefetchCount);
if (bindingTableStateCount > 0u) {
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
sshOffset = ssh->getUsed();
bindingTablePointer = static_cast<uint32_t>(EncodeSurfaceState<Family>::pushBindingTableAndSurfaceStates(
*ssh, bindingTableStateCount,
dispatchInterface->getSurfaceStateHeapData(),
dispatchInterface->getSurfaceStateHeapDataSize(), bindingTableStateCount,
kernelDescriptor.payloadMappings.bindingTable.tableOffset));
}
idd.setBindingTablePointer(bindingTablePointer);
PreemptionHelper::programInterfaceDescriptorDataPreemption<Family>(&idd, preemptionMode);
auto heap = container.getIndirectHeap(HeapType::DYNAMIC_STATE);
@@ -116,9 +109,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
}
idd.setSamplerStatePointer(samplerStateOffset);
auto samplerCountState =
static_cast<typename INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT>((samplerCount + 3) / 4);
idd.setSamplerCount(samplerCountState);
EncodeDispatchKernel<Family>::adjustBindingTablePrefetch(idd, samplerCount, bindingTableStateCount);
auto numGrfCrossThreadData = static_cast<uint32_t>(sizeCrossThreadData / sizeof(float[8]));
idd.setCrossThreadConstantDataReadLength(numGrfCrossThreadData);
@@ -310,9 +302,6 @@ void EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(const HardwareIn
template <typename Family>
void EncodeDispatchKernel<Family>::appendAdditionalIDDFields(INTERFACE_DESCRIPTOR_DATA *pInterfaceDescriptor, const HardwareInfo &hwInfo, const uint32_t threadsPerThreadGroup, uint32_t slmTotalSize) {}
template <typename Family>
void EncodeDispatchKernel<Family>::adjustInterfaceDescriptorData(INTERFACE_DESCRIPTOR_DATA &interfaceDescriptor, const HardwareInfo &hwInfo) {}
template <typename Family>
size_t EncodeDispatchKernel<Family>::estimateEncodeDispatchKernelCmdsSize(Device *device) {
using MEDIA_STATE_FLUSH = typename Family::MEDIA_STATE_FLUSH;

View File

@@ -83,6 +83,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, EnableUsmCompression, -1, "enable compression su
DECLARE_DEBUG_VARIABLE(int32_t, EnableHostUsmSupport, -1, "-1: default, 0: disable, 1: enable, Enables USM host memory")
DECLARE_DEBUG_VARIABLE(int32_t, MediaVfeStateMaxSubSlices, -1, ">=0: Programs Media Vfe State Maximum Number of Dual-Subslices to given value ")
DECLARE_DEBUG_VARIABLE(int32_t, EnableMockSourceLevelDebugger, 0, "Switches driver to mode with active debugger. Active modes: 1: opt-disabled, 2: opt-enabled")
DECLARE_DEBUG_VARIABLE(int32_t, ForceBtpPrefetchMode, -1, "-1: default, 0: disable, 1: enable, Enables Btp prefetching")
/*LOGGING FLAGS*/
DECLARE_DEBUG_VARIABLE(int32_t, PrintDriverDiagnostics, -1, "prints driver diagnostics messages to standard output, value corresponds to hint level")

View File

@@ -258,6 +258,105 @@ HWTEST_F(CommandEncodeStatesTest, givenIndarectOffsetsSizeWhenDispatchingKernelT
ASSERT_NE(itor, commands.end());
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenForceBtpPrefetchModeDebugFlagWhenDispatchingKernelThenValuesAreSetUpCorrectly) {
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using MEDIA_INTERFACE_DESCRIPTOR_LOAD = typename FamilyType::MEDIA_INTERFACE_DESCRIPTOR_LOAD;
using SAMPLER_STATE = typename FamilyType::SAMPLER_STATE;
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
DebugManagerStateRestore restorer;
uint32_t dims[] = {2, 1, 1};
uint32_t numBindingTable = 1;
uint32_t numSamplers = 1;
SAMPLER_STATE samplerState;
BINDING_TABLE_STATE bindingTable;
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.numEntries = numBindingTable;
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.tableOffset = 0;
dispatchInterface->kernelDescriptor.payloadMappings.samplerTable.numSamplers = numSamplers;
dispatchInterface->kernelDescriptor.payloadMappings.samplerTable.tableOffset = 0;
dispatchInterface->kernelDescriptor.payloadMappings.samplerTable.borderColor = 0;
unsigned char *samplerStateRaw = reinterpret_cast<unsigned char *>(&samplerState);
EXPECT_CALL(*dispatchInterface.get(), getDynamicStateHeapData()).WillRepeatedly(::testing::Return(samplerStateRaw));
unsigned char *bindingTableRaw = reinterpret_cast<unsigned char *>(&bindingTable);
EXPECT_CALL(*dispatchInterface.get(), getSurfaceStateHeapData()).WillRepeatedly(::testing::Return(bindingTableRaw));
{
DebugManager.flags.ForceBtpPrefetchMode.set(-1);
cmdContainer.reset(new MyMockCommandContainer());
cmdContainer->initialize(pDevice);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, cmdContainer->getCommandStream()->getCpuBase(), cmdContainer->getCommandStream()->getUsed());
auto itorMIDL = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(commands.begin(), commands.end());
EXPECT_NE(itorMIDL, commands.end());
auto cmd = genCmdCast<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(*itorMIDL);
EXPECT_NE(cmd, nullptr);
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), cmd->getInterfaceDescriptorDataStartAddress()));
if (EncodeSurfaceState<FamilyType>::doBindingTablePrefetch()) {
EXPECT_EQ(numBindingTable, idd->getBindingTableEntryCount());
EXPECT_EQ(static_cast<typename INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT>((numSamplers + 3) / 4), idd->getSamplerCount());
} else {
EXPECT_EQ(0u, idd->getBindingTableEntryCount());
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd->getSamplerCount());
}
}
{
DebugManager.flags.ForceBtpPrefetchMode.set(0);
cmdContainer.reset(new MyMockCommandContainer());
cmdContainer->initialize(pDevice);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, cmdContainer->getCommandStream()->getCpuBase(), cmdContainer->getCommandStream()->getUsed());
auto itorMIDL = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(commands.begin(), commands.end());
EXPECT_NE(itorMIDL, commands.end());
auto cmd = genCmdCast<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(*itorMIDL);
EXPECT_NE(cmd, nullptr);
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), cmd->getInterfaceDescriptorDataStartAddress()));
EXPECT_EQ(0u, idd->getBindingTableEntryCount());
EXPECT_EQ(INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT_NO_SAMPLERS_USED, idd->getSamplerCount());
}
{
DebugManager.flags.ForceBtpPrefetchMode.set(1);
cmdContainer.reset(new MyMockCommandContainer());
cmdContainer->initialize(pDevice);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dims, false, false, dispatchInterface.get(), 0, pDevice, NEO::PreemptionMode::Disabled);
auto dsh = cmdContainer->getIndirectHeap(HeapType::DYNAMIC_STATE);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, cmdContainer->getCommandStream()->getCpuBase(), cmdContainer->getCommandStream()->getUsed());
auto itorMIDL = find<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(commands.begin(), commands.end());
EXPECT_NE(itorMIDL, commands.end());
auto cmd = genCmdCast<MEDIA_INTERFACE_DESCRIPTOR_LOAD *>(*itorMIDL);
EXPECT_NE(cmd, nullptr);
auto idd = static_cast<INTERFACE_DESCRIPTOR_DATA *>(ptrOffset(dsh->getCpuBase(), cmd->getInterfaceDescriptorDataStartAddress()));
EXPECT_EQ(numBindingTable, idd->getBindingTableEntryCount());
EXPECT_EQ(static_cast<typename INTERFACE_DESCRIPTOR_DATA::SAMPLER_COUNT>((numSamplers + 3) / 4), idd->getSamplerCount());
}
}
HWCMDTEST_F(IGFX_GEN8_CORE, CommandEncodeStatesTest, givenCleanHeapsAndSlmNotChangedWhenDispatchKernelThenFlushNotAdded) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
uint32_t dims[] = {2, 1, 1};