Add surface state programming for kernels with images and stateless buffers

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2022-07-04 01:28:25 +00:00
committed by Compute-Runtime-Automation
parent 76e023b941
commit 0b26ee3664
2 changed files with 46 additions and 6 deletions

View File

@@ -110,7 +110,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries; auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
uint32_t bindingTablePointer = 0u; uint32_t bindingTablePointer = 0u;
if (kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindfulAndStateless) { if ((kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindfulAndStateless) ||
kernelDescriptor.kernelAttributes.flags.usesImages) {
container.prepareBindfulSsh(); container.prepareBindfulSsh();
if (bindingTableStateCount > 0u) { if (bindingTableStateCount > 0u) {
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE); auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
@@ -343,7 +344,7 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
return true; return true;
} }
//check if we need to follow kernel requirements // check if we need to follow kernel requirements
if (requireInputWalkOrder) { if (requireInputWalkOrder) {
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) { for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
if (!Math::isPow2<size_t>(lws[walkOrder[dimension]])) { if (!Math::isPow2<size_t>(lws[walkOrder[dimension]])) {
@@ -365,7 +366,7 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
return false; return false;
} }
//kernel doesn't specify any walk order requirements, check if we have any compatible // kernel doesn't specify any walk order requirements, check if we have any compatible
for (uint32_t walkOrder = 0; walkOrder < HwWalkOrderHelper::walkOrderPossibilties; walkOrder++) { for (uint32_t walkOrder = 0; walkOrder < HwWalkOrderHelper::walkOrderPossibilties; walkOrder++) {
bool allDimensionsCompatible = true; bool allDimensionsCompatible = true;
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) { for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
@@ -431,9 +432,9 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
walkerCmd.setMessageSimd(DebugManager.flags.ForceSimdMessageSizeInWalker.get()); walkerCmd.setMessageSimd(DebugManager.flags.ForceSimdMessageSizeInWalker.get());
} }
//1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back // 1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back
//so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds // so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds
//2) Auto-generation of local ids should be possible, when in fact local ids are used // 2) Auto-generation of local ids should be possible, when in fact local ids are used
if (!localIdsGenerationByRuntime && localIdDimensions > 0) { if (!localIdsGenerationByRuntime && localIdDimensions > 0) {
UNRECOVERABLE_IF(localIdDimensions != 3); UNRECOVERABLE_IF(localIdDimensions != 3);
uint32_t emitLocalIdsForDim = (1 << 0) | (1 << 1) | (1 << 2); uint32_t emitLocalIdsForDim = (1 << 0) | (1 << 1) | (1 << 2);

View File

@@ -138,6 +138,45 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenOverrideSlmTotalSizeD
} }
} }
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenStatelessBufferAndImageWhenDispatchingKernelThenBindingTableOffsetIsCorrect) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
uint32_t numBindingTable = 1;
BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState;
auto ssh = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE);
ssh->getSpace(0x20);
uint32_t sizeUsed = static_cast<uint32_t>(ssh->getUsed());
auto expectedOffset = alignUp(sizeUsed, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
uint32_t dims[] = {2, 1, 1};
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.numEntries = numBindingTable;
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.tableOffset = 0;
dispatchInterface->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
dispatchInterface->kernelDescriptor.kernelAttributes.flags.usesImages = true;
unsigned char *bindingTableStateRaw = reinterpret_cast<unsigned char *>(&bindingTableState);
dispatchInterface->getSurfaceStateHeapDataResult = bindingTableStateRaw;
dispatchInterface->getSurfaceStateHeapDataSizeResult = static_cast<uint32_t>(sizeof(BINDING_TABLE_STATE));
bool requiresUncachedMocs = false;
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
GenCmdList commands;
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
ASSERT_NE(itor, commands.end());
auto cmd = genCmdCast<WALKER_TYPE *>(*itor);
auto &idd = cmd->getInterfaceDescriptor();
EXPECT_EQ(idd.getBindingTablePointer(), expectedOffset);
}
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givennumBindingTableOneWhenDispatchingKernelThenBTOffsetIsCorrect) { HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givennumBindingTableOneWhenDispatchingKernelThenBTOffsetIsCorrect) {
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE; using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA; using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;