mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 23:56:39 +08:00
Add surface state programming for kernels with images and stateless buffers
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
76e023b941
commit
0b26ee3664
@@ -110,7 +110,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
|||||||
|
|
||||||
auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||||
uint32_t bindingTablePointer = 0u;
|
uint32_t bindingTablePointer = 0u;
|
||||||
if (kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindfulAndStateless) {
|
if ((kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindfulAndStateless) ||
|
||||||
|
kernelDescriptor.kernelAttributes.flags.usesImages) {
|
||||||
container.prepareBindfulSsh();
|
container.prepareBindfulSsh();
|
||||||
if (bindingTableStateCount > 0u) {
|
if (bindingTableStateCount > 0u) {
|
||||||
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||||
@@ -343,7 +344,7 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if we need to follow kernel requirements
|
// check if we need to follow kernel requirements
|
||||||
if (requireInputWalkOrder) {
|
if (requireInputWalkOrder) {
|
||||||
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
|
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
|
||||||
if (!Math::isPow2<size_t>(lws[walkOrder[dimension]])) {
|
if (!Math::isPow2<size_t>(lws[walkOrder[dimension]])) {
|
||||||
@@ -365,7 +366,7 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
//kernel doesn't specify any walk order requirements, check if we have any compatible
|
// kernel doesn't specify any walk order requirements, check if we have any compatible
|
||||||
for (uint32_t walkOrder = 0; walkOrder < HwWalkOrderHelper::walkOrderPossibilties; walkOrder++) {
|
for (uint32_t walkOrder = 0; walkOrder < HwWalkOrderHelper::walkOrderPossibilties; walkOrder++) {
|
||||||
bool allDimensionsCompatible = true;
|
bool allDimensionsCompatible = true;
|
||||||
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
|
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
|
||||||
@@ -431,9 +432,9 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
|
|||||||
walkerCmd.setMessageSimd(DebugManager.flags.ForceSimdMessageSizeInWalker.get());
|
walkerCmd.setMessageSimd(DebugManager.flags.ForceSimdMessageSizeInWalker.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
//1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back
|
// 1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back
|
||||||
//so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds
|
// so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds
|
||||||
//2) Auto-generation of local ids should be possible, when in fact local ids are used
|
// 2) Auto-generation of local ids should be possible, when in fact local ids are used
|
||||||
if (!localIdsGenerationByRuntime && localIdDimensions > 0) {
|
if (!localIdsGenerationByRuntime && localIdDimensions > 0) {
|
||||||
UNRECOVERABLE_IF(localIdDimensions != 3);
|
UNRECOVERABLE_IF(localIdDimensions != 3);
|
||||||
uint32_t emitLocalIdsForDim = (1 << 0) | (1 << 1) | (1 << 2);
|
uint32_t emitLocalIdsForDim = (1 << 0) | (1 << 1) | (1 << 2);
|
||||||
|
|||||||
@@ -138,6 +138,45 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenOverrideSlmTotalSizeD
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenStatelessBufferAndImageWhenDispatchingKernelThenBindingTableOffsetIsCorrect) {
|
||||||
|
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||||
|
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||||
|
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
|
||||||
|
uint32_t numBindingTable = 1;
|
||||||
|
BINDING_TABLE_STATE bindingTableState = FamilyType::cmdInitBindingTableState;
|
||||||
|
|
||||||
|
auto ssh = cmdContainer->getIndirectHeap(HeapType::SURFACE_STATE);
|
||||||
|
ssh->getSpace(0x20);
|
||||||
|
uint32_t sizeUsed = static_cast<uint32_t>(ssh->getUsed());
|
||||||
|
auto expectedOffset = alignUp(sizeUsed, BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||||
|
|
||||||
|
uint32_t dims[] = {2, 1, 1};
|
||||||
|
std::unique_ptr<MockDispatchKernelEncoder> dispatchInterface(new MockDispatchKernelEncoder());
|
||||||
|
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.numEntries = numBindingTable;
|
||||||
|
dispatchInterface->kernelDescriptor.payloadMappings.bindingTable.tableOffset = 0;
|
||||||
|
dispatchInterface->kernelDescriptor.kernelAttributes.bufferAddressingMode = KernelDescriptor::Stateless;
|
||||||
|
dispatchInterface->kernelDescriptor.kernelAttributes.flags.usesImages = true;
|
||||||
|
|
||||||
|
unsigned char *bindingTableStateRaw = reinterpret_cast<unsigned char *>(&bindingTableState);
|
||||||
|
dispatchInterface->getSurfaceStateHeapDataResult = bindingTableStateRaw;
|
||||||
|
dispatchInterface->getSurfaceStateHeapDataSizeResult = static_cast<uint32_t>(sizeof(BINDING_TABLE_STATE));
|
||||||
|
|
||||||
|
bool requiresUncachedMocs = false;
|
||||||
|
EncodeDispatchKernelArgs dispatchArgs = createDefaultDispatchKernelArgs(pDevice, dispatchInterface.get(), dims, requiresUncachedMocs);
|
||||||
|
EncodeDispatchKernel<FamilyType>::encode(*cmdContainer.get(), dispatchArgs);
|
||||||
|
|
||||||
|
GenCmdList commands;
|
||||||
|
CmdParse<FamilyType>::parseCommandBuffer(commands, ptrOffset(cmdContainer->getCommandStream()->getCpuBase(), 0), cmdContainer->getCommandStream()->getUsed());
|
||||||
|
|
||||||
|
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
|
||||||
|
ASSERT_NE(itor, commands.end());
|
||||||
|
|
||||||
|
auto cmd = genCmdCast<WALKER_TYPE *>(*itor);
|
||||||
|
auto &idd = cmd->getInterfaceDescriptor();
|
||||||
|
|
||||||
|
EXPECT_EQ(idd.getBindingTablePointer(), expectedOffset);
|
||||||
|
}
|
||||||
|
|
||||||
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givennumBindingTableOneWhenDispatchingKernelThenBTOffsetIsCorrect) {
|
HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givennumBindingTableOneWhenDispatchingKernelThenBTOffsetIsCorrect) {
|
||||||
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
using BINDING_TABLE_STATE = typename FamilyType::BINDING_TABLE_STATE;
|
||||||
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
using INTERFACE_DESCRIPTOR_DATA = typename FamilyType::INTERFACE_DESCRIPTOR_DATA;
|
||||||
|
|||||||
Reference in New Issue
Block a user