Add surface state programming for kernels with images and stateless buffers

Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
Kamil Kopryk
2022-07-04 01:28:25 +00:00
committed by Compute-Runtime-Automation
parent 76e023b941
commit 0b26ee3664
2 changed files with 46 additions and 6 deletions

View File

@@ -110,7 +110,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
uint32_t bindingTablePointer = 0u;
if (kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindfulAndStateless) {
if ((kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindfulAndStateless) ||
kernelDescriptor.kernelAttributes.flags.usesImages) {
container.prepareBindfulSsh();
if (bindingTableStateCount > 0u) {
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
@@ -343,7 +344,7 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
return true;
}
//check if we need to follow kernel requirements
// check if we need to follow kernel requirements
if (requireInputWalkOrder) {
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
if (!Math::isPow2<size_t>(lws[walkOrder[dimension]])) {
@@ -365,7 +366,7 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
return false;
}
//kernel doesn't specify any walk order requirements, check if we have any compatible
// kernel doesn't specify any walk order requirements, check if we have any compatible
for (uint32_t walkOrder = 0; walkOrder < HwWalkOrderHelper::walkOrderPossibilties; walkOrder++) {
bool allDimensionsCompatible = true;
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
@@ -431,9 +432,9 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
walkerCmd.setMessageSimd(DebugManager.flags.ForceSimdMessageSizeInWalker.get());
}
//1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back
//so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds
//2) Auto-generation of local ids should be possible, when in fact local ids are used
// 1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back
// so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds
// 2) Auto-generation of local ids should be possible, when in fact local ids are used
if (!localIdsGenerationByRuntime && localIdDimensions > 0) {
UNRECOVERABLE_IF(localIdDimensions != 3);
uint32_t emitLocalIdsForDim = (1 << 0) | (1 << 1) | (1 << 2);