mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
Add surface state programming for kernels with images and stateless buffers
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
76e023b941
commit
0b26ee3664
@@ -110,7 +110,8 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
|
||||
|
||||
auto bindingTableStateCount = kernelDescriptor.payloadMappings.bindingTable.numEntries;
|
||||
uint32_t bindingTablePointer = 0u;
|
||||
if (kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindfulAndStateless) {
|
||||
if ((kernelDescriptor.kernelAttributes.bufferAddressingMode == KernelDescriptor::BindfulAndStateless) ||
|
||||
kernelDescriptor.kernelAttributes.flags.usesImages) {
|
||||
container.prepareBindfulSsh();
|
||||
if (bindingTableStateCount > 0u) {
|
||||
auto ssh = container.getHeapWithRequiredSizeAndAlignment(HeapType::SURFACE_STATE, args.dispatchInterface->getSurfaceStateHeapDataSize(), BINDING_TABLE_STATE::SURFACESTATEPOINTER_ALIGN_SIZE);
|
||||
@@ -343,7 +344,7 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
|
||||
return true;
|
||||
}
|
||||
|
||||
//check if we need to follow kernel requirements
|
||||
// check if we need to follow kernel requirements
|
||||
if (requireInputWalkOrder) {
|
||||
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
|
||||
if (!Math::isPow2<size_t>(lws[walkOrder[dimension]])) {
|
||||
@@ -365,7 +366,7 @@ bool EncodeDispatchKernel<Family>::isRuntimeLocalIdsGenerationRequired(uint32_t
|
||||
return false;
|
||||
}
|
||||
|
||||
//kernel doesn't specify any walk order requirements, check if we have any compatible
|
||||
// kernel doesn't specify any walk order requirements, check if we have any compatible
|
||||
for (uint32_t walkOrder = 0; walkOrder < HwWalkOrderHelper::walkOrderPossibilties; walkOrder++) {
|
||||
bool allDimensionsCompatible = true;
|
||||
for (uint32_t dimension = 0; dimension < activeChannels - 1; dimension++) {
|
||||
@@ -431,9 +432,9 @@ void EncodeDispatchKernel<Family>::encodeThreadData(WALKER_TYPE &walkerCmd,
|
||||
walkerCmd.setMessageSimd(DebugManager.flags.ForceSimdMessageSizeInWalker.get());
|
||||
}
|
||||
|
||||
//1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back
|
||||
//so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds
|
||||
//2) Auto-generation of local ids should be possible, when in fact local ids are used
|
||||
// 1) cross-thread inline data will be put into R1, but if kernel uses local ids, then cross-thread should be put further back
|
||||
// so whenever local ids are driver or hw generated, reserve space by setting right values for emitLocalIds
|
||||
// 2) Auto-generation of local ids should be possible, when in fact local ids are used
|
||||
if (!localIdsGenerationByRuntime && localIdDimensions > 0) {
|
||||
UNRECOVERABLE_IF(localIdDimensions != 3);
|
||||
uint32_t emitLocalIdsForDim = (1 << 0) | (1 << 1) | (1 << 2);
|
||||
|
||||
Reference in New Issue
Block a user