fix: correclty program StateBaseAddress in global bindless mode

- prepare bindful ssh when kernel requires ssh heap and
SurfaceStateBaseAddress
- remove lastAppendedKernelBindlesMode - local ssh heap may be needed
for bindless kernels with scratch or misaligned buffer args
- use ssh heap gpu address to program SurfaceStateBaseAddress, global base is
used for BindlessSurfaceState and DynamicState

Related-To: NEO-7063

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2024-09-19 14:35:03 +00:00
committed by Compute-Runtime-Automation
parent 200acbe743
commit 4a068c8eab
15 changed files with 234 additions and 230 deletions

View File

@@ -58,37 +58,36 @@ struct DispatchFlags {
uint64_t sliceCountP, bool blockingP, bool dcFlushP, bool useSLMP, bool guardCommandBufferWithPipeControlP, bool gsba32BitRequiredP,
bool lowPriorityP, bool implicitFlushP, bool outOfOrderExecutionAllowedP, bool epilogueRequiredP,
bool usePerDSSbackedBufferP, bool areMultipleSubDevicesInContextP, bool memoryMigrationRequiredP, bool textureCacheFlush,
bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool stateCacheInvalidation, bool isStallingCommandsOnNextFlushRequired, bool isDcFlushRequiredOnStallingCommandsOnNextFlush, bool disableGlobalSSH) : barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
pipelineSelectArgs(pipelineSelectArgsP),
flushStampReference(flushStampReferenceP),
throttle(throttleP),
preemptionMode(preemptionModeP),
numGrfRequired(numGrfRequiredP),
l3CacheSettings(l3CacheSettingsP),
threadArbitrationPolicy(threadArbitrationPolicyP),
additionalKernelExecInfo(additionalKernelExecInfoP),
kernelExecutionType(kernelExecutionTypeP),
memoryCompressionState(memoryCompressionStateP),
sliceCount(sliceCountP),
blocking(blockingP),
dcFlush(dcFlushP),
useSLM(useSLMP),
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP),
gsba32BitRequired(gsba32BitRequiredP),
lowPriority(lowPriorityP),
implicitFlush(implicitFlushP),
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
epilogueRequired(epilogueRequiredP),
usePerDssBackedBuffer(usePerDSSbackedBufferP),
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
memoryMigrationRequired(memoryMigrationRequiredP),
textureCacheFlush(textureCacheFlush),
hasStallingCmds(hasStallingCmds),
hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies),
stateCacheInvalidation(stateCacheInvalidation),
isStallingCommandsOnNextFlushRequired(isStallingCommandsOnNextFlushRequired),
isDcFlushRequiredOnStallingCommandsOnNextFlush(isDcFlushRequiredOnStallingCommandsOnNextFlush),
disableGlobalSSH(disableGlobalSSH){};
bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool stateCacheInvalidation, bool isStallingCommandsOnNextFlushRequired, bool isDcFlushRequiredOnStallingCommandsOnNextFlush) : barrierTimestampPacketNodes(barrierTimestampPacketNodesP),
pipelineSelectArgs(pipelineSelectArgsP),
flushStampReference(flushStampReferenceP),
throttle(throttleP),
preemptionMode(preemptionModeP),
numGrfRequired(numGrfRequiredP),
l3CacheSettings(l3CacheSettingsP),
threadArbitrationPolicy(threadArbitrationPolicyP),
additionalKernelExecInfo(additionalKernelExecInfoP),
kernelExecutionType(kernelExecutionTypeP),
memoryCompressionState(memoryCompressionStateP),
sliceCount(sliceCountP),
blocking(blockingP),
dcFlush(dcFlushP),
useSLM(useSLMP),
guardCommandBufferWithPipeControl(guardCommandBufferWithPipeControlP),
gsba32BitRequired(gsba32BitRequiredP),
lowPriority(lowPriorityP),
implicitFlush(implicitFlushP),
outOfOrderExecutionAllowed(outOfOrderExecutionAllowedP),
epilogueRequired(epilogueRequiredP),
usePerDssBackedBuffer(usePerDSSbackedBufferP),
areMultipleSubDevicesInContext(areMultipleSubDevicesInContextP),
memoryMigrationRequired(memoryMigrationRequiredP),
textureCacheFlush(textureCacheFlush),
hasStallingCmds(hasStallingCmds),
hasRelaxedOrderingDependencies(hasRelaxedOrderingDependencies),
stateCacheInvalidation(stateCacheInvalidation),
isStallingCommandsOnNextFlushRequired(isStallingCommandsOnNextFlushRequired),
isDcFlushRequiredOnStallingCommandsOnNextFlush(isDcFlushRequiredOnStallingCommandsOnNextFlush){};
CsrDependencies csrDependencies{};
TimestampPacketContainer *barrierTimestampPacketNodes = nullptr;
@@ -123,7 +122,6 @@ struct DispatchFlags {
bool stateCacheInvalidation = false;
bool isStallingCommandsOnNextFlushRequired = false;
bool isDcFlushRequiredOnStallingCommandsOnNextFlush = false;
bool disableGlobalSSH = true;
};
struct CsrSizeRequestFlags {