fix: flush cache after accessing img from buffer

Related-To: NEO-15391

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-07-23 11:39:49 +00:00
committed by Compute-Runtime-Automation
parent 7e6436563e
commit bbcca97682
8 changed files with 80 additions and 8 deletions

View File

@@ -833,6 +833,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
auto systolicPipelineSelectMode = false;
Kernel *kernel = nullptr;
bool auxTranslationRequired = false;
bool containsImageFromBuffer = false;
for (auto &dispatchInfo : multiDispatchInfo) {
if (kernel != dispatchInfo.getKernel()) {
kernel = dispatchInfo.getKernel();
@@ -849,6 +850,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
anyUncacheableArgs = true;
}
this->isCacheFlushOnNextBcsWriteRequired |= kernel->usesImages();
containsImageFromBuffer |= kernel->hasImageFromBufferArgs();
}
UNRECOVERABLE_IF(kernel == nullptr);
@@ -875,16 +877,16 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
dsh = &getIndirectHeap(IndirectHeap::Type::dynamicState, 0u);
ioh = &getIndirectHeap(IndirectHeap::Type::indirectObject, 0u);
auto allocNeedsFlushDC = false;
auto dcFlush = shouldFlushDC(commandType, printfHandler) || containsImageFromBuffer;
if (!device->isFullRangeSvm()) {
if (std::any_of(csr.getResidencyAllocations().begin(), csr.getResidencyAllocations().end(), [](const auto allocation) { return allocation->isFlushL3Required(); })) {
allocNeedsFlushDC = true;
dcFlush |= true;
}
}
auto memoryCompressionState = csr.getMemoryCompressionState(auxTranslationRequired);
bool hasStallingCmds = enqueueProperties.hasStallingCmds || (!relaxedOrderingEnabled && (eventsRequest.numEventsInWaitList > 0 || timestampPacketDependencies.previousEnqueueNodes.peekNodes().size() > 0));
auto textureCacheFlush = isTextureCacheFlushNeeded(commandType) || containsImageFromBuffer;
DispatchFlags dispatchFlags(
&timestampPacketDependencies.barrierNodes, // barrierTimestampPacketNodes
{}, // pipelineSelectArgs
@@ -899,7 +901,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
memoryCompressionState, // memoryCompressionState
getSliceCount(), // sliceCount
blocking, // blocking
shouldFlushDC(commandType, printfHandler) || allocNeedsFlushDC, // dcFlush
dcFlush, // dcFlush
multiDispatchInfo.usesSlm(), // useSLM
!csr.isUpdateTagFromWaitEnabled() || commandType == CL_COMMAND_FILL_BUFFER, // guardCommandBufferWithPipeControl
commandType == CL_COMMAND_NDRANGE_KERNEL, // GSBA32BitRequired
@@ -910,7 +912,7 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
false, // usePerDssBackedBuffer
kernel->areMultipleSubDevicesInContext(), // areMultipleSubDevicesInContext
kernel->requiresMemoryMigration(), // memoryMigrationRequired
isTextureCacheFlushNeeded(commandType), // textureCacheFlush
textureCacheFlush, // textureCacheFlush
hasStallingCmds, // hasStallingCmds
relaxedOrderingEnabled, // hasRelaxedOrderingDependencies
false, // stateCacheInvalidation