fix: add set stateCacheInvalidationEnable to flush cache

when blit operation and dcflush needed

resolves problem with corruptions visible when switch ccs with bcs
on platforms without engines coherency

Resolves: NEO-7577
Signed-off-by: Cencelewska, Katarzyna <katarzyna.cencelewska@intel.com>
This commit is contained in:
Cencelewska, Katarzyna
2023-01-12 16:58:18 +00:00
committed by Compute-Runtime-Automation
parent 2ea734491a
commit f94528097a
8 changed files with 91 additions and 42 deletions

View File

@@ -844,7 +844,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueNonBlocked(
kernel->requiresMemoryMigration(), // memoryMigrationRequired
isTextureCacheFlushNeeded(commandType), // textureCacheFlush
false, // hasStallingCmds
false); // hasRelaxedOrderingDependencies
false, // hasRelaxedOrderingDependencies
false); // stateCacheInvalidation
dispatchFlags.pipelineSelectArgs.mediaSamplerRequired = mediaSamplerRequired;
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode = systolicPipelineSelectMode;
@@ -1060,6 +1061,10 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
for (auto surface : createRange(surfaces, surfaceCount)) {
surface->makeResident(getGpgpuCommandStreamReceiver());
}
bool stateCacheInvalidationNeeded = false;
if (getGpgpuCommandStreamReceiver().getDcFlushSupport() && enqueueProperties.operation == EnqueueProperties::Operation::Blit) {
stateCacheInvalidationNeeded = true;
}
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
DispatchFlags dispatchFlags(
@@ -1092,7 +1097,8 @@ CompletionStamp CommandQueueHw<GfxFamily>::enqueueCommandWithoutKernel(
false, // memoryMigrationRequired
false, // textureCacheFlush
false, // hasStallingCmds
false); // hasRelaxedOrderingDependencies
false, // hasRelaxedOrderingDependencies
stateCacheInvalidationNeeded); // stateCacheInvalidation
const bool isHandlingBarrier = getGpgpuCommandStreamReceiver().isStallingCommandsOnNextFlushRequired();

View File

@@ -81,7 +81,8 @@ CompletionStamp &CommandMapUnmap::submit(TaskCountType taskLevel, bool terminate
false, // memoryMigrationRequired
false, // textureCacheFlush
false, // hasStallingCmds
false); // hasRelaxedOrderingDependencies
false, // hasRelaxedOrderingDependencies
false); // stateCacheInvalidation
DEBUG_BREAK_IF(taskLevel >= CompletionStamp::notReady);
@@ -215,7 +216,8 @@ CompletionStamp &CommandComputeKernel::submit(TaskCountType taskLevel, bool term
kernel->requiresMemoryMigration(), // memoryMigrationRequired
commandQueue.isTextureCacheFlushNeeded(this->commandType), // textureCacheFlush
false, // hasStallingCmds
false); // hasRelaxedOrderingDependencies
false, // hasRelaxedOrderingDependencies
false); // stateCacheInvalidation
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver);
@@ -390,7 +392,8 @@ CompletionStamp &CommandWithoutKernel::submit(TaskCountType taskLevel, bool term
false, // memoryMigrationRequired
false, // textureCacheFlush
false, // hasStallingCmds
false); // hasRelaxedOrderingDependencies
false, // hasRelaxedOrderingDependencies
false); // stateCacheInvalidation
if (commandQueue.getContext().getRootDeviceIndices().size() > 1) {
eventsRequest.fillCsrDependenciesForRootDevices(dispatchFlags.csrDependencies, commandStreamReceiver);

View File

@@ -366,6 +366,43 @@ HWTEST_F(DispatchFlagsBlitTests, givenBlitEnqueueWhenDispatchingCommandsWithoutK
EXPECT_EQ(GrfConfig::NotApplicable, mockCsr->passedDispatchFlags.numGrfRequired);
}
HWTEST_F(DispatchFlagsBlitTests, givenBlitOperationWhenEnqueueCommandWithoutKernelThenDispatchFlagStateCacheInvalidationInFlushTaskIsSetCorrectly) {
using CsrType = MockCsrHw2<FamilyType>;
setUpImpl<CsrType>();
REQUIRE_FULL_BLITTER_OR_SKIP(device->getRootDeviceEnvironment());
auto mockCmdQ = std::make_unique<MockCommandQueueHw<FamilyType>>(context.get(), device.get(), nullptr);
auto mockCsr = static_cast<CsrType *>(&mockCmdQ->getGpgpuCommandStreamReceiver());
mockCsr->skipBlitCalls = true;
cl_int retVal = CL_SUCCESS;
auto buffer = std::unique_ptr<Buffer>(Buffer::create(context.get(), 0, 1, nullptr, retVal));
auto &bcsCsr = *mockCmdQ->bcsEngines[0]->commandStreamReceiver;
auto blocking = true;
TimestampPacketDependencies timestampPacketDependencies;
EventsRequest eventsRequest(0, nullptr, nullptr);
EventBuilder eventBuilder;
BuiltinOpParams builtinOpParams;
builtinOpParams.srcMemObj = buffer.get();
builtinOpParams.dstPtr = reinterpret_cast<void *>(0x1234);
MultiDispatchInfo multiDispatchInfo;
multiDispatchInfo.setBuiltinOpParams(builtinOpParams);
CsrDependencies csrDeps;
BlitProperties blitProperties = mockCmdQ->processDispatchForBlitEnqueue(bcsCsr, multiDispatchInfo, timestampPacketDependencies,
eventsRequest, &mockCmdQ->getCS(0), CL_COMMAND_READ_BUFFER, false, nullptr);
BlitPropertiesContainer blitPropertiesContainer;
blitPropertiesContainer.push_back(blitProperties);
EnqueueProperties enqueueProperties(true, false, false, false, false, &blitPropertiesContainer);
mockCmdQ->enqueueCommandWithoutKernel(nullptr, 0, &mockCmdQ->getCS(0), 0, blocking, enqueueProperties, timestampPacketDependencies,
eventsRequest, eventBuilder, 0, csrDeps, &bcsCsr);
auto expectedValue = mockCmdQ->getGpgpuCommandStreamReceiver().getDcFlushSupport();
EXPECT_EQ(expectedValue, mockCsr->passedDispatchFlags.stateCacheInvalidation);
}
HWTEST_F(DispatchFlagsBlitTests, givenN1EnabledWhenDispatchingWithoutKernelThenAllowOutOfOrderExecution) {
using CsrType = MockCsrHw2<FamilyType>;
DebugManager.flags.EnableTimestampPacket.set(1);