fix: flush caches during host sync

Related-To: NEO-14360

Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
Szymon Morek
2025-08-11 14:09:27 +00:00
committed by Compute-Runtime-Automation
parent ab98dad17e
commit ffed307d15
8 changed files with 24 additions and 31 deletions

View File

@@ -4489,16 +4489,10 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCommands(const CmdLis
value,
device->getNEODevice()->getRootDeviceEnvironment(),
pipeControlArgs);
if (syncCmdBuffer != nullptr) {
*syncCmdBuffer = pipeControlArgs.postSyncCmd;
}
if (productHelper.isNonCoherentTimestampsModeEnabled()) {
pipeControlArgs = {};
pipeControlArgs.dcFlushEnable = getDcFlushRequired(signalScope);
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(
*commandContainer.getCommandStream(),
pipeControlArgs);
}
}
if (eventOperations.isTimestmapEvent && !skipPartitionOffsetProgramming) {

View File

@@ -751,7 +751,8 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
auto isHeaplessModeDisabled = !this->device->getCompilerProductHelper().isHeaplessModeEnabled(hwInfo);
auto isDcFlushAllowed = this->device->getProductHelper().isDcFlushAllowed();
auto isFlushForOptimizedBarrierRequired = isDcFlushAllowed && this->isEventOnBarrierOptimized;
if ((((this->isCounterBased() && !this->inOrderTimestampNode.empty()) || this->mitigateHostVisibleSignal) && isDcFlushAllowed && isHeaplessModeDisabled) ||
auto isPostSyncWriteCachedInL2 = this->device->getProductHelper().isNonCoherentTimestampsModeEnabled();
if ((((this->isCounterBased() && !this->inOrderTimestampNode.empty()) || this->mitigateHostVisibleSignal || isPostSyncWriteCachedInL2) && isDcFlushAllowed && isHeaplessModeDisabled) ||
isFlushForOptimizedBarrierRequired) {
auto lock = this->csrs[0]->obtainUniqueOwnership();
this->csrs[0]->flushTagUpdate();

View File

@@ -241,9 +241,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait();
size_t postSyncSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
if (device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
postSyncSize += NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier();
}
auto useSizeBefore = cmdListStream->getUsed();
auto result = commandList->appendBarrier(eventHandle, 0, nullptr, false);
auto useSizeAfter = cmdListStream->getUsed();

View File

@@ -389,9 +389,6 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) +
commandList->estimateBufferSizeMultiTileBarrier(device->getNEODevice()->getRootDeviceEnvironment());
if (device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
expectedSize += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
}
size_t usedSize = cmdStream->getUsed();
EXPECT_EQ(expectedSize, usedSize);

View File

@@ -521,9 +521,6 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset();
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
if (device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
expectedSize += NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier();
}
size_t usedSize = cmdStream->getUsed();
EXPECT_EQ(expectedSize, usedSize);
@@ -565,9 +562,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle(), false));
size_t expectedSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
if (device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
expectedSize += NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier();
}
auto unifiedPostSyncLayout = device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout();
if (!unifiedPostSyncLayout) {
@@ -719,9 +714,6 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
if (device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
expectedSize += NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier();
}
size_t usedSize = cmdStream->getUsed();
EXPECT_EQ(expectedSize, usedSize);
@@ -786,9 +778,6 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
if (device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
expectedSize += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
}
size_t usedSize = cmdStream->getUsed();
EXPECT_EQ(expectedSize, usedSize);

View File

@@ -561,9 +561,6 @@ void testMultiTileAppendMemoryFillManyKernels(FillTestInput &input, TestExpected
// 1st dc flush after cross-tile sync, 2nd dc flush for signal scope event
expectedDcFlush = NEO::ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired() ? 1 : 0;
expectedDcFlush++;
if (!input.eventPoolFlags && !input.allPackets && input.device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
expectedDcFlush++;
}
}
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());

View File

@@ -4914,7 +4914,15 @@ HWTEST_F(InOrderCmdListTests, givenEventCounterNotReusedFromPreviousAppendWhenHo
EXPECT_FALSE(ultCsr->flushTagUpdateCalled);
events[0]->hostSynchronize(std::numeric_limits<uint64_t>::max());
EXPECT_FALSE(ultCsr->flushTagUpdateCalled);
auto &hwInfo = device->getHwInfo();
auto isHeaplessModeDisabled = !device->getCompilerProductHelper().isHeaplessModeEnabled(hwInfo);
auto isDcFlushAllowed = device->getProductHelper().isDcFlushAllowed();
auto isPostSyncWriteCachedInL2 = device->getProductHelper().isNonCoherentTimestampsModeEnabled();
if (isPostSyncWriteCachedInL2 && isDcFlushAllowed && isHeaplessModeDisabled) {
EXPECT_TRUE(ultCsr->flushTagUpdateCalled);
} else {
EXPECT_FALSE(ultCsr->flushTagUpdateCalled);
}
EXPECT_EQ(2u, events[0]->inOrderExecSignalValue);
}

View File

@@ -8,6 +8,7 @@
#include "shared/source/built_ins/sip.h"
#include "shared/source/gmm_helper/gmm.h"
#include "shared/source/helpers/aligned_memory.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/memory_manager/gfx_partition.h"
#include "shared/source/utilities/buffer_pool_allocator.inl"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
@@ -4930,7 +4931,15 @@ HWTEST2_F(EventMultiTileDynamicPacketUseTest, givenEventUsedCreatedOnSubDeviceBu
EXPECT_EQ(1u, ultCsr1->downloadAllocationsCalledCount);
EXPECT_TRUE(ultCsr1->latestDownloadAllocationsBlocking);
EXPECT_EQ(0u, downloadCounter1);
auto &hwInfo = device->getHwInfo();
auto isHeaplessModeDisabled = !device->getCompilerProductHelper().isHeaplessModeEnabled(hwInfo);
auto isDcFlushAllowed = device->getProductHelper().isDcFlushAllowed();
auto isPostSyncWriteCachedInL2 = device->getProductHelper().isNonCoherentTimestampsModeEnabled();
if (isPostSyncWriteCachedInL2 && isDcFlushAllowed && isHeaplessModeDisabled) {
EXPECT_EQ(1u, downloadCounter1);
} else {
EXPECT_EQ(0u, downloadCounter1);
}
EXPECT_EQ(0u, ultCsr2->downloadAllocationsCalledCount);
EXPECT_EQ(0u, downloadCounter2);