mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-11 08:07:19 +08:00
fix: flush caches during host sync
Related-To: NEO-14360 Signed-off-by: Szymon Morek <szymon.morek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ab98dad17e
commit
ffed307d15
@@ -4489,16 +4489,10 @@ void CommandListCoreFamily<gfxCoreFamily>::dispatchPostSyncCommands(const CmdLis
|
||||
value,
|
||||
device->getNEODevice()->getRootDeviceEnvironment(),
|
||||
pipeControlArgs);
|
||||
|
||||
if (syncCmdBuffer != nullptr) {
|
||||
*syncCmdBuffer = pipeControlArgs.postSyncCmd;
|
||||
}
|
||||
if (productHelper.isNonCoherentTimestampsModeEnabled()) {
|
||||
pipeControlArgs = {};
|
||||
pipeControlArgs.dcFlushEnable = getDcFlushRequired(signalScope);
|
||||
NEO::MemorySynchronizationCommands<GfxFamily>::addSingleBarrier(
|
||||
*commandContainer.getCommandStream(),
|
||||
pipeControlArgs);
|
||||
}
|
||||
}
|
||||
|
||||
if (eventOperations.isTimestmapEvent && !skipPartitionOffsetProgramming) {
|
||||
|
||||
@@ -751,7 +751,8 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
|
||||
auto isHeaplessModeDisabled = !this->device->getCompilerProductHelper().isHeaplessModeEnabled(hwInfo);
|
||||
auto isDcFlushAllowed = this->device->getProductHelper().isDcFlushAllowed();
|
||||
auto isFlushForOptimizedBarrierRequired = isDcFlushAllowed && this->isEventOnBarrierOptimized;
|
||||
if ((((this->isCounterBased() && !this->inOrderTimestampNode.empty()) || this->mitigateHostVisibleSignal) && isDcFlushAllowed && isHeaplessModeDisabled) ||
|
||||
auto isPostSyncWriteCachedInL2 = this->device->getProductHelper().isNonCoherentTimestampsModeEnabled();
|
||||
if ((((this->isCounterBased() && !this->inOrderTimestampNode.empty()) || this->mitigateHostVisibleSignal || isPostSyncWriteCachedInL2) && isDcFlushAllowed && isHeaplessModeDisabled) ||
|
||||
isFlushForOptimizedBarrierRequired) {
|
||||
auto lock = this->csrs[0]->obtainUniqueOwnership();
|
||||
this->csrs[0]->flushTagUpdate();
|
||||
|
||||
@@ -241,9 +241,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
|
||||
sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait();
|
||||
|
||||
size_t postSyncSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
|
||||
if (device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
|
||||
postSyncSize += NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier();
|
||||
}
|
||||
|
||||
auto useSizeBefore = cmdListStream->getUsed();
|
||||
auto result = commandList->appendBarrier(eventHandle, 0, nullptr, false);
|
||||
auto useSizeAfter = cmdListStream->getUsed();
|
||||
|
||||
@@ -389,9 +389,6 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
|
||||
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) +
|
||||
commandList->estimateBufferSizeMultiTileBarrier(device->getNEODevice()->getRootDeviceEnvironment());
|
||||
if (device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
|
||||
expectedSize += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
|
||||
}
|
||||
size_t usedSize = cmdStream->getUsed();
|
||||
EXPECT_EQ(expectedSize, usedSize);
|
||||
|
||||
|
||||
@@ -521,9 +521,6 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset();
|
||||
|
||||
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
|
||||
if (device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
|
||||
expectedSize += NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier();
|
||||
}
|
||||
size_t usedSize = cmdStream->getUsed();
|
||||
EXPECT_EQ(expectedSize, usedSize);
|
||||
|
||||
@@ -565,9 +562,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle(), false));
|
||||
|
||||
size_t expectedSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
|
||||
if (device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
|
||||
expectedSize += NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier();
|
||||
}
|
||||
|
||||
auto unifiedPostSyncLayout = device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout();
|
||||
|
||||
if (!unifiedPostSyncLayout) {
|
||||
@@ -719,9 +714,6 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
|
||||
|
||||
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
|
||||
if (device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
|
||||
expectedSize += NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier();
|
||||
}
|
||||
size_t usedSize = cmdStream->getUsed();
|
||||
EXPECT_EQ(expectedSize, usedSize);
|
||||
|
||||
@@ -786,9 +778,6 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
|
||||
auto gpuAddress = event->getCompletionFieldGpuAddress(device);
|
||||
|
||||
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
|
||||
if (device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
|
||||
expectedSize += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
|
||||
}
|
||||
size_t usedSize = cmdStream->getUsed();
|
||||
EXPECT_EQ(expectedSize, usedSize);
|
||||
|
||||
|
||||
@@ -561,9 +561,6 @@ void testMultiTileAppendMemoryFillManyKernels(FillTestInput &input, TestExpected
|
||||
// 1st dc flush after cross-tile sync, 2nd dc flush for signal scope event
|
||||
expectedDcFlush = NEO::ImplicitScalingDispatch<FamilyType>::getPipeControlStallRequired() ? 1 : 0;
|
||||
expectedDcFlush++;
|
||||
if (!input.eventPoolFlags && !input.allPackets && input.device->getProductHelper().isNonCoherentTimestampsModeEnabled()) {
|
||||
expectedDcFlush++;
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_EQ(arg.expectedPacketsInUse, event->getPacketsInUse());
|
||||
|
||||
@@ -4914,7 +4914,15 @@ HWTEST_F(InOrderCmdListTests, givenEventCounterNotReusedFromPreviousAppendWhenHo
|
||||
|
||||
EXPECT_FALSE(ultCsr->flushTagUpdateCalled);
|
||||
events[0]->hostSynchronize(std::numeric_limits<uint64_t>::max());
|
||||
EXPECT_FALSE(ultCsr->flushTagUpdateCalled);
|
||||
auto &hwInfo = device->getHwInfo();
|
||||
auto isHeaplessModeDisabled = !device->getCompilerProductHelper().isHeaplessModeEnabled(hwInfo);
|
||||
auto isDcFlushAllowed = device->getProductHelper().isDcFlushAllowed();
|
||||
auto isPostSyncWriteCachedInL2 = device->getProductHelper().isNonCoherentTimestampsModeEnabled();
|
||||
if (isPostSyncWriteCachedInL2 && isDcFlushAllowed && isHeaplessModeDisabled) {
|
||||
EXPECT_TRUE(ultCsr->flushTagUpdateCalled);
|
||||
} else {
|
||||
EXPECT_FALSE(ultCsr->flushTagUpdateCalled);
|
||||
}
|
||||
EXPECT_EQ(2u, events[0]->inOrderExecSignalValue);
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include "shared/source/built_ins/sip.h"
|
||||
#include "shared/source/gmm_helper/gmm.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/compiler_product_helper.h"
|
||||
#include "shared/source/memory_manager/gfx_partition.h"
|
||||
#include "shared/source/utilities/buffer_pool_allocator.inl"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
@@ -4930,7 +4931,15 @@ HWTEST2_F(EventMultiTileDynamicPacketUseTest, givenEventUsedCreatedOnSubDeviceBu
|
||||
|
||||
EXPECT_EQ(1u, ultCsr1->downloadAllocationsCalledCount);
|
||||
EXPECT_TRUE(ultCsr1->latestDownloadAllocationsBlocking);
|
||||
EXPECT_EQ(0u, downloadCounter1);
|
||||
auto &hwInfo = device->getHwInfo();
|
||||
auto isHeaplessModeDisabled = !device->getCompilerProductHelper().isHeaplessModeEnabled(hwInfo);
|
||||
auto isDcFlushAllowed = device->getProductHelper().isDcFlushAllowed();
|
||||
auto isPostSyncWriteCachedInL2 = device->getProductHelper().isNonCoherentTimestampsModeEnabled();
|
||||
if (isPostSyncWriteCachedInL2 && isDcFlushAllowed && isHeaplessModeDisabled) {
|
||||
EXPECT_EQ(1u, downloadCounter1);
|
||||
} else {
|
||||
EXPECT_EQ(0u, downloadCounter1);
|
||||
}
|
||||
|
||||
EXPECT_EQ(0u, ultCsr2->downloadAllocationsCalledCount);
|
||||
EXPECT_EQ(0u, downloadCounter2);
|
||||
|
||||
Reference in New Issue
Block a user