fix: add pc with stall before barrier with post sync on bmg

Related-To: NEO-14491
Signed-off-by: Maciej Plewka <maciej.plewka@intel.com>
This commit is contained in:
Maciej Plewka
2025-05-26 12:06:15 +00:00
committed by Compute-Runtime-Automation
parent b7681a2e7b
commit e6f3ebce5d
55 changed files with 151 additions and 75 deletions

View File

@@ -262,7 +262,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateStreamSizeForExecuteCommandListsRe
} }
if (ctx.isDispatchTaskCountPostSyncRequired) { if (ctx.isDispatchTaskCountPostSyncRequired) {
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(this->device->getNEODevice()->getRootDeviceEnvironment(), true); linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(this->device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
} }
if (instructionCacheFlushRequired) { if (instructionCacheFlushRequired) {
@@ -327,7 +327,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
linearStreamSizeEstimate += this->computeDebuggerCmdsSize(ctx); linearStreamSizeEstimate += this->computeDebuggerCmdsSize(ctx);
if (ctx.isDispatchTaskCountPostSyncRequired) { if (ctx.isDispatchTaskCountPostSyncRequired) {
linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), true); linearStreamSizeEstimate += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
} }
NEO::LinearStream child(nullptr); NEO::LinearStream child(nullptr);

View File

@@ -240,7 +240,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix
sizeof(MI_STORE_DATA_IMM) + sizeof(MI_STORE_DATA_IMM) +
sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait(); sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait();
size_t postSyncSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); size_t postSyncSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
auto useSizeBefore = cmdListStream->getUsed(); auto useSizeBefore = cmdListStream->getUsed();
auto result = commandList->appendBarrier(eventHandle, 0, nullptr, false); auto result = commandList->appendBarrier(eventHandle, 0, nullptr, false);

View File

@@ -390,7 +390,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset();
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true) + size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) +
commandList->estimateBufferSizeMultiTileBarrier(device->getNEODevice()->getRootDeviceEnvironment()); commandList->estimateBufferSizeMultiTileBarrier(device->getNEODevice()->getRootDeviceEnvironment());
size_t usedSize = cmdStream->getUsed(); size_t usedSize = cmdStream->getUsed();
EXPECT_EQ(expectedSize, usedSize); EXPECT_EQ(expectedSize, usedSize);

View File

@@ -520,7 +520,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset();
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
size_t usedSize = cmdStream->getUsed(); size_t usedSize = cmdStream->getUsed();
EXPECT_EQ(expectedSize, usedSize); EXPECT_EQ(expectedSize, usedSize);
@@ -561,7 +561,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy
commandList->partitionCount = 2; commandList->partitionCount = 2;
EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle(), false)); EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle(), false));
size_t expectedSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); size_t expectedSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
auto unifiedPostSyncLayout = device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout(); auto unifiedPostSyncLayout = device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout();
@@ -713,7 +713,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
auto gpuAddress = event->getCompletionFieldGpuAddress(device); auto gpuAddress = event->getCompletionFieldGpuAddress(device);
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
size_t usedSize = cmdStream->getUsed(); size_t usedSize = cmdStream->getUsed();
EXPECT_EQ(expectedSize, usedSize); EXPECT_EQ(expectedSize, usedSize);
@@ -777,7 +777,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent,
auto gpuAddress = event->getCompletionFieldGpuAddress(device); auto gpuAddress = event->getCompletionFieldGpuAddress(device);
size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); size_t expectedSize = NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
size_t usedSize = cmdStream->getUsed(); size_t usedSize = cmdStream->getUsed();
EXPECT_EQ(expectedSize, usedSize); EXPECT_EQ(expectedSize, usedSize);

View File

@@ -494,7 +494,7 @@ HWTEST_F(CommandQueueCreate, GivenDispatchTaskCountPostSyncRequiredWhenExecuteCo
commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false, nullptr, nullptr); commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false, nullptr, nullptr);
auto estimatedSizeWithtBarrier = commandQueue->requiredSizeCalled; auto estimatedSizeWithtBarrier = commandQueue->requiredSizeCalled;
auto sizeForBarrier = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); auto sizeForBarrier = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
EXPECT_GT(sizeForBarrier, 0u); EXPECT_GT(sizeForBarrier, 0u);
EXPECT_EQ(estimatedSizeWithtBarrier, estimatedSizeWithoutBarrier + sizeForBarrier); EXPECT_EQ(estimatedSizeWithtBarrier, estimatedSizeWithoutBarrier + sizeForBarrier);

View File

@@ -485,7 +485,7 @@ HWTEST_F(CommandQueueSynchronizeTest, givenSynchronousCommandQueueWhenTagUpdateF
} else { } else {
expectedSize += sizeof(MI_BATCH_BUFFER_END); expectedSize += sizeof(MI_BATCH_BUFFER_END);
} }
expectedSize += NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), true); expectedSize += NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
expectedSize = alignUp(expectedSize, 8); expectedSize = alignUp(expectedSize, 8);
const ze_command_queue_desc_t desc{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, 0, 0, 0, ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, ZE_COMMAND_QUEUE_PRIORITY_NORMAL}; const ze_command_queue_desc_t desc{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, 0, 0, 0, ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, ZE_COMMAND_QUEUE_PRIORITY_NORMAL};

View File

@@ -75,7 +75,7 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
if (blitEnqueue) { if (blitEnqueue) {
size_t expectedSizeCS = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>(); size_t expectedSizeCS = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>();
if (commandQueueHw.isCacheFlushForBcsRequired()) { if (commandQueueHw.isCacheFlushForBcsRequired()) {
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
} }
return expectedSizeCS; return expectedSizeCS;
@@ -129,7 +129,7 @@ size_t EnqueueOperation<GfxFamily>::getTotalSizeRequiredCS(uint32_t eventType, c
if (outEvent) { if (outEvent) {
auto pEvent = castToObjectOrAbort<Event>(*outEvent); auto pEvent = castToObjectOrAbort<Event>(*outEvent);
if ((pEvent->getContext()->getRootDeviceIndices().size() > 1) && (!pEvent->isUserEvent())) { if ((pEvent->getContext()->getRootDeviceIndices().size() > 1) && (!pEvent->isUserEvent())) {
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
} }
} }
expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(); expectedSizeCS += MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();

View File

@@ -104,7 +104,7 @@ struct AubWalkerPartitionFixture : public KernelAUBFixture<SimpleKernelFixture>
uint8_t buffer[256]; uint8_t buffer[256];
LinearStream stream(buffer, 256); LinearStream stream(buffer, 256);
MemorySynchronizationCommands<FamilyType>::addBarrierWa(stream, 0ull, rootDeviceEnvironment); MemorySynchronizationCommands<FamilyType>::addBarrierWa(stream, 0ull, rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
void *syncPipeControlAddress = reinterpret_cast<void *>(reinterpret_cast<size_t>(startAddress) + stream.getUsed()); void *syncPipeControlAddress = reinterpret_cast<void *>(reinterpret_cast<size_t>(startAddress) + stream.getUsed());
PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(syncPipeControlAddress); PIPE_CONTROL *pipeControl = genCmdCast<PIPE_CONTROL *>(syncPipeControlAddress);
return pipeControl; return pipeControl;

View File

@@ -508,7 +508,7 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWithRequiredC
size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>(); size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>();
auto &rootDeviceEnvironment = device->getRootDeviceEnvironment(); auto &rootDeviceEnvironment = device->getRootDeviceEnvironment();
size_t cacheFlushSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); size_t cacheFlushSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
setMockKernelArgs(std::array<Buffer *, 3>{{buffer0.get(), buffer1.get(), buffer2.get()}}); setMockKernelArgs(std::array<Buffer *, 3>{{buffer0.get(), buffer1.get(), buffer2.get()}});

View File

@@ -571,7 +571,7 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenE
EXPECT_EQ(CL_SUCCESS, enqueueResult); EXPECT_EQ(CL_SUCCESS, enqueueResult);
auto requiredCmdStreamSize = alignUp(MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation( auto requiredCmdStreamSize = alignUp(MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(
pDevice->getRootDeviceEnvironment(), true), pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData),
MemoryConstants::cacheLineSize); MemoryConstants::cacheLineSize);
EXPECT_EQ(mockCmdQ->getCS(0).getUsed(), requiredCmdStreamSize); EXPECT_EQ(mockCmdQ->getCS(0).getUsed(), requiredCmdStreamSize);

View File

@@ -996,7 +996,7 @@ HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThe
auto pipeControls = findAll<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end()); auto pipeControls = findAll<typename FamilyType::PIPE_CONTROL *>(cmdList.begin(), cmdList.end());
auto additionalPcCount = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation( auto additionalPcCount = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(
pDevice->getRootDeviceEnvironment(), true) / pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) /
sizeof(typename FamilyType::PIPE_CONTROL); sizeof(typename FamilyType::PIPE_CONTROL);
// |AuxToNonAux|NDR|NonAuxToAux| // |AuxToNonAux|NDR|NonAuxToAux|

View File

@@ -442,7 +442,7 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenOutEventForMultiDeviceContextWhenCalcul
auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, false, nullptr); auto baseCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, false, nullptr);
auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, false, &clEvent); auto extendedCommandStreamSize = EnqueueOperation<FamilyType>::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, false, &clEvent);
EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pContext.getDevices()[0]->getRootDeviceEnvironment(), true), extendedCommandStreamSize); EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pContext.getDevices()[0]->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData), extendedCommandStreamSize);
} }
HWTEST2_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSizeIsComputedThenItIsProperlyAligned, IsHeapfulSupported) { HWTEST2_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSizeIsComputedThenItIsProperlyAligned, IsHeapfulSupported) {

View File

@@ -74,7 +74,7 @@ HWTEST_F(GetSizeRequiredTest, WhenEnqueuingMarkerThenHeapsAndCommandBufferAreNot
size_t expectedStreamSize = 0; size_t expectedStreamSize = 0;
if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && (!pCmdQ->getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled())) { if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && (!pCmdQ->getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled())) {
expectedStreamSize = alignUp(MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation( expectedStreamSize = alignUp(MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(
pDevice->getRootDeviceEnvironment(), true), pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData),
MemoryConstants::cacheLineSize); MemoryConstants::cacheLineSize);
} }
EXPECT_EQ(expectedStreamSize, commandStream.getUsed() - usedBeforeCS); EXPECT_EQ(expectedStreamSize, commandStream.getUsed() - usedBeforeCS);
@@ -100,7 +100,7 @@ HWTEST_F(GetSizeRequiredTest, WhenEnqueuingBarrierThenHeapsAndCommandBufferAreNo
size_t expectedStreamSize = 0; size_t expectedStreamSize = 0;
if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) {
auto unalignedSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true) + auto unalignedSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) +
EncodeStoreMemory<FamilyType>::getStoreDataImmSize() + EncodeStoreMemory<FamilyType>::getStoreDataImmSize() +
sizeof(typename FamilyType::MI_BATCH_BUFFER_END); sizeof(typename FamilyType::MI_BATCH_BUFFER_END);
expectedStreamSize = alignUp(unalignedSize, MemoryConstants::cacheLineSize); expectedStreamSize = alignUp(unalignedSize, MemoryConstants::cacheLineSize);

View File

@@ -1719,7 +1719,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, UltCommandStreamReceiverTest, givenBarrierNodeSet
DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags();
dispatchFlags.barrierTimestampPacketNodes = &timestampPacketDependencies.barrierNodes; dispatchFlags.barrierTimestampPacketNodes = &timestampPacketDependencies.barrierNodes;
size_t expectedCmdSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); size_t expectedCmdSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags);
EXPECT_EQ(expectedCmdSize, estimatedCmdSize); EXPECT_EQ(expectedCmdSize, estimatedCmdSize);

View File

@@ -820,7 +820,7 @@ HWTEST2_TEMPLATED_F(CommandStreamReceiverHwTestXeHPAndLaterWithMockCsrHw, givenS
commandStreamReceiver->staticWorkPartitioningEnabled = true; commandStreamReceiver->staticWorkPartitioningEnabled = true;
commandStreamReceiver->activePartitions = 1; commandStreamReceiver->activePartitions = 1;
size_t expectedCmdSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); size_t expectedCmdSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags);
EXPECT_EQ(expectedCmdSize, estimatedCmdSize); EXPECT_EQ(expectedCmdSize, estimatedCmdSize);
@@ -867,7 +867,7 @@ HWTEST2_TEMPLATED_F(CommandStreamReceiverHwTestXeHPAndLaterWithMockCsrHw, givenS
commandStreamReceiver->staticWorkPartitioningEnabled = false; commandStreamReceiver->staticWorkPartitioningEnabled = false;
commandStreamReceiver->activePartitions = 2; commandStreamReceiver->activePartitions = 2;
size_t expectedCmdSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); size_t expectedCmdSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags);
EXPECT_EQ(expectedCmdSize, estimatedCmdSize); EXPECT_EQ(expectedCmdSize, estimatedCmdSize);
@@ -917,7 +917,7 @@ HWTEST2_TEMPLATED_F(CommandStreamReceiverHwTestXeHPAndLaterWithMockCsrHw, givenS
commandStreamReceiver->staticWorkPartitioningEnabled = true; commandStreamReceiver->staticWorkPartitioningEnabled = true;
commandStreamReceiver->activePartitions = 2; commandStreamReceiver->activePartitions = 2;
size_t expectedSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) + size_t expectedSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) +
sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait() + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait() +
sizeof(MI_BATCH_BUFFER_START) + sizeof(MI_BATCH_BUFFER_START) +
2 * sizeof(uint32_t); 2 * sizeof(uint32_t);

View File

@@ -396,7 +396,7 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWithBarrierWriteWhenEstima
flags.isStallingCommandsOnNextFlushRequired = true; flags.isStallingCommandsOnNextFlushRequired = true;
auto sizeWithPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, device->getDevice()); auto sizeWithPcRequest = device->getUltCommandStreamReceiver<FamilyType>().getRequiredCmdStreamSize(flags, device->getDevice());
size_t extendedSize = sizeWithoutPcRequest + MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getRootDeviceEnvironment(), true); size_t extendedSize = sizeWithoutPcRequest + MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(device->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
EXPECT_EQ(sizeWithPcRequest, extendedSize); EXPECT_EQ(sizeWithPcRequest, extendedSize);
} }

View File

@@ -962,7 +962,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimati
auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>(); auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<FamilyType>();
if (cmdQ->isCacheFlushForBcsRequired()) { if (cmdQ->isCacheFlushForBcsRequired()) {
expectedSize += MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(cmdQ->getDevice().getRootDeviceEnvironment(), true); expectedSize += MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(cmdQ->getDevice().getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
} }
EXPECT_EQ(expectedSize, readBufferCmdsSize); EXPECT_EQ(expectedSize, readBufferCmdsSize);

View File

@@ -581,7 +581,7 @@ XE_HPC_CORETEST_F(GfxCoreHelperTestsXeHpcCore, whenPipecontrolWaIsProgrammedThen
LinearStream cmdStream(buffer, sizeof(buffer)); LinearStream cmdStream(buffer, sizeof(buffer));
uint64_t gpuAddress = 0x1234; uint64_t gpuAddress = 0x1234;
MemorySynchronizationCommands<FamilyType>::addBarrierWa(cmdStream, gpuAddress, this->pDevice->getRootDeviceEnvironment()); MemorySynchronizationCommands<FamilyType>::addBarrierWa(cmdStream, gpuAddress, this->pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::noWrite);
auto pipeControl = genCmdCast<PIPE_CONTROL *>(buffer); auto pipeControl = genCmdCast<PIPE_CONTROL *>(buffer);
ASSERT_NE(nullptr, pipeControl); ASSERT_NE(nullptr, pipeControl);

View File

@@ -338,7 +338,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
if (!args.makeCommandView) { if (!args.makeCommandView) {
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, false)); void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::noWrite));
args.additionalCommands->push_back(commandBuffer); args.additionalCommands->push_back(commandBuffer);
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
@@ -475,7 +475,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
if (!args.makeCommandView) { if (!args.makeCommandView) {
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, false)); void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::noWrite));
args.additionalCommands->push_back(commandBuffer); args.additionalCommands->push_back(commandBuffer);
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);

View File

@@ -345,7 +345,7 @@ void programPostSyncPipeControlCommand(void *&inputAddress,
rootDeviceEnvironment, rootDeviceEnvironment,
flushArgs); flushArgs);
totalBytesProgrammed += static_cast<uint32_t>(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true)); totalBytesProgrammed += static_cast<uint32_t>(NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData));
} }
template <typename GfxFamily> template <typename GfxFamily>
@@ -828,7 +828,7 @@ uint64_t computeBarrierControlSectionOffset(WalkerPartitionArgs &args,
} }
if (args.usePostSync) { if (args.usePostSync) {
offset += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); offset += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
} else { } else {
offset += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier(); offset += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
} }

View File

@@ -643,7 +643,7 @@ inline bool CommandStreamReceiverHw<GfxFamily>::flushBatchedSubmissions() {
auto lastTaskCount = primaryCmdBuffer->taskCount; auto lastTaskCount = primaryCmdBuffer->taskCount;
auto lastPipeControlArgs = primaryCmdBuffer->epiloguePipeControlArgs; auto lastPipeControlArgs = primaryCmdBuffer->epiloguePipeControlArgs;
auto pipeControlLocationSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true); auto pipeControlLocationSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
FlushStampUpdateHelper flushStampUpdateHelper; FlushStampUpdateHelper flushStampUpdateHelper;
flushStampUpdateHelper.insert(primaryCmdBuffer->flushStamp->getStampReference()); flushStampUpdateHelper.insert(primaryCmdBuffer->flushStamp->getStampReference());
@@ -1235,7 +1235,7 @@ SubmissionStatus CommandStreamReceiverHw<GfxFamily>::flushPipeControl(bool state
args.tlbInvalidation = this->isTlbFlushRequiredForStateCacheFlush(); args.tlbInvalidation = this->isTlbFlushRequiredForStateCacheFlush();
} }
auto dispatchSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true) + this->getCmdSizeForPrologue(); auto dispatchSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) + this->getCmdSizeForPrologue();
auto &commandStream = getCS(dispatchSize); auto &commandStream = getCS(dispatchSize);
auto commandStreamStart = commandStream.getUsed(); auto commandStreamStart = commandStream.getUsed();
@@ -2413,7 +2413,7 @@ bool CommandStreamReceiverHw<GfxFamily>::submitDependencyUpdate(TagNodeBase *tag
} }
auto ownership = obtainUniqueOwnership(); auto ownership = obtainUniqueOwnership();
PipeControlArgs args; PipeControlArgs args;
auto expectedSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true) + this->getCmdSizeForPrologue(); auto expectedSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) + this->getCmdSizeForPrologue();
auto &commandStream = getCS(expectedSize); auto &commandStream = getCS(expectedSize);
auto commandStreamStart = commandStream.getUsed(); auto commandStreamStart = commandStream.getUsed();
auto cacheFlushTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*tag); auto cacheFlushTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*tag);

View File

@@ -169,7 +169,7 @@ inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForStallingPostSyncC
false, false,
true); true);
} else { } else {
return MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true); return MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
} }
} }

View File

@@ -49,7 +49,7 @@ inline void RenderDispatcher<GfxFamily>::dispatchMonitorFence(LinearStream &cmdB
template <typename GfxFamily> template <typename GfxFamily>
inline size_t RenderDispatcher<GfxFamily>::getSizeMonitorFence(const RootDeviceEnvironment &rootDeviceEnvironment) { inline size_t RenderDispatcher<GfxFamily>::getSizeMonitorFence(const RootDeviceEnvironment &rootDeviceEnvironment) {
return MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); return MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
} }
template <typename GfxFamily> template <typename GfxFamily>

View File

@@ -309,7 +309,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd)); memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd));
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), false)); void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), NEO::PostSyncMode::noWrite));
args.additionalCommands->push_back(commandBuffer); args.additionalCommands->push_back(commandBuffer);
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);
@@ -326,7 +326,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
args.partitionCount = 1; args.partitionCount = 1;
if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) {
void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), false)); void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands<Family>::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), NEO::PostSyncMode::noWrite));
args.additionalCommands->push_back(commandBuffer); args.additionalCommands->push_back(commandBuffer);
EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); EncodeSemaphore<Family>::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands);

View File

@@ -129,7 +129,7 @@ inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForStallingNoPostSyn
template <typename GfxFamily> template <typename GfxFamily>
inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForStallingPostSyncCommands() const { inline size_t CommandStreamReceiverHw<GfxFamily>::getCmdSizeForStallingPostSyncCommands() const {
return MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true); return MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
} }
template <typename GfxFamily> template <typename GfxFamily>

View File

@@ -498,8 +498,8 @@ struct MemorySynchronizationCommands {
static void setPostSyncExtraProperties(PipeControlArgs &args); static void setPostSyncExtraProperties(PipeControlArgs &args);
static void addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment); static void addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode);
static void setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment); static void setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode);
static void setBarrierWaFlags(void *barrierCmd); static void setBarrierWaFlags(void *barrierCmd);
@@ -519,8 +519,8 @@ struct MemorySynchronizationCommands {
static void addStateCacheFlush(LinearStream &commandStream, const RootDeviceEnvironment &rootDeviceEnvironment); static void addStateCacheFlush(LinearStream &commandStream, const RootDeviceEnvironment &rootDeviceEnvironment);
static void addInstructionCacheFlush(LinearStream &commandStream); static void addInstructionCacheFlush(LinearStream &commandStream);
static size_t getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment, bool postSyncWrite); static size_t getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode);
static size_t getSizeForBarrierWa(const RootDeviceEnvironment &rootDeviceEnvironment); static size_t getSizeForBarrierWa(const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode);
static size_t getSizeForSingleBarrier(); static size_t getSizeForSingleBarrier();
static size_t getSizeForSingleAdditionalSynchronizationForDirectSubmission(const RootDeviceEnvironment &rootDeviceEnvironment); static size_t getSizeForSingleAdditionalSynchronizationForDirectSubmission(const RootDeviceEnvironment &rootDeviceEnvironment);
static size_t getSizeForSingleAdditionalSynchronization(const RootDeviceEnvironment &rootDeviceEnvironment); static size_t getSizeForSingleAdditionalSynchronization(const RootDeviceEnvironment &rootDeviceEnvironment);

View File

@@ -25,6 +25,7 @@
#include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/os_interface.h"
#include "shared/source/os_interface/product_helper.h" #include "shared/source/os_interface/product_helper.h"
#include "shared/source/release_helper/release_helper.h"
#include "shared/source/utilities/tag_allocator.h" #include "shared/source/utilities/tag_allocator.h"
#include "encode_surface_state_args.h" #include "encode_surface_state_args.h"
@@ -197,9 +198,7 @@ AuxTranslationMode GfxCoreHelperHw<Family>::getAuxTranslationMode(const Hardware
template <typename GfxFamily> template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, void MemorySynchronizationCommands<GfxFamily>::addBarrierWithPostSyncOperation(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData,
const RootDeviceEnvironment &rootDeviceEnvironment, PipeControlArgs &args) { const RootDeviceEnvironment &rootDeviceEnvironment, PipeControlArgs &args) {
void *commandBuffer = commandStream.getSpace(MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, postSyncMode));
void *commandBuffer = commandStream.getSpace(MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, postSyncMode != PostSyncMode::noWrite));
MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(commandBuffer, postSyncMode, gpuAddress, immediateData, rootDeviceEnvironment, args); MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(commandBuffer, postSyncMode, gpuAddress, immediateData, rootDeviceEnvironment, args);
} }
@@ -212,7 +211,7 @@ void MemorySynchronizationCommands<GfxFamily>::setBarrierWithPostSyncOperation(
const RootDeviceEnvironment &rootDeviceEnvironment, const RootDeviceEnvironment &rootDeviceEnvironment,
PipeControlArgs &args) { PipeControlArgs &args) {
MemorySynchronizationCommands<GfxFamily>::setBarrierWa(commandsBuffer, gpuAddress, rootDeviceEnvironment); MemorySynchronizationCommands<GfxFamily>::setBarrierWa(commandsBuffer, gpuAddress, rootDeviceEnvironment, postSyncMode);
if (!args.blockSettingPostSyncProperties) { if (!args.blockSettingPostSyncProperties) {
setPostSyncExtraProperties(args); setPostSyncExtraProperties(args);
@@ -313,16 +312,17 @@ void MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(void *commandsBu
} }
template <typename GfxFamily> template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment) { void MemorySynchronizationCommands<GfxFamily>::addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode) {
size_t requiredSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWa(rootDeviceEnvironment); size_t requiredSize = MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWa(rootDeviceEnvironment, postSyncMode);
void *commandBuffer = commandStream.getSpace(requiredSize); void *commandBuffer = commandStream.getSpace(requiredSize);
setBarrierWa(commandBuffer, gpuAddress, rootDeviceEnvironment); setBarrierWa(commandBuffer, gpuAddress, rootDeviceEnvironment, postSyncMode);
} }
template <typename GfxFamily> template <typename GfxFamily>
void MemorySynchronizationCommands<GfxFamily>::setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment) { void MemorySynchronizationCommands<GfxFamily>::setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode) {
using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL;
auto releaseHelper = rootDeviceEnvironment.getReleaseHelper();
if (MemorySynchronizationCommands<GfxFamily>::isBarrierWaRequired(rootDeviceEnvironment)) { if (MemorySynchronizationCommands<GfxFamily>::isBarrierWaRequired(rootDeviceEnvironment)) {
PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl;
MemorySynchronizationCommands<GfxFamily>::setBarrierWaFlags(&cmd); MemorySynchronizationCommands<GfxFamily>::setBarrierWaFlags(&cmd);
@@ -330,6 +330,12 @@ void MemorySynchronizationCommands<GfxFamily>::setBarrierWa(void *&commandsBuffe
commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL)); commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL));
MemorySynchronizationCommands<GfxFamily>::setAdditionalSynchronization(commandsBuffer, gpuAddress, false, rootDeviceEnvironment); MemorySynchronizationCommands<GfxFamily>::setAdditionalSynchronization(commandsBuffer, gpuAddress, false, rootDeviceEnvironment);
} else if (releaseHelper && postSyncMode == PostSyncMode::timestamp && releaseHelper->programmAdditionalStallPriorToBarrierWithTimestamp()) {
PipeControlArgs additionalArgs = {};
additionalArgs.csStallOnly = true;
MemorySynchronizationCommands<GfxFamily>::setSingleBarrier(commandsBuffer, additionalArgs);
commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL));
} }
} }
@@ -360,22 +366,25 @@ size_t MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier() {
} }
template <typename GfxFamily> template <typename GfxFamily>
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment, bool postSyncWrite) { size_t MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode) {
size_t size = getSizeForSingleBarrier(); size_t size = getSizeForSingleBarrier();
size += getSizeForBarrierWa(rootDeviceEnvironment); size += getSizeForBarrierWa(rootDeviceEnvironment, postSyncMode);
if (postSyncWrite) { if (postSyncMode != PostSyncMode::noWrite) {
size += getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); size += getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment);
} }
return size; return size;
} }
template <typename GfxFamily> template <typename GfxFamily>
size_t MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWa(const RootDeviceEnvironment &rootDeviceEnvironment) { size_t MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWa(const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode) {
size_t size = 0; size_t size = 0;
auto releaseHelper = rootDeviceEnvironment.getReleaseHelper();
if (MemorySynchronizationCommands<GfxFamily>::isBarrierWaRequired(rootDeviceEnvironment)) { if (MemorySynchronizationCommands<GfxFamily>::isBarrierWaRequired(rootDeviceEnvironment)) {
size = getSizeForSingleBarrier() + size = getSizeForSingleBarrier() +
getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment);
} else if (releaseHelper && postSyncMode == PostSyncMode::timestamp && releaseHelper->programmAdditionalStallPriorToBarrierWithTimestamp()) {
size = getSizeForSingleBarrier();
} }
return size; return size;
} }

View File

@@ -182,7 +182,7 @@ struct TimestampPacketHelper {
size_t size = count * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>(); size_t size = count * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue<GfxFamily>();
if (auxTranslationDirection == AuxTranslationDirection::nonAuxToAux && cacheFlushForBcsRequired) { if (auxTranslationDirection == AuxTranslationDirection::nonAuxToAux && cacheFlushForBcsRequired) {
size += MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); size += MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
} }
return size; return size;

View File

@@ -64,6 +64,7 @@ class ReleaseHelper {
virtual const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const = 0; virtual const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const = 0;
virtual bool isNumRtStacksPerDssFixedValue() const = 0; virtual bool isNumRtStacksPerDssFixedValue() const = 0;
virtual bool getFtrXe2Compression() const = 0; virtual bool getFtrXe2Compression() const = 0;
virtual bool programmAdditionalStallPriorToBarrierWithTimestamp() const = 0;
virtual uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const = 0; virtual uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const = 0;
virtual bool isBlitImageAllowedForDepthFormat() const = 0; virtual bool isBlitImageAllowedForDepthFormat() const = 0;
@@ -109,6 +110,7 @@ class ReleaseHelperHw : public ReleaseHelper {
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override; const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override;
bool isNumRtStacksPerDssFixedValue() const override; bool isNumRtStacksPerDssFixedValue() const override;
bool getFtrXe2Compression() const override; bool getFtrXe2Compression() const override;
bool programmAdditionalStallPriorToBarrierWithTimestamp() const override;
uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const override; uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const override;
bool isBlitImageAllowedForDepthFormat() const override; bool isBlitImageAllowedForDepthFormat() const override;

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2024 Intel Corporation * Copyright (C) 2024-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -44,6 +44,11 @@ const SizeToPreferredSlmValueArray &ReleaseHelperHw<release>::getSizeToPreferred
return sizeToPreferredSlmValue; return sizeToPreferredSlmValue;
} }
template <>
bool ReleaseHelperHw<release>::programmAdditionalStallPriorToBarrierWithTimestamp() const {
return true;
}
} // namespace NEO } // namespace NEO
#include "shared/source/release_helper/release_helper_common_xe2_hpg.inl" #include "shared/source/release_helper/release_helper_common_xe2_hpg.inl"

View File

@@ -171,6 +171,11 @@ bool ReleaseHelperHw<releaseType>::getFtrXe2Compression() const {
return true; return true;
} }
template <ReleaseType releaseType>
bool ReleaseHelperHw<releaseType>::programmAdditionalStallPriorToBarrierWithTimestamp() const {
return false;
}
template <ReleaseType releaseType> template <ReleaseType releaseType>
uint32_t ReleaseHelperHw<releaseType>::computeSlmValues(uint32_t slmSize, bool isHeapless) const { uint32_t ReleaseHelperHw<releaseType>::computeSlmValues(uint32_t slmSize, bool isHeapless) const {
return 0u; return 0u;

View File

@@ -196,11 +196,11 @@ inline void MemorySynchronizationCommands<Family>::setBarrierExtraProperties(voi
} }
template <> template <>
void MemorySynchronizationCommands<Family>::addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment) { void MemorySynchronizationCommands<Family>::addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode) {
} }
template <> template <>
void MemorySynchronizationCommands<Family>::setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment) { void MemorySynchronizationCommands<Family>::setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode) {
} }
template <> template <>

View File

@@ -8,6 +8,7 @@
#pragma once #pragma once
#include "shared/source/execution_environment/root_device_environment.h" #include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/common_types.h"
#include "shared/source/helpers/gfx_core_helper.h" #include "shared/source/helpers/gfx_core_helper.h"
namespace NEO { namespace NEO {
@@ -15,7 +16,7 @@ namespace NEO {
template <typename GfxFamily> template <typename GfxFamily>
struct UltMemorySynchronizationCommands : MemorySynchronizationCommands<GfxFamily> { struct UltMemorySynchronizationCommands : MemorySynchronizationCommands<GfxFamily> {
static size_t getExpectedPipeControlCount(const RootDeviceEnvironment &rootDeviceEnvironment) { static size_t getExpectedPipeControlCount(const RootDeviceEnvironment &rootDeviceEnvironment) {
return (MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) - return (MemorySynchronizationCommands<GfxFamily>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) -
MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(rootDeviceEnvironment)) / MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(rootDeviceEnvironment)) /
sizeof(typename GfxFamily::PIPE_CONTROL); sizeof(typename GfxFamily::PIPE_CONTROL);
} }

View File

@@ -43,6 +43,7 @@ class MockReleaseHelper : public ReleaseHelper {
ADDMETHOD_CONST_NOBASE(getFtrXe2Compression, bool, false, ()); ADDMETHOD_CONST_NOBASE(getFtrXe2Compression, bool, false, ());
ADDMETHOD_CONST_NOBASE(isDirectSubmissionLightSupported, bool, false, ()); ADDMETHOD_CONST_NOBASE(isDirectSubmissionLightSupported, bool, false, ());
ADDMETHOD_CONST_NOBASE(computeSlmValues, uint32_t, {}, (uint32_t slmSize, bool isHeapless)); ADDMETHOD_CONST_NOBASE(computeSlmValues, uint32_t, {}, (uint32_t slmSize, bool isHeapless));
ADDMETHOD_CONST_NOBASE(programmAdditionalStallPriorToBarrierWithTimestamp, bool, false, ());
ADDMETHOD_CONST_NOBASE_VOIDRETURN(adjustRTDispatchGlobals, (void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels)); ADDMETHOD_CONST_NOBASE_VOIDRETURN(adjustRTDispatchGlobals, (void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels));
const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override { const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override {

View File

@@ -5775,7 +5775,7 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenImplicitScalingEnabledWhenProgrammin
ultCsr.activePartitions = 2; ultCsr.activePartitions = 2;
ultCsr.staticWorkPartitioningEnabled = true; ultCsr.staticWorkPartitioningEnabled = true;
size_t barrierWithPostSyncOperationSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true); size_t barrierWithPostSyncOperationSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
size_t expectedSize = barrierWithPostSyncOperationSize + size_t expectedSize = barrierWithPostSyncOperationSize +
sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait() + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait() +
sizeof(MI_BATCH_BUFFER_START) + sizeof(MI_BATCH_BUFFER_START) +

View File

@@ -53,7 +53,7 @@ HWTEST_F(RenderDispatcherTest, givenRenderWhenAddingPreemptionCmdThenExpectPrope
} }
HWTEST_F(RenderDispatcherTest, givenRenderWhenAskingForMonitorFenceCmdSizeThenReturnRequiredPipeControlCmdSize) { HWTEST_F(RenderDispatcherTest, givenRenderWhenAskingForMonitorFenceCmdSizeThenReturnRequiredPipeControlCmdSize) {
size_t expectedSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(this->pDevice->getRootDeviceEnvironment(), true); size_t expectedSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(this->pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData);
EXPECT_EQ(expectedSize, RenderDispatcher<FamilyType>::getSizeMonitorFence(this->pDevice->getRootDeviceEnvironment())); EXPECT_EQ(expectedSize, RenderDispatcher<FamilyType>::getSizeMonitorFence(this->pDevice->getRootDeviceEnvironment()));
} }

View File

@@ -1193,7 +1193,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
size_t expectedSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true) + size_t expectedSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) +
sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait() + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait() +
sizeof(MI_BATCH_BUFFER_START) + sizeof(MI_BATCH_BUFFER_START) +
sizeof(WalkerPartition::BarrierControlSection); sizeof(WalkerPartition::BarrierControlSection);
@@ -1270,7 +1270,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
size_t expectedSize = sizeof(MI_STORE_DATA_IMM) + size_t expectedSize = sizeof(MI_STORE_DATA_IMM) +
MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true) + MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) +
sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait() + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait() +
sizeof(MI_BATCH_BUFFER_START) + sizeof(MI_BATCH_BUFFER_START) +
sizeof(WalkerPartition::BarrierControlSection) + sizeof(WalkerPartition::BarrierControlSection) +
@@ -1354,7 +1354,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests,
testHardwareInfo.featureTable.flags.ftrLocalMemory = true; testHardwareInfo.featureTable.flags.ftrLocalMemory = true;
size_t expectedSize = sizeof(MI_ATOMIC) + size_t expectedSize = sizeof(MI_ATOMIC) +
MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true) + MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) +
sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait() + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore<FamilyType>::getSizeMiSemaphoreWait() +
sizeof(MI_BATCH_BUFFER_START) + sizeof(MI_BATCH_BUFFER_START) +
sizeof(WalkerPartition::BarrierControlSection) + sizeof(WalkerPartition::BarrierControlSection) +

View File

@@ -1493,7 +1493,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests,
MockExecutionEnvironment mockExecutionEnvironment{}; MockExecutionEnvironment mockExecutionEnvironment{};
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0]; auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
auto expectedOffsetSectionSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) + auto expectedOffsetSectionSize = NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) + sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>); sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
@@ -1584,7 +1584,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests,
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0]; auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
auto expectedOffsetSectionSize = sizeof(WalkerPartition::MI_STORE_DATA_IMM<FamilyType>) + auto expectedOffsetSectionSize = sizeof(WalkerPartition::MI_STORE_DATA_IMM<FamilyType>) +
NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) + NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) + sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>); sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);
@@ -1727,7 +1727,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests,
auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0]; auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0];
auto expectedOffsetSectionSize = sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) + auto expectedOffsetSectionSize = sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) +
NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) + NEO::MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) +
sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) + sizeof(WalkerPartition::MI_ATOMIC<FamilyType>) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT<FamilyType>) +
sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>); sizeof(WalkerPartition::BATCH_BUFFER_START<FamilyType>);

View File

@@ -24,6 +24,7 @@
#include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_execution_environment.h"
#include "shared/test/common/mocks/mock_gmm.h" #include "shared/test/common/mocks/mock_gmm.h"
#include "shared/test/common/mocks/mock_release_helper.h"
#include "shared/test/common/test_macros/hw_test.h" #include "shared/test/common/test_macros/hw_test.h"
#include "shared/test/common/test_macros/test_checks_shared.h" #include "shared/test/common/test_macros/test_checks_shared.h"
@@ -326,7 +327,7 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteTimestampModeWhenHelperIsUsed
PipeControlArgs args; PipeControlArgs args;
MemorySynchronizationCommands<FamilyType>::addBarrierWithPostSyncOperation( MemorySynchronizationCommands<FamilyType>::addBarrierWithPostSyncOperation(
stream, PostSyncMode::timestamp, address, immediateData, rootDeviceEnvironment, args); stream, PostSyncMode::timestamp, address, immediateData, rootDeviceEnvironment, args);
auto additionalPcSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) - sizeof(PIPE_CONTROL); auto additionalPcSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::timestamp) - sizeof(PIPE_CONTROL);
auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment);
void *cpuPipeControlBuffer = ptrOffset(stream.getCpuBase(), pipeControlLocationSize); void *cpuPipeControlBuffer = ptrOffset(stream.getCpuBase(), pipeControlLocationSize);
auto pipeControl = genCmdCast<PIPE_CONTROL *>(cpuPipeControlBuffer); auto pipeControl = genCmdCast<PIPE_CONTROL *>(cpuPipeControlBuffer);
@@ -386,7 +387,7 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteImmediateDataModeWhenHelperIs
PipeControlArgs args{}; PipeControlArgs args{};
MemorySynchronizationCommands<FamilyType>::addBarrierWithPostSyncOperation( MemorySynchronizationCommands<FamilyType>::addBarrierWithPostSyncOperation(
stream, PostSyncMode::immediateData, address, immediateData, rootDeviceEnvironment, args); stream, PostSyncMode::immediateData, address, immediateData, rootDeviceEnvironment, args);
auto additionalPcSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) - sizeof(PIPE_CONTROL); auto additionalPcSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) - sizeof(PIPE_CONTROL);
auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment);
void *cpuPipeControlBuffer = ptrOffset(stream.getCpuBase(), pipeControlLocationSize); void *cpuPipeControlBuffer = ptrOffset(stream.getCpuBase(), pipeControlLocationSize);
auto pipeControl = genCmdCast<PIPE_CONTROL *>(cpuPipeControlBuffer); auto pipeControl = genCmdCast<PIPE_CONTROL *>(cpuPipeControlBuffer);
@@ -425,7 +426,7 @@ HWTEST_F(PipeControlHelperTests, givenNotifyEnableArgumentIsTrueWhenHelperIsUsed
args.notifyEnable = true; args.notifyEnable = true;
MemorySynchronizationCommands<FamilyType>::addBarrierWithPostSyncOperation( MemorySynchronizationCommands<FamilyType>::addBarrierWithPostSyncOperation(
stream, PostSyncMode::immediateData, address, immediateData, rootDeviceEnvironment, args); stream, PostSyncMode::immediateData, address, immediateData, rootDeviceEnvironment, args);
auto additionalPcSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) - sizeof(PIPE_CONTROL); auto additionalPcSize = MemorySynchronizationCommands<FamilyType>::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) - sizeof(PIPE_CONTROL);
auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment);
auto pipeControl = genCmdCast<PIPE_CONTROL *>(ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); auto pipeControl = genCmdCast<PIPE_CONTROL *>(ptrOffset(stream.getCpuBase(), pipeControlLocationSize));
ASSERT_NE(nullptr, pipeControl); ASSERT_NE(nullptr, pipeControl);

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright (C) 2021-2024 Intel Corporation * Copyright (C) 2021-2025 Intel Corporation
* *
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
@@ -99,7 +99,7 @@ HWTEST2_F(PipeControlHelperTestsDg2AndLater, WhenAddingPipeControlWAThenCorrectC
LinearStream stream(buffer, 128); LinearStream stream(buffer, 128);
hardwareInfo.featureTable.flags.ftrLocalMemory = ftrLocalMemory; hardwareInfo.featureTable.flags.ftrLocalMemory = ftrLocalMemory;
MemorySynchronizationCommands<FamilyType>::addBarrierWa(stream, address, rootDeviceEnvironment); MemorySynchronizationCommands<FamilyType>::addBarrierWa(stream, address, rootDeviceEnvironment, NEO::PostSyncMode::immediateData);
if (MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(rootDeviceEnvironment) == false) { if (MemorySynchronizationCommands<FamilyType>::isBarrierWaRequired(rootDeviceEnvironment) == false) {
EXPECT_EQ(0u, stream.getUsed()); EXPECT_EQ(0u, stream.getUsed());

View File

@@ -77,6 +77,10 @@ TEST_F(ReleaseHelper1255Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru
whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned();
} }
TEST_F(ReleaseHelper1255Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) {
whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned();
}
TEST_F(ReleaseHelper1255Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { TEST_F(ReleaseHelper1255Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) {
whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey();
} }

View File

@@ -77,6 +77,10 @@ TEST_F(ReleaseHelper1256Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru
whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned();
} }
TEST_F(ReleaseHelper1256Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) {
whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned();
}
TEST_F(ReleaseHelper1256Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { TEST_F(ReleaseHelper1256Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) {
whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey();
} }

View File

@@ -77,6 +77,9 @@ TEST_F(ReleaseHelper1257Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru
whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned();
} }
TEST_F(ReleaseHelper1257Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) {
whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned();
}
TEST_F(ReleaseHelper1257Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { TEST_F(ReleaseHelper1257Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) {
whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey();
} }

View File

@@ -66,6 +66,10 @@ TEST_F(ReleaseHelper1260Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru
whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned();
} }
TEST_F(ReleaseHelper1260Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) {
whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned();
}
TEST_F(ReleaseHelper1260Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { TEST_F(ReleaseHelper1260Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) {
whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey();
} }

View File

@@ -65,6 +65,10 @@ TEST_F(ReleaseHelper1261Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru
whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned();
} }
TEST_F(ReleaseHelper1261Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) {
whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned();
}
TEST_F(ReleaseHelper1261Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { TEST_F(ReleaseHelper1261Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) {
whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey();
} }

View File

@@ -79,6 +79,9 @@ TEST_F(ReleaseHelper1270Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru
whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned();
} }
TEST_F(ReleaseHelper1270Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) {
whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned();
}
TEST_F(ReleaseHelper1270Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { TEST_F(ReleaseHelper1270Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) {
whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey();
} }

View File

@@ -79,6 +79,10 @@ TEST_F(ReleaseHelper1271Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru
whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned();
} }
TEST_F(ReleaseHelper1271Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) {
whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned();
}
TEST_F(ReleaseHelper1271Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { TEST_F(ReleaseHelper1271Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) {
whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey();
} }

View File

@@ -74,6 +74,10 @@ TEST_F(ReleaseHelper1274Tests, whenIsDummyBlitWaRequiredCalledThenFalseReturned)
whenIsDummyBlitWaRequiredCalledThenFalseReturned(); whenIsDummyBlitWaRequiredCalledThenFalseReturned();
} }
TEST_F(ReleaseHelper1274Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) {
whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned();
}
TEST_F(ReleaseHelper1274Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { TEST_F(ReleaseHelper1274Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) {
whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey();
} }

View File

@@ -78,6 +78,10 @@ TEST_F(ReleaseHelper2004Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru
whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned();
} }
TEST_F(ReleaseHelper2004Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) {
whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned();
}
TEST_F(ReleaseHelper2004Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { TEST_F(ReleaseHelper2004Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) {
whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey();
} }

View File

@@ -84,6 +84,10 @@ TEST_F(ReleaseHelper3000Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru
whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned();
} }
TEST_F(ReleaseHelper3000Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) {
whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned();
}
TEST_F(ReleaseHelper3000Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorrectValues) { TEST_F(ReleaseHelper3000Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorrectValues) {
for (auto &revision : getRevisions()) { for (auto &revision : getRevisions()) {
ipVersion.revision = revision; ipVersion.revision = revision;

View File

@@ -84,6 +84,10 @@ TEST_F(ReleaseHelper3001Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru
whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned();
} }
TEST_F(ReleaseHelper3001Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) {
whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned();
}
TEST_F(ReleaseHelper3001Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorrectValues) { TEST_F(ReleaseHelper3001Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorrectValues) {
for (auto &revision : getRevisions()) { for (auto &revision : getRevisions()) {
ipVersion.revision = revision; ipVersion.revision = revision;

View File

@@ -193,4 +193,13 @@ void ReleaseHelperTestsBase::whenIsBlitImageAllowedForDepthFormatCalledThenTrueR
ASSERT_NE(nullptr, releaseHelper); ASSERT_NE(nullptr, releaseHelper);
EXPECT_TRUE(releaseHelper->isBlitImageAllowedForDepthFormat()); EXPECT_TRUE(releaseHelper->isBlitImageAllowedForDepthFormat());
} }
}
void ReleaseHelperTestsBase::whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned() {
for (auto &revision : getRevisions()) {
ipVersion.revision = revision;
releaseHelper = ReleaseHelper::create(ipVersion);
ASSERT_NE(nullptr, releaseHelper);
EXPECT_FALSE(releaseHelper->programmAdditionalStallPriorToBarrierWithTimestamp());
}
} }

View File

@@ -36,6 +36,7 @@ struct ReleaseHelperTestsBase : public ::testing::Test {
void whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); void whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey();
void whenGettingThreadsPerEuConfigsThenCorrectValueIsReturnedBasedOnNumThreadPerEu(); void whenGettingThreadsPerEuConfigsThenCorrectValueIsReturnedBasedOnNumThreadPerEu();
void whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); void whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned();
void whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned();
virtual std::vector<uint32_t> getRevisions() = 0; virtual std::vector<uint32_t> getRevisions() = 0;
std::unique_ptr<ReleaseHelper> releaseHelper; std::unique_ptr<ReleaseHelper> releaseHelper;

View File

@@ -467,7 +467,7 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, whenPipecontrolWaIsProgrammedTh
LinearStream cmdStream(buffer, sizeof(buffer)); LinearStream cmdStream(buffer, sizeof(buffer));
uint64_t gpuAddress = 0x1234; uint64_t gpuAddress = 0x1234;
MemorySynchronizationCommands<FamilyType>::addBarrierWa(cmdStream, gpuAddress, this->pDevice->getRootDeviceEnvironment()); MemorySynchronizationCommands<FamilyType>::addBarrierWa(cmdStream, gpuAddress, this->pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::noWrite);
auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(buffer); auto pipeControl = reinterpret_cast<PIPE_CONTROL *>(buffer);
EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable());

View File

@@ -161,7 +161,7 @@ XE_HPG_CORETEST_F(GfxCoreHelperTestXeHpgCore, givenDisablePipeControlFlagIsEnabl
constexpr size_t bufferSize = 128u; constexpr size_t bufferSize = 128u;
uint8_t buffer[bufferSize]; uint8_t buffer[bufferSize];
LinearStream cmdStream(buffer, bufferSize); LinearStream cmdStream(buffer, bufferSize);
MemorySynchronizationCommands<FamilyType>::addBarrierWa(cmdStream, 0x1000, rootDeviceEnvironment); MemorySynchronizationCommands<FamilyType>::addBarrierWa(cmdStream, 0x1000, rootDeviceEnvironment, NEO::PostSyncMode::noWrite);
EXPECT_EQ(sizeof(PIPE_CONTROL), cmdStream.getUsed()); EXPECT_EQ(sizeof(PIPE_CONTROL), cmdStream.getUsed());
} }
@@ -179,7 +179,7 @@ XE_HPG_CORETEST_F(GfxCoreHelperTestXeHpgCore, givenDisablePipeControlFlagIsEnabl
constexpr size_t bufferSize = 128u; constexpr size_t bufferSize = 128u;
uint8_t buffer[bufferSize]; uint8_t buffer[bufferSize];
LinearStream cmdStream(buffer, bufferSize); LinearStream cmdStream(buffer, bufferSize);
MemorySynchronizationCommands<FamilyType>::addBarrierWa(cmdStream, 0x1000, rootDeviceEnvironment); MemorySynchronizationCommands<FamilyType>::addBarrierWa(cmdStream, 0x1000, rootDeviceEnvironment, NEO::PostSyncMode::noWrite);
EXPECT_EQ(0u, cmdStream.getUsed()); EXPECT_EQ(0u, cmdStream.getUsed());
} }