diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index deec48530a..638f7feee9 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -262,7 +262,7 @@ size_t CommandQueueHw::estimateStreamSizeForExecuteCommandListsRe } if (ctx.isDispatchTaskCountPostSyncRequired) { - linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(this->device->getNEODevice()->getRootDeviceEnvironment()); + linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(this->device->getNEODevice()->getRootDeviceEnvironment(), true); } if (instructionCacheFlushRequired) { @@ -327,7 +327,7 @@ ze_result_t CommandQueueHw::executeCommandListsRegular( linearStreamSizeEstimate += this->computeDebuggerCmdsSize(ctx); if (ctx.isDispatchTaskCountPostSyncRequired) { - linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment()); + linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), true); } NEO::LinearStream child(nullptr); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp index ad52582c9e..fe1df66f73 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp @@ -240,7 +240,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix sizeof(MI_STORE_DATA_IMM) + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait(); - size_t postSyncSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment()); + size_t postSyncSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); auto useSizeBefore = cmdListStream->getUsed(); auto result = commandList->appendBarrier(eventHandle, 0, nullptr, false); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp index 069c2345f1..6efc41a319 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp @@ -390,7 +390,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment()) + + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true) + commandList->estimateBufferSizeMultiTileBarrier(device->getNEODevice()->getRootDeviceEnvironment()); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index d865a24d8b..c85789050e 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -520,7 +520,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment()); + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); @@ -561,7 +561,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy commandList->partitionCount = 2; EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle(), false)); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment()); + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); auto unifiedPostSyncLayout = device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout(); @@ -713,7 +713,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, auto gpuAddress = event->getCompletionFieldGpuAddress(device); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment()); + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); @@ -777,7 +777,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, auto gpuAddress = event->getCompletionFieldGpuAddress(device); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment()); + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index d5565fb1e2..fffac6b8c4 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -494,7 +494,7 @@ HWTEST_F(CommandQueueCreate, GivenDispatchTaskCountPostSyncRequiredWhenExecuteCo commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false, nullptr, nullptr); auto estimatedSizeWithtBarrier = commandQueue->requiredSizeCalled; - auto sizeForBarrier = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment()); + auto sizeForBarrier = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); EXPECT_GT(sizeForBarrier, 0u); EXPECT_EQ(estimatedSizeWithtBarrier, estimatedSizeWithoutBarrier + sizeForBarrier); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp index 77e0b45670..2673e0497c 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp @@ -485,7 +485,7 @@ HWTEST_F(CommandQueueSynchronizeTest, givenSynchronousCommandQueueWhenTagUpdateF } else { expectedSize += sizeof(MI_BATCH_BUFFER_END); } - expectedSize += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment()); + expectedSize += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), true); expectedSize = alignUp(expectedSize, 8); const ze_command_queue_desc_t desc{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, 0, 0, 0, ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, ZE_COMMAND_QUEUE_PRIORITY_NORMAL}; diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index de4e5b0659..6bb5b3c16d 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -75,7 +75,7 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c if (blitEnqueue) { size_t expectedSizeCS = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (commandQueueHw.isCacheFlushForBcsRequired()) { - expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment); + expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); } return expectedSizeCS; @@ -129,7 +129,7 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c if (outEvent) { auto pEvent = castToObjectOrAbort(*outEvent); if ((pEvent->getContext()->getRootDeviceIndices().size() > 1) && (!pEvent->isUserEvent())) { - expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment); + expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); } } expectedSizeCS += MemorySynchronizationCommands::getSizeForSingleBarrier(); diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp index e02279fb40..826071c1ad 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp @@ -508,7 +508,7 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWithRequiredC size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); auto &rootDeviceEnvironment = device->getRootDeviceEnvironment(); - size_t cacheFlushSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment); + size_t cacheFlushSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp index 4708a39078..550b77d1dd 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp @@ -571,7 +571,7 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenE EXPECT_EQ(CL_SUCCESS, enqueueResult); auto requiredCmdStreamSize = alignUp(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation( - pDevice->getRootDeviceEnvironment()), + pDevice->getRootDeviceEnvironment(), true), MemoryConstants::cacheLineSize); EXPECT_EQ(mockCmdQ->getCS(0).getUsed(), requiredCmdStreamSize); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 83466f194a..1fdadb5f15 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -996,7 +996,7 @@ HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThe auto pipeControls = findAll(cmdList.begin(), cmdList.end()); auto additionalPcCount = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation( - pDevice->getRootDeviceEnvironment()) / + pDevice->getRootDeviceEnvironment(), true) / sizeof(typename FamilyType::PIPE_CONTROL); // |AuxToNonAux|NDR|NonAuxToAux| diff --git a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp index 9b27b6223a..9dc3f1fad6 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp @@ -442,7 +442,7 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenOutEventForMultiDeviceContextWhenCalcul auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, false, nullptr); auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, false, &clEvent); - EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pContext.getDevices()[0]->getRootDeviceEnvironment()), extendedCommandStreamSize); + EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pContext.getDevices()[0]->getRootDeviceEnvironment(), true), extendedCommandStreamSize); } HWTEST2_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSizeIsComputedThenItIsProperlyAligned, IsHeapfulSupported) { diff --git a/opencl/test/unit_test/command_queue/get_size_required_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_tests.cpp index 855be21554..fffaf13d87 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_tests.cpp @@ -74,7 +74,7 @@ HWTEST_F(GetSizeRequiredTest, WhenEnqueuingMarkerThenHeapsAndCommandBufferAreNot size_t expectedStreamSize = 0; if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && (!pCmdQ->getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled())) { expectedStreamSize = alignUp(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation( - pDevice->getRootDeviceEnvironment()), + pDevice->getRootDeviceEnvironment(), true), MemoryConstants::cacheLineSize); } EXPECT_EQ(expectedStreamSize, commandStream.getUsed() - usedBeforeCS); @@ -100,7 +100,7 @@ HWTEST_F(GetSizeRequiredTest, WhenEnqueuingBarrierThenHeapsAndCommandBufferAreNo size_t expectedStreamSize = 0; if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { - auto unalignedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment()) + + auto unalignedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true) + EncodeStoreMemory::getStoreDataImmSize() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedStreamSize = alignUp(unalignedSize, MemoryConstants::cacheLineSize); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index e8769d2b36..78bf1f3af6 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -1719,7 +1719,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, UltCommandStreamReceiverTest, givenBarrierNodeSet DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies.barrierNodes; - size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment); + size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedCmdSize, estimatedCmdSize); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp index f1ff77b925..5873f4699b 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp @@ -801,7 +801,7 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWh commandStreamReceiver->staticWorkPartitioningEnabled = true; commandStreamReceiver->activePartitions = 1; - size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment); + size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedCmdSize, estimatedCmdSize); @@ -848,7 +848,7 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionDisabledW commandStreamReceiver->staticWorkPartitioningEnabled = false; commandStreamReceiver->activePartitions = 2; - size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment); + size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedCmdSize, estimatedCmdSize); @@ -898,7 +898,7 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWh commandStreamReceiver->staticWorkPartitioningEnabled = true; commandStreamReceiver->activePartitions = 2; - size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment) + + size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + sizeof(MI_BATCH_BUFFER_START) + 2 * sizeof(uint32_t); diff --git a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp index 7d772667da..fd8650c937 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp @@ -400,7 +400,7 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWithBarrierWriteWhenEstima flags.isStallingCommandsOnNextFlushRequired = true; auto sizeWithPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); - size_t extendedSize = sizeWithoutPcRequest + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getRootDeviceEnvironment()); + size_t extendedSize = sizeWithoutPcRequest + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getRootDeviceEnvironment(), true); EXPECT_EQ(sizeWithPcRequest, extendedSize); } diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index f3e7bd1d57..12abf03111 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -962,7 +962,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimati auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (cmdQ->isCacheFlushForBcsRequired()) { - expectedSize += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(cmdQ->getDevice().getRootDeviceEnvironment()); + expectedSize += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(cmdQ->getDevice().getRootDeviceEnvironment(), true); } EXPECT_EQ(expectedSize, readBufferCmdsSize); diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 81a6d6cd85..e9118c2d0e 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -338,7 +338,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis if (!args.makeCommandView) { if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { - void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment)); + void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, false)); args.additionalCommands->push_back(commandBuffer); EncodeSemaphore::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); @@ -475,7 +475,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis if (!args.makeCommandView) { if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { - void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment)); + void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, false)); args.additionalCommands->push_back(commandBuffer); EncodeSemaphore::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); diff --git a/shared/source/command_container/walker_partition_xehp_and_later.h b/shared/source/command_container/walker_partition_xehp_and_later.h index fae0b8d163..7f89af9938 100644 --- a/shared/source/command_container/walker_partition_xehp_and_later.h +++ b/shared/source/command_container/walker_partition_xehp_and_later.h @@ -345,7 +345,7 @@ void programPostSyncPipeControlCommand(void *&inputAddress, rootDeviceEnvironment, flushArgs); - totalBytesProgrammed += static_cast(NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment)); + totalBytesProgrammed += static_cast(NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true)); } template @@ -828,7 +828,7 @@ uint64_t computeBarrierControlSectionOffset(WalkerPartitionArgs &args, } if (args.usePostSync) { - offset += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment); + offset += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); } else { offset += NEO::MemorySynchronizationCommands::getSizeForSingleBarrier(); } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 8522fa6b76..470cdff176 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -643,7 +643,7 @@ inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { auto lastTaskCount = primaryCmdBuffer->taskCount; auto lastPipeControlArgs = primaryCmdBuffer->epiloguePipeControlArgs; - auto pipeControlLocationSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment()); + auto pipeControlLocationSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true); FlushStampUpdateHelper flushStampUpdateHelper; flushStampUpdateHelper.insert(primaryCmdBuffer->flushStamp->getStampReference()); @@ -1235,7 +1235,7 @@ SubmissionStatus CommandStreamReceiverHw::flushPipeControl(bool state args.tlbInvalidation = this->isTlbFlushRequiredForStateCacheFlush(); } - auto dispatchSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment()) + this->getCmdSizeForPrologue(); + auto dispatchSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true) + this->getCmdSizeForPrologue(); auto &commandStream = getCS(dispatchSize); auto commandStreamStart = commandStream.getUsed(); @@ -2413,7 +2413,7 @@ bool CommandStreamReceiverHw::submitDependencyUpdate(TagNodeBase *tag } auto ownership = obtainUniqueOwnership(); PipeControlArgs args; - auto expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment()) + this->getCmdSizeForPrologue(); + auto expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true) + this->getCmdSizeForPrologue(); auto &commandStream = getCS(expectedSize); auto commandStreamStart = commandStream.getUsed(); auto cacheFlushTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*tag); diff --git a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl index 6f9068fca4..b08039a1f2 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl @@ -169,7 +169,7 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForStallingPostSyncC false, true); } else { - return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment()); + return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true); } } diff --git a/shared/source/direct_submission/dispatchers/render_dispatcher.inl b/shared/source/direct_submission/dispatchers/render_dispatcher.inl index 460ed79ce5..da3ae54e6f 100644 --- a/shared/source/direct_submission/dispatchers/render_dispatcher.inl +++ b/shared/source/direct_submission/dispatchers/render_dispatcher.inl @@ -49,7 +49,7 @@ inline void RenderDispatcher::dispatchMonitorFence(LinearStream &cmdB template inline size_t RenderDispatcher::getSizeMonitorFence(const RootDeviceEnvironment &rootDeviceEnvironment) { - return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment); + return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); } template diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index 308262cbdd..0da828ffb0 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -309,7 +309,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd)); if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { - void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment())); + void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), false)); args.additionalCommands->push_back(commandBuffer); EncodeSemaphore::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); @@ -326,7 +326,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis args.partitionCount = 1; if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { - void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment())); + void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), false)); args.additionalCommands->push_back(commandBuffer); EncodeSemaphore::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); diff --git a/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp b/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp index f39aa33dbc..67e200dcec 100644 --- a/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp +++ b/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp @@ -129,7 +129,7 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForStallingNoPostSyn template inline size_t CommandStreamReceiverHw::getCmdSizeForStallingPostSyncCommands() const { - return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment()); + return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true); } template diff --git a/shared/source/helpers/gfx_core_helper.h b/shared/source/helpers/gfx_core_helper.h index 430ff47acb..f3237cc3a8 100644 --- a/shared/source/helpers/gfx_core_helper.h +++ b/shared/source/helpers/gfx_core_helper.h @@ -522,7 +522,7 @@ struct MemorySynchronizationCommands { static void addStateCacheFlush(LinearStream &commandStream, const RootDeviceEnvironment &rootDeviceEnvironment); static void addInstructionCacheFlush(LinearStream &commandStream); - static size_t getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment); + static size_t getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment, bool postSyncWrite); static size_t getSizeForBarrierWa(const RootDeviceEnvironment &rootDeviceEnvironment); static size_t getSizeForSingleBarrier(); static size_t getSizeForSingleAdditionalSynchronizationForDirectSubmission(const RootDeviceEnvironment &rootDeviceEnvironment); diff --git a/shared/source/helpers/gfx_core_helper_base.inl b/shared/source/helpers/gfx_core_helper_base.inl index bf3c70cb6b..0913cc24b9 100644 --- a/shared/source/helpers/gfx_core_helper_base.inl +++ b/shared/source/helpers/gfx_core_helper_base.inl @@ -198,7 +198,7 @@ template void MemorySynchronizationCommands::addBarrierWithPostSyncOperation(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const RootDeviceEnvironment &rootDeviceEnvironment, PipeControlArgs &args) { - void *commandBuffer = commandStream.getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment)); + void *commandBuffer = commandStream.getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, postSyncMode != PostSyncMode::noWrite)); MemorySynchronizationCommands::setBarrierWithPostSyncOperation(commandBuffer, postSyncMode, gpuAddress, immediateData, rootDeviceEnvironment, args); } @@ -220,7 +220,9 @@ void MemorySynchronizationCommands::setBarrierWithPostSyncOperation( MemorySynchronizationCommands::setSingleBarrier(commandsBuffer, postSyncMode, gpuAddress, immediateData, args); commandsBuffer = ptrOffset(commandsBuffer, getSizeForSingleBarrier()); - MemorySynchronizationCommands::setAdditionalSynchronization(commandsBuffer, gpuAddress, false, rootDeviceEnvironment); + if (postSyncMode != PostSyncMode::noWrite) { + MemorySynchronizationCommands::setAdditionalSynchronization(commandsBuffer, gpuAddress, false, rootDeviceEnvironment); + } } template @@ -358,11 +360,13 @@ size_t MemorySynchronizationCommands::getSizeForSingleBarrier() { } template -size_t MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment) { +size_t MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment, bool postSyncWrite) { size_t size = getSizeForSingleBarrier(); size += getSizeForBarrierWa(rootDeviceEnvironment); - size += getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); + if (postSyncWrite) { + size += getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); + } return size; } diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index 8a443b8bb5..b1df789f5b 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -182,7 +182,7 @@ struct TimestampPacketHelper { size_t size = count * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (auxTranslationDirection == AuxTranslationDirection::nonAuxToAux && cacheFlushForBcsRequired) { - size += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment); + size += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); } return size; diff --git a/shared/test/common/helpers/ult_gfx_core_helper.h b/shared/test/common/helpers/ult_gfx_core_helper.h index 51f4a187c5..8bec21ae52 100644 --- a/shared/test/common/helpers/ult_gfx_core_helper.h +++ b/shared/test/common/helpers/ult_gfx_core_helper.h @@ -15,7 +15,7 @@ namespace NEO { template struct UltMemorySynchronizationCommands : MemorySynchronizationCommands { static size_t getExpectedPipeControlCount(const RootDeviceEnvironment &rootDeviceEnvironment) { - return (MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment) - + return (MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) - MemorySynchronizationCommands::getSizeForAdditonalSynchronization(rootDeviceEnvironment)) / sizeof(typename GfxFamily::PIPE_CONTROL); } diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 152dc0c4ab..41d2090d97 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -5775,7 +5775,7 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenImplicitScalingEnabledWhenProgrammin ultCsr.activePartitions = 2; ultCsr.staticWorkPartitioningEnabled = true; - size_t barrierWithPostSyncOperationSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment()); + size_t barrierWithPostSyncOperationSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true); size_t expectedSize = barrierWithPostSyncOperationSize + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + sizeof(MI_BATCH_BUFFER_START) + diff --git a/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp b/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp index 03e820462f..c2e8ba0dba 100644 --- a/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp +++ b/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp @@ -53,7 +53,7 @@ HWTEST_F(RenderDispatcherTest, givenRenderWhenAddingPreemptionCmdThenExpectPrope } HWTEST_F(RenderDispatcherTest, givenRenderWhenAskingForMonitorFenceCmdSizeThenReturnRequiredPipeControlCmdSize) { - size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(this->pDevice->getRootDeviceEnvironment()); + size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(this->pDevice->getRootDeviceEnvironment(), true); EXPECT_EQ(expectedSize, RenderDispatcher::getSizeMonitorFence(this->pDevice->getRootDeviceEnvironment())); } diff --git a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp index f22464b336..2bb2fdd78d 100644 --- a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp @@ -1193,7 +1193,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, MockExecutionEnvironment mockExecutionEnvironment{}; - size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment()) + + size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true) + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + sizeof(MI_BATCH_BUFFER_START) + sizeof(WalkerPartition::BarrierControlSection); @@ -1270,7 +1270,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; size_t expectedSize = sizeof(MI_STORE_DATA_IMM) + - MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment()) + + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true) + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + sizeof(MI_BATCH_BUFFER_START) + sizeof(WalkerPartition::BarrierControlSection) + @@ -1354,7 +1354,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, testHardwareInfo.featureTable.flags.ftrLocalMemory = true; size_t expectedSize = sizeof(MI_ATOMIC) + - MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment()) + + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true) + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + sizeof(MI_BATCH_BUFFER_START) + sizeof(WalkerPartition::BarrierControlSection) + diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp index 002c9ea610..a77eefd370 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp @@ -1493,7 +1493,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, MockExecutionEnvironment mockExecutionEnvironment{}; auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0]; - auto expectedOffsetSectionSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment) + + auto expectedOffsetSectionSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) + sizeof(WalkerPartition::MI_ATOMIC) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + sizeof(WalkerPartition::BATCH_BUFFER_START); @@ -1584,7 +1584,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0]; auto expectedOffsetSectionSize = sizeof(WalkerPartition::MI_STORE_DATA_IMM) + - NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment) + + NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) + sizeof(WalkerPartition::MI_ATOMIC) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + sizeof(WalkerPartition::BATCH_BUFFER_START); @@ -1727,7 +1727,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0]; auto expectedOffsetSectionSize = sizeof(WalkerPartition::MI_ATOMIC) + - NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment) + + NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) + sizeof(WalkerPartition::MI_ATOMIC) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + sizeof(WalkerPartition::BATCH_BUFFER_START); diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp index 821e61dba1..c7f8cbf563 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp @@ -326,7 +326,7 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteTimestampModeWhenHelperIsUsed PipeControlArgs args; MemorySynchronizationCommands::addBarrierWithPostSyncOperation( stream, PostSyncMode::timestamp, address, immediateData, rootDeviceEnvironment, args); - auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment) - sizeof(PIPE_CONTROL); + auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); void *cpuPipeControlBuffer = ptrOffset(stream.getCpuBase(), pipeControlLocationSize); auto pipeControl = genCmdCast(cpuPipeControlBuffer); @@ -386,7 +386,7 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteImmediateDataModeWhenHelperIs PipeControlArgs args{}; MemorySynchronizationCommands::addBarrierWithPostSyncOperation( stream, PostSyncMode::immediateData, address, immediateData, rootDeviceEnvironment, args); - auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment) - sizeof(PIPE_CONTROL); + auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); void *cpuPipeControlBuffer = ptrOffset(stream.getCpuBase(), pipeControlLocationSize); auto pipeControl = genCmdCast(cpuPipeControlBuffer); @@ -425,7 +425,7 @@ HWTEST_F(PipeControlHelperTests, givenNotifyEnableArgumentIsTrueWhenHelperIsUsed args.notifyEnable = true; MemorySynchronizationCommands::addBarrierWithPostSyncOperation( stream, PostSyncMode::immediateData, address, immediateData, rootDeviceEnvironment, args); - auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment) - sizeof(PIPE_CONTROL); + auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); auto pipeControl = genCmdCast(ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); ASSERT_NE(nullptr, pipeControl);