diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 638f7feee9..90422640d1 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -262,7 +262,7 @@ size_t CommandQueueHw::estimateStreamSizeForExecuteCommandListsRe } if (ctx.isDispatchTaskCountPostSyncRequired) { - linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(this->device->getNEODevice()->getRootDeviceEnvironment(), true); + linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(this->device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); } if (instructionCacheFlushRequired) { @@ -327,7 +327,7 @@ ze_result_t CommandQueueHw::executeCommandListsRegular( linearStreamSizeEstimate += this->computeDebuggerCmdsSize(ctx); if (ctx.isDispatchTaskCountPostSyncRequired) { - linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), true); + linearStreamSizeEstimate += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); } NEO::LinearStream child(nullptr); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp index fe1df66f73..b74dd84fdb 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp @@ -240,7 +240,7 @@ struct MultiTileCommandListAppendBarrierFixture : public MultiTileCommandListFix sizeof(MI_STORE_DATA_IMM) + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait(); - size_t postSyncSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); + size_t postSyncSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); auto useSizeBefore = cmdListStream->getUsed(); auto result = commandList->appendBarrier(eventHandle, 0, nullptr, false); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp index 6efc41a319..038c7cd70b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp @@ -390,7 +390,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true) + + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) + commandList->estimateBufferSizeMultiTileBarrier(device->getNEODevice()->getRootDeviceEnvironment()); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index c85789050e..84aba60be0 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -520,7 +520,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); @@ -561,7 +561,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, givenMultiTileAndDynamicPostSy commandList->partitionCount = 2; EXPECT_EQ(ZE_RESULT_SUCCESS, commandList->appendSignalEvent(event->toHandle(), false)); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); auto unifiedPostSyncLayout = device->getL0GfxCoreHelper().hasUnifiedPostSyncAllocationLayout(); @@ -713,7 +713,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, auto gpuAddress = event->getCompletionFieldGpuAddress(device); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); @@ -777,7 +777,7 @@ HWTEST2_F(CommandListAppendUsedPacketSignalEvent, auto gpuAddress = event->getCompletionFieldGpuAddress(device); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp index fffac6b8c4..cb8e643567 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_1.cpp @@ -494,7 +494,7 @@ HWTEST_F(CommandQueueCreate, GivenDispatchTaskCountPostSyncRequiredWhenExecuteCo commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false, nullptr, nullptr); auto estimatedSizeWithtBarrier = commandQueue->requiredSizeCalled; - auto sizeForBarrier = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), true); + auto sizeForBarrier = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getNEODevice()->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); EXPECT_GT(sizeForBarrier, 0u); EXPECT_EQ(estimatedSizeWithtBarrier, estimatedSizeWithoutBarrier + sizeForBarrier); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp index 2673e0497c..dd2eab32ad 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp @@ -485,7 +485,7 @@ HWTEST_F(CommandQueueSynchronizeTest, givenSynchronousCommandQueueWhenTagUpdateF } else { expectedSize += sizeof(MI_BATCH_BUFFER_END); } - expectedSize += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), true); + expectedSize += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(neoDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); expectedSize = alignUp(expectedSize, 8); const ze_command_queue_desc_t desc{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, 0, 0, 0, ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, ZE_COMMAND_QUEUE_PRIORITY_NORMAL}; diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index 6bb5b3c16d..831ab5afaf 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -75,7 +75,7 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c if (blitEnqueue) { size_t expectedSizeCS = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (commandQueueHw.isCacheFlushForBcsRequired()) { - expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); + expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData); } return expectedSizeCS; @@ -129,7 +129,7 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c if (outEvent) { auto pEvent = castToObjectOrAbort(*outEvent); if ((pEvent->getContext()->getRootDeviceIndices().size() > 1) && (!pEvent->isUserEvent())) { - expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); + expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData); } } expectedSizeCS += MemorySynchronizationCommands::getSizeForSingleBarrier(); diff --git a/opencl/test/unit_test/aub_tests/fixtures/aub_walker_partition_fixture.h b/opencl/test/unit_test/aub_tests/fixtures/aub_walker_partition_fixture.h index 873206ca57..797f535c31 100644 --- a/opencl/test/unit_test/aub_tests/fixtures/aub_walker_partition_fixture.h +++ b/opencl/test/unit_test/aub_tests/fixtures/aub_walker_partition_fixture.h @@ -104,7 +104,7 @@ struct AubWalkerPartitionFixture : public KernelAUBFixture uint8_t buffer[256]; LinearStream stream(buffer, 256); - MemorySynchronizationCommands::addBarrierWa(stream, 0ull, rootDeviceEnvironment); + MemorySynchronizationCommands::addBarrierWa(stream, 0ull, rootDeviceEnvironment, NEO::PostSyncMode::immediateData); void *syncPipeControlAddress = reinterpret_cast(reinterpret_cast(startAddress) + stream.getUsed()); PIPE_CONTROL *pipeControl = genCmdCast(syncPipeControlAddress); return pipeControl; diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp index 826071c1ad..0a26ab1704 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp @@ -508,7 +508,7 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWithRequiredC size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); auto &rootDeviceEnvironment = device->getRootDeviceEnvironment(); - size_t cacheFlushSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); + size_t cacheFlushSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp index 7a51fd0846..ba4831c2a2 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp @@ -571,7 +571,7 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenE EXPECT_EQ(CL_SUCCESS, enqueueResult); auto requiredCmdStreamSize = alignUp(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation( - pDevice->getRootDeviceEnvironment(), true), + pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData), MemoryConstants::cacheLineSize); EXPECT_EQ(mockCmdQ->getCS(0).getUsed(), requiredCmdStreamSize); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 1fdadb5f15..b968137d9e 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -996,7 +996,7 @@ HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThe auto pipeControls = findAll(cmdList.begin(), cmdList.end()); auto additionalPcCount = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation( - pDevice->getRootDeviceEnvironment(), true) / + pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) / sizeof(typename FamilyType::PIPE_CONTROL); // |AuxToNonAux|NDR|NonAuxToAux| diff --git a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp index 9dc3f1fad6..dc6f3365d4 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_buffer_tests.cpp @@ -442,7 +442,7 @@ HWTEST_F(GetSizeRequiredBufferTest, GivenOutEventForMultiDeviceContextWhenCalcul auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, false, nullptr); auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false, false, &clEvent); - EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pContext.getDevices()[0]->getRootDeviceEnvironment(), true), extendedCommandStreamSize); + EXPECT_EQ(baseCommandStreamSize + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pContext.getDevices()[0]->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData), extendedCommandStreamSize); } HWTEST2_F(GetSizeRequiredBufferTest, givenMultipleKernelRequiringSshWhenTotalSizeIsComputedThenItIsProperlyAligned, IsHeapfulSupported) { diff --git a/opencl/test/unit_test/command_queue/get_size_required_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_tests.cpp index fffaf13d87..a2a14fc03b 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_tests.cpp @@ -74,7 +74,7 @@ HWTEST_F(GetSizeRequiredTest, WhenEnqueuingMarkerThenHeapsAndCommandBufferAreNot size_t expectedStreamSize = 0; if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled() && (!pCmdQ->getGpgpuCommandStreamReceiver().isUpdateTagFromWaitEnabled())) { expectedStreamSize = alignUp(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation( - pDevice->getRootDeviceEnvironment(), true), + pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData), MemoryConstants::cacheLineSize); } EXPECT_EQ(expectedStreamSize, commandStream.getUsed() - usedBeforeCS); @@ -100,7 +100,7 @@ HWTEST_F(GetSizeRequiredTest, WhenEnqueuingBarrierThenHeapsAndCommandBufferAreNo size_t expectedStreamSize = 0; if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { - auto unalignedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true) + + auto unalignedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) + EncodeStoreMemory::getStoreDataImmSize() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END); expectedStreamSize = alignUp(unalignedSize, MemoryConstants::cacheLineSize); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index 78bf1f3af6..7c5a03c451 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -1719,7 +1719,7 @@ HWCMDTEST_F(IGFX_GEN12LP_CORE, UltCommandStreamReceiverTest, givenBarrierNodeSet DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies.barrierNodes; - size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); + size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedCmdSize, estimatedCmdSize); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp index 65f194714e..c0d56b6a0a 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp @@ -820,7 +820,7 @@ HWTEST2_TEMPLATED_F(CommandStreamReceiverHwTestXeHPAndLaterWithMockCsrHw, givenS commandStreamReceiver->staticWorkPartitioningEnabled = true; commandStreamReceiver->activePartitions = 1; - size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); + size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedCmdSize, estimatedCmdSize); @@ -867,7 +867,7 @@ HWTEST2_TEMPLATED_F(CommandStreamReceiverHwTestXeHPAndLaterWithMockCsrHw, givenS commandStreamReceiver->staticWorkPartitioningEnabled = false; commandStreamReceiver->activePartitions = 2; - size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); + size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedCmdSize, estimatedCmdSize); @@ -917,7 +917,7 @@ HWTEST2_TEMPLATED_F(CommandStreamReceiverHwTestXeHPAndLaterWithMockCsrHw, givenS commandStreamReceiver->staticWorkPartitioningEnabled = true; commandStreamReceiver->activePartitions = 2; - size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) + + size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + sizeof(MI_BATCH_BUFFER_START) + 2 * sizeof(uint32_t); diff --git a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp index 998f4fa089..c07564718e 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp @@ -396,7 +396,7 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWithBarrierWriteWhenEstima flags.isStallingCommandsOnNextFlushRequired = true; auto sizeWithPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); - size_t extendedSize = sizeWithoutPcRequest + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getRootDeviceEnvironment(), true); + size_t extendedSize = sizeWithoutPcRequest + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); EXPECT_EQ(sizeWithPcRequest, extendedSize); } diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index ab37a7d052..40b5994cb9 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -962,7 +962,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimati auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (cmdQ->isCacheFlushForBcsRequired()) { - expectedSize += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(cmdQ->getDevice().getRootDeviceEnvironment(), true); + expectedSize += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(cmdQ->getDevice().getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); } EXPECT_EQ(expectedSize, readBufferCmdsSize); diff --git a/opencl/test/unit_test/xe_hpc_core/gfx_core_helper_tests_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/gfx_core_helper_tests_xe_hpc_core.cpp index 22aed4a3f3..e5530956a0 100644 --- a/opencl/test/unit_test/xe_hpc_core/gfx_core_helper_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/gfx_core_helper_tests_xe_hpc_core.cpp @@ -581,7 +581,7 @@ XE_HPC_CORETEST_F(GfxCoreHelperTestsXeHpcCore, whenPipecontrolWaIsProgrammedThen LinearStream cmdStream(buffer, sizeof(buffer)); uint64_t gpuAddress = 0x1234; - MemorySynchronizationCommands::addBarrierWa(cmdStream, gpuAddress, this->pDevice->getRootDeviceEnvironment()); + MemorySynchronizationCommands::addBarrierWa(cmdStream, gpuAddress, this->pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::noWrite); auto pipeControl = genCmdCast(buffer); ASSERT_NE(nullptr, pipeControl); diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index e9118c2d0e..72ecebaf09 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -338,7 +338,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis if (!args.makeCommandView) { if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { - void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, false)); + void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::noWrite)); args.additionalCommands->push_back(commandBuffer); EncodeSemaphore::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); @@ -475,7 +475,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis if (!args.makeCommandView) { if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { - void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, false)); + void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::noWrite)); args.additionalCommands->push_back(commandBuffer); EncodeSemaphore::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); diff --git a/shared/source/command_container/walker_partition_xehp_and_later.h b/shared/source/command_container/walker_partition_xehp_and_later.h index 7f89af9938..bf7d5bc90c 100644 --- a/shared/source/command_container/walker_partition_xehp_and_later.h +++ b/shared/source/command_container/walker_partition_xehp_and_later.h @@ -345,7 +345,7 @@ void programPostSyncPipeControlCommand(void *&inputAddress, rootDeviceEnvironment, flushArgs); - totalBytesProgrammed += static_cast(NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true)); + totalBytesProgrammed += static_cast(NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData)); } template @@ -828,7 +828,7 @@ uint64_t computeBarrierControlSectionOffset(WalkerPartitionArgs &args, } if (args.usePostSync) { - offset += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); + offset += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData); } else { offset += NEO::MemorySynchronizationCommands::getSizeForSingleBarrier(); } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 470cdff176..5b0aec7149 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -643,7 +643,7 @@ inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { auto lastTaskCount = primaryCmdBuffer->taskCount; auto lastPipeControlArgs = primaryCmdBuffer->epiloguePipeControlArgs; - auto pipeControlLocationSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true); + auto pipeControlLocationSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); FlushStampUpdateHelper flushStampUpdateHelper; flushStampUpdateHelper.insert(primaryCmdBuffer->flushStamp->getStampReference()); @@ -1235,7 +1235,7 @@ SubmissionStatus CommandStreamReceiverHw::flushPipeControl(bool state args.tlbInvalidation = this->isTlbFlushRequiredForStateCacheFlush(); } - auto dispatchSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true) + this->getCmdSizeForPrologue(); + auto dispatchSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) + this->getCmdSizeForPrologue(); auto &commandStream = getCS(dispatchSize); auto commandStreamStart = commandStream.getUsed(); @@ -2413,7 +2413,7 @@ bool CommandStreamReceiverHw::submitDependencyUpdate(TagNodeBase *tag } auto ownership = obtainUniqueOwnership(); PipeControlArgs args; - auto expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true) + this->getCmdSizeForPrologue(); + auto expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) + this->getCmdSizeForPrologue(); auto &commandStream = getCS(expectedSize); auto commandStreamStart = commandStream.getUsed(); auto cacheFlushTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*tag); diff --git a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl index b08039a1f2..1461ac9418 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl @@ -169,7 +169,7 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForStallingPostSyncC false, true); } else { - return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true); + return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); } } diff --git a/shared/source/direct_submission/dispatchers/render_dispatcher.inl b/shared/source/direct_submission/dispatchers/render_dispatcher.inl index da3ae54e6f..b5fb9b7504 100644 --- a/shared/source/direct_submission/dispatchers/render_dispatcher.inl +++ b/shared/source/direct_submission/dispatchers/render_dispatcher.inl @@ -49,7 +49,7 @@ inline void RenderDispatcher::dispatchMonitorFence(LinearStream &cmdB template inline size_t RenderDispatcher::getSizeMonitorFence(const RootDeviceEnvironment &rootDeviceEnvironment) { - return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); + return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData); } template diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index 0da828ffb0..809a868490 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -309,7 +309,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis memcpy_s(iddPtr, sizeof(idd), &idd, sizeof(idd)); if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::BeforeWorkload)) { - void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), false)); + void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), NEO::PostSyncMode::noWrite)); args.additionalCommands->push_back(commandBuffer); EncodeSemaphore::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); @@ -326,7 +326,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis args.partitionCount = 1; if (NEO::PauseOnGpuProperties::pauseModeAllowed(NEO::debugManager.flags.PauseOnEnqueue.get(), args.device->debugExecutionCounter.load(), NEO::PauseOnGpuProperties::PauseMode::AfterWorkload)) { - void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), false)); + void *commandBuffer = listCmdBufferStream->getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(args.device->getRootDeviceEnvironment(), NEO::PostSyncMode::noWrite)); args.additionalCommands->push_back(commandBuffer); EncodeSemaphore::applyMiSemaphoreWaitCommand(*listCmdBufferStream, *args.additionalCommands); diff --git a/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp b/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp index 67e200dcec..9c01220d76 100644 --- a/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp +++ b/shared/source/gen12lp/command_stream_receiver_hw_gen12lp.cpp @@ -129,7 +129,7 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForStallingNoPostSyn template inline size_t CommandStreamReceiverHw::getCmdSizeForStallingPostSyncCommands() const { - return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), true); + return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); } template diff --git a/shared/source/helpers/gfx_core_helper.h b/shared/source/helpers/gfx_core_helper.h index d0f8c63d7f..0d4d9abbb7 100644 --- a/shared/source/helpers/gfx_core_helper.h +++ b/shared/source/helpers/gfx_core_helper.h @@ -498,8 +498,8 @@ struct MemorySynchronizationCommands { static void setPostSyncExtraProperties(PipeControlArgs &args); - static void addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment); - static void setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment); + static void addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode); + static void setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode); static void setBarrierWaFlags(void *barrierCmd); @@ -519,8 +519,8 @@ struct MemorySynchronizationCommands { static void addStateCacheFlush(LinearStream &commandStream, const RootDeviceEnvironment &rootDeviceEnvironment); static void addInstructionCacheFlush(LinearStream &commandStream); - static size_t getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment, bool postSyncWrite); - static size_t getSizeForBarrierWa(const RootDeviceEnvironment &rootDeviceEnvironment); + static size_t getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode); + static size_t getSizeForBarrierWa(const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode); static size_t getSizeForSingleBarrier(); static size_t getSizeForSingleAdditionalSynchronizationForDirectSubmission(const RootDeviceEnvironment &rootDeviceEnvironment); static size_t getSizeForSingleAdditionalSynchronization(const RootDeviceEnvironment &rootDeviceEnvironment); diff --git a/shared/source/helpers/gfx_core_helper_base.inl b/shared/source/helpers/gfx_core_helper_base.inl index 994041f5fa..e41ef11103 100644 --- a/shared/source/helpers/gfx_core_helper_base.inl +++ b/shared/source/helpers/gfx_core_helper_base.inl @@ -25,6 +25,7 @@ #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/os_interface/os_interface.h" #include "shared/source/os_interface/product_helper.h" +#include "shared/source/release_helper/release_helper.h" #include "shared/source/utilities/tag_allocator.h" #include "encode_surface_state_args.h" @@ -197,9 +198,7 @@ AuxTranslationMode GfxCoreHelperHw::getAuxTranslationMode(const Hardware template void MemorySynchronizationCommands::addBarrierWithPostSyncOperation(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const RootDeviceEnvironment &rootDeviceEnvironment, PipeControlArgs &args) { - - void *commandBuffer = commandStream.getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, postSyncMode != PostSyncMode::noWrite)); - + void *commandBuffer = commandStream.getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, postSyncMode)); MemorySynchronizationCommands::setBarrierWithPostSyncOperation(commandBuffer, postSyncMode, gpuAddress, immediateData, rootDeviceEnvironment, args); } @@ -212,7 +211,7 @@ void MemorySynchronizationCommands::setBarrierWithPostSyncOperation( const RootDeviceEnvironment &rootDeviceEnvironment, PipeControlArgs &args) { - MemorySynchronizationCommands::setBarrierWa(commandsBuffer, gpuAddress, rootDeviceEnvironment); + MemorySynchronizationCommands::setBarrierWa(commandsBuffer, gpuAddress, rootDeviceEnvironment, postSyncMode); if (!args.blockSettingPostSyncProperties) { setPostSyncExtraProperties(args); @@ -313,16 +312,17 @@ void MemorySynchronizationCommands::setSingleBarrier(void *commandsBu } template -void MemorySynchronizationCommands::addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment) { - size_t requiredSize = MemorySynchronizationCommands::getSizeForBarrierWa(rootDeviceEnvironment); +void MemorySynchronizationCommands::addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode) { + size_t requiredSize = MemorySynchronizationCommands::getSizeForBarrierWa(rootDeviceEnvironment, postSyncMode); void *commandBuffer = commandStream.getSpace(requiredSize); - setBarrierWa(commandBuffer, gpuAddress, rootDeviceEnvironment); + setBarrierWa(commandBuffer, gpuAddress, rootDeviceEnvironment, postSyncMode); } template -void MemorySynchronizationCommands::setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment) { +void MemorySynchronizationCommands::setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; + auto releaseHelper = rootDeviceEnvironment.getReleaseHelper(); if (MemorySynchronizationCommands::isBarrierWaRequired(rootDeviceEnvironment)) { PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; MemorySynchronizationCommands::setBarrierWaFlags(&cmd); @@ -330,6 +330,12 @@ void MemorySynchronizationCommands::setBarrierWa(void *&commandsBuffe commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL)); MemorySynchronizationCommands::setAdditionalSynchronization(commandsBuffer, gpuAddress, false, rootDeviceEnvironment); + } else if (releaseHelper && postSyncMode == PostSyncMode::timestamp && releaseHelper->programmAdditionalStallPriorToBarrierWithTimestamp()) { + PipeControlArgs additionalArgs = {}; + additionalArgs.csStallOnly = true; + + MemorySynchronizationCommands::setSingleBarrier(commandsBuffer, additionalArgs); + commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL)); } } @@ -360,22 +366,25 @@ size_t MemorySynchronizationCommands::getSizeForSingleBarrier() { } template -size_t MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment, bool postSyncWrite) { +size_t MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode) { size_t size = getSizeForSingleBarrier(); - size += getSizeForBarrierWa(rootDeviceEnvironment); - if (postSyncWrite) { + size += getSizeForBarrierWa(rootDeviceEnvironment, postSyncMode); + if (postSyncMode != PostSyncMode::noWrite) { size += getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); } return size; } template -size_t MemorySynchronizationCommands::getSizeForBarrierWa(const RootDeviceEnvironment &rootDeviceEnvironment) { +size_t MemorySynchronizationCommands::getSizeForBarrierWa(const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode) { size_t size = 0; + auto releaseHelper = rootDeviceEnvironment.getReleaseHelper(); if (MemorySynchronizationCommands::isBarrierWaRequired(rootDeviceEnvironment)) { size = getSizeForSingleBarrier() + getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); + } else if (releaseHelper && postSyncMode == PostSyncMode::timestamp && releaseHelper->programmAdditionalStallPriorToBarrierWithTimestamp()) { + size = getSizeForSingleBarrier(); } return size; } diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index b1df789f5b..6ed336dd78 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -182,7 +182,7 @@ struct TimestampPacketHelper { size_t size = count * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (auxTranslationDirection == AuxTranslationDirection::nonAuxToAux && cacheFlushForBcsRequired) { - size += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true); + size += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData); } return size; diff --git a/shared/source/release_helper/release_helper.h b/shared/source/release_helper/release_helper.h index 6002c3d697..9acbf09014 100644 --- a/shared/source/release_helper/release_helper.h +++ b/shared/source/release_helper/release_helper.h @@ -64,6 +64,7 @@ class ReleaseHelper { virtual const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const = 0; virtual bool isNumRtStacksPerDssFixedValue() const = 0; virtual bool getFtrXe2Compression() const = 0; + virtual bool programmAdditionalStallPriorToBarrierWithTimestamp() const = 0; virtual uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const = 0; virtual bool isBlitImageAllowedForDepthFormat() const = 0; @@ -109,6 +110,7 @@ class ReleaseHelperHw : public ReleaseHelper { const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override; bool isNumRtStacksPerDssFixedValue() const override; bool getFtrXe2Compression() const override; + bool programmAdditionalStallPriorToBarrierWithTimestamp() const override; uint32_t computeSlmValues(uint32_t slmSize, bool isHeapless) const override; bool isBlitImageAllowedForDepthFormat() const override; diff --git a/shared/source/release_helper/release_helper_2001.cpp b/shared/source/release_helper/release_helper_2001.cpp index a5c77b19f1..fd2dd2eba3 100644 --- a/shared/source/release_helper/release_helper_2001.cpp +++ b/shared/source/release_helper/release_helper_2001.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -44,6 +44,11 @@ const SizeToPreferredSlmValueArray &ReleaseHelperHw::getSizeToPreferred return sizeToPreferredSlmValue; } +template <> +bool ReleaseHelperHw::programmAdditionalStallPriorToBarrierWithTimestamp() const { + return true; +} + } // namespace NEO #include "shared/source/release_helper/release_helper_common_xe2_hpg.inl" diff --git a/shared/source/release_helper/release_helper_base.inl b/shared/source/release_helper/release_helper_base.inl index 5bfbceecaa..b66aa7940a 100644 --- a/shared/source/release_helper/release_helper_base.inl +++ b/shared/source/release_helper/release_helper_base.inl @@ -171,6 +171,11 @@ bool ReleaseHelperHw::getFtrXe2Compression() const { return true; } +template +bool ReleaseHelperHw::programmAdditionalStallPriorToBarrierWithTimestamp() const { + return false; +} + template uint32_t ReleaseHelperHw::computeSlmValues(uint32_t slmSize, bool isHeapless) const { return 0u; diff --git a/shared/source/xe3_core/gfx_core_helper_xe3_core.cpp b/shared/source/xe3_core/gfx_core_helper_xe3_core.cpp index 29fdf80490..57e7725e7f 100644 --- a/shared/source/xe3_core/gfx_core_helper_xe3_core.cpp +++ b/shared/source/xe3_core/gfx_core_helper_xe3_core.cpp @@ -196,11 +196,11 @@ inline void MemorySynchronizationCommands::setBarrierExtraProperties(voi } template <> -void MemorySynchronizationCommands::addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment) { +void MemorySynchronizationCommands::addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode) { } template <> -void MemorySynchronizationCommands::setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment) { +void MemorySynchronizationCommands::setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const RootDeviceEnvironment &rootDeviceEnvironment, NEO::PostSyncMode postSyncMode) { } template <> diff --git a/shared/test/common/helpers/ult_gfx_core_helper.h b/shared/test/common/helpers/ult_gfx_core_helper.h index 8bec21ae52..e8b68fcc18 100644 --- a/shared/test/common/helpers/ult_gfx_core_helper.h +++ b/shared/test/common/helpers/ult_gfx_core_helper.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/execution_environment/root_device_environment.h" +#include "shared/source/helpers/common_types.h" #include "shared/source/helpers/gfx_core_helper.h" namespace NEO { @@ -15,7 +16,7 @@ namespace NEO { template struct UltMemorySynchronizationCommands : MemorySynchronizationCommands { static size_t getExpectedPipeControlCount(const RootDeviceEnvironment &rootDeviceEnvironment) { - return (MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) - + return (MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) - MemorySynchronizationCommands::getSizeForAdditonalSynchronization(rootDeviceEnvironment)) / sizeof(typename GfxFamily::PIPE_CONTROL); } diff --git a/shared/test/common/mocks/mock_release_helper.h b/shared/test/common/mocks/mock_release_helper.h index 6e02d24d3c..958a04a78c 100644 --- a/shared/test/common/mocks/mock_release_helper.h +++ b/shared/test/common/mocks/mock_release_helper.h @@ -43,6 +43,7 @@ class MockReleaseHelper : public ReleaseHelper { ADDMETHOD_CONST_NOBASE(getFtrXe2Compression, bool, false, ()); ADDMETHOD_CONST_NOBASE(isDirectSubmissionLightSupported, bool, false, ()); ADDMETHOD_CONST_NOBASE(computeSlmValues, uint32_t, {}, (uint32_t slmSize, bool isHeapless)); + ADDMETHOD_CONST_NOBASE(programmAdditionalStallPriorToBarrierWithTimestamp, bool, false, ()); ADDMETHOD_CONST_NOBASE_VOIDRETURN(adjustRTDispatchGlobals, (void *rtDispatchGlobals, uint32_t rtStacksPerDss, bool heaplessEnabled, uint32_t maxBvhLevels)); const SizeToPreferredSlmValueArray &getSizeToPreferredSlmValue(bool isHeapless) const override { diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index c231d08068..51a6506095 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -5775,7 +5775,7 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenImplicitScalingEnabledWhenProgrammin ultCsr.activePartitions = 2; ultCsr.staticWorkPartitioningEnabled = true; - size_t barrierWithPostSyncOperationSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true); + size_t barrierWithPostSyncOperationSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); size_t expectedSize = barrierWithPostSyncOperationSize + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + sizeof(MI_BATCH_BUFFER_START) + diff --git a/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp b/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp index c2e8ba0dba..2443fcc025 100644 --- a/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp +++ b/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp @@ -53,7 +53,7 @@ HWTEST_F(RenderDispatcherTest, givenRenderWhenAddingPreemptionCmdThenExpectPrope } HWTEST_F(RenderDispatcherTest, givenRenderWhenAskingForMonitorFenceCmdSizeThenReturnRequiredPipeControlCmdSize) { - size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(this->pDevice->getRootDeviceEnvironment(), true); + size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(this->pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData); EXPECT_EQ(expectedSize, RenderDispatcher::getSizeMonitorFence(this->pDevice->getRootDeviceEnvironment())); } diff --git a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp index 2bb2fdd78d..49cd92b1f8 100644 --- a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp @@ -1193,7 +1193,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, MockExecutionEnvironment mockExecutionEnvironment{}; - size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true) + + size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + sizeof(MI_BATCH_BUFFER_START) + sizeof(WalkerPartition::BarrierControlSection); @@ -1270,7 +1270,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; size_t expectedSize = sizeof(MI_STORE_DATA_IMM) + - MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true) + + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + sizeof(MI_BATCH_BUFFER_START) + sizeof(WalkerPartition::BarrierControlSection) + @@ -1354,7 +1354,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, testHardwareInfo.featureTable.flags.ftrLocalMemory = true; size_t expectedSize = sizeof(MI_ATOMIC) + - MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), true) + + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::immediateData) + sizeof(MI_ATOMIC) + NEO::EncodeSemaphore::getSizeMiSemaphoreWait() + sizeof(MI_BATCH_BUFFER_START) + sizeof(WalkerPartition::BarrierControlSection) + diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp index a77eefd370..f3183cbab7 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp @@ -1493,7 +1493,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, MockExecutionEnvironment mockExecutionEnvironment{}; auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0]; - auto expectedOffsetSectionSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) + + auto expectedOffsetSectionSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) + sizeof(WalkerPartition::MI_ATOMIC) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + sizeof(WalkerPartition::BATCH_BUFFER_START); @@ -1584,7 +1584,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0]; auto expectedOffsetSectionSize = sizeof(WalkerPartition::MI_STORE_DATA_IMM) + - NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) + + NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) + sizeof(WalkerPartition::MI_ATOMIC) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + sizeof(WalkerPartition::BATCH_BUFFER_START); @@ -1727,7 +1727,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, auto &rootDeviceEnvironment = *mockExecutionEnvironment.rootDeviceEnvironments[0]; auto expectedOffsetSectionSize = sizeof(WalkerPartition::MI_ATOMIC) + - NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) + + NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) + sizeof(WalkerPartition::MI_ATOMIC) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + sizeof(WalkerPartition::BATCH_BUFFER_START); diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp index c7f8cbf563..fce20049c4 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests.cpp @@ -24,6 +24,7 @@ #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/common/mocks/mock_execution_environment.h" #include "shared/test/common/mocks/mock_gmm.h" +#include "shared/test/common/mocks/mock_release_helper.h" #include "shared/test/common/test_macros/hw_test.h" #include "shared/test/common/test_macros/test_checks_shared.h" @@ -326,7 +327,7 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteTimestampModeWhenHelperIsUsed PipeControlArgs args; MemorySynchronizationCommands::addBarrierWithPostSyncOperation( stream, PostSyncMode::timestamp, address, immediateData, rootDeviceEnvironment, args); - auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) - sizeof(PIPE_CONTROL); + auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::timestamp) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); void *cpuPipeControlBuffer = ptrOffset(stream.getCpuBase(), pipeControlLocationSize); auto pipeControl = genCmdCast(cpuPipeControlBuffer); @@ -386,7 +387,7 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteImmediateDataModeWhenHelperIs PipeControlArgs args{}; MemorySynchronizationCommands::addBarrierWithPostSyncOperation( stream, PostSyncMode::immediateData, address, immediateData, rootDeviceEnvironment, args); - auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) - sizeof(PIPE_CONTROL); + auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); void *cpuPipeControlBuffer = ptrOffset(stream.getCpuBase(), pipeControlLocationSize); auto pipeControl = genCmdCast(cpuPipeControlBuffer); @@ -425,7 +426,7 @@ HWTEST_F(PipeControlHelperTests, givenNotifyEnableArgumentIsTrueWhenHelperIsUsed args.notifyEnable = true; MemorySynchronizationCommands::addBarrierWithPostSyncOperation( stream, PostSyncMode::immediateData, address, immediateData, rootDeviceEnvironment, args); - auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, true) - sizeof(PIPE_CONTROL); + auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(rootDeviceEnvironment, NEO::PostSyncMode::immediateData) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(rootDeviceEnvironment); auto pipeControl = genCmdCast(ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); ASSERT_NE(nullptr, pipeControl); diff --git a/shared/test/unit_test/helpers/gfx_core_helper_tests_dg2_and_later.cpp b/shared/test/unit_test/helpers/gfx_core_helper_tests_dg2_and_later.cpp index f5c3e37854..7e32199760 100644 --- a/shared/test/unit_test/helpers/gfx_core_helper_tests_dg2_and_later.cpp +++ b/shared/test/unit_test/helpers/gfx_core_helper_tests_dg2_and_later.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021-2024 Intel Corporation + * Copyright (C) 2021-2025 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -99,7 +99,7 @@ HWTEST2_F(PipeControlHelperTestsDg2AndLater, WhenAddingPipeControlWAThenCorrectC LinearStream stream(buffer, 128); hardwareInfo.featureTable.flags.ftrLocalMemory = ftrLocalMemory; - MemorySynchronizationCommands::addBarrierWa(stream, address, rootDeviceEnvironment); + MemorySynchronizationCommands::addBarrierWa(stream, address, rootDeviceEnvironment, NEO::PostSyncMode::immediateData); if (MemorySynchronizationCommands::isBarrierWaRequired(rootDeviceEnvironment) == false) { EXPECT_EQ(0u, stream.getUsed()); diff --git a/shared/test/unit_test/release_helper/release_helper_12_55_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_55_tests.cpp index 262aa88205..c08fad167c 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_55_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_55_tests.cpp @@ -77,6 +77,10 @@ TEST_F(ReleaseHelper1255Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); } +TEST_F(ReleaseHelper1255Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) { + whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); +} + TEST_F(ReleaseHelper1255Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); } diff --git a/shared/test/unit_test/release_helper/release_helper_12_56_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_56_tests.cpp index 19bc38484a..65fdf337d6 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_56_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_56_tests.cpp @@ -77,6 +77,10 @@ TEST_F(ReleaseHelper1256Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); } +TEST_F(ReleaseHelper1256Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) { + whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); +} + TEST_F(ReleaseHelper1256Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); } diff --git a/shared/test/unit_test/release_helper/release_helper_12_57_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_57_tests.cpp index d71e922f7a..b62c36b3d4 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_57_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_57_tests.cpp @@ -77,6 +77,9 @@ TEST_F(ReleaseHelper1257Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); } +TEST_F(ReleaseHelper1257Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) { + whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); +} TEST_F(ReleaseHelper1257Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); } diff --git a/shared/test/unit_test/release_helper/release_helper_12_60_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_60_tests.cpp index 2f626c046c..2a6c0ef759 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_60_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_60_tests.cpp @@ -66,6 +66,10 @@ TEST_F(ReleaseHelper1260Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); } +TEST_F(ReleaseHelper1260Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) { + whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); +} + TEST_F(ReleaseHelper1260Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); } diff --git a/shared/test/unit_test/release_helper/release_helper_12_61_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_61_tests.cpp index 9ec1cb0728..58c930582c 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_61_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_61_tests.cpp @@ -65,6 +65,10 @@ TEST_F(ReleaseHelper1261Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); } +TEST_F(ReleaseHelper1261Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) { + whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); +} + TEST_F(ReleaseHelper1261Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); } diff --git a/shared/test/unit_test/release_helper/release_helper_12_70_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_70_tests.cpp index 5cff429390..f40ab33d92 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_70_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_70_tests.cpp @@ -79,6 +79,9 @@ TEST_F(ReleaseHelper1270Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); } +TEST_F(ReleaseHelper1270Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) { + whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); +} TEST_F(ReleaseHelper1270Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); } diff --git a/shared/test/unit_test/release_helper/release_helper_12_71_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_71_tests.cpp index 25afbf9e62..00fc9e43c2 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_71_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_71_tests.cpp @@ -79,6 +79,10 @@ TEST_F(ReleaseHelper1271Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); } +TEST_F(ReleaseHelper1271Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) { + whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); +} + TEST_F(ReleaseHelper1271Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); } diff --git a/shared/test/unit_test/release_helper/release_helper_12_74_tests.cpp b/shared/test/unit_test/release_helper/release_helper_12_74_tests.cpp index 17fd940ff1..e8277bdfbc 100644 --- a/shared/test/unit_test/release_helper/release_helper_12_74_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_12_74_tests.cpp @@ -74,6 +74,10 @@ TEST_F(ReleaseHelper1274Tests, whenIsDummyBlitWaRequiredCalledThenFalseReturned) whenIsDummyBlitWaRequiredCalledThenFalseReturned(); } +TEST_F(ReleaseHelper1274Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) { + whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); +} + TEST_F(ReleaseHelper1274Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); } diff --git a/shared/test/unit_test/release_helper/release_helper_20_04_tests.cpp b/shared/test/unit_test/release_helper/release_helper_20_04_tests.cpp index 27d3b8e5f7..b6037987ef 100644 --- a/shared/test/unit_test/release_helper/release_helper_20_04_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_20_04_tests.cpp @@ -78,6 +78,10 @@ TEST_F(ReleaseHelper2004Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); } +TEST_F(ReleaseHelper2004Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) { + whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); +} + TEST_F(ReleaseHelper2004Tests, whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey) { whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); } diff --git a/shared/test/unit_test/release_helper/release_helper_30_00_tests.cpp b/shared/test/unit_test/release_helper/release_helper_30_00_tests.cpp index e457b30769..e73a5397b3 100644 --- a/shared/test/unit_test/release_helper/release_helper_30_00_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_30_00_tests.cpp @@ -84,6 +84,10 @@ TEST_F(ReleaseHelper3000Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); } +TEST_F(ReleaseHelper3000Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) { + whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); +} + TEST_F(ReleaseHelper3000Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorrectValues) { for (auto &revision : getRevisions()) { ipVersion.revision = revision; diff --git a/shared/test/unit_test/release_helper/release_helper_30_01_tests.cpp b/shared/test/unit_test/release_helper/release_helper_30_01_tests.cpp index e162d463b5..ebd0cc1a56 100644 --- a/shared/test/unit_test/release_helper/release_helper_30_01_tests.cpp +++ b/shared/test/unit_test/release_helper/release_helper_30_01_tests.cpp @@ -84,6 +84,10 @@ TEST_F(ReleaseHelper3001Tests, whenIsBlitImageAllowedForDepthFormatCalledThenTru whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); } +TEST_F(ReleaseHelper3001Tests, whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned) { + whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); +} + TEST_F(ReleaseHelper3001Tests, whenGettingPreferredSlmSizeThenAllEntriesHaveCorrectValues) { for (auto &revision : getRevisions()) { ipVersion.revision = revision; diff --git a/shared/test/unit_test/release_helper/release_helper_tests_base.cpp b/shared/test/unit_test/release_helper/release_helper_tests_base.cpp index 681b53819c..82ba574da2 100644 --- a/shared/test/unit_test/release_helper/release_helper_tests_base.cpp +++ b/shared/test/unit_test/release_helper/release_helper_tests_base.cpp @@ -193,4 +193,13 @@ void ReleaseHelperTestsBase::whenIsBlitImageAllowedForDepthFormatCalledThenTrueR ASSERT_NE(nullptr, releaseHelper); EXPECT_TRUE(releaseHelper->isBlitImageAllowedForDepthFormat()); } +} + +void ReleaseHelperTestsBase::whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned() { + for (auto &revision : getRevisions()) { + ipVersion.revision = revision; + releaseHelper = ReleaseHelper::create(ipVersion); + ASSERT_NE(nullptr, releaseHelper); + EXPECT_FALSE(releaseHelper->programmAdditionalStallPriorToBarrierWithTimestamp()); + } } \ No newline at end of file diff --git a/shared/test/unit_test/release_helper/release_helper_tests_base.h b/shared/test/unit_test/release_helper/release_helper_tests_base.h index 7e8b7cbefb..ff81ac22f0 100644 --- a/shared/test/unit_test/release_helper/release_helper_tests_base.h +++ b/shared/test/unit_test/release_helper/release_helper_tests_base.h @@ -36,6 +36,7 @@ struct ReleaseHelperTestsBase : public ::testing::Test { void whenGettingNumThreadsPerEuThenCorrectValueIsReturnedBasedOnOverrideNumThreadsPerEuDebugKey(); void whenGettingThreadsPerEuConfigsThenCorrectValueIsReturnedBasedOnNumThreadPerEu(); void whenIsBlitImageAllowedForDepthFormatCalledThenTrueReturned(); + void whenProgrammAdditionalStallPriorToBarrierWithTimestampCalledThenFalseReturned(); virtual std::vector getRevisions() = 0; std::unique_ptr releaseHelper; diff --git a/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp b/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp index d3f24f15b8..2d1196e8c8 100644 --- a/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp +++ b/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp @@ -467,7 +467,7 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, whenPipecontrolWaIsProgrammedTh LinearStream cmdStream(buffer, sizeof(buffer)); uint64_t gpuAddress = 0x1234; - MemorySynchronizationCommands::addBarrierWa(cmdStream, gpuAddress, this->pDevice->getRootDeviceEnvironment()); + MemorySynchronizationCommands::addBarrierWa(cmdStream, gpuAddress, this->pDevice->getRootDeviceEnvironment(), NEO::PostSyncMode::noWrite); auto pipeControl = reinterpret_cast(buffer); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); diff --git a/shared/test/unit_test/xe_hpg_core/gfx_core_helper_tests_xe_hpg_core.cpp b/shared/test/unit_test/xe_hpg_core/gfx_core_helper_tests_xe_hpg_core.cpp index 554e89917b..930f0792a2 100644 --- a/shared/test/unit_test/xe_hpg_core/gfx_core_helper_tests_xe_hpg_core.cpp +++ b/shared/test/unit_test/xe_hpg_core/gfx_core_helper_tests_xe_hpg_core.cpp @@ -161,7 +161,7 @@ XE_HPG_CORETEST_F(GfxCoreHelperTestXeHpgCore, givenDisablePipeControlFlagIsEnabl constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); - MemorySynchronizationCommands::addBarrierWa(cmdStream, 0x1000, rootDeviceEnvironment); + MemorySynchronizationCommands::addBarrierWa(cmdStream, 0x1000, rootDeviceEnvironment, NEO::PostSyncMode::noWrite); EXPECT_EQ(sizeof(PIPE_CONTROL), cmdStream.getUsed()); } @@ -179,7 +179,7 @@ XE_HPG_CORETEST_F(GfxCoreHelperTestXeHpgCore, givenDisablePipeControlFlagIsEnabl constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); - MemorySynchronizationCommands::addBarrierWa(cmdStream, 0x1000, rootDeviceEnvironment); + MemorySynchronizationCommands::addBarrierWa(cmdStream, 0x1000, rootDeviceEnvironment, NEO::PostSyncMode::noWrite); EXPECT_EQ(0u, cmdStream.getUsed()); }