From a3903c385e727cd81203e3ec3febd41a00c91a6a Mon Sep 17 00:00:00 2001 From: "Dunajski, Bartosz" Date: Thu, 21 Jul 2022 14:28:10 +0000 Subject: [PATCH] Remove HW types from synchronization interface Signed-off-by: Dunajski, Bartosz --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 34 ++- .../core/source/cmdlist/cmdlist_hw_base.inl | 2 +- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 9 +- .../core/source/cmdqueue/cmdqueue_hw.inl | 13 +- .../core/source/gen11/cmdlist_gen11.cpp | 3 +- .../definitions/cache_flush_gen12lp.inl | 3 +- level_zero/core/source/gen9/cmdlist_gen9.cpp | 3 +- .../xe_hpc_core/cmdlist_xe_hpc_core.cpp | 2 +- .../cmdlist/test_cmdlist_append_barrier.cpp | 4 +- .../test_cmdlist_append_event_reset.cpp | 2 +- .../test_cmdlist_append_signal_event.cpp | 4 +- .../sources/cmdqueue/test_cmdqueue_2.cpp | 2 +- .../built_ins/aux_translation_builtin.h | 6 +- opencl/source/command_queue/enqueue_common.h | 4 +- .../command_queue/gpgpu_walker_base.inl | 6 +- .../gpgpu_walker_bdw_and_later.inl | 14 +- .../gpgpu_walker_xehp_and_later.inl | 2 +- .../command_queue/hardware_interface_base.inl | 7 +- ...hardware_commands_helper_bdw_and_later.inl | 2 +- ...ardware_commands_helper_xehp_and_later.inl | 2 +- ...range_based_flush_tests_xehp_and_later.cpp | 4 +- ..._walker_partition_tests_xehp_and_later.cpp | 60 +++--- .../command_queue/blit_enqueue_1_tests.cpp | 2 +- .../command_queue/dispatch_walker_tests.cpp | 4 +- .../dispatch_walker_tests_xehp_and_later.cpp | 12 +- .../enqueue_command_without_kernel_tests.cpp | 2 +- .../command_queue/enqueue_kernel_2_tests.cpp | 4 +- .../enqueue_kernel_two_walker_ioq_tests.cpp | 2 +- .../command_queue/get_size_required_tests.cpp | 4 +- ...and_stream_receiver_flush_task_1_tests.cpp | 2 +- ...and_stream_receiver_flush_task_2_tests.cpp | 4 +- ...and_stream_receiver_flush_task_3_tests.cpp | 10 +- ...and_stream_receiver_flush_task_4_tests.cpp | 4 +- ...ceiver_flush_task_tests_xehp_and_later.cpp | 4 +- .../command_stream_receiver_hw_1_tests.cpp | 4 +- ...tream_receiver_hw_tests_xehp_and_later.cpp | 12 +- .../experimental_command_buffer_tests.cpp | 8 +- .../gen12lp/coherency_tests_gen12lp.inl | 2 +- ...mmand_stream_receiver_hw_tests_gen12lp.inl | 4 +- .../unit_test/helpers/hw_helper_tests.cpp | 28 +-- .../helpers/hw_helper_tests_dg2_and_later.cpp | 10 +- .../hw_helper_tests_xehp_and_later.cpp | 16 +- .../helpers/test_preamble_xehp_and_later.cpp | 2 +- .../helpers/timestamp_packet_1_tests.cpp | 4 +- .../helpers/timestamp_packet_2_tests.cpp | 2 +- .../unit_test/mem_obj/buffer_bcs_tests.cpp | 2 +- .../xe_hp_core/hw_helper_tests_xe_hp_core.cpp | 42 ++-- .../xe_hpc_core/enqueue_tests_xe_hpc_core.cpp | 2 +- .../hw_helper_tests_xe_hpc_core.cpp | 2 +- .../command_encoder_bdw_and_later.inl | 4 +- .../command_encoder_xehp_and_later.inl | 4 +- .../encode_compute_mode_tgllp_and_later.inl | 4 +- .../walker_partition_xehp_and_later.h | 44 ++-- .../command_stream_receiver_hw_base.inl | 41 ++-- ...mmand_stream_receiver_hw_bdw_and_later.inl | 10 +- ...mand_stream_receiver_hw_xehp_and_later.inl | 8 +- .../experimental_command_buffer.inl | 10 +- .../preemption_xehp_and_later.inl | 2 +- .../dispatchers/render_dispatcher.inl | 14 +- shared/source/gen11/command_encoder_gen11.cpp | 4 +- .../command_stream_receiver_hw_gen11.cpp | 10 +- shared/source/gen11/preamble_gen11.cpp | 2 +- shared/source/gen12lp/hw_helper_gen12lp.cpp | 10 +- shared/source/gen12lp/preamble_gen12lp.cpp | 6 +- shared/source/gen8/hw_helper_gen8.cpp | 22 +- shared/source/gen8/preamble_gen8.cpp | 2 +- shared/source/gen9/command_encoder_gen9.cpp | 4 +- shared/source/gen9/hw_helper_gen9.cpp | 2 +- shared/source/gen9/preamble_gen9.cpp | 2 +- shared/source/helpers/common_types.h | 6 + shared/source/helpers/hw_helper.h | 55 ++--- shared/source/helpers/hw_helper_base.inl | 200 ++++++++---------- .../helpers/hw_helper_bdw_and_later.inl | 8 +- .../helpers/hw_helper_dg2_and_later.inl | 8 +- .../helpers/hw_helper_xehp_and_later.inl | 2 +- shared/source/helpers/pipe_control_args.h | 1 + shared/source/helpers/preamble_base.inl | 2 +- .../helpers/preamble_xehp_and_later.inl | 4 +- shared/source/helpers/timestamp_packet.h | 6 +- .../xe_hp_core/hw_helper_xe_hp_core.cpp | 8 +- .../xe_hpc_core/hw_helper_xe_hpc_core.cpp | 2 +- shared/test/common/helpers/ult_hw_helper.h | 4 +- .../dispatchers/render_dispatcher_tests.cpp | 2 +- .../test_implicit_scaling_xehp_and_later.cpp | 14 +- ...alker_partition_tests_xehp_and_later_1.cpp | 14 +- .../fixtures/templated_fixture_tests.cpp | 4 +- .../hw_helper_tests_xe_hpg_core.cpp | 34 ++- 87 files changed, 459 insertions(+), 502 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 736f1d9296..b7ffafb3b7 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -329,7 +329,6 @@ ze_result_t CommandListCoreFamily::appendLaunchMultipleKernelsInd template ze_result_t CommandListCoreFamily::appendEventReset(ze_event_handle_t hEvent) { - using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; auto event = Event::fromHandle(hEvent); uint64_t baseAddr = event->getGpuAddress(this->device); @@ -386,9 +385,9 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand if (appendPipeControlWithPostSync) { NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(!!event->signalScope, hwInfo); - NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + NEO::MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *commandContainer.getCommandStream(), - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + NEO::PostSyncMode::ImmediateData, baseAddr, Event::STATE_CLEARED, hwInfo, @@ -1080,7 +1079,7 @@ ze_result_t CommandListCoreFamily::appendPageFaultCopy(NEO::Graph if (flushHost) { NEO::PipeControlArgs args; args.dcFlushEnable = true; - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); } } } @@ -1730,8 +1729,6 @@ void CommandListCoreFamily::appendSignalEventPostWalker(Event *ev if (event->isEventTimestampFlagSet()) { appendEventForProfiling(event, false, workloadPartition); } else { - using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; - commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); uint64_t baseAddr = event->getGpuAddress(this->device); if (event->isUsingContextEndOffset()) { @@ -1751,9 +1748,9 @@ void CommandListCoreFamily::appendSignalEventPostWalker(Event *ev args.workloadPartitionOffset = true; event->setPacketsInUse(this->partitionCount); } - NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + NEO::MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *commandContainer.getCommandStream(), - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + NEO::PostSyncMode::ImmediateData, baseAddr, Event::STATE_SIGNALED, hwInfo, @@ -1818,9 +1815,9 @@ inline AlignedAllocationData CommandListCoreFamily::getAlignedAll alloc = device->getDriverHandle()->findHostPointerAllocation(ptr, static_cast(bufferSize), device->getRootDeviceIndex()); if (alloc != nullptr) { alignedPtr = static_cast(alignDown(alloc->getGpuAddress(), NEO::EncodeSurfaceState::getSurfaceBaseAddressAlignment())); - //get offset from GPUVA of allocation to align down GPU address + // get offset from GPUVA of allocation to align down GPU address offset = static_cast(alloc->getGpuAddress()) - alignedPtr; - //get offset from base of allocation to arg address + // get offset from base of allocation to arg address offset += reinterpret_cast(ptr) - reinterpret_cast(alloc->getUnderlyingBuffer()); } else { alloc = getHostPtrAlloc(buffer, bufferSize, hostCopyAllowed); @@ -1874,7 +1871,6 @@ inline ze_result_t CommandListCoreFamily::addEventsToCmdList(uint template ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_handle_t hEvent) { - using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; auto event = Event::fromHandle(hEvent); commandContainer.addToResidencyContainer(&event->getAllocation(this->device)); @@ -1910,9 +1906,9 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han args.workloadPartitionOffset = true; } if (applyScope || event->isEventTimestampFlagSet()) { - NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + NEO::MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *commandContainer.getCommandStream(), - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + NEO::PostSyncMode::ImmediateData, ptrOffset(baseAddr, eventSignalOffset), Event::STATE_SIGNALED, hwInfo, @@ -1975,7 +1971,7 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu } else { NEO::PipeControlArgs args; args.dcFlushEnable = true; - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); } } @@ -2073,7 +2069,7 @@ void CommandListCoreFamily::appendEventForProfiling(Event *event, NEO::MemorySynchronizationCommands::setPostSyncExtraProperties(args, hwInfo); - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); uint64_t baseAddr = event->getGpuAddress(this->device); NEO::MemorySynchronizationCommands::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, false, hwInfo); @@ -2087,8 +2083,6 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( uint64_t *dstptr, ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) { - using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; - if (numWaitEvents > 0) { if (phWaitEvents) { CommandListCoreFamily::appendWaitOnEvents(numWaitEvents, phWaitEvents); @@ -2118,9 +2112,9 @@ ze_result_t CommandListCoreFamily::appendWriteGlobalTimestamp( } else { NEO::PipeControlArgs args; - NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + NEO::MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *commandContainer.getCommandStream(), - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, + NEO::PostSyncMode::Timestamp, reinterpret_cast(dstptr), 0, hwInfo, @@ -2400,7 +2394,7 @@ void CommandListCoreFamily::addFlushRequiredCommand(bool flushOpe if (NEO::MemorySynchronizationCommands::getDcFlushEnable(flushOperationRequired, hwInfo)) { NEO::PipeControlArgs args; args.dcFlushEnable = true; - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); } } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index f295f24924..593d4b921b 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -182,7 +182,7 @@ void CommandListCoreFamily::appendMultiPartitionEpilogue() {} template void CommandListCoreFamily::appendComputeBarrierCommand() { NEO::PipeControlArgs args = createBarrierFlags(); - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); } template diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index 39ff11cb15..9f268c140e 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -89,7 +89,6 @@ void programEventL3Flush(Event *event, uint32_t partitionCount, NEO::CommandContainer &commandContainer) { using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; - using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; auto eventPartitionOffset = (partitionCount > 1) ? (partitionCount * event->getSinglePacketSize()) : event->getSinglePacketSize(); @@ -111,9 +110,9 @@ void programEventL3Flush(Event *event, args.dcFlushEnable = true; args.workloadPartitionOffset = partitionCount > 1; - NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + NEO::MemorySynchronizationCommands::addBarrierWithPostSyncOperation( cmdListStream, - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + NEO::PostSyncMode::ImmediateData, eventAddress, Event::STATE_SIGNALED, commandContainer.getDevice()->getHardwareInfo(), @@ -129,7 +128,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K const auto &hwInfo = this->device->getHwInfo(); if (NEO::DebugManager.flags.ForcePipeControlPriorToWalker.get()) { NEO::PipeControlArgs args; - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); } NEO::Device *neoDevice = device->getNEODevice(); @@ -327,7 +326,7 @@ void CommandListCoreFamily::appendComputeBarrierCommand() { appendMultiTileBarrier(*neoDevice); } else { NEO::PipeControlArgs args = createBarrierFlags(); - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); } } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index f98661ca4f..3dc8ef3bf8 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -187,7 +187,7 @@ ze_result_t CommandQueueHw::executeCommandLists( auto commandListPreemption = commandList->getCommandListPreemptionMode(); if (statePreemption != commandListPreemption) { if (preemptionCmdSyncProgramming) { - preemptionSize += NEO::MemorySynchronizationCommands::getSizeForSinglePipeControl(); + preemptionSize += NEO::MemorySynchronizationCommands::getSizeForSingleBarrier(); } preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize(commandListPreemption, statePreemption); statePreemption = commandListPreemption; @@ -272,7 +272,8 @@ ze_result_t CommandQueueHw::executeCommandLists( bool dispatchPostSync = isDispatchTaskCountPostSyncRequired(hFence, containsAnyRegularCmdList); if (dispatchPostSync) { - linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); + linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + : NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); } size_t alignedSize = alignUp(linearStreamSizeEstimate, minCmdBufferPtrAlign); @@ -397,7 +398,7 @@ ze_result_t CommandQueueHw::executeCommandLists( if (preemptionCmdSyncProgramming) { NEO::PipeControlArgs args; - NEO::MemorySynchronizationCommands::addPipeControl(child, args); + NEO::MemorySynchronizationCommands::addSingleBarrier(child, args); } NEO::PreemptionHelper::programCmdStream(child, commandListPreemption, @@ -594,8 +595,6 @@ bool CommandQueueHw::isDispatchTaskCountPostSyncRequired(ze_fence template void CommandQueueHw::dispatchTaskCountPostSync(NEO::LinearStream &commandStream, const NEO::HardwareInfo &hwInfo) { - using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; - uint64_t postSyncAddress = csr->getTagAllocation()->getGpuAddress(); uint32_t postSyncData = csr->peekTaskCount() + 1; @@ -609,9 +608,9 @@ void CommandQueueHw::dispatchTaskCountPostSync(NEO::LinearStream args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); args.workloadPartitionOffset = partitionCount > 1; args.notifyEnable = csr->isUsedNotifyEnableForPostSync(); - NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + NEO::MemorySynchronizationCommands::addBarrierWithPostSyncOperation( commandStream, - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + NEO::PostSyncMode::ImmediateData, postSyncAddress, postSyncData, hwInfo, diff --git a/level_zero/core/source/gen11/cmdlist_gen11.cpp b/level_zero/core/source/gen11/cmdlist_gen11.cpp index f3fb7fcece..00d2cfeeb8 100644 --- a/level_zero/core/source/gen11/cmdlist_gen11.cpp +++ b/level_zero/core/source/gen11/cmdlist_gen11.cpp @@ -27,8 +27,7 @@ void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t n const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), - args); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); } template struct CommandListCoreFamily; diff --git a/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl b/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl index 86ee6d53bd..3dac0ff0d7 100644 --- a/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl +++ b/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl @@ -22,8 +22,7 @@ void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t num if (!supportL3Control) { NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), - args); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); } else { NEO::LinearStream *commandStream = commandContainer.getCommandStream(); NEO::SVMAllocsManager *svmAllocsManager = diff --git a/level_zero/core/source/gen9/cmdlist_gen9.cpp b/level_zero/core/source/gen9/cmdlist_gen9.cpp index 067ce48e6f..174327e5bb 100644 --- a/level_zero/core/source/gen9/cmdlist_gen9.cpp +++ b/level_zero/core/source/gen9/cmdlist_gen9.cpp @@ -28,8 +28,7 @@ void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t nu const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; args.dcFlushEnable = NEO::MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), - args); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); } template <> diff --git a/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp b/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp index 4dc1f274c4..34026cd1c4 100644 --- a/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp +++ b/level_zero/core/source/xe_hpc_core/cmdlist_xe_hpc_core.cpp @@ -61,7 +61,7 @@ void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t NEO::PipeControlArgs args; args.hdcPipelineFlush = true; args.unTypedDataPortCacheFlush = true; - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); + NEO::MemorySynchronizationCommands::addSingleBarrier(*commandContainer.getCommandStream(), args); } template struct CommandListCoreFamily; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp index ee83474661..c172dca78f 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_barrier.cpp @@ -341,7 +341,7 @@ HWTEST2_F(MultiTileCommandListAppendBarrier, sizeof(MI_STORE_DATA_IMM) + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT); - size_t postSyncSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo()); + size_t postSyncSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getHwInfo()); auto useSizeBefore = cmdListStream->getUsed(); auto result = commandList->appendBarrier(eventHandle, 0, nullptr); @@ -450,7 +450,7 @@ HWTEST2_F(MultiTileCommandListAppendBarrier, size_t timestampRegisters = 2 * (sizeof(MI_LOAD_REGISTER_REG) + sizeof(MI_LOAD_REGISTER_IMM) + NEO::EncodeMath::streamCommandSize + sizeof(MI_STORE_REGISTER_MEM)); - size_t postBarrierSynchronization = NEO::MemorySynchronizationCommands::getSizeForSinglePipeControl() + + size_t postBarrierSynchronization = NEO::MemorySynchronizationCommands::getSizeForSingleBarrier() + NEO::MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(device->getHwInfo()); size_t stopRegisters = timestampRegisters + postBarrierSynchronization; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp index 9f07b3b4c6..b67efed19b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp @@ -327,7 +327,7 @@ HWTEST2_F(CommandListAppendEventReset, auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); auto &hwInfo = device->getNEODevice()->getHardwareInfo(); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo) + + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo) + ((packets - 1) * sizeof(MI_STORE_DATA_IMM)) + commandList->estimateBufferSizeMultiTileBarrier(hwInfo); size_t usedSize = cmdStream->getUsed(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index fab6b27928..25cf6d1bab 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -214,7 +214,7 @@ HWTEST2_F(CommandListAppendSignalEvent, auto gpuAddress = event->getGpuAddress(device) + event->getContextEndOffset(); auto &hwInfo = device->getNEODevice()->getHardwareInfo(); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); @@ -327,7 +327,7 @@ HWTEST2_F(CommandListAppendSignalEvent, } auto &hwInfo = device->getNEODevice()->getHardwareInfo(); - size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); + size_t expectedSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); size_t usedSize = cmdStream->getUsed(); EXPECT_EQ(expectedSize, usedSize); diff --git a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp index 605de6d1db..94ab0340de 100644 --- a/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdqueue/test_cmdqueue_2.cpp @@ -446,7 +446,7 @@ HWTEST_F(CommandQueueSynchronizeTest, givenSynchronousCommandQueueWhenTagUpdateF } else { expectedSize += sizeof(MI_BATCH_BUFFER_END); } - expectedSize += NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(neoDevice->getHardwareInfo()); + expectedSize += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(neoDevice->getHardwareInfo()); expectedSize = alignUp(expectedSize, 8); const ze_command_queue_desc_t desc{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, 0, 0, 0, ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS, ZE_COMMAND_QUEUE_PRIORITY_NORMAL}; diff --git a/opencl/source/built_ins/aux_translation_builtin.h b/opencl/source/built_ins/aux_translation_builtin.h index a2e7f04ea4..7016a02576 100644 --- a/opencl/source/built_ins/aux_translation_builtin.h +++ b/opencl/source/built_ins/aux_translation_builtin.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -82,12 +82,12 @@ class BuiltInOp : public BuiltinDispatchInfoBuilder static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *, const HardwareInfo &hwInfo) { PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(dcFlush, hwInfo); - MemorySynchronizationCommands::addPipeControl(linearStream, args); + MemorySynchronizationCommands::addSingleBarrier(linearStream, args); } template static size_t getSizeForSinglePipeControl(size_t, const HardwareInfo &, bool) { - return MemorySynchronizationCommands::getSizeForSinglePipeControl(); + return MemorySynchronizationCommands::getSizeForSingleBarrier(); } template diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index ef1b6d2ff2..a8f4a851f3 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -490,9 +490,9 @@ BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(CommandS const auto &hwInfo = device->getHardwareInfo(); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *commandStream, - GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + PostSyncMode::ImmediateData, cacheFlushTimestampPacketGpuAddress, 0, hwInfo, diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index 7f10810f3a..7cf8f69460 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -173,7 +173,7 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c if (blitEnqueue) { size_t expectedSizeCS = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (commandQueueHw.isCacheFlushForBcsRequired()) { - expectedSizeCS += MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); + expectedSizeCS += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); } return expectedSizeCS; @@ -195,7 +195,7 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c expectedSizeCS += 4 * EncodeStoreMMIO::size; } } else if (isMarkerWithProfiling) { - expectedSizeCS += 2 * MemorySynchronizationCommands::getSizeForSinglePipeControl(); + expectedSizeCS += 2 * MemorySynchronizationCommands::getSizeForSingleBarrier(); if (!HwHelper::get(hwInfo.platform.eRenderCoreFamily).useOnlyGlobalTimestamps()) { expectedSizeCS += 2 * EncodeStoreMMIO::size; } @@ -205,7 +205,7 @@ size_t EnqueueOperation::getTotalSizeRequiredCS(uint32_t eventType, c } if (DebugManager.flags.PauseOnEnqueue.get() != -1) { - expectedSizeCS += MemorySynchronizationCommands::getSizeForSinglePipeControl() * 2; + expectedSizeCS += MemorySynchronizationCommands::getSizeForSingleBarrier() * 2; expectedSizeCS += sizeof(typename GfxFamily::MI_SEMAPHORE_WAIT) * 2; } diff --git a/opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl b/opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl index 3d40eed10d..50b9be8b82 100644 --- a/opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl +++ b/opencl/source/command_queue/gpgpu_walker_bdw_and_later.inl @@ -66,9 +66,9 @@ void GpgpuWalkerHelper::setupTimestampPacket( uint64_t address = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketNode); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *cmdStream, - PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + PostSyncMode::ImmediateData, address, 0, *rootDeviceEnvironment.getHardwareInfo(), @@ -80,7 +80,7 @@ void GpgpuWalkerHelper::setupTimestampPacket( template size_t EnqueueOperation::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo) { size_t size = sizeof(typename GfxFamily::GPGPU_WALKER) + HardwareCommandsHelper::getSizeRequiredCS() + - sizeof(PIPE_CONTROL) * (MemorySynchronizationCommands::isPipeControlWArequired(commandQueue.getDevice().getHardwareInfo()) ? 2 : 1); + sizeof(PIPE_CONTROL) * (MemorySynchronizationCommands::isBarrierWaRequired(commandQueue.getDevice().getHardwareInfo()) ? 2 : 1); size += HardwareCommandsHelper::getSizeRequiredForCacheFlush(commandQueue, pKernel, 0U); size += PreemptionHelper::getPreemptionWaCsSize(commandQueue.getDevice()); if (reserveProfilingCmdsSpace) { @@ -112,9 +112,9 @@ void GpgpuWalkerHelper::dispatchProfilingCommandsStart( // PIPE_CONTROL for global timestamp uint64_t timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, GlobalStartTS); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *commandStream, - PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, + PostSyncMode::Timestamp, timeStampAddress, 0llu, hwInfo, @@ -144,9 +144,9 @@ void GpgpuWalkerHelper::dispatchProfilingCommandsEnd( // PIPE_CONTROL for global timestamp uint64_t timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, GlobalEndTS); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *commandStream, - PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, + PostSyncMode::Timestamp, timeStampAddress, 0llu, hwInfo, diff --git a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl index e225a152c8..84d9bd7f5b 100644 --- a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl +++ b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl @@ -130,7 +130,7 @@ void GpgpuWalkerHelper::adjustMiStoreRegMemMode(MI_STORE_REG_MEM size_t EnqueueOperation::getSizeRequiredCSKernel(bool reserveProfilingCmdsSpace, bool reservePerfCounters, CommandQueue &commandQueue, const Kernel *pKernel, const DispatchInfo &dispatchInfo) { - size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(commandQueue.getDevice().getHardwareInfo()) ? 2 : 1; + size_t numPipeControls = MemorySynchronizationCommands::isBarrierWaRequired(commandQueue.getDevice().getHardwareInfo()) ? 2 : 1; size_t size = sizeof(typename GfxFamily::COMPUTE_WALKER) + (sizeof(typename GfxFamily::PIPE_CONTROL) * numPipeControls) + diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index 12b72be42d..bdf60c1f2d 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -283,15 +283,12 @@ inline void HardwareInterface::dispatchDebugPauseCommands( if (!commandQueue.isSpecial()) { auto address = commandQueue.getGpgpuCommandStreamReceiver().getDebugPauseStateGPUAddress(); { - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; - const auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *commandStream, - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + PostSyncMode::ImmediateData, address, static_cast(confirmationTrigger), hwInfo, diff --git a/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl b/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl index 3d2850a49c..6f30a5725c 100644 --- a/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl +++ b/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl @@ -151,7 +151,7 @@ void HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(Line const auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - MemorySynchronizationCommands::addPipeControl(*commandStream, args); + MemorySynchronizationCommands::addSingleBarrier(*commandStream, args); } } // namespace NEO diff --git a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl index 8df2f446f1..66a6b45609 100644 --- a/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl +++ b/opencl/source/helpers/hardware_commands_helper_xehp_and_later.inl @@ -167,7 +167,7 @@ void HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(Line auto &hardwareInfo = commandQueue.getDevice().getHardwareInfo(); args.unTypedDataPortCacheFlush = HwHelper::get(hardwareInfo.platform.eRenderCoreFamily).unTypedDataPortCacheFlushRequired(); - MemorySynchronizationCommands::addPipeControl(*commandStream, args); + MemorySynchronizationCommands::addSingleBarrier(*commandStream, args); // 2. flush all affected L3 lines if constexpr (GfxFamily::isUsingL3Control) { diff --git a/opencl/test/unit_test/aub_tests/command_stream/aub_range_based_flush_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_stream/aub_range_based_flush_tests_xehp_and_later.cpp index 06458bc76b..e172a2281d 100644 --- a/opencl/test/unit_test/aub_tests/command_stream/aub_range_based_flush_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_stream/aub_range_based_flush_tests_xehp_and_later.cpp @@ -121,7 +121,7 @@ HWTEST2_F(RangeBasedFlushTest, givenNoDcFlushInPipeControlWhenL3ControlFlushesCa new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getCommandStreamerStallEnable, true), EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)}), new MatchHwCmd(1, Expects{EXPECT_MEMBER(L3_CONTROL, getPostSyncOperation, L3_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_NO_WRITE)}), }; - if (MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo())) { expectedCommands.push_back(new MatchHwCmd(1, Expects{EXPECT_MEMBER(PIPE_CONTROL, getDcFlushEnable, false)})); if (MemorySynchronizationCommands::getSizeForAdditonalSynchronization(device->getHardwareInfo()) > 0) { expectedCommands.push_back(new MatchHwCmd(1, Expects{EXPECT_MEMBER(MI_SEMAPHORE_WAIT, getSemaphoreDataDword, EncodeSempahore::invalidHardwareTag)})); @@ -144,7 +144,7 @@ HWTEST2_F(RangeBasedFlushTest, givenL3ControlWhenPostSyncIsSetThenExpectPostSync using L3_CONTROL = typename FamilyType::L3_CONTROL; using L3_FLUSH_ADDRESS_RANGE = typename FamilyType::L3_FLUSH_ADDRESS_RANGE; - if (MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo())) { GTEST_SKIP(); } diff --git a/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp b/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp index 6f72c7d0b4..8bf0036c5c 100644 --- a/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/aub_tests/command_stream/aub_walker_partition_tests_xehp_and_later.cpp @@ -139,7 +139,7 @@ struct AubWalkerPartitionFixture : public KernelAUBFixture uint8_t buffer[256]; LinearStream stream(buffer, 256); - MemorySynchronizationCommands::addPipeControlWA(stream, 0ull, hwInfo); + MemorySynchronizationCommands::addBarrierWa(stream, 0ull, hwInfo); void *syncPipeControlAddress = reinterpret_cast(reinterpret_cast(startAddress) + stream.getUsed()); PIPE_CONTROL *pipeControl = genCmdCast(syncPipeControlAddress); return pipeControl; @@ -313,8 +313,6 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, whenPartitionCountSetTo } HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, whenPipeControlIsBeingEmittedWithPartitionBitSetThenMultipleFieldsAreBeingUpdatedWithValue) { - using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - auto writeAddress = helperSurface->getGpuAddress(); auto partitionId = 1u; auto writeSize = 8u; @@ -331,8 +329,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, whenPipeControlIsBeingE void *pipeControlAddress = taskStream->getSpace(0); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *taskStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); auto pipeControl = retrieveSyncPipeControl(pipeControlAddress, device->getHardwareInfo()); @@ -394,8 +392,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenVariousCompareMode //this pipe control should be executed void *pipeControlAddress = taskStream->getSpace(0); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *taskStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); auto pipeControl = retrieveSyncPipeControl(pipeControlAddress, device->getHardwareInfo()); @@ -687,8 +685,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithNesting, givenConditionalBa //this pipe control should be executed PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *secondLevelBatchStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *secondLevelBatchStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); conditionalBatchBufferEnd = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); @@ -699,8 +697,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithNesting, givenConditionalBa writeAddress += sizeof(uint64_t); writeValue++; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *taskStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); csr->makeResident(*secondLevelBatch); @@ -747,8 +745,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithNesting, givenConditionalBa //this pipe control should NOT be executed PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *secondLevelBatchStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *secondLevelBatchStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); conditionalBatchBufferEnd = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); @@ -759,8 +757,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithNesting, givenConditionalBa writeAddress += sizeof(uint64_t); writeValue++; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *taskStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); csr->makeResident(*secondLevelBatch); @@ -792,8 +790,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithNesting, givenCommandBuffer //this pipe control should be executed PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *secondLevelBatchStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *secondLevelBatchStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); auto batchBufferEnd = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(BATCH_BUFFER_END))); @@ -836,8 +834,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithoutNesting, givenConditiona //this pipe control should't be executed PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *secondLevelBatchStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *secondLevelBatchStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); conditionalBatchBufferEnd = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); @@ -849,8 +847,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithoutNesting, givenConditiona writeAddress += sizeof(uint64_t); writeValue++; //and this shouldn't as well, we returned to ring - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *taskStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); csr->makeResident(*secondLevelBatch); @@ -896,8 +894,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithoutNesting, givenConditiona //this pipe control should't be executed PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *secondLevelBatchStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *secondLevelBatchStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); conditionalBatchBufferEnd = reinterpret_cast(secondLevelBatchStream->getSpace(sizeof(CONDITIONAL_BATCH_BUFFER_END))); @@ -909,8 +907,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, MultiLevelBatchTestsWithoutNesting, givenConditiona writeAddress += sizeof(uint64_t); writeValue++; //and this should , we returned to primary batch - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *taskStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); csr->makeResident(*secondLevelBatch); @@ -1035,8 +1033,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenGeneralPurposeRegi //this command must not execute taskStream->getSpace(totalBytesProgrammed); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *taskStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); streamCpuPointer = taskStream->getSpace(0); @@ -1067,8 +1065,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenPredicationWhenItI taskStream->getSpace(totalBytesProgrammed); //emit pipe control PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *taskStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); //turn off predication @@ -1120,8 +1118,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, AubWalkerPartitionZeroTest, givenPredicationWhenItI //emit pipe control void *pipeControlAddress = taskStream->getSpace(0); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - *taskStream, FamilyType::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + *taskStream, PostSyncMode::ImmediateData, writeAddress, writeValue, device->getHardwareInfo(), args); auto pipeControl = retrieveSyncPipeControl(pipeControlAddress, device->getHardwareInfo()); diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp index 72bc8fa07b..bd60ccc204 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_1_tests.cpp @@ -481,7 +481,7 @@ HWTEST_TEMPLATED_F(BlitAuxTranslationTests, givenBlitAuxTranslationWithRequiredC size_t numBuffersToEstimate = 2; size_t dependencySize = numBuffersToEstimate * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); - size_t cacheFlushSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); + size_t cacheFlushSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); setMockKernelArgs(std::array{{buffer0.get(), buffer1.get(), buffer2.get()}}); diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index 13629b64e4..0caa6fa9fc 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -1353,7 +1353,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingCommandsTest, givenKernelWhenProfilingComma auto itorPipeCtrl = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorPipeCtrl); - if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(pDevice->getHardwareInfo())) { itorPipeCtrl++; } if (UnitTestHelper::isAdditionalSynchronizationRequired()) { @@ -1368,7 +1368,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, ProfilingCommandsTest, givenKernelWhenProfilingComma itorPipeCtrl++; itorPipeCtrl = find(itorPipeCtrl, cmdList.end()); - if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(pDevice->getHardwareInfo())) { itorPipeCtrl++; } if (UnitTestHelper::isAdditionalSynchronizationRequired()) { diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp index 225f1d2796..8914ec504b 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp @@ -1097,7 +1097,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr auto cmdQ = std::make_unique>(&context, device, nullptr); auto &csr = cmdQ->getUltCommandStreamReceiver(); - size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo()) ? 2 : 1; + size_t numPipeControls = MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo()) ? 2 : 1; auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + @@ -1154,7 +1154,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr auto cmdQ = std::make_unique>(&context, device, nullptr); auto &csr = cmdQ->getUltCommandStreamReceiver(); - size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo()) ? 2 : 1; + size_t numPipeControls = MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo()) ? 2 : 1; auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + @@ -1205,7 +1205,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenWalkerPart DebugManager.flags.EnableWalkerPartition.set(0u); auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); - size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo()) ? 2 : 1; + size_t numPipeControls = MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo()) ? 2 : 1; DispatchInfo dispatchInfo{}; dispatchInfo.setNumberOfWorkgroups({32, 1, 1}); @@ -1225,7 +1225,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, whenPipeContro hwInfo.featureTable.flags.ftrLocalMemory = true; auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); - size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(hwInfo) ? 2 : 1; + size_t numPipeControls = MemorySynchronizationCommands::isBarrierWaRequired(hwInfo) ? 2 : 1; auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + @@ -1242,7 +1242,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); - size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo()) ? 2 : 1; + size_t numPipeControls = MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo()) ? 2 : 1; auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + @@ -1271,7 +1271,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, GivenPipeContr auto cmdQ = std::make_unique>(context.get(), device.get(), nullptr); - size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo()) ? 2 : 1; + size_t numPipeControls = MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo()) ? 2 : 1; auto baseSize = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + diff --git a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp index 7cf052b6f7..7f3c55d68b 100644 --- a/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_command_without_kernel_tests.cpp @@ -461,7 +461,7 @@ HWTEST_F(EnqueueHandlerTest, GivenCommandStreamWithoutKernelAndZeroSurfacesWhenE const auto enqueueResult = mockCmdQ->template enqueueHandler(nullptr, 0, false, multiDispatch, 0, nullptr, nullptr); EXPECT_EQ(CL_SUCCESS, enqueueResult); - auto requiredCmdStreamSize = alignUp(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( + auto requiredCmdStreamSize = alignUp(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation( pDevice->getHardwareInfo()), MemoryConstants::cacheLineSize); diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp index 6b38f58b1d..9071856858 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_2_tests.cpp @@ -899,7 +899,7 @@ HWTEST_F(EnqueueAuxKernelTests, givenMultipleArgsWhenAuxTranslationIsRequiredThe auto pipeControls = findAll(cmdList.begin(), cmdList.end()); - auto additionalPcCount = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( + auto additionalPcCount = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation( pDevice->getHardwareInfo()) / sizeof(typename FamilyType::PIPE_CONTROL); @@ -1016,7 +1016,7 @@ HWTEST_F(EnqueueKernelTest, givenTimestampWriteEnableWhenMarkerProfilingWithoutW auto baseCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, false, false); auto extendedCommandStreamSize = EnqueueOperation::getTotalSizeRequiredCS(CL_COMMAND_MARKER, {}, false, false, false, *pCmdQ, multiDispatchInfo, true, false); - EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size + MemorySynchronizationCommands::getSizeForSinglePipeControl(), extendedCommandStreamSize); + EXPECT_EQ(baseCommandStreamSize + 4 * EncodeStoreMMIO::size + MemorySynchronizationCommands::getSizeForSingleBarrier(), extendedCommandStreamSize); } HWCMDTEST_F(IGFX_XE_HP_CORE, EnqueueKernelTest, givenTimestampWriteEnableOnMultiTileQueueWhenMarkerProfilingWithoutWaitListThenSizeHasFourMMIOStoresAndCrossTileBarrier) { diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp index 8ca5d05419..d9dffeca60 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_two_walker_ioq_tests.cpp @@ -43,7 +43,7 @@ HWTEST_F(IOQWithTwoWalkers, GivenTwoCommandQueuesWhenEnqueuingKernelThenOnePipeC typedef typename FamilyType::PIPE_CONTROL PIPE_CONTROL; - auto waNeeded = MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo()); + auto waNeeded = MemorySynchronizationCommands::isBarrierWaRequired(pDevice->getHardwareInfo()); auto itorCmd = find(itorWalker1, itorWalker2); ASSERT_NE(itorWalker2, itorCmd); diff --git a/opencl/test/unit_test/command_queue/get_size_required_tests.cpp b/opencl/test/unit_test/command_queue/get_size_required_tests.cpp index 561242bc19..751bdaf355 100644 --- a/opencl/test/unit_test/command_queue/get_size_required_tests.cpp +++ b/opencl/test/unit_test/command_queue/get_size_required_tests.cpp @@ -72,7 +72,7 @@ HWTEST_F(GetSizeRequiredTest, WhenEnqueuingMarkerThenHeapsAndCommandBufferAreNot size_t expectedStreamSize = 0; if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { - expectedStreamSize = alignUp(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( + expectedStreamSize = alignUp(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation( pDevice->getHardwareInfo()), MemoryConstants::cacheLineSize); } @@ -99,7 +99,7 @@ HWTEST_F(GetSizeRequiredTest, WhenEnqueuingBarrierThenHeapsAndCommandBufferAreNo size_t expectedStreamSize = 0; if (pCmdQ->getGpgpuCommandStreamReceiver().peekTimestampPacketWriteEnabled()) { - expectedStreamSize = alignUp(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( + expectedStreamSize = alignUp(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation( pDevice->getHardwareInfo()), MemoryConstants::cacheLineSize); } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp index 75bfea6ca8..3bf2911778 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp @@ -118,7 +118,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, givenCsrInBatch //we do level change that will emit PPC, fill all the space so only BB end fits. taskLevel++; - auto ppcSize = MemorySynchronizationCommands::getSizeForSinglePipeControl(); + auto ppcSize = MemorySynchronizationCommands::getSizeForSingleBarrier(); auto fillSize = MemoryConstants::cacheLineSize - ppcSize - sizeof(typename FamilyType::MI_BATCH_BUFFER_END); csrCommandStream.getSpace(fillSize); auto expectedUsedSize = 2 * MemoryConstants::cacheLineSize; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index d8453eece1..44f770d4eb 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -1089,7 +1089,7 @@ HWTEST2_F(CommandStreamReceiverFlushTaskTests, givenSpecialPipelineSelectModeCha size_t size = commandStreamReceiver.getCmdSizeForPipelineSelect(); size_t expectedSize = sizeof(PIPELINE_SELECT); - if (MemorySynchronizationCommands::isPipeControlPriorToPipelineSelectWArequired(pDevice->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierlPriorToPipelineSelectWaRequired(pDevice->getHardwareInfo())) { expectedSize += sizeof(PIPE_CONTROL); } EXPECT_EQ(expectedSize, size); @@ -1117,7 +1117,7 @@ HWTEST2_F(CommandStreamReceiverFlushTaskTests, givenCsrWhenPreambleSentThenRequi auto difference = mediaSamplerConfigChangedSize - mediaSamplerConfigNotChangedSize; size_t expectedDifference = sizeof(PIPELINE_SELECT); - if (MemorySynchronizationCommands::isPipeControlPriorToPipelineSelectWArequired(pDevice->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierlPriorToPipelineSelectWaRequired(pDevice->getHardwareInfo())) { expectedDifference += sizeof(PIPE_CONTROL); } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index 7ddcd4661a..b94a4b0aa3 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -996,7 +996,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenCommandA parseCommands(commandStream); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); - if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(pDevice->getHardwareInfo())) { itorPipeControl++; } auto pipeControl = genCmdCast(*itorPipeControl); @@ -1032,7 +1032,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWithOutOfOrd parseCommands(commandStream); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); - if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(pDevice->getHardwareInfo())) { itorPipeControl++; } auto pipeControl = genCmdCast(*itorPipeControl); @@ -1243,7 +1243,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEpiloguePipeControlThenDcFlus ASSERT_NE(nullptr, pipeControl); parseCommands(commandStream); auto itorPipeControl = find(cmdList.begin(), cmdList.end()); - if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(pDevice->getHardwareInfo())) { itorPipeControl++; } auto pipeControlCmdBuffer = genCmdCast(*itorPipeControl); @@ -1897,7 +1897,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDcFlushArgumentIsTrueWhenCall PipeControlArgs args; args.dcFlushEnable = true; - MemorySynchronizationCommands::addPipeControl(commandStream, args); + MemorySynchronizationCommands::addSingleBarrier(commandStream, args); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); @@ -1911,7 +1911,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDcFlushArgumentIsFalseWhenCal LinearStream commandStream(buffer.get(), 128); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControl(commandStream, args); + MemorySynchronizationCommands::addSingleBarrier(commandStream, args); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp index fae3c36d1e..ae3398c719 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_4_tests.cpp @@ -814,7 +814,7 @@ HWTEST_F(UltCommandStreamReceiverTest, WhenFlushingAllCachesThenPipeControlIsAdd LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); parseCommands(stream, 0); @@ -856,7 +856,7 @@ HWTEST_F(UltCommandStreamReceiverTest, givenDebugDisablingCacheFlushWhenAddingPi args.textureCacheInvalidationEnable = true; args.vfCacheInvalidationEnable = true; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); parseCommands(stream, 0); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp index ae1ceaf94a..8032856d64 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp @@ -384,7 +384,7 @@ HWTEST2_F(CommandStreamReceiverFlushTaskXeHPAndLaterTests, whenFlushAllCachesVar LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); parseCommands(stream, 0); @@ -440,7 +440,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi //we do level change that will emit PPC, fill all the space so only BB end fits. taskLevel++; - auto ppcSize = MemorySynchronizationCommands::getSizeForSinglePipeControl(); + auto ppcSize = MemorySynchronizationCommands::getSizeForSingleBarrier(); auto fillSize = MemoryConstants::cacheLineSize - ppcSize - sizeof(typename FamilyType::MI_BATCH_BUFFER_END); csrCommandStream.getSpace(fillSize); auto expectedUsedSize = 2 * MemoryConstants::cacheLineSize; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp index f940afbc90..aa7b3be262 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_1_tests.cpp @@ -1502,7 +1502,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenBarrierNodeSetWhe DispatchFlags dispatchFlags = DispatchFlagsHelper::createDefaultDispatchFlags(); dispatchFlags.barrierTimestampPacketNodes = ×tampPacketDependencies.barrierNodes; - size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); + size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedCmdSize, estimatedCmdSize); @@ -1513,7 +1513,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, UltCommandStreamReceiverTest, givenBarrierNodeSetWhe findHardwareCommands(); auto cmdItor = cmdList.begin(); - if (MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)) { + if (MemorySynchronizationCommands::isBarrierWaRequired(hwInfo)) { PIPE_CONTROL *pipeControl = genCmdCast(*cmdItor); ASSERT_NE(nullptr, pipeControl); cmdItor++; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp index abc113c80b..acf557311b 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests_xehp_and_later.cpp @@ -973,7 +973,7 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWh commandStreamReceiver->staticWorkPartitioningEnabled = true; commandStreamReceiver->activePartitions = 1; - size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); + size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedCmdSize, estimatedCmdSize); @@ -984,7 +984,7 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWh findHardwareCommands(); auto cmdItor = cmdList.begin(); - if (MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)) { + if (MemorySynchronizationCommands::isBarrierWaRequired(hwInfo)) { PIPE_CONTROL *pipeControl = genCmdCast(*cmdItor); ASSERT_NE(nullptr, pipeControl); cmdItor++; @@ -1021,7 +1021,7 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionDisabledW commandStreamReceiver->staticWorkPartitioningEnabled = false; commandStreamReceiver->activePartitions = 2; - size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); + size_t expectedCmdSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); size_t estimatedCmdSize = commandStreamReceiver->getCmdSizeForStallingCommands(dispatchFlags); EXPECT_EQ(expectedCmdSize, estimatedCmdSize); @@ -1032,7 +1032,7 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionDisabledW findHardwareCommands(); auto cmdItor = cmdList.begin(); - if (MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)) { + if (MemorySynchronizationCommands::isBarrierWaRequired(hwInfo)) { PIPE_CONTROL *pipeControl = genCmdCast(*cmdItor); ASSERT_NE(nullptr, pipeControl); cmdItor++; @@ -1072,7 +1072,7 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWh commandStreamReceiver->staticWorkPartitioningEnabled = true; commandStreamReceiver->activePartitions = 2; - size_t expectedSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo) + + size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo) + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_BATCH_BUFFER_START) + 2 * sizeof(uint32_t); @@ -1087,7 +1087,7 @@ HWTEST2_F(CommandStreamReceiverHwTestXeHPAndLater, givenStaticPartitionEnabledWh findHardwareCommands(); auto cmdItor = cmdList.begin(); - if (MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)) { + if (MemorySynchronizationCommands::isBarrierWaRequired(hwInfo)) { PIPE_CONTROL *pipeControl = genCmdCast(*cmdItor); ASSERT_NE(nullptr, pipeControl); cmdItor++; diff --git a/opencl/test/unit_test/command_stream/experimental_command_buffer_tests.cpp b/opencl/test/unit_test/command_stream/experimental_command_buffer_tests.cpp index e9bc8c8aa7..62811563f7 100644 --- a/opencl/test/unit_test/command_stream/experimental_command_buffer_tests.cpp +++ b/opencl/test/unit_test/command_stream/experimental_command_buffer_tests.cpp @@ -93,7 +93,7 @@ HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhe it = hwParserExCmdBuffer.cmdList.begin(); GenCmdList::iterator end = hwParserExCmdBuffer.cmdList.end(); - if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(pDevice->getHardwareInfo())) { //1st PIPE_CONTROL with CS Stall ASSERT_NE(end, it); pipeControl = genCmdCast(*it); @@ -127,7 +127,7 @@ HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhe EXPECT_EQ(exAllocationGpuAddr, semaphoreCmd->getSemaphoreGraphicsAddress()); EXPECT_EQ(MI_SEMAPHORE_WAIT::COMPARE_OPERATION_SAD_EQUAL_SDD, semaphoreCmd->getCompareOperation()); - if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(pDevice->getHardwareInfo())) { //3rd PIPE_CONTROL with CS stall it++; ASSERT_NE(end, it); @@ -229,7 +229,7 @@ HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhe hwParserExCmdBuffer.parseCommands(*mockExCmdBuffer->currentStream, cmbBufferOffset); it = hwParserExCmdBuffer.cmdList.begin(); GenCmdList::iterator end = hwParserExCmdBuffer.cmdList.end(); - if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(pDevice->getHardwareInfo())) { it++; if (UnitTestHelper::isAdditionalSynchronizationRequired()) { it++; @@ -245,7 +245,7 @@ HWTEST_F(MockExperimentalCommandBufferTest, givenEnabledExperimentalCmdBufferWhe EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, pipeControl->getPostSyncOperation()); EXPECT_EQ(timeStampAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); //omit SEMAPHORE_WAIT and 3rd PIPE_CONTROL - if (MemorySynchronizationCommands::isPipeControlWArequired(pDevice->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(pDevice->getHardwareInfo())) { it++; if (UnitTestHelper::isAdditionalSynchronizationRequired()) { it++; diff --git a/opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl index aa02958dd0..01ecc85bd9 100644 --- a/opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/coherency_tests_gen12lp.inl @@ -244,7 +244,7 @@ GEN12LPTEST_F(Gen12LpCoherencyRequirements, givenCoherencyRequirementWithoutShar auto hwInfo = device->getHardwareInfo(); flushTask(false); - if (MemorySynchronizationCommands::isPipeControlPriorToPipelineSelectWArequired(hwInfo)) { + if (MemorySynchronizationCommands::isBarrierlPriorToPipelineSelectWaRequired(hwInfo)) { findCmd(true, false, true); // first time } else { findCmd(true, false, false); // first time diff --git a/opencl/test/unit_test/gen12lp/command_stream_receiver_hw_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/command_stream_receiver_hw_tests_gen12lp.inl index 2c779ebfcb..1a2c3fecbd 100644 --- a/opencl/test/unit_test/gen12lp/command_stream_receiver_hw_tests_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/command_stream_receiver_hw_tests_gen12lp.inl @@ -84,7 +84,7 @@ GEN12LPTEST_F(UltCommandStreamReceiverTestGen12Lp, givenDebugEnablingCacheFlushW LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); parseCommands(stream, 0); @@ -111,7 +111,7 @@ GEN12LPTEST_F(UltCommandStreamReceiverTestGen12Lp, givenDebugDisablingCacheFlush PipeControlArgs args; args.dcFlushEnable = true; args.hdcPipelineFlush = true; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); parseCommands(stream, 0); diff --git a/opencl/test/unit_test/helpers/hw_helper_tests.cpp b/opencl/test/unit_test/helpers/hw_helper_tests.cpp index 25aa145fea..793608ab52 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests.cpp @@ -297,9 +297,9 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteTimestampModeWhenHelperIsUsed HardwareInfo hardwareInfo = *defaultHwInfo; PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData, hardwareInfo, args); - auto additionalPcSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + stream, PostSyncMode::Timestamp, address, immediateData, hardwareInfo, args); + auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hardwareInfo); auto pipeControl = genCmdCast(ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); ASSERT_NE(nullptr, pipeControl); @@ -321,7 +321,7 @@ HWTEST_F(PipeControlHelperTests, givenDcFlushNotAllowedWhenProgrammingPipeContro PipeControlArgs args; args.dcFlushEnable = true; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); auto pipeControl = genCmdCast(stream.getCpuBase()); ASSERT_NE(nullptr, pipeControl); @@ -346,9 +346,9 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteImmediateDataModeWhenHelperIs HardwareInfo hardwareInfo = *defaultHwInfo; PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, hardwareInfo, args); - auto additionalPcSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + stream, PostSyncMode::ImmediateData, address, immediateData, hardwareInfo, args); + auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hardwareInfo); auto pipeControl = genCmdCast(ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); ASSERT_NE(nullptr, pipeControl); @@ -376,9 +376,9 @@ HWTEST_F(PipeControlHelperTests, givenNotifyEnableArgumentIsTrueWhenHelperIsUsed PipeControlArgs args; args.notifyEnable = true; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, hardwareInfo, args); - auto additionalPcSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + stream, PostSyncMode::ImmediateData, address, immediateData, hardwareInfo, args); + auto additionalPcSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hardwareInfo); auto pipeControl = genCmdCast(ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); ASSERT_NE(nullptr, pipeControl); @@ -402,8 +402,8 @@ HWTEST_F(PipeControlHelperTests, WhenPipeControlPostSyncTimestampUsedThenCorrect uint64_t immediateData = 0x0; PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlWithPostSync( - stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData, args); + MemorySynchronizationCommands::addSingleBarrier( + stream, PostSyncMode::Timestamp, address, immediateData, args); auto pipeControl = genCmdCast(stream.getCpuBase()); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(address, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); @@ -420,8 +420,8 @@ HWTEST_F(PipeControlHelperTests, WhenPipeControlPostSyncWriteImmediateDataUsedTh uint64_t immediateData = 0x1234; PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlWithPostSync( - stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, args); + MemorySynchronizationCommands::addSingleBarrier( + stream, PostSyncMode::ImmediateData, address, immediateData, args); auto pipeControl = genCmdCast(stream.getCpuBase()); ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(address, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControl)); diff --git a/opencl/test/unit_test/helpers/hw_helper_tests_dg2_and_later.cpp b/opencl/test/unit_test/helpers/hw_helper_tests_dg2_and_later.cpp index 350ab10739..d616a657f4 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests_dg2_and_later.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests_dg2_and_later.cpp @@ -31,9 +31,9 @@ HWTEST2_F(PipeControlHelperTestsDg2AndLater, WhenAddingPipeControlWAThenCorrectC LinearStream stream(buffer, 128); hardwareInfo.featureTable.flags.ftrLocalMemory = ftrLocalMemory; - MemorySynchronizationCommands::addPipeControlWA(stream, address, hardwareInfo); + MemorySynchronizationCommands::addBarrierWa(stream, address, hardwareInfo); - if (MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo) == false) { + if (MemorySynchronizationCommands::isBarrierWaRequired(hardwareInfo) == false) { EXPECT_EQ(0u, stream.getUsed()); continue; } @@ -104,7 +104,7 @@ HWTEST2_F(PipeControlHelperTestsDg2AndLater, givenRequestedCacheFlushesWhenProgr args.hdcPipelineFlush = true; args.unTypedDataPortCacheFlush = true; args.compressionControlSurfaceCcsFlush = true; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); auto pipeControl = reinterpret_cast(buffer); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControl)); @@ -121,7 +121,7 @@ HWTEST2_F(PipeControlHelperTestsDg2AndLater, givenDebugVariableSetWhenProgrammin LinearStream stream(buffer, sizeof(buffer)); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); auto pipeControl = reinterpret_cast(buffer); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControl)); @@ -141,7 +141,7 @@ HWTEST2_F(PipeControlHelperTestsDg2AndLater, givenDebugDisableCacheFlushWhenProg args.hdcPipelineFlush = true; args.unTypedDataPortCacheFlush = true; args.compressionControlSurfaceCcsFlush = true; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); auto pipeControl = reinterpret_cast(buffer); EXPECT_FALSE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControl)); diff --git a/opencl/test/unit_test/helpers/hw_helper_tests_xehp_and_later.cpp b/opencl/test/unit_test/helpers/hw_helper_tests_xehp_and_later.cpp index 9fefe1dc31..b3635aa843 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests_xehp_and_later.cpp @@ -248,9 +248,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, WhenAddingPipeC LinearStream stream(buffer, 128); hardwareInfo.featureTable.flags.ftrLocalMemory = ftrLocalMemory; - MemorySynchronizationCommands::addPipeControlWA(stream, address, hardwareInfo); + MemorySynchronizationCommands::addBarrierWa(stream, address, hardwareInfo); - if (MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo) == false) { + if (MemorySynchronizationCommands::isBarrierWaRequired(hardwareInfo) == false) { EXPECT_EQ(0u, stream.getUsed()); continue; } @@ -322,7 +322,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, givenRequestedC PipeControlArgs args; args.hdcPipelineFlush = true; args.compressionControlSurfaceCcsFlush = true; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); auto pipeControl = reinterpret_cast(buffer); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControl)); @@ -338,7 +338,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, givenDebugVaria LinearStream stream(buffer, sizeof(buffer)); PipeControlArgs args; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); auto pipeControl = reinterpret_cast(buffer); EXPECT_TRUE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControl)); @@ -356,7 +356,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, givenDebugDisab PipeControlArgs args; args.hdcPipelineFlush = true; args.compressionControlSurfaceCcsFlush = true; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); auto pipeControl = reinterpret_cast(buffer); EXPECT_FALSE(UnitTestHelper::getPipeControlHdcPipelineFlush(*pipeControl)); @@ -379,7 +379,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, HwHelperTestXeHPAndLater, WhenIsPipeControlWArequir hwInfo.featureTable.flags.ftrLocalMemory = ftrLocalMemory; EXPECT_EQ(UnitTestHelper::isPipeControlWArequired(hwInfo), - MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)); + MemorySynchronizationCommands::isBarrierWaRequired(hwInfo)); } } @@ -432,9 +432,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, PipeControlHelperTestsXeHPAndLater, givenPostSyncPi PipeControlArgs args; args.workloadPartitionOffset = true; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( stream, - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + PostSyncMode::ImmediateData, gpuAddress, data, hardwareInfo, diff --git a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp index e255d5efda..5eeb44043f 100644 --- a/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp +++ b/opencl/test/unit_test/helpers/test_preamble_xehp_and_later.cpp @@ -115,7 +115,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, KernelCommandsXeHPAndLater, whenKernelSizeIsRequire HWCMDTEST_F(IGFX_XE_HP_CORE, KernelCommandsXeHPAndLater, whenPipeControlForWaIsRequiredThenReturnFalse) { auto &hwInfo = pDevice->getHardwareInfo(); - EXPECT_EQ(UnitTestHelper::isPipeControlWArequired(hwInfo), MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)); + EXPECT_EQ(UnitTestHelper::isPipeControlWArequired(hwInfo), MemorySynchronizationCommands::isBarrierWaRequired(hwInfo)); } HWCMDTEST_F(IGFX_XE_HP_CORE, KernelCommandsXeHPAndLater, whenMediaInterfaceDescriptorLoadIsRequiredThenDoNotProgramNonExistingCommand) { diff --git a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp index 8bb08de40b..d6f9dff217 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_1_tests.cpp @@ -271,7 +271,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, TimestampPacketTests, givenTimestampPacketWhenDispat uint32_t walkersFound = 0; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { if (genCmdCast(*it)) { - if (MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo())) { auto pipeControl = genCmdCast(*++it); EXPECT_NE(nullptr, pipeControl); } @@ -380,7 +380,7 @@ HWTEST_F(TimestampPacketTests, givenTimestampPacketWriteEnabledWhenEnqueueingThe bool walkerFound = false; for (auto it = hwParser.cmdList.begin(); it != hwParser.cmdList.end(); it++) { if (genCmdCast(*it)) { - if (MemorySynchronizationCommands::isPipeControlWArequired(device->getHardwareInfo())) { + if (MemorySynchronizationCommands::isBarrierWaRequired(device->getHardwareInfo())) { auto pipeControl = genCmdCast(*++it); EXPECT_NE(nullptr, pipeControl); } diff --git a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp index d5e7bfe9e8..1a3b88f2f3 100644 --- a/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp +++ b/opencl/test/unit_test/helpers/timestamp_packet_2_tests.cpp @@ -257,7 +257,7 @@ HWTEST_F(TimestampPacketTests, givenPipeControlRequestWithBarrierWriteWhenEstima csr.stallingCommandsOnNextFlushRequired = true; auto sizeWithPcRequest = device->getUltCommandStreamReceiver().getRequiredCmdStreamSize(flags, device->getDevice()); - size_t extendedSize = sizeWithoutPcRequest + MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(device->getHardwareInfo()); + size_t extendedSize = sizeWithoutPcRequest + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(device->getHardwareInfo()); EXPECT_EQ(sizeWithPcRequest, extendedSize); } diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index 6dadb73cce..a3ebe6d396 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -769,7 +769,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBufferOperationWithoutKernelWhenEstimati auto expectedSize = TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (cmdQ->isCacheFlushForBcsRequired()) { - expectedSize += MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); + expectedSize += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); } EXPECT_EQ(expectedSize, readBufferCmdsSize); diff --git a/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp b/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp index 911c1807ca..4740717762 100644 --- a/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp +++ b/opencl/test/unit_test/xe_hp_core/hw_helper_tests_xe_hp_core.cpp @@ -140,12 +140,12 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenRevisionEnumThenProperMaxThreadsF XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDisablePipeControlFlagIsDefaultWhenLocalMemoryIsEnabledThenReturnFalseAndDoNotProgramPipeControl) { hardwareInfo.featureTable.flags.ftrLocalMemory = true; - EXPECT_FALSE(MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo)); + EXPECT_FALSE(MemorySynchronizationCommands::isBarrierWaRequired(hardwareInfo)); constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); - MemorySynchronizationCommands::addPipeControlWA(cmdStream, 0x1000, hardwareInfo); + MemorySynchronizationCommands::addBarrierWa(cmdStream, 0x1000, hardwareInfo); EXPECT_EQ(0u, cmdStream.getUsed()); } @@ -154,12 +154,12 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDisablePipeControlFlagIsDisabledW DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(0); hardwareInfo.featureTable.flags.ftrLocalMemory = true; - EXPECT_FALSE(MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo)); + EXPECT_FALSE(MemorySynchronizationCommands::isBarrierWaRequired(hardwareInfo)); constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); - MemorySynchronizationCommands::addPipeControlWA(cmdStream, 0x1000, hardwareInfo); + MemorySynchronizationCommands::addBarrierWa(cmdStream, 0x1000, hardwareInfo); EXPECT_EQ(0u, cmdStream.getUsed()); } @@ -169,12 +169,12 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDisablePipeControlFlagIsEnabledWh DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); hardwareInfo.featureTable.flags.ftrLocalMemory = true; - EXPECT_TRUE(MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo)); + EXPECT_TRUE(MemorySynchronizationCommands::isBarrierWaRequired(hardwareInfo)); constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); - MemorySynchronizationCommands::addPipeControlWA(cmdStream, 0x1000, hardwareInfo); + MemorySynchronizationCommands::addBarrierWa(cmdStream, 0x1000, hardwareInfo); EXPECT_EQ(sizeof(PIPE_CONTROL), cmdStream.getUsed()); } @@ -183,12 +183,12 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDisablePipeControlFlagIsEnabledWh DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); hardwareInfo.featureTable.flags.ftrLocalMemory = false; - EXPECT_FALSE(MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo)); + EXPECT_FALSE(MemorySynchronizationCommands::isBarrierWaRequired(hardwareInfo)); constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); - MemorySynchronizationCommands::addPipeControlWA(cmdStream, 0x1000, hardwareInfo); + MemorySynchronizationCommands::addBarrierWa(cmdStream, 0x1000, hardwareInfo); EXPECT_EQ(0u, cmdStream.getUsed()); } @@ -292,7 +292,6 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, whenGettingDefaultRevisionThenB0IsRetu XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDebugFlagAndLocalMemoryIsNotAvailableWhenProgrammingPostSyncPipeControlThenExpectNotAddingWaPipeControl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; DebugManagerStateRestore restore; DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); @@ -307,12 +306,12 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, PipeControlArgs args; uint64_t gpuAddress = 0xABC0; uint64_t immediateValue = 0x10; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(cmdStream, - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - gpuAddress, - immediateValue, - hardwareInfo, - args); + MemorySynchronizationCommands::addBarrierWithPostSyncOperation(cmdStream, + PostSyncMode::ImmediateData, + gpuAddress, + immediateValue, + hardwareInfo, + args); EXPECT_EQ(sizeof(PIPE_CONTROL), cmdStream.getUsed()); HardwareParse hwParser; @@ -330,7 +329,6 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, givenDebugFlagAndLocalMemoryIsAvailableWhenProgrammingPostSyncPipeControlThenExpectAddingWaPipeControl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; DebugManagerStateRestore restore; DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); @@ -345,12 +343,12 @@ XE_HP_CORE_TEST_F(HwHelperTestXE_HP_CORE, PipeControlArgs args; uint64_t gpuAddress = 0xABC0; uint64_t immediateValue = 0x10; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(cmdStream, - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - gpuAddress, - immediateValue, - hardwareInfo, - args); + MemorySynchronizationCommands::addBarrierWithPostSyncOperation(cmdStream, + PostSyncMode::ImmediateData, + gpuAddress, + immediateValue, + hardwareInfo, + args); EXPECT_EQ(sizeof(PIPE_CONTROL) * 2, cmdStream.getUsed()); HardwareParse hwParser; diff --git a/opencl/test/unit_test/xe_hpc_core/enqueue_tests_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/enqueue_tests_xe_hpc_core.cpp index a4e0aabc8e..68af3a49a6 100644 --- a/opencl/test/unit_test/xe_hpc_core/enqueue_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/enqueue_tests_xe_hpc_core.cpp @@ -91,7 +91,7 @@ XE_HPC_CORETEST_F(MemoryPrefetchTestsXeHpcCore, givenKernelWhenWalkerIsProgramme XE_HPC_CORETEST_F(MemoryPrefetchTestsXeHpcCore, givenPrefetchEnabledWhenEstimatingCommandsSizeThenAddStatePrefetch) { auto commandQueue = createCommandQueue(); - size_t numPipeControls = MemorySynchronizationCommands::isPipeControlWArequired(clDevice->getHardwareInfo()) ? 2 : 1; + size_t numPipeControls = MemorySynchronizationCommands::isBarrierWaRequired(clDevice->getHardwareInfo()) ? 2 : 1; size_t expected = sizeof(typename FamilyType::COMPUTE_WALKER) + (sizeof(typename FamilyType::PIPE_CONTROL) * numPipeControls) + diff --git a/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp index a1fd230465..9f0ff61dc6 100644 --- a/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp @@ -576,7 +576,7 @@ XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, whenPipecontrolWaIsProgrammedThenFlush LinearStream cmdStream(buffer, sizeof(buffer)); uint64_t gpuAddress = 0x1234; - MemorySynchronizationCommands::addPipeControlWA(cmdStream, gpuAddress, *defaultHwInfo); + MemorySynchronizationCommands::addBarrierWa(cmdStream, gpuAddress, *defaultHwInfo); auto pipeControl = genCmdCast(buffer); ASSERT_NE(nullptr, pipeControl); diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index c33920adcd..b27baa171f 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -172,7 +172,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, if (dirtyHeaps) { syncArgs.hdcPipelineFlush = true; } - MemorySynchronizationCommands::addPipeControl(*container.getCommandStream(), syncArgs); + MemorySynchronizationCommands::addSingleBarrier(*container.getCommandStream(), syncArgs); if (dirtyHeaps || args.requiresUncachedMocs) { STATE_BASE_ADDRESS sba; @@ -428,7 +428,7 @@ inline size_t EncodeWA::getAdditionalPipelineSelectSize(Device &devic template inline void EncodeWA::addPipeControlPriorToNonPipelinedStateCommand(LinearStream &commandStream, PipeControlArgs args, const HardwareInfo &hwInfo, bool isRcs) { - MemorySynchronizationCommands::addPipeControl(commandStream, args); + MemorySynchronizationCommands::addSingleBarrier(commandStream, args); } template diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 8dbd6dc5e7..ef2f661d8d 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -211,7 +211,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, PipeControlArgs syncArgs; syncArgs.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - MemorySynchronizationCommands::addPipeControl(*container.getCommandStream(), syncArgs); + MemorySynchronizationCommands::addSingleBarrier(*container.getCommandStream(), syncArgs); STATE_BASE_ADDRESS sbaCmd; auto gmmHelper = container.getDevice()->getGmmHelper(); uint32_t statelessMocsIndex = @@ -701,7 +701,7 @@ inline void EncodeWA::addPipeControlPriorToNonPipelinedStateCommand(Line NEO::EncodeWA::setAdditionalPipeControlFlagsForNonPipelineStateCommand(args); } - MemorySynchronizationCommands::addPipeControl(commandStream, args); + MemorySynchronizationCommands::addSingleBarrier(commandStream, args); } template diff --git a/shared/source/command_container/encode_compute_mode_tgllp_and_later.inl b/shared/source/command_container/encode_compute_mode_tgllp_and_later.inl index fbdf8e0a24..32e788cd74 100644 --- a/shared/source/command_container/encode_compute_mode_tgllp_and_later.inl +++ b/shared/source/command_container/encode_compute_mode_tgllp_and_later.inl @@ -54,7 +54,9 @@ inline void EncodeComputeMode::programComputeModeCommandWithSynchronizat EncodeComputeMode::programComputeModeCommand(csr, properties, hwInfo, logicalStateHelper); if (hasSharedHandles) { - MemorySynchronizationCommands::addPipeControlWithCSStallOnly(csr); + PipeControlArgs args; + args.csStallOnly = true; + MemorySynchronizationCommands::addSingleBarrier(csr, args); } NEO::EncodeWA::encodeAdditionalPipelineSelect(csr, args, false, hwInfo, isRcs); diff --git a/shared/source/command_container/walker_partition_xehp_and_later.h b/shared/source/command_container/walker_partition_xehp_and_later.h index 5e86f566ed..72859497ca 100644 --- a/shared/source/command_container/walker_partition_xehp_and_later.h +++ b/shared/source/command_container/walker_partition_xehp_and_later.h @@ -109,12 +109,12 @@ uint32_t computePartitionCountAndPartitionType(uint32_t preferredMinimalPartitio goWithMaxAlgorithm = !!!NEO::DebugManager.flags.WalkerPartitionPreferHighestDimension.get(); } - //compute misaligned %, accept imbalance below threshold in favor of Z/Y/X distribution. + // compute misaligned %, accept imbalance below threshold in favor of Z/Y/X distribution. const float minimalThreshold = 0.05f; float zImbalance = static_cast(groupCount.z - alignDown(groupCount.z, preferredMinimalPartitionCount)) / static_cast(groupCount.z); float yImbalance = static_cast(groupCount.y - alignDown(groupCount.y, preferredMinimalPartitionCount)) / static_cast(groupCount.y); - //we first try with deepest dimension to see if we can partition there + // we first try with deepest dimension to see if we can partition there if (groupCount.z > 1 && (zImbalance <= minimalThreshold)) { *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_Z; } else if (groupCount.y > 1 && (yImbalance < minimalThreshold)) { @@ -122,7 +122,7 @@ uint32_t computePartitionCountAndPartitionType(uint32_t preferredMinimalPartitio } else if (groupCount.x % preferredMinimalPartitionCount == 0) { *outSelectedPartitionType = COMPUTE_WALKER::PARTITION_TYPE::PARTITION_TYPE_X; } - //if we are here then there is no dimension that results in even distribution, choose max dimension to minimize impact + // if we are here then there is no dimension that results in even distribution, choose max dimension to minimize impact else { goWithMaxAlgorithm = true; } @@ -222,7 +222,7 @@ void programWaitForSemaphore(void *&inputAddress, uint32_t &totalBytesProgrammed template bool programWparidMask(void *&inputAddress, uint32_t &totalBytesProgrammed, uint32_t partitionCount) { - //currently only power of 2 values of partitionCount are being supported + // currently only power of 2 values of partitionCount are being supported if (!Math::isPow2(partitionCount) || partitionCount > 16) { return false; } @@ -300,7 +300,7 @@ template void programPipeControlCommand(void *&inputAddress, uint32_t &totalBytesProgrammed, NEO::PipeControlArgs &flushArgs) { auto pipeControl = putCommand>(inputAddress, totalBytesProgrammed); PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; - NEO::MemorySynchronizationCommands::setPipeControl(cmd, flushArgs); + NEO::MemorySynchronizationCommands::setSingleBarrier(&cmd, flushArgs); *pipeControl = cmd; } @@ -311,14 +311,14 @@ void programPostSyncPipeControlCommand(void *&inputAddress, NEO::PipeControlArgs &flushArgs, const NEO::HardwareInfo &hwInfo) { - NEO::MemorySynchronizationCommands::setPipeControlAndProgramPostSyncOperation(inputAddress, - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - args.postSyncGpuAddress, - args.postSyncImmediateValue, - hwInfo, - flushArgs); + NEO::MemorySynchronizationCommands::setBarrierWithPostSyncOperation(inputAddress, + NEO::PostSyncMode::ImmediateData, + args.postSyncGpuAddress, + args.postSyncImmediateValue, + hwInfo, + flushArgs); - totalBytesProgrammed += static_cast(NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo)); + totalBytesProgrammed += static_cast(NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo)); } template @@ -402,7 +402,7 @@ void programSelfCleanupEndSection(void *&inputAddress, useAtomicsForSelfCleanup); } - //this synchronization point ensures that all tiles finished zeroing and will fairly access control section atomic variables + // this synchronization point ensures that all tiles finished zeroing and will fairly access control section atomic variables programTilesSynchronizationWithAtomics(inputAddress, totalBytesProgrammed, finalSyncTileCountAddress, 2 * tileCount); } @@ -428,9 +428,9 @@ uint64_t computeControlSectionOffset(WalkerPartitionArgs &args) { uint64_t size = 0u; size += args.synchronizeBeforeExecution ? computeTilesSynchronizationWithAtomicsSectionSize() : 0; - size += sizeof(LOAD_REGISTER_IMM); //predication mask - size += sizeof(MI_ATOMIC); //current id for partition - size += sizeof(LOAD_REGISTER_REG); //id into register + size += sizeof(LOAD_REGISTER_IMM); // predication mask + size += sizeof(MI_ATOMIC); // current id for partition + size += sizeof(LOAD_REGISTER_REG); // id into register size += sizeof(MI_SET_PREDICATE) * 2 + sizeof(BATCH_BUFFER_START) * 2; size += (args.semaphoreProgrammingRequired ? sizeof(MI_SEMAPHORE_WAIT) * args.partitionCount : 0u); @@ -537,10 +537,10 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, true, MI_ATOMIC::ATOMIC_OPCODES::ATOMIC_4B_INCREMENT); - //move atomic result to wparid + // move atomic result to wparid programMiLoadRegisterReg(currentBatchBufferPointer, totalBytesProgrammed, generalPurposeRegister4, wparidCCSOffset); - //enable predication basing on wparid value + // enable predication basing on wparid value programWparidPredication(currentBatchBufferPointer, totalBytesProgrammed, true); programMiBatchBufferStart(currentBatchBufferPointer, @@ -550,7 +550,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, true, args.secondaryBatchBuffer); - //disable predication to not noop subsequent commands. + // disable predication to not noop subsequent commands. programWparidPredication(currentBatchBufferPointer, totalBytesProgrammed, false); if (args.emitSelfCleanup) { @@ -580,7 +580,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, programMiLoadRegisterMem(currentBatchBufferPointer, totalBytesProgrammed, args.workPartitionAllocationGpuVa, wparidCCSOffset); } - //this bb start goes to the end of partitioned command buffer + // this bb start goes to the end of partitioned command buffer programMiBatchBufferStart( currentBatchBufferPointer, totalBytesProgrammed, @@ -588,7 +588,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, false, args.secondaryBatchBuffer); - //Walker section + // Walker section programPartitionedWalker(currentBatchBufferPointer, totalBytesProgrammed, inputWalker, args.partitionCount); programMiBatchBufferStart(currentBatchBufferPointer, totalBytesProgrammed, gpuAddressOfAllocation, false, args.secondaryBatchBuffer); @@ -755,7 +755,7 @@ uint64_t computeBarrierControlSectionOffset(WalkerPartitionArgs &args, } if (args.usePostSync) { - offset += NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); + offset += NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); } else { offset += sizeof(PIPE_CONTROL); } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index ca9769d3a2..5820177871 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -232,9 +232,9 @@ CompletionStamp CommandStreamReceiverHw::flushTask( args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired; args.textureCacheInvalidationEnable |= dispatchFlags.textureCacheFlush; args.workloadPartitionOffset = isMultiTileOperationEnabled(); - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( commandStreamTask, - PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + PostSyncMode::ImmediateData, address, taskCount + 1, hwInfo, @@ -461,7 +461,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) { PipeControlArgs args; args.textureCacheInvalidationEnable = true; - MemorySynchronizationCommands::addPipeControl(commandStreamCSR, args); + MemorySynchronizationCommands::addSingleBarrier(commandStreamCSR, args); if (this->samplerCacheFlushRequired == SamplerCacheFlushState::samplerCacheFlushBefore) { this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushAfter; } else { @@ -478,7 +478,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (requiresInstructionCacheFlush) { PipeControlArgs args; args.instructionCacheInvalidateEnable = true; - MemorySynchronizationCommands::addPipeControl(commandStreamCSR, args); + MemorySynchronizationCommands::addSingleBarrier(commandStreamCSR, args); requiresInstructionCacheFlush = false; } @@ -492,7 +492,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (programPipeControl) { PipeControlArgs args; - MemorySynchronizationCommands::addPipeControl(commandStreamCSR, args); + MemorySynchronizationCommands::addSingleBarrier(commandStreamCSR, args); } this->taskLevel = taskLevel; DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", peekTaskCount()); @@ -663,8 +663,11 @@ CompletionStamp CommandStreamReceiverHw::flushTask( template void CommandStreamReceiverHw::forcePipeControl(NEO::LinearStream &commandStreamCSR) { PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlWithCSStallOnly(commandStreamCSR); - MemorySynchronizationCommands::addPipeControl(commandStreamCSR, args); + args.csStallOnly = true; + MemorySynchronizationCommands::addSingleBarrier(commandStreamCSR, args); + + args.csStallOnly = false; + MemorySynchronizationCommands::addSingleBarrier(commandStreamCSR, args); } template @@ -707,7 +710,7 @@ inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { ResidencyContainer surfacesForSubmit; ResourcePackage resourcePackage; const auto &hwInfo = peekHwInfo(); - auto pipeControlLocationSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); + auto pipeControlLocationSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); void *currentPipeControlForNooping = nullptr; void *epiloguePipeControlLocation = nullptr; @@ -781,9 +784,9 @@ inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { lastPipeControlArgs.dcFlushEnable = false; } - MemorySynchronizationCommands::setPipeControlAndProgramPostSyncOperation( + MemorySynchronizationCommands::setBarrierWithPostSyncOperation( epiloguePipeControlLocation, - PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + PostSyncMode::ImmediateData, getTagAllocation()->getGpuAddress(), lastTaskCount, hwInfo, @@ -832,7 +835,7 @@ size_t CommandStreamReceiverHw::getRequiredCmdStreamSize(const Dispat if (!this->isStateSipSent || device.getDebugger()) { size += PreemptionHelper::getRequiredStateSipCmdSize(device, isRcs()); } - size += MemorySynchronizationCommands::getSizeForSinglePipeControl(); + size += MemorySynchronizationCommands::getSizeForSingleBarrier(); size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_START); size += getCmdSizeForL3Config(); @@ -1201,24 +1204,22 @@ inline void CommandStreamReceiverHw::flushMiFlushDW() { template void CommandStreamReceiverHw::flushPipeControl() { - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - auto lock = obtainUniqueOwnership(); const auto &hwInfo = peekHwInfo(); - auto &commandStream = getCS(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo)); + auto &commandStream = getCS(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo)); auto commandStreamStart = commandStream.getUsed(); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); args.notifyEnable = isUsedNotifyEnableForPostSync(); args.workloadPartitionOffset = isMultiTileOperationEnabled(); - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(commandStream, - PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - getTagAllocation()->getGpuAddress(), - taskCount + 1, - hwInfo, - args); + MemorySynchronizationCommands::addBarrierWithPostSyncOperation(commandStream, + PostSyncMode::ImmediateData, + getTagAllocation()->getGpuAddress(), + taskCount + 1, + hwInfo, + args); makeResident(*tagAllocation); diff --git a/shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl index 1eb55d7705..7b0f7c8061 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl @@ -26,7 +26,7 @@ inline void CommandStreamReceiverHw::programL3(LinearStream &csr, uin PipeControlArgs args = {}; args.dcFlushEnable = true; setClearSlmWorkAroundParameter(args); - MemorySynchronizationCommands::addPipeControl(csr, args); + MemorySynchronizationCommands::addSingleBarrier(csr, args); PreambleHelper::programL3(&csr, newL3Config); this->lastSentL3Config = newL3Config; @@ -141,13 +141,13 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForStallingNoPostSyn template inline size_t CommandStreamReceiverHw::getCmdSizeForStallingPostSyncCommands() const { - return MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(peekHwInfo()); + return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekHwInfo()); } template inline void CommandStreamReceiverHw::programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream) { PipeControlArgs args; - MemorySynchronizationCommands::addPipeControl(cmdStream, args); + MemorySynchronizationCommands::addSingleBarrier(cmdStream, args); } template @@ -156,9 +156,9 @@ inline void CommandStreamReceiverHw::programStallingPostSyncCommandsF const auto &hwInfo = peekHwInfo(); PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( cmdStream, - PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + PostSyncMode::ImmediateData, barrierTimestampPacketGpuAddress, 0, hwInfo, diff --git a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl index 93a08aae05..cd8093da72 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl @@ -168,7 +168,7 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForStallingPostSyncC false, true); } else { - return MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(peekHwInfo()); + return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(peekHwInfo()); } } @@ -186,7 +186,7 @@ inline void CommandStreamReceiverHw::programStallingNoPostSyncCommand false, false); } else { - MemorySynchronizationCommands::addPipeControl(cmdStream, args); + MemorySynchronizationCommands::addSingleBarrier(cmdStream, args); } } @@ -208,9 +208,9 @@ inline void CommandStreamReceiverHw::programStallingPostSyncCommandsF false); tagNode.setPacketsUsed(this->activePartitions); } else { - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( cmdStream, - PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + PostSyncMode::ImmediateData, barrierTimestampPacketGpuAddress, 0, hwInfo, diff --git a/shared/source/command_stream/experimental_command_buffer.inl b/shared/source/command_stream/experimental_command_buffer.inl index 2587c9c06c..ffa3662c00 100644 --- a/shared/source/command_stream/experimental_command_buffer.inl +++ b/shared/source/command_stream/experimental_command_buffer.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018-2021 Intel Corporation + * Copyright (C) 2018-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -65,19 +65,17 @@ size_t ExperimentalCommandBuffer::getTimeStampPipeControlSize() noexcept { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; // Two P_C for timestamps - return 2 * MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation( + return 2 * MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation( *commandStreamReceiver->peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->getHardwareInfo()); } template void ExperimentalCommandBuffer::addTimeStampPipeControl() { - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - uint64_t timeStampAddress = timestamps->getGpuAddress() + timestampsOffset; PipeControlArgs args; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( *currentStream, - PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, + PostSyncMode::Timestamp, timeStampAddress, 0llu, *commandStreamReceiver->peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->getHardwareInfo(), diff --git a/shared/source/command_stream/preemption_xehp_and_later.inl b/shared/source/command_stream/preemption_xehp_and_later.inl index b27c93229c..0196e2d0d1 100644 --- a/shared/source/command_stream/preemption_xehp_and_later.inl +++ b/shared/source/command_stream/preemption_xehp_and_later.inl @@ -52,7 +52,7 @@ void PreemptionHelper::programStateSipEndWa(LinearStream &cmdStream, if (hwHelper.isSipWANeeded(device.getHardwareInfo())) { NEO::PipeControlArgs args; - NEO::MemorySynchronizationCommands::addPipeControl(cmdStream, args); + NEO::MemorySynchronizationCommands::addSingleBarrier(cmdStream, args); auto mmio = reinterpret_cast(cmdStream.getSpace(sizeof(MI_LOAD_REGISTER_IMM))); MI_LOAD_REGISTER_IMM cmd = GfxFamily::cmdInitLoadRegisterImm; diff --git a/shared/source/direct_submission/dispatchers/render_dispatcher.inl b/shared/source/direct_submission/dispatchers/render_dispatcher.inl index 4feee0bb56..6d44b73b93 100644 --- a/shared/source/direct_submission/dispatchers/render_dispatcher.inl +++ b/shared/source/direct_submission/dispatchers/render_dispatcher.inl @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -31,14 +31,13 @@ inline void RenderDispatcher::dispatchMonitorFence(LinearStream &cmdB const HardwareInfo &hwInfo, bool useNotifyEnable, bool partitionedWorkload) { - using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); args.workloadPartitionOffset = partitionedWorkload; args.notifyEnable = useNotifyEnable; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( cmdBuffer, - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + PostSyncMode::ImmediateData, gpuAddress, immediateData, hwInfo, @@ -47,8 +46,7 @@ inline void RenderDispatcher::dispatchMonitorFence(LinearStream &cmdB template inline size_t RenderDispatcher::getSizeMonitorFence(const HardwareInfo &hwInfo) { - size_t size = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); - return size; + return MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); } template @@ -63,7 +61,7 @@ inline void RenderDispatcher::dispatchTlbFlush(LinearStream &cmdBuffe args.pipeControlFlushEnable = true; args.textureCacheInvalidationEnable = true; - MemorySynchronizationCommands::addPipeControl(cmdBuffer, args); + MemorySynchronizationCommands::addSingleBarrier(cmdBuffer, args); } template @@ -74,7 +72,7 @@ inline size_t RenderDispatcher::getSizeCacheFlush(const HardwareInfo template inline size_t RenderDispatcher::getSizeTlbFlush() { - return MemorySynchronizationCommands::getSizeForSinglePipeControl(); + return MemorySynchronizationCommands::getSizeForSingleBarrier(); } } // namespace NEO diff --git a/shared/source/gen11/command_encoder_gen11.cpp b/shared/source/gen11/command_encoder_gen11.cpp index 0d08ff795c..525e8947e6 100644 --- a/shared/source/gen11/command_encoder_gen11.cpp +++ b/shared/source/gen11/command_encoder_gen11.cpp @@ -51,7 +51,9 @@ void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, Sta using PIPE_CONTROL = typename Family::PIPE_CONTROL; if (properties.threadArbitrationPolicy.isDirty) { - MemorySynchronizationCommands::addPipeControlWithCSStallOnly(csr); + PipeControlArgs args; + args.csStallOnly = true; + MemorySynchronizationCommands::addSingleBarrier(csr, args); LriHelper::program(&csr, RowChickenReg4::address, diff --git a/shared/source/gen11/command_stream_receiver_hw_gen11.cpp b/shared/source/gen11/command_stream_receiver_hw_gen11.cpp index df8a55dd14..e5f8fa533f 100644 --- a/shared/source/gen11/command_stream_receiver_hw_gen11.cpp +++ b/shared/source/gen11/command_stream_receiver_hw_gen11.cpp @@ -35,7 +35,7 @@ void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, args.vfCacheInvalidationEnable = true; args.constantCacheInvalidationEnable = true; args.stateCacheInvalidationEnable = true; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); uint32_t numSubslices = hwInfo.gtSystemInfo.SubSliceCount; uint32_t numSubslicesWithVme = numSubslices / 2; // 1 VME unit per DSS @@ -54,7 +54,7 @@ void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, false); args = {}; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); lastVmeSubslicesConfig = true; } @@ -70,10 +70,10 @@ void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, args.constantCacheInvalidationEnable = true; args.stateCacheInvalidationEnable = true; args.genericMediaStateClear = true; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); args = {}; - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); // In Gen11-LP, software programs this register as if GT consists of // 2 slices with 4 subslices in each slice. Hardware maps this to the @@ -95,7 +95,7 @@ void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, reg.TheStructure.RawData[0], false); - MemorySynchronizationCommands::addPipeControl(stream, args); + MemorySynchronizationCommands::addSingleBarrier(stream, args); } } } diff --git a/shared/source/gen11/preamble_gen11.cpp b/shared/source/gen11/preamble_gen11.cpp index 3fcf062a9f..79ee4b02c1 100644 --- a/shared/source/gen11/preamble_gen11.cpp +++ b/shared/source/gen11/preamble_gen11.cpp @@ -60,7 +60,7 @@ void PreambleHelper::addPipeControlBeforeVfeCmd(LinearStream *pCommandSt args.depthCacheFlushEnable = true; args.dcFlushEnable = true; } - MemorySynchronizationCommands::addPipeControl(*pCommandStream, args); + MemorySynchronizationCommands::addSingleBarrier(*pCommandStream, args); } template <> diff --git a/shared/source/gen12lp/hw_helper_gen12lp.cpp b/shared/source/gen12lp/hw_helper_gen12lp.cpp index b0e8f6038e..55acabd42d 100644 --- a/shared/source/gen12lp/hw_helper_gen12lp.cpp +++ b/shared/source/gen12lp/hw_helper_gen12lp.cpp @@ -140,7 +140,9 @@ std::string HwHelperHw::getExtensions(const HardwareInfo &hwInfo) const } template <> -inline void MemorySynchronizationCommands::setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args) { +inline void MemorySynchronizationCommands::setBarrierExtraProperties(void *barrierCmd, PipeControlArgs &args) { + auto &pipeControl = *reinterpret_cast(barrierCmd); + pipeControl.setHdcPipelineFlush(args.hdcPipelineFlush); if (DebugManager.flags.FlushAllCaches.get()) { @@ -180,13 +182,13 @@ uint32_t HwHelperHw::getMocsIndex(const GmmHelper &gmmHelper, bool l3ena } template <> -bool MemorySynchronizationCommands::isPipeControlWArequired(const HardwareInfo &hwInfo) { +bool MemorySynchronizationCommands::isBarrierWaRequired(const HardwareInfo &hwInfo) { return HwInfoConfig::get(hwInfo.platform.eProductFamily)->pipeControlWARequired(hwInfo); } template <> -bool MemorySynchronizationCommands::isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo) { - return MemorySynchronizationCommands::isPipeControlWArequired(hwInfo); +bool MemorySynchronizationCommands::isBarrierlPriorToPipelineSelectWaRequired(const HardwareInfo &hwInfo) { + return MemorySynchronizationCommands::isBarrierWaRequired(hwInfo); } template <> diff --git a/shared/source/gen12lp/preamble_gen12lp.cpp b/shared/source/gen12lp/preamble_gen12lp.cpp index 094e413fa7..938d203947 100644 --- a/shared/source/gen12lp/preamble_gen12lp.cpp +++ b/shared/source/gen12lp/preamble_gen12lp.cpp @@ -38,10 +38,10 @@ void PreambleHelper::programPipelineSelect(LinearStream *pCommandStream, using PIPELINE_SELECT = typename Family::PIPELINE_SELECT; - if (MemorySynchronizationCommands::isPipeControlPriorToPipelineSelectWArequired(hwInfo)) { + if (MemorySynchronizationCommands::isBarrierlPriorToPipelineSelectWaRequired(hwInfo)) { PipeControlArgs args; args.renderTargetCacheFlushEnable = true; - MemorySynchronizationCommands::addPipeControl(*pCommandStream, args); + MemorySynchronizationCommands::addSingleBarrier(*pCommandStream, args); } auto pCmd = pCommandStream->getSpaceForCmd(); @@ -71,7 +71,7 @@ void PreambleHelper::addPipeControlBeforeVfeCmd(LinearStream *pCommandSt args.dcFlushEnable = true; } - MemorySynchronizationCommands::addPipeControl(*pCommandStream, args); + MemorySynchronizationCommands::addSingleBarrier(*pCommandStream, args); } template <> diff --git a/shared/source/gen8/hw_helper_gen8.cpp b/shared/source/gen8/hw_helper_gen8.cpp index ac75831fba..f68950cc76 100644 --- a/shared/source/gen8/hw_helper_gen8.cpp +++ b/shared/source/gen8/hw_helper_gen8.cpp @@ -59,24 +59,20 @@ bool HwHelperHw::isStatelessToStatefulWithOffsetSupported() const { } template <> -void MemorySynchronizationCommands::addPipeControl(LinearStream &commandStream, PipeControlArgs &args) { +void MemorySynchronizationCommands::addSingleBarrier(LinearStream &commandStream, PipeControlArgs &args) { Family::PIPE_CONTROL cmd = Family::cmdInitPipeControl; - args.dcFlushEnable = true; - MemorySynchronizationCommands::setPipeControl(cmd, args); + MemorySynchronizationCommands::setSingleBarrier(&cmd, args); + + cmd.setDcFlushEnable(true); + + if (DebugManager.flags.DoNotFlushCaches.get()) { + cmd.setDcFlushEnable(false); + } + Family::PIPE_CONTROL *cmdBuffer = commandStream.getSpaceForCmd(); *cmdBuffer = cmd; } -template <> -void MemorySynchronizationCommands::addPipeControlWithCSStallOnly(LinearStream &commandStream) { - using PIPE_CONTROL = typename Family::PIPE_CONTROL; - PIPE_CONTROL cmd = Family::cmdInitPipeControl; - cmd.setCommandStreamerStallEnable(true); - cmd.setDcFlushEnable(true); - auto pipeControl = commandStream.getSpaceForCmd(); - *pipeControl = cmd; -} - template class HwHelperHw; template class FlatBatchBufferHelperHw; template struct MemorySynchronizationCommands; diff --git a/shared/source/gen8/preamble_gen8.cpp b/shared/source/gen8/preamble_gen8.cpp index 4b33e97162..486b6e1cb1 100644 --- a/shared/source/gen8/preamble_gen8.cpp +++ b/shared/source/gen8/preamble_gen8.cpp @@ -17,7 +17,7 @@ template <> void PreambleHelper::addPipeControlBeforeVfeCmd(LinearStream *pCommandStream, const HardwareInfo *hwInfo, EngineGroupType engineGroupType) { PipeControlArgs args = {}; args.dcFlushEnable = true; - MemorySynchronizationCommands::addPipeControl(*pCommandStream, args); + MemorySynchronizationCommands::addSingleBarrier(*pCommandStream, args); } template <> diff --git a/shared/source/gen9/command_encoder_gen9.cpp b/shared/source/gen9/command_encoder_gen9.cpp index b2f6a42d31..8e45d58b74 100644 --- a/shared/source/gen9/command_encoder_gen9.cpp +++ b/shared/source/gen9/command_encoder_gen9.cpp @@ -43,7 +43,9 @@ void EncodeComputeMode::programComputeModeCommand(LinearStream &csr, Sta UNRECOVERABLE_IF(properties.threadArbitrationPolicy.value == ThreadArbitrationPolicy::NotPresent); if (properties.threadArbitrationPolicy.isDirty) { - MemorySynchronizationCommands::addPipeControlWithCSStallOnly(csr); + PipeControlArgs args; + args.csStallOnly = true; + MemorySynchronizationCommands::addSingleBarrier(csr, args); LriHelper::program(&csr, DebugControlReg2::address, diff --git a/shared/source/gen9/hw_helper_gen9.cpp b/shared/source/gen9/hw_helper_gen9.cpp index c808a35320..094388c63f 100644 --- a/shared/source/gen9/hw_helper_gen9.cpp +++ b/shared/source/gen9/hw_helper_gen9.cpp @@ -45,7 +45,7 @@ uint32_t HwHelperHw::getDefaultRevisionId(const HardwareInfo &hwInfo) co } template <> -bool MemorySynchronizationCommands::isPipeControlWArequired(const HardwareInfo &hwInfo) { return true; } +bool MemorySynchronizationCommands::isBarrierWaRequired(const HardwareInfo &hwInfo) { return true; } template class HwHelperHw; template class FlatBatchBufferHelperHw; diff --git a/shared/source/gen9/preamble_gen9.cpp b/shared/source/gen9/preamble_gen9.cpp index 7991657f15..0470551307 100644 --- a/shared/source/gen9/preamble_gen9.cpp +++ b/shared/source/gen9/preamble_gen9.cpp @@ -62,7 +62,7 @@ void PreambleHelper::addPipeControlBeforeVfeCmd(LinearStream *pCommandSt args.depthCacheFlushEnable = true; args.dcFlushEnable = true; } - MemorySynchronizationCommands::addPipeControl(*pCommandStream, args); + MemorySynchronizationCommands::addSingleBarrier(*pCommandStream, args); } template <> diff --git a/shared/source/helpers/common_types.h b/shared/source/helpers/common_types.h index 423cfc73bc..4ac6677a32 100644 --- a/shared/source/helpers/common_types.h +++ b/shared/source/helpers/common_types.h @@ -63,4 +63,10 @@ enum class CachePolicy : uint32_t { WriteBack = 3, }; +enum class PostSyncMode : uint32_t { + NoWrite = 0, + Timestamp = 1, + ImmediateData = 2, +}; + } // namespace NEO diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index d18265a025..51c6eeef45 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -429,66 +429,43 @@ struct LriHelper { template struct MemorySynchronizationCommands { - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; + static void addSingleBarrier(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args); + static void setSingleBarrier(void *commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args); + static void addSingleBarrier(LinearStream &commandStream, PipeControlArgs &args); + static void setSingleBarrier(void *commandsBuffer, PipeControlArgs &args); - static void addPipeControlAndProgramPostSyncOperation(LinearStream &commandStream, - POST_SYNC_OPERATION operation, - uint64_t gpuAddress, - uint64_t immediateData, - const HardwareInfo &hwInfo, - PipeControlArgs &args); - static void setPipeControlAndProgramPostSyncOperation(void *&commandsBuffer, - POST_SYNC_OPERATION operation, - uint64_t gpuAddress, - uint64_t immediateData, - const HardwareInfo &hwInfo, - PipeControlArgs &args); - - static void addPipeControlWithPostSync(LinearStream &commandStream, - POST_SYNC_OPERATION operation, - uint64_t gpuAddress, - uint64_t immediateData, - PipeControlArgs &args); - static void setPipeControlWithPostSync(void *&commandsBuffer, - POST_SYNC_OPERATION operation, - uint64_t gpuAddress, - uint64_t immediateData, - PipeControlArgs &args); + static void addBarrierWithPostSyncOperation(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const HardwareInfo &hwInfo, PipeControlArgs &args); + static void setBarrierWithPostSyncOperation(void *&commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const HardwareInfo &hwInfo, PipeControlArgs &args); static void setPostSyncExtraProperties(PipeControlArgs &args, const HardwareInfo &hwInfo); - static void setPipeControlWAFlags(PIPE_CONTROL &pipeControl); - static void addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo); - static void setPipeControlWA(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo); + static void addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo); + static void setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo); + + static void setBarrierWaFlags(void *barrierCmd); static void addAdditionalSynchronizationForDirectSubmission(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const HardwareInfo &hwInfo); static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const HardwareInfo &hwInfo); static void setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, bool acquire, const HardwareInfo &hwInfo); - static void addPipeControl(LinearStream &commandStream, PipeControlArgs &args); - static void setPipeControl(PIPE_CONTROL &pipeControl, PipeControlArgs &args); - - static void addPipeControlWithCSStallOnly(LinearStream &commandStream); - static bool getDcFlushEnable(bool isFlushPreferred, const HardwareInfo &hwInfo); static void addFullCacheFlush(LinearStream &commandStream, const HardwareInfo &hwInfo); static void setCacheFlushExtraProperties(PipeControlArgs &args); - static size_t getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo); - static size_t getSizeForPipeControlWA(const HardwareInfo &hwInfo); - static size_t getSizeForSinglePipeControl(); + static size_t getSizeForBarrierWithPostSyncOperation(const HardwareInfo &hwInfo); + static size_t getSizeForBarrierWa(const HardwareInfo &hwInfo); + static size_t getSizeForSingleBarrier(); static size_t getSizeForSingleAdditionalSynchronizationForDirectSubmission(const HardwareInfo &hwInfo); static size_t getSizeForSingleAdditionalSynchronization(const HardwareInfo &hwInfo); static size_t getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo); static size_t getSizeForFullCacheFlush(); - static bool isPipeControlWArequired(const HardwareInfo &hwInfo); - static bool isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo); + static bool isBarrierWaRequired(const HardwareInfo &hwInfo); + static bool isBarrierlPriorToPipelineSelectWaRequired(const HardwareInfo &hwInfo); protected: - static void setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args); + static void setBarrierExtraProperties(void *barrierCmd, PipeControlArgs &args); }; union SURFACE_STATE_BUFFER_LENGTH { diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index e856901b16..3ecae1b534 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -213,107 +213,62 @@ AuxTranslationMode HwHelperHw::getAuxTranslationMode(const HardwareInfo } template -void MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - LinearStream &commandStream, - POST_SYNC_OPERATION operation, - uint64_t gpuAddress, - uint64_t immediateData, - const HardwareInfo &hwInfo, - PipeControlArgs &args) { +void MemorySynchronizationCommands::addBarrierWithPostSyncOperation(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, + const HardwareInfo &hwInfo, PipeControlArgs &args) { - void *commandBuffer = commandStream.getSpace( - MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo)); + void *commandBuffer = commandStream.getSpace(MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo)); - MemorySynchronizationCommands::setPipeControlAndProgramPostSyncOperation( - commandBuffer, - operation, - gpuAddress, - immediateData, - hwInfo, - args); + MemorySynchronizationCommands::setBarrierWithPostSyncOperation(commandBuffer, postSyncMode, gpuAddress, immediateData, hwInfo, args); } template -void MemorySynchronizationCommands::setPipeControlAndProgramPostSyncOperation( +void MemorySynchronizationCommands::setBarrierWithPostSyncOperation( void *&commandsBuffer, - POST_SYNC_OPERATION operation, + PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, const HardwareInfo &hwInfo, PipeControlArgs &args) { - MemorySynchronizationCommands::setPipeControlWA(commandsBuffer, gpuAddress, hwInfo); + MemorySynchronizationCommands::setBarrierWa(commandsBuffer, gpuAddress, hwInfo); setPostSyncExtraProperties(args, hwInfo); - MemorySynchronizationCommands::setPipeControlWithPostSync(commandsBuffer, operation, gpuAddress, immediateData, args); + MemorySynchronizationCommands::setSingleBarrier(commandsBuffer, postSyncMode, gpuAddress, immediateData, args); + commandsBuffer = ptrOffset(commandsBuffer, getSizeForSingleBarrier()); MemorySynchronizationCommands::setAdditionalSynchronization(commandsBuffer, gpuAddress, false, hwInfo); } template -void MemorySynchronizationCommands::setPipeControlWithPostSync(void *&commandsBuffer, - POST_SYNC_OPERATION operation, - uint64_t gpuAddress, - uint64_t immediateData, - PipeControlArgs &args) { +void MemorySynchronizationCommands::addSingleBarrier(LinearStream &commandStream, PipeControlArgs &args) { + addSingleBarrier(commandStream, PostSyncMode::NoWrite, 0, 0, args); +} + +template +void MemorySynchronizationCommands::setSingleBarrier(void *commandsBuffer, PipeControlArgs &args) { + setSingleBarrier(commandsBuffer, PostSyncMode::NoWrite, 0, 0, args); +} + +template +void MemorySynchronizationCommands::addSingleBarrier(LinearStream &commandStream, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args) { + auto barrier = commandStream.getSpace(MemorySynchronizationCommands::getSizeForSingleBarrier()); + + setSingleBarrier(barrier, postSyncMode, gpuAddress, immediateData, args); +} + +template +void MemorySynchronizationCommands::setSingleBarrier(void *commandsBuffer, PostSyncMode postSyncMode, uint64_t gpuAddress, uint64_t immediateData, PipeControlArgs &args) { + using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; + PIPE_CONTROL pipeControl = GfxFamily::cmdInitPipeControl; - setPipeControl(pipeControl, args); - pipeControl.setPostSyncOperation(operation); - pipeControl.setAddress(static_cast(gpuAddress & 0x0000FFFFFFFFULL)); - pipeControl.setAddressHigh(static_cast(gpuAddress >> 32)); - if (operation == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { - pipeControl.setImmediateData(immediateData); - } - *reinterpret_cast(commandsBuffer) = pipeControl; - commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL)); -} - -template -void MemorySynchronizationCommands::addPipeControlWithPostSync( - LinearStream &commandStream, - POST_SYNC_OPERATION operation, - uint64_t gpuAddress, - uint64_t immediateData, - PipeControlArgs &args) { - void *pipeControl = commandStream.getSpace(sizeof(PIPE_CONTROL)); - setPipeControlWithPostSync(pipeControl, operation, gpuAddress, immediateData, args); -} - -template -void MemorySynchronizationCommands::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) { - size_t requiredSize = MemorySynchronizationCommands::getSizeForPipeControlWA(hwInfo); - void *commandBuffer = commandStream.getSpace(requiredSize); - setPipeControlWA(commandBuffer, gpuAddress, hwInfo); -} - -template -void MemorySynchronizationCommands::setPipeControlWA(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo) { - if (MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)) { - PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; - MemorySynchronizationCommands::setPipeControlWAFlags(cmd); - *reinterpret_cast(commandsBuffer) = cmd; - commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL)); - - MemorySynchronizationCommands::setAdditionalSynchronization(commandsBuffer, gpuAddress, false, hwInfo); - } -} - -template -void MemorySynchronizationCommands::addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const HardwareInfo &hwInfo) { - size_t requiredSize = MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hwInfo); - void *commandBuffer = commandStream.getSpace(requiredSize); - setAdditionalSynchronization(commandBuffer, gpuAddress, acquire, hwInfo); -} - -template -void MemorySynchronizationCommands::addAdditionalSynchronizationForDirectSubmission(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const HardwareInfo &hwInfo) { - MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, gpuAddress, acquire, hwInfo); -} - -template -void MemorySynchronizationCommands::setPipeControl(typename GfxFamily::PIPE_CONTROL &pipeControl, PipeControlArgs &args) { pipeControl.setCommandStreamerStallEnable(true); + + if (args.csStallOnly) { + *reinterpret_cast(commandsBuffer) = pipeControl; + return; + } + pipeControl.setConstantCacheInvalidationEnable(args.constantCacheInvalidationEnable); pipeControl.setInstructionCacheInvalidateEnable(args.instructionCacheInvalidateEnable); pipeControl.setPipeControlFlushEnable(args.pipeControlFlushEnable); @@ -331,7 +286,7 @@ void MemorySynchronizationCommands::setPipeControl(typename GfxFamily if constexpr (GfxFamily::isUsingGenericMediaStateClear) { pipeControl.setGenericMediaStateClear(args.genericMediaStateClear); } - setPipeControlExtraProperties(pipeControl, args); + setBarrierExtraProperties(&pipeControl, args); if (DebugManager.flags.FlushAllCaches.get()) { pipeControl.setDcFlushEnable(true); @@ -354,6 +309,53 @@ void MemorySynchronizationCommands::setPipeControl(typename GfxFamily pipeControl.setConstantCacheInvalidationEnable(false); pipeControl.setStateCacheInvalidationEnable(false); } + + if (postSyncMode != PostSyncMode::NoWrite) { + pipeControl.setAddress(static_cast(gpuAddress & 0x0000FFFFFFFFULL)); + pipeControl.setAddressHigh(static_cast(gpuAddress >> 32)); + } + + if (postSyncMode == PostSyncMode::Timestamp) { + pipeControl.setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP); + } else if (postSyncMode == PostSyncMode::ImmediateData) { + pipeControl.setPostSyncOperation(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA); + pipeControl.setImmediateData(immediateData); + } + + *reinterpret_cast(commandsBuffer) = pipeControl; +} + +template +void MemorySynchronizationCommands::addBarrierWa(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) { + size_t requiredSize = MemorySynchronizationCommands::getSizeForBarrierWa(hwInfo); + void *commandBuffer = commandStream.getSpace(requiredSize); + setBarrierWa(commandBuffer, gpuAddress, hwInfo); +} + +template +void MemorySynchronizationCommands::setBarrierWa(void *&commandsBuffer, uint64_t gpuAddress, const HardwareInfo &hwInfo) { + using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; + + if (MemorySynchronizationCommands::isBarrierWaRequired(hwInfo)) { + PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; + MemorySynchronizationCommands::setBarrierWaFlags(&cmd); + *reinterpret_cast(commandsBuffer) = cmd; + commandsBuffer = ptrOffset(commandsBuffer, sizeof(PIPE_CONTROL)); + + MemorySynchronizationCommands::setAdditionalSynchronization(commandsBuffer, gpuAddress, false, hwInfo); + } +} + +template +void MemorySynchronizationCommands::addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const HardwareInfo &hwInfo) { + size_t requiredSize = MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(hwInfo); + void *commandBuffer = commandStream.getSpace(requiredSize); + setAdditionalSynchronization(commandBuffer, gpuAddress, acquire, hwInfo); +} + +template +void MemorySynchronizationCommands::addAdditionalSynchronizationForDirectSubmission(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const HardwareInfo &hwInfo) { + MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, gpuAddress, acquire, hwInfo); } template @@ -366,41 +368,23 @@ bool MemorySynchronizationCommands::getDcFlushEnable(bool isFlushPref } template -void MemorySynchronizationCommands::addPipeControl(LinearStream &commandStream, PipeControlArgs &args) { - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; - MemorySynchronizationCommands::setPipeControl(cmd, args); - auto pipeControl = commandStream.getSpaceForCmd(); - *pipeControl = cmd; -} - -template -void MemorySynchronizationCommands::addPipeControlWithCSStallOnly(LinearStream &commandStream) { - using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; - PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; - cmd.setCommandStreamerStallEnable(true); - auto pipeControl = commandStream.getSpaceForCmd(); - *pipeControl = cmd; -} - -template -size_t MemorySynchronizationCommands::getSizeForSinglePipeControl() { +size_t MemorySynchronizationCommands::getSizeForSingleBarrier() { return sizeof(typename GfxFamily::PIPE_CONTROL); } template -size_t MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo) { - size_t size = getSizeForSinglePipeControl() + - getSizeForPipeControlWA(hwInfo) + +size_t MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(const HardwareInfo &hwInfo) { + size_t size = getSizeForSingleBarrier() + + getSizeForBarrierWa(hwInfo) + getSizeForSingleAdditionalSynchronization(hwInfo); return size; } template -size_t MemorySynchronizationCommands::getSizeForPipeControlWA(const HardwareInfo &hwInfo) { +size_t MemorySynchronizationCommands::getSizeForBarrierWa(const HardwareInfo &hwInfo) { size_t size = 0; - if (MemorySynchronizationCommands::isPipeControlWArequired(hwInfo)) { - size = getSizeForSinglePipeControl() + + if (MemorySynchronizationCommands::isBarrierWaRequired(hwInfo)) { + size = getSizeForSingleBarrier() + getSizeForSingleAdditionalSynchronization(hwInfo); } return size; @@ -568,7 +552,7 @@ void MemorySynchronizationCommands::addFullCacheFlush(LinearStream &c args.stateCacheInvalidationEnable = true; args.tlbInvalidation = true; MemorySynchronizationCommands::setCacheFlushExtraProperties(args); - MemorySynchronizationCommands::setPipeControl(cmd, args); + MemorySynchronizationCommands::setSingleBarrier(&cmd, args); *pipeControl = cmd; } @@ -611,7 +595,7 @@ bool HwHelperHw::isCpuImageTransferPreferred(const HardwareInfo &hwIn } template -bool MemorySynchronizationCommands::isPipeControlPriorToPipelineSelectWArequired(const HardwareInfo &hwInfo) { +bool MemorySynchronizationCommands::isBarrierlPriorToPipelineSelectWaRequired(const HardwareInfo &hwInfo) { return false; } diff --git a/shared/source/helpers/hw_helper_bdw_and_later.inl b/shared/source/helpers/hw_helper_bdw_and_later.inl index 7e873a07de..4005c42e4e 100644 --- a/shared/source/helpers/hw_helper_bdw_and_later.inl +++ b/shared/source/helpers/hw_helper_bdw_and_later.inl @@ -129,15 +129,15 @@ inline void MemorySynchronizationCommands::setCacheFlushExtraProperti } template -inline void MemorySynchronizationCommands::setPipeControlExtraProperties(typename GfxFamily::PIPE_CONTROL &pipeControl, PipeControlArgs &args) { +inline void MemorySynchronizationCommands::setBarrierExtraProperties(void *barrierCmd, PipeControlArgs &args) { } template -bool MemorySynchronizationCommands::isPipeControlWArequired(const HardwareInfo &hwInfo) { return false; } +bool MemorySynchronizationCommands::isBarrierWaRequired(const HardwareInfo &hwInfo) { return false; } template -inline void MemorySynchronizationCommands::setPipeControlWAFlags(PIPE_CONTROL &pipeControl) { - pipeControl.setCommandStreamerStallEnable(true); +inline void MemorySynchronizationCommands::setBarrierWaFlags(void *barrierCmd) { + reinterpret_cast(barrierCmd)->setCommandStreamerStallEnable(true); } template diff --git a/shared/source/helpers/hw_helper_dg2_and_later.inl b/shared/source/helpers/hw_helper_dg2_and_later.inl index 7ccb3752b4..613b546270 100644 --- a/shared/source/helpers/hw_helper_dg2_and_later.inl +++ b/shared/source/helpers/hw_helper_dg2_and_later.inl @@ -12,7 +12,9 @@ namespace NEO { template -inline void MemorySynchronizationCommands::setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args) { +inline void MemorySynchronizationCommands::setBarrierExtraProperties(void *barrierCmd, PipeControlArgs &args) { + auto &pipeControl = *reinterpret_cast(barrierCmd); + pipeControl.setHdcPipelineFlush(args.hdcPipelineFlush); pipeControl.setUnTypedDataPortCacheFlush(args.unTypedDataPortCacheFlush); pipeControl.setCompressionControlSurfaceCcsFlush(args.compressionControlSurfaceCcsFlush); @@ -46,7 +48,9 @@ inline void MemorySynchronizationCommands::setCacheFlushExtraProperti } template -void MemorySynchronizationCommands::setPipeControlWAFlags(PIPE_CONTROL &pipeControl) { +void MemorySynchronizationCommands::setBarrierWaFlags(void *barrierCmd) { + auto &pipeControl = *reinterpret_cast(barrierCmd); + pipeControl.setCommandStreamerStallEnable(true); pipeControl.setHdcPipelineFlush(true); pipeControl.setUnTypedDataPortCacheFlush(true); diff --git a/shared/source/helpers/hw_helper_xehp_and_later.inl b/shared/source/helpers/hw_helper_xehp_and_later.inl index 785d0ee472..f2942f762c 100644 --- a/shared/source/helpers/hw_helper_xehp_and_later.inl +++ b/shared/source/helpers/hw_helper_xehp_and_later.inl @@ -177,7 +177,7 @@ aub_stream::MMIOList HwHelperHw::getExtraMmioList(const HardwareInfo } template -bool MemorySynchronizationCommands::isPipeControlWArequired(const HardwareInfo &hwInfo) { +bool MemorySynchronizationCommands::isBarrierWaRequired(const HardwareInfo &hwInfo) { if (DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.get() == 1) { return hwInfo.featureTable.flags.ftrLocalMemory; } diff --git a/shared/source/helpers/pipe_control_args.h b/shared/source/helpers/pipe_control_args.h index 5ee447aac3..be0b05abfc 100644 --- a/shared/source/helpers/pipe_control_args.h +++ b/shared/source/helpers/pipe_control_args.h @@ -12,6 +12,7 @@ namespace NEO { struct PipeControlArgs { PipeControlArgs() = default; + bool csStallOnly = false; bool dcFlushEnable = false; bool renderTargetCacheFlushEnable = false; bool instructionCacheInvalidateEnable = false; diff --git a/shared/source/helpers/preamble_base.inl b/shared/source/helpers/preamble_base.inl index be2d7ca883..6c5680893d 100644 --- a/shared/source/helpers/preamble_base.inl +++ b/shared/source/helpers/preamble_base.inl @@ -57,7 +57,7 @@ size_t PreambleHelper::getCmdSizeForPipelineSelect(const HardwareInfo size_t size = 0; using PIPELINE_SELECT = typename GfxFamily::PIPELINE_SELECT; size += sizeof(PIPELINE_SELECT); - if (MemorySynchronizationCommands::isPipeControlPriorToPipelineSelectWArequired(hwInfo)) { + if (MemorySynchronizationCommands::isBarrierlPriorToPipelineSelectWaRequired(hwInfo)) { size += sizeof(PIPE_CONTROL); } return size; diff --git a/shared/source/helpers/preamble_xehp_and_later.inl b/shared/source/helpers/preamble_xehp_and_later.inl index 61811fd0ee..bc22f89a1b 100644 --- a/shared/source/helpers/preamble_xehp_and_later.inl +++ b/shared/source/helpers/preamble_xehp_and_later.inl @@ -60,7 +60,7 @@ void PreambleHelper::programPipelineSelect(LinearStream *pCommandStream, PipeControlArgs args = {}; args.stateCacheInvalidationEnable = true; - MemorySynchronizationCommands::addPipeControl(*pCommandStream, args); + MemorySynchronizationCommands::addSingleBarrier(*pCommandStream, args); } auto pCmd = pCommandStream->getSpaceForCmd(); @@ -81,7 +81,7 @@ void PreambleHelper::programPipelineSelect(LinearStream *pCommandStream, if (DebugManager.flags.CleanStateInPreamble.get()) { PipeControlArgs args = {}; args.stateCacheInvalidationEnable = true; - MemorySynchronizationCommands::addPipeControl(*pCommandStream, args); + MemorySynchronizationCommands::addSingleBarrier(*pCommandStream, args); } } diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index 8efc6927db..e51b497f29 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -174,8 +174,8 @@ struct TimestampPacketHelper { PipeControlArgs args; args.dcFlushEnable = MemorySynchronizationCommands::getDcFlushEnable(true, hwInfo); - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( - cmdStream, GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + MemorySynchronizationCommands::addBarrierWithPostSyncOperation( + cmdStream, PostSyncMode::ImmediateData, cacheFlushTimestampPacketGpuAddress, 0, hwInfo, args); } @@ -189,7 +189,7 @@ struct TimestampPacketHelper { size_t size = count * TimestampPacketHelper::getRequiredCmdStreamSizeForNodeDependencyWithBlitEnqueue(); if (auxTranslationDirection == AuxTranslationDirection::NonAuxToAux && cacheFlushForBcsRequired) { - size += MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); + size += MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo); } return size; diff --git a/shared/source/xe_hp_core/hw_helper_xe_hp_core.cpp b/shared/source/xe_hp_core/hw_helper_xe_hp_core.cpp index eedb571cc4..7d97188270 100644 --- a/shared/source/xe_hp_core/hw_helper_xe_hp_core.cpp +++ b/shared/source/xe_hp_core/hw_helper_xe_hp_core.cpp @@ -103,13 +103,17 @@ std::string HwHelperHw::getExtensions(const HardwareInfo &hwInfo) const } template <> -void MemorySynchronizationCommands::setPipeControlWAFlags(PIPE_CONTROL &pipeControl) { +void MemorySynchronizationCommands::setBarrierWaFlags(void *barrierCmd) { + auto &pipeControl = *reinterpret_cast(barrierCmd); + pipeControl.setCommandStreamerStallEnable(true); pipeControl.setHdcPipelineFlush(true); } template <> -void MemorySynchronizationCommands::setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args) { +void MemorySynchronizationCommands::setBarrierExtraProperties(void *barrierCmd, PipeControlArgs &args) { + auto &pipeControl = *reinterpret_cast(barrierCmd); + pipeControl.setHdcPipelineFlush(args.hdcPipelineFlush); pipeControl.setCompressionControlSurfaceCcsFlush(args.compressionControlSurfaceCcsFlush); pipeControl.setWorkloadPartitionIdOffsetEnable(args.workloadPartitionOffset); diff --git a/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp b/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp index d8ed8d73d4..a98da2826e 100644 --- a/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp @@ -210,7 +210,7 @@ void MemorySynchronizationCommands::setAdditionalSynchronization(void *& } template <> -bool MemorySynchronizationCommands::isPipeControlWArequired(const HardwareInfo &hwInfo) { +bool MemorySynchronizationCommands::isBarrierWaRequired(const HardwareInfo &hwInfo) { if (DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.get() == 1) { return true; } diff --git a/shared/test/common/helpers/ult_hw_helper.h b/shared/test/common/helpers/ult_hw_helper.h index 265529cba9..1cb6a43d53 100644 --- a/shared/test/common/helpers/ult_hw_helper.h +++ b/shared/test/common/helpers/ult_hw_helper.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2020-2021 Intel Corporation + * Copyright (C) 2020-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -14,7 +14,7 @@ namespace NEO { template struct UltMemorySynchronizationCommands : MemorySynchronizationCommands { static size_t getExpectedPipeControlCount(const HardwareInfo &hwInfo) { - return (MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo) - + return (MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hwInfo) - MemorySynchronizationCommands::getSizeForAdditonalSynchronization(hwInfo)) / sizeof(typename GfxFamily::PIPE_CONTROL); } diff --git a/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp b/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp index 2f584aa796..b9680ed81f 100644 --- a/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp +++ b/shared/test/unit_test/direct_submission/dispatchers/render_dispatcher_tests.cpp @@ -50,7 +50,7 @@ HWTEST_F(RenderDispatcherTest, givenRenderWhenAddingPreemptionCmdThenExpectPrope } HWTEST_F(RenderDispatcherTest, givenRenderWhenAskingForMonitorFenceCmdSizeThenReturnRequiredPipeControlCmdSize) { - size_t expectedSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hardwareInfo); + size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(hardwareInfo); EXPECT_EQ(expectedSize, RenderDispatcher::getSizeMonitorFence(hardwareInfo)); } diff --git a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp index 8401ac86ed..29b7bc094e 100644 --- a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1153,7 +1153,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; - size_t expectedSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(testHardwareInfo) + + size_t expectedSize = MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(testHardwareInfo) + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_BATCH_BUFFER_START) + sizeof(WalkerPartition::BarrierControlSection); @@ -1187,7 +1187,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, expectedSemaphores++; } - if (MemorySynchronizationCommands::isPipeControlWArequired(testHardwareInfo)) { + if (MemorySynchronizationCommands::isBarrierWaRequired(testHardwareInfo)) { expectedPipeControls++; if (semaphoreAsAdditionalSync) { expectedSemaphores++; @@ -1230,7 +1230,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; size_t expectedSize = sizeof(MI_STORE_DATA_IMM) + - MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(testHardwareInfo) + + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(testHardwareInfo) + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_BATCH_BUFFER_START) + sizeof(WalkerPartition::BarrierControlSection) + @@ -1269,7 +1269,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, if (semaphoreAsAdditionalSync) { expectedSemaphores++; } - if (MemorySynchronizationCommands::isPipeControlWArequired(testHardwareInfo)) { + if (MemorySynchronizationCommands::isBarrierWaRequired(testHardwareInfo)) { expectedPipeControls++; if (semaphoreAsAdditionalSync) { expectedSemaphores++; @@ -1314,7 +1314,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, testHardwareInfo.featureTable.flags.ftrLocalMemory = true; size_t expectedSize = sizeof(MI_ATOMIC) + - MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(testHardwareInfo) + + MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(testHardwareInfo) + sizeof(MI_ATOMIC) + sizeof(MI_SEMAPHORE_WAIT) + sizeof(MI_BATCH_BUFFER_START) + sizeof(WalkerPartition::BarrierControlSection) + @@ -1350,7 +1350,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, if (semaphoreAsAdditionalSync) { expectedSemaphores++; } - if (MemorySynchronizationCommands::isPipeControlWArequired(testHardwareInfo)) { + if (MemorySynchronizationCommands::isBarrierWaRequired(testHardwareInfo)) { expectedPipeControls++; if (semaphoreAsAdditionalSync) { expectedSemaphores++; diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp index 9b674cdcad..79e05ff674 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -1385,7 +1385,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, uint32_t totalBytesProgrammed = 0u; uint64_t gpuVirtualAddress = 0xFF0000; - auto expectedOffsetSectionSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(testHardwareInfo) + + auto expectedOffsetSectionSize = NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(testHardwareInfo) + sizeof(WalkerPartition::MI_ATOMIC) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + sizeof(WalkerPartition::BATCH_BUFFER_START); @@ -1409,7 +1409,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, size_t additionalSyncCmdSize = NEO::MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(testHardwareInfo); - if (NEO::MemorySynchronizationCommands::isPipeControlWArequired(testHardwareInfo)) { + if (NEO::MemorySynchronizationCommands::isBarrierWaRequired(testHardwareInfo)) { constexpr uint64_t zeroGpuAddress = 0; constexpr uint64_t zeroImmediateValue = 0; auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -1473,7 +1473,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, uint64_t gpuVirtualAddress = 0xFF0000; auto expectedOffsetSectionSize = sizeof(WalkerPartition::MI_STORE_DATA_IMM) + - NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(testHardwareInfo) + + NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(testHardwareInfo) + sizeof(WalkerPartition::MI_ATOMIC) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + sizeof(WalkerPartition::BATCH_BUFFER_START); @@ -1509,7 +1509,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, size_t additionalSyncCmdSize = NEO::MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(testHardwareInfo); - if (NEO::MemorySynchronizationCommands::isPipeControlWArequired(testHardwareInfo)) { + if (NEO::MemorySynchronizationCommands::isBarrierWaRequired(testHardwareInfo)) { constexpr uint64_t zeroGpuAddress = 0; constexpr uint64_t zeroImmediateValue = 0; auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -1614,7 +1614,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, uint64_t gpuVirtualAddress = 0xFF0000; auto expectedOffsetSectionSize = sizeof(WalkerPartition::MI_ATOMIC) + - NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(testHardwareInfo) + + NEO::MemorySynchronizationCommands::getSizeForBarrierWithPostSyncOperation(testHardwareInfo) + sizeof(WalkerPartition::MI_ATOMIC) + sizeof(WalkerPartition::MI_SEMAPHORE_WAIT) + sizeof(WalkerPartition::BATCH_BUFFER_START); @@ -1656,7 +1656,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, size_t additionalSyncCmdSize = NEO::MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(testHardwareInfo); - if (NEO::MemorySynchronizationCommands::isPipeControlWArequired(testHardwareInfo)) { + if (NEO::MemorySynchronizationCommands::isBarrierWaRequired(testHardwareInfo)) { constexpr uint64_t zeroGpuAddress = 0; constexpr uint64_t zeroImmediateValue = 0; auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); diff --git a/shared/test/unit_test/fixtures/templated_fixture_tests.cpp b/shared/test/unit_test/fixtures/templated_fixture_tests.cpp index f29fd5da02..498b1cb808 100644 --- a/shared/test/unit_test/fixtures/templated_fixture_tests.cpp +++ b/shared/test/unit_test/fixtures/templated_fixture_tests.cpp @@ -77,7 +77,7 @@ HWTEST_TEMPLATED_F(DerivedTemplatedFixtureTests, whenExecutingTemplatedTestThenC struct TemplatedFixtureBaseTests : public ::testing::Test { template void setUpT() { - capturedPipeControlWaRequiredInSetUp = MemorySynchronizationCommands::isPipeControlWArequired(*defaultHwInfo); + capturedPipeControlWaRequiredInSetUp = MemorySynchronizationCommands::isBarrierWaRequired(*defaultHwInfo); } template @@ -87,7 +87,7 @@ struct TemplatedFixtureBaseTests : public ::testing::Test { }; HWTEST_TEMPLATED_F(TemplatedFixtureBaseTests, whenExecutingTemplatedSetupThenTemplateTargetsCorrectPlatform) { - bool capturedPipeControlWaRequiredInTestBody = MemorySynchronizationCommands::isPipeControlWArequired(*defaultHwInfo); + bool capturedPipeControlWaRequiredInTestBody = MemorySynchronizationCommands::isBarrierWaRequired(*defaultHwInfo); EXPECT_EQ(capturedPipeControlWaRequiredInTestBody, capturedPipeControlWaRequiredInSetUp); } diff --git a/shared/test/unit_test/xe_hpg_core/hw_helper_tests_xe_hpg_core.cpp b/shared/test/unit_test/xe_hpg_core/hw_helper_tests_xe_hpg_core.cpp index 4a95e66007..16fc1839c4 100644 --- a/shared/test/unit_test/xe_hpg_core/hw_helper_tests_xe_hpg_core.cpp +++ b/shared/test/unit_test/xe_hpg_core/hw_helper_tests_xe_hpg_core.cpp @@ -168,12 +168,12 @@ XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, givenDisablePipeControlFlagIsEnabledWhe HardwareInfo hardwareInfo = *defaultHwInfo; hardwareInfo.featureTable.flags.ftrLocalMemory = true; - EXPECT_TRUE(MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo)); + EXPECT_TRUE(MemorySynchronizationCommands::isBarrierWaRequired(hardwareInfo)); constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); - MemorySynchronizationCommands::addPipeControlWA(cmdStream, 0x1000, hardwareInfo); + MemorySynchronizationCommands::addBarrierWa(cmdStream, 0x1000, hardwareInfo); EXPECT_EQ(sizeof(PIPE_CONTROL), cmdStream.getUsed()); } @@ -184,12 +184,12 @@ XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, givenDisablePipeControlFlagIsEnabledWhe HardwareInfo hardwareInfo = *defaultHwInfo; hardwareInfo.featureTable.flags.ftrLocalMemory = false; - EXPECT_FALSE(MemorySynchronizationCommands::isPipeControlWArequired(hardwareInfo)); + EXPECT_FALSE(MemorySynchronizationCommands::isBarrierWaRequired(hardwareInfo)); constexpr size_t bufferSize = 128u; uint8_t buffer[bufferSize]; LinearStream cmdStream(buffer, bufferSize); - MemorySynchronizationCommands::addPipeControlWA(cmdStream, 0x1000, hardwareInfo); + MemorySynchronizationCommands::addBarrierWa(cmdStream, 0x1000, hardwareInfo); EXPECT_EQ(0u, cmdStream.getUsed()); } @@ -201,7 +201,6 @@ XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, givenXeHpgCoreWhenCheckingIfEngineTypeR XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, givenDebugFlagAndLocalMemoryIsNotAvailableWhenProgrammingPostSyncPipeControlThenExpectNotAddingWaPipeControl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; DebugManagerStateRestore restore; DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); @@ -216,12 +215,12 @@ XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, PipeControlArgs args; uint64_t gpuAddress = 0xABC0; uint64_t immediateValue = 0x10; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(cmdStream, - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - gpuAddress, - immediateValue, - hardwareInfo, - args); + MemorySynchronizationCommands::addBarrierWithPostSyncOperation(cmdStream, + PostSyncMode::ImmediateData, + gpuAddress, + immediateValue, + hardwareInfo, + args); EXPECT_EQ(sizeof(PIPE_CONTROL), cmdStream.getUsed()); HardwareParse hwParser; @@ -239,7 +238,6 @@ XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, givenDebugFlagAndLocalMemoryIsAvailableWhenProgrammingPostSyncPipeControlThenExpectAddingWaPipeControl) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; - using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; DebugManagerStateRestore restore; DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); @@ -254,12 +252,12 @@ XE_HPG_CORETEST_F(HwHelperTestXeHpgCore, PipeControlArgs args; uint64_t gpuAddress = 0xABC0; uint64_t immediateValue = 0x10; - MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(cmdStream, - POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - gpuAddress, - immediateValue, - hardwareInfo, - args); + MemorySynchronizationCommands::addBarrierWithPostSyncOperation(cmdStream, + PostSyncMode::ImmediateData, + gpuAddress, + immediateValue, + hardwareInfo, + args); EXPECT_EQ(sizeof(PIPE_CONTROL) * 2, cmdStream.getUsed()); HardwareParse hwParser;