diff --git a/CMakeLists.txt b/CMakeLists.txt index c1d3e4a12b..2e6c98aaeb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -814,6 +814,7 @@ include_directories(${NEO_SHARED_DIRECTORY}/debug_settings/definitions${BRANCH_D include_directories(${NEO_SHARED_DIRECTORY}/gen_common/reg_configs${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/gmm_helper/client_context${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/gmm_helper/windows/gmm_memory${BRANCH_DIR_SUFFIX}) +include_directories(${NEO_SHARED_DIRECTORY}/helpers/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/memory_manager/definitions${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/memory_properties${BRANCH_DIR_SUFFIX}) include_directories(${NEO_SHARED_DIRECTORY}/sku_info/definitions${BRANCH_DIR_SUFFIX}) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 6e5cefb306..10e587f273 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -28,6 +28,8 @@ #include "level_zero/core/source/image/image.h" #include "level_zero/core/source/module/module.h" +#include "pipe_control_args.h" + #include namespace L0 { @@ -168,10 +170,14 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; auto event = Event::fromHandle(hEvent); commandContainer.addToResidencyContainer(&event->getAllocation()); - - NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - event->getGpuAddress(), Event::STATE_CLEARED, true, commandContainer.getDevice()->getHardwareInfo()); + NEO::PipeControlArgs args(true); + NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + *commandContainer.getCommandStream(), + POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + event->getGpuAddress(), + Event::STATE_CLEARED, + commandContainer.getDevice()->getHardwareInfo(), + args); return ZE_RESULT_SUCCESS; } @@ -188,7 +194,8 @@ ze_result_t CommandListCoreFamily::appendBarrier(ze_event_handle_ if (isCopyOnlyCmdList) { NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false); } else { - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), false); + NEO::PipeControlArgs args; + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } if (hSignalEvent) { @@ -417,7 +424,8 @@ ze_result_t CommandListCoreFamily::appendImageCopyToMemory(void * hEvent, numWaitEvents, phWaitEvents); if (allocationStruct.needsFlush) { - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); + NEO::PipeControlArgs args(true); + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } return ret; @@ -649,7 +657,8 @@ ze_result_t CommandListCoreFamily::appendPageFaultCopy(NEO::Graph } if (flushHost) { - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); + NEO::PipeControlArgs args(true); + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } return ret; @@ -731,7 +740,8 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, } if (dstAllocationStruct.needsFlush) { - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); + NEO::PipeControlArgs args(true); + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } return ret; @@ -817,7 +827,8 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d if (isCopyOnlyCmdList) { NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false); } else { - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); + NEO::PipeControlArgs args(true); + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } } @@ -1057,7 +1068,8 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, } if (hostPointerNeedsFlush) { - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); + NEO::PipeControlArgs args(true); + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } return res; @@ -1155,10 +1167,13 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han if (isCopyOnlyCmdList) { NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), event->getGpuAddress(), Event::STATE_SIGNALED, false, true); } else { - bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; - NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( + NEO::PipeControlArgs args; + args.dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; + NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandContainer.getCommandStream(), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - event->getGpuAddress(), Event::STATE_SIGNALED, dcFlushEnable, commandContainer.getDevice()->getHardwareInfo()); + event->getGpuAddress(), Event::STATE_SIGNALED, + commandContainer.getDevice()->getHardwareInfo(), + args); } return ZE_RESULT_SUCCESS; } @@ -1193,7 +1208,8 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu if (isCopyOnlyCmdList) { NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), 0, 0, false, false); } else { - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); + NEO::PipeControlArgs args(true); + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } } } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 08a2b298cb..c12a89d254 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -16,6 +16,8 @@ #include "shared/source/memory_manager/residency_container.h" #include "shared/source/unified_memory/unified_memory.h" +#include "pipe_control_args.h" + #include namespace L0 { @@ -100,23 +102,24 @@ void CommandListCoreFamily::appendEventForProfiling(ze_event_hand } else { timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::GLOBAL_END); - bool dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; + NEO::PipeControlArgs args; + args.dcFlushEnable = (event->signalScope == ZE_EVENT_SCOPE_FLAG_NONE) ? false : true; if (isCopyOnlyCmdList) { NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), timeStampAddress, 0llu, true, true); } else { - NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( + NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *(commandContainer.getCommandStream()), POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_TIMESTAMP, timeStampAddress, 0llu, - dcFlushEnable, - device->getHwInfo()); + device->getHwInfo(), + args); timeStampAddress = event->getGpuAddress() + event->getOffsetOfEventTimestampRegister(Event::CONTEXT_END); NEO::EncodeStoreMMIO::encode(commandContainer, GP_THREAD_TIME_REG_ADDRESS_OFFSET_LOW, timeStampAddress); - if (dcFlushEnable) { - NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), true); + if (args.dcFlushEnable) { + NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } } } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 824efce36a..62db167589 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -28,6 +28,8 @@ #include "level_zero/core/source/fence/fence.h" #include "level_zero/tools/source/metrics/metric.h" +#include "pipe_control_args.h" + #include #include @@ -219,7 +221,8 @@ ze_result_t CommandQueueHw::executeCommandLists( auto commandListPreemption = commandList->getCommandListPreemptionMode(); if (statePreemption != commandListPreemption) { - NEO::MemorySynchronizationCommands::addPipeControl(child, false); + NEO::PipeControlArgs args; + NEO::MemorySynchronizationCommands::addPipeControl(child, args); NEO::PreemptionHelper::programCmdStream(child, commandListPreemption, statePreemption, @@ -267,9 +270,13 @@ ze_result_t CommandQueueHw::executeCommandLists( if (isCopyOnlyCommandQueue) { NEO::EncodeMiFlushDW::programMiFlushDw(child, fence->getGpuAddress(), Fence::STATE_SIGNALED, false, true); } else { - NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( + NEO::PipeControlArgs args(true); + NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( child, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - fence->getGpuAddress(), Fence::STATE_SIGNALED, true, device->getHwInfo()); + fence->getGpuAddress(), + Fence::STATE_SIGNALED, + device->getHwInfo(), + args); } } @@ -352,9 +359,14 @@ void CommandQueueHw::dispatchTaskCountWrite(NEO::LinearStream &co if (isCopyOnlyCommandQueue) { NEO::EncodeMiFlushDW::programMiFlushDw(commandStream, gpuAddress, taskCountToWrite, false, true); } else { - NEO::MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - gpuAddress, taskCountToWrite, true, device->getHwInfo()); + NEO::PipeControlArgs args(true); + NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + commandStream, + POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + gpuAddress, + taskCountToWrite, + device->getHwInfo(), + args); } } } // namespace L0 diff --git a/level_zero/core/source/gen11/cmdlist_gen11.inl b/level_zero/core/source/gen11/cmdlist_gen11.inl index 5a2c312e85..9d529cbdb8 100644 --- a/level_zero/core/source/gen11/cmdlist_gen11.inl +++ b/level_zero/core/source/gen11/cmdlist_gen11.inl @@ -5,14 +5,17 @@ * */ +#include "pipe_control_args.h" + namespace L0 { template void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { + NEO::PipeControlArgs args(true); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), - true); + args); } } // namespace L0 diff --git a/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl b/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl index b0c489bb80..97ccdf17ee 100644 --- a/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl +++ b/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl @@ -7,14 +7,17 @@ #include "level_zero/core/source/cmdlist/cmdlist_hw.h" +#include "pipe_control_args.h" + namespace L0 { template void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { + NEO::PipeControlArgs args(true); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), - true); + args); } } // namespace L0 diff --git a/level_zero/core/source/gen9/cmdlist_gen9.inl b/level_zero/core/source/gen9/cmdlist_gen9.inl index 277ec08f55..342a6f4ccd 100644 --- a/level_zero/core/source/gen9/cmdlist_gen9.inl +++ b/level_zero/core/source/gen9/cmdlist_gen9.inl @@ -12,13 +12,16 @@ #include "level_zero/core/source/cmdlist/cmdlist_hw.h" +#include "pipe_control_args.h" + namespace L0 { template void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { + NEO::PipeControlArgs args(true); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), - true); + args); } } // namespace L0 diff --git a/opencl/source/built_ins/aux_translation_builtin.h b/opencl/source/built_ins/aux_translation_builtin.h index 64fa43abe5..af450ac1be 100644 --- a/opencl/source/built_ins/aux_translation_builtin.h +++ b/opencl/source/built_ins/aux_translation_builtin.h @@ -12,6 +12,8 @@ #include "opencl/source/built_ins/builtins_dispatch_builder.h" #include "opencl/source/helpers/dispatch_info_builder.h" +#include "pipe_control_args.h" + #include namespace NEO { @@ -65,7 +67,8 @@ class BuiltInOp : public BuiltinDispatchInfoBuilder DispatchInfo::EstimateCommandsMethodT>; template static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *, const HardwareInfo &) { - MemorySynchronizationCommands::addPipeControl(linearStream, dcFlush); + PipeControlArgs args(dcFlush); + MemorySynchronizationCommands::addPipeControl(linearStream, args); } template diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index 59e14ddea5..8cf2046a63 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -36,6 +36,8 @@ #include "opencl/source/program/block_kernel_manager.h" #include "opencl/source/program/printf_handler.h" +#include "pipe_control_args.h" + #include #include @@ -486,10 +488,14 @@ BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(const Mu if (isCacheFlushForBcsRequired()) { auto cacheFlushTimestampPacketGpuAddress = timestampPacketDependencies.cacheFlushNodes.peekNodes()[0]->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); - - MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - commandStream, GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - cacheFlushTimestampPacketGpuAddress, 0, true, device->getHardwareInfo()); + PipeControlArgs args(true); + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + commandStream, + GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + cacheFlushTimestampPacketGpuAddress, + 0, + device->getHardwareInfo(), + args); } TimestampPacketHelper::programSemaphoreWithImplicitDependency(commandStream, *currentTimestampPacketNode); diff --git a/opencl/source/command_queue/gpgpu_walker_base.inl b/opencl/source/command_queue/gpgpu_walker_base.inl index 76f9edd4c1..21d0a0119c 100644 --- a/opencl/source/command_queue/gpgpu_walker_base.inl +++ b/opencl/source/command_queue/gpgpu_walker_base.inl @@ -27,6 +27,8 @@ #include "opencl/source/helpers/validators.h" #include "opencl/source/mem_obj/mem_obj.h" +#include "pipe_control_args.h" + #include #include @@ -123,10 +125,14 @@ void GpgpuWalkerHelper::dispatchProfilingCommandsStart( // PIPE_CONTROL for global timestamp uint64_t timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, GlobalStartTS); - - MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - *commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, - timeStampAddress, 0llu, false, hwInfo); + PipeControlArgs args; + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + *commandStream, + PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, + timeStampAddress, + 0llu, + hwInfo, + args); //MI_STORE_REGISTER_MEM for context local timestamp timeStampAddress = hwTimeStamps.getGpuAddress() + offsetof(HwTimeStamps, ContextStartTS); diff --git a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl index 2c8a0bfdec..c431265333 100644 --- a/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl +++ b/opencl/source/command_queue/gpgpu_walker_bdw_plus.inl @@ -12,6 +12,8 @@ #include "opencl/source/cl_device/cl_device.h" #include "opencl/source/command_queue/gpgpu_walker_base.inl" +#include "pipe_control_args.h" + namespace NEO { template @@ -69,8 +71,8 @@ void GpgpuWalkerHelper::dispatchScheduler( using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; - bool dcFlush = false; - MemorySynchronizationCommands::addPipeControl(commandStream, dcFlush); + NEO::PipeControlArgs args; + MemorySynchronizationCommands::addPipeControl(commandStream, args); uint32_t interfaceDescriptorIndex = devQueueHw.schedulerIDIndex; const size_t offsetInterfaceDescriptorTable = devQueueHw.colorCalcStateSize; @@ -161,8 +163,8 @@ void GpgpuWalkerHelper::dispatchScheduler( // Do not put BB_START only when returning in first Scheduler run if (devQueueHw.getSchedulerReturnInstance() != 1) { - - MemorySynchronizationCommands::addPipeControl(commandStream, true); + args.dcFlushEnable = true; + MemorySynchronizationCommands::addPipeControl(commandStream, args); // Add BB Start Cmd to the SLB in the Primary Batch Buffer auto *bbStart = static_cast(commandStream.getSpace(sizeof(MI_BATCH_BUFFER_START))); @@ -183,8 +185,14 @@ void GpgpuWalkerHelper::setupTimestampPacket( if (TimestampPacketStorage::WriteOperationType::AfterWalker == writeOperationType) { uint64_t address = timestampPacketNode->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); - MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - *cmdStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, 0, false, *rootDeviceEnvironment.getHardwareInfo()); + PipeControlArgs args; + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + *cmdStream, + PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + address, + 0, + *rootDeviceEnvironment.getHardwareInfo(), + args); } } diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index f98d11d5a1..0622b91827 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -108,7 +108,8 @@ void HardwareInterface::dispatchWalker( if (static_cast(DebugManager.flags.AddBlockingSemaphoreAfterSpecificEnqueue.get()) == gpgpuCsr.peekTaskCount()) { if (DebugManager.flags.AddCacheFlushBeforeBlockingSemaphore.get()) { - MemorySynchronizationCommands::addPipeControl(*commandStream, true); + NEO::PipeControlArgs args(true); + MemorySynchronizationCommands::addPipeControl(*commandStream, args); } auto tagValue = *(gpgpuCsr.getTagAddress()); diff --git a/opencl/source/device_queue/device_queue_hw_base.inl b/opencl/source/device_queue/device_queue_hw_base.inl index 68825bf409..f77d9bb417 100644 --- a/opencl/source/device_queue/device_queue_hw_base.inl +++ b/opencl/source/device_queue/device_queue_hw_base.inl @@ -16,6 +16,8 @@ #include "opencl/source/device_queue/device_queue_hw.h" #include "opencl/source/helpers/hardware_commands_helper.h" +#include "pipe_control_args.h" + namespace NEO { template void DeviceQueueHw::allocateSlbBuffer() { @@ -124,14 +126,22 @@ void DeviceQueueHw::addExecutionModelCleanUpSection(Kernel *parentKer } uint64_t criticalSectionAddress = (uint64_t)&igilQueue->m_controls.m_CriticalSection; + PipeControlArgs args; + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + slbCS, + PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + criticalSectionAddress, + ExecutionModelCriticalSection::Free, + device->getHardwareInfo(), + args); - MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - criticalSectionAddress, ExecutionModelCriticalSection::Free, false, device->getHardwareInfo()); - - MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - slbCS, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - tagAddress, taskCount, false, device->getHardwareInfo()); + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + slbCS, + PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + tagAddress, + taskCount, + device->getHardwareInfo(), + args); addMediaStateClearCmds(); diff --git a/opencl/source/gen12lp/hw_helper_gen12lp.cpp b/opencl/source/gen12lp/hw_helper_gen12lp.cpp index 7b2ca766a4..6a521f770a 100644 --- a/opencl/source/gen12lp/hw_helper_gen12lp.cpp +++ b/opencl/source/gen12lp/hw_helper_gen12lp.cpp @@ -158,7 +158,12 @@ bool HwHelperHw::isIndependentForwardProgressSupported() { } template <> -void MemorySynchronizationCommands::setExtraCacheFlushFields(Family::PIPE_CONTROL &pipeControl) { +inline void MemorySynchronizationCommands::setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args) { + pipeControl.setHdcPipelineFlush(args.hdcPipelineFlush); +} + +template <> +void MemorySynchronizationCommands::setCacheFlushExtraProperties(Family::PIPE_CONTROL &pipeControl) { pipeControl.setHdcPipelineFlush(true); pipeControl.setConstantCacheInvalidationEnable(false); } diff --git a/opencl/source/gen8/hw_helper_gen8.cpp b/opencl/source/gen8/hw_helper_gen8.cpp index 57edc64b15..09806339dc 100644 --- a/opencl/source/gen8/hw_helper_gen8.cpp +++ b/opencl/source/gen8/hw_helper_gen8.cpp @@ -28,12 +28,12 @@ void HwHelperHw::setupHardwareCapabilities(HardwareCapabilities *caps, c } template <> -typename Family::PIPE_CONTROL *MemorySynchronizationCommands::addPipeControl(LinearStream &commandStream, bool dcFlush) { +void MemorySynchronizationCommands::addPipeControl(LinearStream &commandStream, PipeControlArgs &args) { Family::PIPE_CONTROL cmd = Family::cmdInitPipeControl; - MemorySynchronizationCommands::setPipeControl(cmd, true); + args.dcFlushEnable = true; + MemorySynchronizationCommands::setPipeControl(cmd, args); Family::PIPE_CONTROL *cmdBuffer = commandStream.getSpaceForCmd(); *cmdBuffer = cmd; - return cmdBuffer; } template class AubHelperHw; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 669711bb41..f2a44c52a0 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -870,7 +870,8 @@ HWTEST_F(UltCommandStreamReceiverTest, addPipeControlWithFlushAllCaches) { char buff[sizeof(PIPE_CONTROL) * 3]; LinearStream stream(buff, sizeof(PIPE_CONTROL) * 3); - MemorySynchronizationCommands::addPipeControl(stream, false); + PipeControlArgs args; + MemorySynchronizationCommands::addPipeControl(stream, args); parseCommands(stream, 0); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index f801203158..880c7d0f04 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -1555,7 +1555,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDcFlushArgumentIsTrueWhenCall std::unique_ptr buffer(new uint8_t[128]); LinearStream commandStream(buffer.get(), 128); - auto pipeControl = MemorySynchronizationCommands::addPipeControl(commandStream, true); + PipeControlArgs args(true); + MemorySynchronizationCommands::addPipeControl(commandStream, args); + PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); + ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getDcFlushEnable()); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); @@ -1566,7 +1569,10 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenDcFlushArgumentIsFalseWhenCal std::unique_ptr buffer(new uint8_t[128]); LinearStream commandStream(buffer.get(), 128); - auto pipeControl = MemorySynchronizationCommands::addPipeControl(commandStream, false); + PipeControlArgs args; + MemorySynchronizationCommands::addPipeControl(commandStream, args); + PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); + ASSERT_NE(nullptr, pipeControl); const bool expectedDcFlush = ::renderCoreFamily == IGFX_GEN8_CORE; EXPECT_EQ(expectedDcFlush, pipeControl->getDcFlushEnable()); diff --git a/opencl/test/unit_test/gen11/hw_helper_tests_gen11.cpp b/opencl/test/unit_test/gen11/hw_helper_tests_gen11.cpp index 4c2f604cdb..44fe519824 100644 --- a/opencl/test/unit_test/gen11/hw_helper_tests_gen11.cpp +++ b/opencl/test/unit_test/gen11/hw_helper_tests_gen11.cpp @@ -5,6 +5,8 @@ * */ +#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h" + #include "opencl/test/unit_test/helpers/get_gpgpu_engines_tests.inl" #include "opencl/test/unit_test/helpers/hw_helper_tests.h" @@ -53,6 +55,8 @@ GEN11TEST_F(MemorySynchronizatiopCommandsTestsGen11, WhenProgrammingCacheFlushTh std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); - PIPE_CONTROL *pipeControl = MemorySynchronizationCommands::addFullCacheFlush(stream); + MemorySynchronizationCommands::addFullCacheFlush(stream); + PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); + ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); } diff --git a/opencl/test/unit_test/gen11/icllp/test_program_media_sampler_icllp.cpp b/opencl/test/unit_test/gen11/icllp/test_program_media_sampler_icllp.cpp index 60a715ddea..3091795760 100644 --- a/opencl/test/unit_test/gen11/icllp/test_program_media_sampler_icllp.cpp +++ b/opencl/test/unit_test/gen11/icllp/test_program_media_sampler_icllp.cpp @@ -93,17 +93,20 @@ ICLLPTEST_F(Gen11MediaSamplerProgramingTest, givenVmeEnableSubsliceDisabledWhenP auto expectedPipeControlCmd = FamilyType::cmdInitPipeControl; expectedPipeControlCmd.setCommandStreamerStallEnable(0x1); setFlushAllCaches(expectedPipeControlCmd); - auto pipeControlCmd = reinterpret_cast(stream->getCpuBase()); + auto pipeControlCmd = genCmdCast(stream->getCpuBase()); + ASSERT_NE(nullptr, pipeControlCmd); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); size_t cmdOffset = sizeof(PIPE_CONTROL); - auto miLrCmd = reinterpret_cast(ptrOffset(stream->getCpuBase(), cmdOffset)); + auto miLrCmd = genCmdCast(ptrOffset(stream->getCpuBase(), cmdOffset)); + ASSERT_NE(nullptr, miLrCmd); EXPECT_EQ(0, memcmp(&expectedMiLrCmd, miLrCmd, sizeof(MI_LOAD_REGISTER_IMM))); cmdOffset += sizeof(MI_LOAD_REGISTER_IMM); expectedPipeControlCmd = FamilyType::cmdInitPipeControl; expectedPipeControlCmd.setCommandStreamerStallEnable(0x1); - pipeControlCmd = reinterpret_cast(ptrOffset(stream->getCpuBase(), cmdOffset)); + pipeControlCmd = genCmdCast(ptrOffset(stream->getCpuBase(), cmdOffset)); + ASSERT_NE(nullptr, pipeControlCmd); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); } @@ -132,21 +135,25 @@ ICLLPTEST_F(Gen11MediaSamplerProgramingTest, givenVmeEnableSubsliceEnabledWhenPo expectedPipeControlCmd.setCommandStreamerStallEnable(0x1); setFlushAllCaches(expectedPipeControlCmd); expectedPipeControlCmd.setGenericMediaStateClear(true); - auto pipeControlCmd = reinterpret_cast(stream->getCpuBase()); + auto pipeControlCmd = genCmdCast(stream->getCpuBase()); + ASSERT_NE(nullptr, pipeControlCmd); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); size_t cmdOffset = sizeof(PIPE_CONTROL); - pipeControlCmd = reinterpret_cast(ptrOffset(stream->getCpuBase(), cmdOffset)); + pipeControlCmd = genCmdCast(ptrOffset(stream->getCpuBase(), cmdOffset)); + ASSERT_NE(nullptr, pipeControlCmd); expectedPipeControlCmd = FamilyType::cmdInitPipeControl; expectedPipeControlCmd.setCommandStreamerStallEnable(0x1); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); cmdOffset += sizeof(PIPE_CONTROL); - auto miLrCmd = reinterpret_cast(ptrOffset(stream->getCpuBase(), cmdOffset)); + auto miLrCmd = genCmdCast(ptrOffset(stream->getCpuBase(), cmdOffset)); + ASSERT_NE(nullptr, miLrCmd); EXPECT_EQ(0, memcmp(&expectedMiLrCmd, miLrCmd, sizeof(MI_LOAD_REGISTER_IMM))); cmdOffset += sizeof(MI_LOAD_REGISTER_IMM); - pipeControlCmd = reinterpret_cast(ptrOffset(stream->getCpuBase(), cmdOffset)); + pipeControlCmd = genCmdCast(ptrOffset(stream->getCpuBase(), cmdOffset)); + ASSERT_NE(nullptr, pipeControlCmd); EXPECT_EQ(0, memcmp(&expectedPipeControlCmd, pipeControlCmd, sizeof(PIPE_CONTROL))); } diff --git a/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl b/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl index 66c2870814..1169752fe6 100644 --- a/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl +++ b/opencl/test/unit_test/gen12lp/hw_helper_tests_gen12lp.inl @@ -238,7 +238,7 @@ GEN12LPTEST_F(MemorySynchronizatiopCommandsTests, whenSettingCacheFlushExtraFiel using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; PIPE_CONTROL pipeControl = FamilyType::cmdInitPipeControl; pipeControl.setConstantCacheInvalidationEnable(true); - MemorySynchronizationCommands::setExtraCacheFlushFields(pipeControl); + MemorySynchronizationCommands::setCacheFlushExtraProperties(pipeControl); EXPECT_TRUE(pipeControl.getHdcPipelineFlush()); EXPECT_FALSE(pipeControl.getConstantCacheInvalidationEnable()); } diff --git a/opencl/test/unit_test/gen8/hw_helper_tests_gen8.cpp b/opencl/test/unit_test/gen8/hw_helper_tests_gen8.cpp index 8ac405d0a4..68bacbe684 100644 --- a/opencl/test/unit_test/gen8/hw_helper_tests_gen8.cpp +++ b/opencl/test/unit_test/gen8/hw_helper_tests_gen8.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/helpers/constants.h" +#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h" #include "opencl/test/unit_test/helpers/get_gpgpu_engines_tests.inl" #include "opencl/test/unit_test/helpers/hw_helper_tests.h" @@ -60,6 +61,8 @@ GEN8TEST_F(MemorySynchronizatiopCommandsTestsGen8, WhenProgrammingCacheFlushThen std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); - PIPE_CONTROL *pipeControl = MemorySynchronizationCommands::addFullCacheFlush(stream); + MemorySynchronizationCommands::addFullCacheFlush(stream); + PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); + ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); } diff --git a/opencl/test/unit_test/gen9/hw_helper_tests_gen9.cpp b/opencl/test/unit_test/gen9/hw_helper_tests_gen9.cpp index 7f31d17010..2fb87a140e 100644 --- a/opencl/test/unit_test/gen9/hw_helper_tests_gen9.cpp +++ b/opencl/test/unit_test/gen9/hw_helper_tests_gen9.cpp @@ -5,6 +5,8 @@ * */ +#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h" + #include "opencl/test/unit_test/helpers/get_gpgpu_engines_tests.inl" #include "opencl/test/unit_test/helpers/hw_helper_tests.h" @@ -60,6 +62,8 @@ GEN9TEST_F(MemorySynchronizatiopCommandsTestsGen9, WhenProgrammingCacheFlushThen std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); - PIPE_CONTROL *pipeControl = MemorySynchronizationCommands::addFullCacheFlush(stream); + MemorySynchronizationCommands::addFullCacheFlush(stream); + PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); + ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); } diff --git a/opencl/test/unit_test/helpers/hw_helper_tests.cpp b/opencl/test/unit_test/helpers/hw_helper_tests.cpp index 8bda22c981..0e4ebd819f 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests.cpp @@ -15,6 +15,7 @@ #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_interface.h" +#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/helpers/variable_backup.h" @@ -25,6 +26,8 @@ #include "opencl/test/unit_test/mocks/mock_buffer.h" #include "opencl/test/unit_test/mocks/mock_context.h" +#include "pipe_control_args.h" + #include #include #include @@ -212,13 +215,15 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteTimestampModeWhenHelperIsUsed expectedPipeControl.setAddressHigh(static_cast(address >> 32)); HardwareInfo hardwareInfo = *defaultHwInfo; - auto pipeControl = MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData, false, hardwareInfo); + PipeControlArgs args; + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, address, immediateData, hardwareInfo, args); auto additionalPcSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleSynchronization(hardwareInfo); + auto pipeControl = genCmdCast(ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); + ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(sizeof(PIPE_CONTROL) + additionalPcSize, stream.getUsed()); - EXPECT_EQ(pipeControl, ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); EXPECT_TRUE(memcmp(pipeControl, &expectedPipeControl, sizeof(PIPE_CONTROL)) == 0); } @@ -238,13 +243,15 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteImmediateDataModeWhenHelperIs expectedPipeControl.setImmediateData(immediateData); HardwareInfo hardwareInfo = *defaultHwInfo; - auto pipeControl = MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, false, hardwareInfo); + PipeControlArgs args; + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + stream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, immediateData, hardwareInfo, args); auto additionalPcSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hardwareInfo) - sizeof(PIPE_CONTROL); auto pipeControlLocationSize = additionalPcSize - MemorySynchronizationCommands::getSizeForSingleSynchronization(hardwareInfo); + auto pipeControl = genCmdCast(ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); + ASSERT_NE(nullptr, pipeControl); EXPECT_EQ(sizeof(PIPE_CONTROL) + additionalPcSize, stream.getUsed()); - EXPECT_EQ(pipeControl, ptrOffset(stream.getCpuBase(), pipeControlLocationSize)); EXPECT_TRUE(memcmp(pipeControl, &expectedPipeControl, sizeof(PIPE_CONTROL)) == 0); } @@ -850,7 +857,10 @@ HWTEST_F(PipeControlHelperTests, WhenProgrammingCacheFlushThenExpectBasicFieldsS LinearStream stream(buffer.get(), 128); - PIPE_CONTROL *pipeControl = MemorySynchronizationCommands::addFullCacheFlush(stream); + MemorySynchronizationCommands::addFullCacheFlush(stream); + PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); + ASSERT_NE(nullptr, pipeControl); + EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); EXPECT_TRUE(pipeControl->getDcFlushEnable()); diff --git a/shared/source/command_container/command_encoder_base.inl b/shared/source/command_container/command_encoder_base.inl index f67aeb0725..3317345456 100644 --- a/shared/source/command_container/command_encoder_base.inl +++ b/shared/source/command_container/command_encoder_base.inl @@ -17,6 +17,8 @@ #include "opencl/source/helpers/hardware_commands_helper.h" +#include "pipe_control_args.h" + #include namespace NEO { @@ -147,7 +149,8 @@ void EncodeDispatchKernel::encode(CommandContainer &container, bool flush = container.slmSize != slmSizeNew || container.isAnyHeapDirty(); if (flush) { - MemorySynchronizationCommands::addPipeControl(*container.getCommandStream(), true); + PipeControlArgs args(true); + MemorySynchronizationCommands::addPipeControl(*container.getCommandStream(), args); if (container.slmSize != slmSizeNew) { EncodeL3State::encode(container, slmSizeNew != 0u); diff --git a/shared/source/command_stream/command_stream_receiver_hw.h b/shared/source/command_stream/command_stream_receiver_hw.h index 88fb6e1ddc..d7a4bc0ab8 100644 --- a/shared/source/command_stream/command_stream_receiver_hw.h +++ b/shared/source/command_stream/command_stream_receiver_hw.h @@ -18,6 +18,7 @@ namespace NEO { template class DeviceCommandStreamReceiver; +struct PipeControlArgs; template class CommandStreamReceiverHw : public CommandStreamReceiver { @@ -111,8 +112,8 @@ class CommandStreamReceiverHw : public CommandStreamReceiver { size_t getCmdSizeForPrologue(const DispatchFlags &dispatchFlags) const; void addClearSLMWorkAround(typename GfxFamily::PIPE_CONTROL *pCmd); - PIPE_CONTROL *addPipeControlCmd(LinearStream &commandStream); - PIPE_CONTROL *addPipeControlBeforeStateBaseAddress(LinearStream &commandStream); + void addPipeControlCmd(LinearStream &commandStream, PipeControlArgs &args); + void addPipeControlBeforeStateBaseAddress(LinearStream &commandStream); size_t getSshHeapSize(); uint64_t getScratchPatchAddress(); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 5d48d0e0c1..bddd38a7d2 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -31,6 +31,7 @@ #include "shared/source/utilities/tag_allocator.h" #include "command_stream_receiver_hw_ext.inl" +#include "pipe_control_args.h" namespace NEO { @@ -63,9 +64,9 @@ bool CommandStreamReceiverHw::flush(BatchBuffer &batchBuffer, Residen template inline void CommandStreamReceiverHw::addBatchBufferEnd(LinearStream &commandStream, void **patchLocation) { - typedef typename GfxFamily::MI_BATCH_BUFFER_END MI_BATCH_BUFFER_END; + using MI_BATCH_BUFFER_END = typename GfxFamily::MI_BATCH_BUFFER_END; - auto pCmd = (MI_BATCH_BUFFER_END *)commandStream.getSpace(sizeof(MI_BATCH_BUFFER_END)); + auto pCmd = commandStream.getSpaceForCmd(); *pCmd = GfxFamily::cmdInitBatchBufferEnd; if (patchLocation) { *patchLocation = pCmd; @@ -131,12 +132,10 @@ inline size_t CommandStreamReceiverHw::getRequiredCmdSizeForPreamble( } template -inline typename GfxFamily::PIPE_CONTROL *CommandStreamReceiverHw::addPipeControlCmd(LinearStream &commandStream) { - typedef typename GfxFamily::PIPE_CONTROL PIPE_CONTROL; - auto pCmd = reinterpret_cast(commandStream.getSpace(sizeof(PIPE_CONTROL))); - *pCmd = GfxFamily::cmdInitPipeControl; - pCmd->setCommandStreamerStallEnable(true); - return pCmd; +inline void CommandStreamReceiverHw::addPipeControlCmd( + LinearStream &commandStream, + PipeControlArgs &args) { + MemorySynchronizationCommands::addPipeControl(commandStream, args); } template @@ -190,9 +189,15 @@ CompletionStamp CommandStreamReceiverHw::flushTask( } auto address = getTagAllocation()->getGpuAddress(); - MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - commandStreamTask, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - address, taskCount + 1, dispatchFlags.dcFlush, peekHwInfo()); + + PipeControlArgs args(dispatchFlags.dcFlush); + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + commandStreamTask, + PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + address, + taskCount + 1, + peekHwInfo(), + args); this->latestSentTaskCount = taskCount + 1; DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "taskCount", taskCount); @@ -358,8 +363,9 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (executionEnvironment.rootDeviceEnvironments[rootDeviceIndex]->getHardwareInfo()->workaroundTable.waSamplerCacheFlushBetweenRedescribedSurfaceReads) { if (this->samplerCacheFlushRequired != SamplerCacheFlushState::samplerCacheFlushNotRequired) { - auto pCmd = addPipeControlCmd(commandStreamCSR); - pCmd->setTextureCacheInvalidationEnable(true); + PipeControlArgs args; + args.textureCacheInvalidationEnable = true; + addPipeControlCmd(commandStreamCSR, args); if (this->samplerCacheFlushRequired == SamplerCacheFlushState::samplerCacheFlushBefore) { this->samplerCacheFlushRequired = SamplerCacheFlushState::samplerCacheFlushAfter; } else { @@ -374,15 +380,17 @@ CompletionStamp CommandStreamReceiverHw::flushTask( } if (requiresInstructionCacheFlush) { - auto pipeControl = MemorySynchronizationCommands::addPipeControl(commandStreamCSR, false); - pipeControl->setInstructionCacheInvalidateEnable(true); + PipeControlArgs args; + args.instructionCacheInvalidateEnable = true; + MemorySynchronizationCommands::addPipeControl(commandStreamCSR, args); requiresInstructionCacheFlush = false; } // Add a PC if we have a dependency on a previous walker to avoid concurrency issues. if (taskLevel > this->taskLevel) { if (!timestampPacketWriteEnabled) { - MemorySynchronizationCommands::addPipeControl(commandStreamCSR, false); + PipeControlArgs args; + MemorySynchronizationCommands::addPipeControl(commandStreamCSR, args); } this->taskLevel = taskLevel; DBG_LOG(LogTaskCounts, __FUNCTION__, "Line: ", __LINE__, "this->taskCount", this->taskCount); @@ -522,23 +530,26 @@ template inline void CommandStreamReceiverHw::programStallingPipeControlForBarrier(LinearStream &cmdStream, DispatchFlags &dispatchFlags) { stallingPipeControlOnNextFlushRequired = false; - PIPE_CONTROL *stallingPipeControlCmd; auto barrierTimestampPacketNodes = dispatchFlags.barrierTimestampPacketNodes; if (barrierTimestampPacketNodes && barrierTimestampPacketNodes->peekNodes().size() != 0) { auto barrierTimestampPacketGpuAddress = dispatchFlags.barrierTimestampPacketNodes->peekNodes()[0]->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); - stallingPipeControlCmd = MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - cmdStream, PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - barrierTimestampPacketGpuAddress, 0, true, peekHwInfo()); + PipeControlArgs args(true); + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + cmdStream, + PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, + barrierTimestampPacketGpuAddress, + 0, + peekHwInfo(), + args); dispatchFlags.barrierTimestampPacketNodes->makeResident(*this); } else { - stallingPipeControlCmd = MemorySynchronizationCommands::addPipeControl(cmdStream, false); + PipeControlArgs args; + MemorySynchronizationCommands::addPipeControl(cmdStream, args); } - - stallingPipeControlCmd->setCommandStreamerStallEnable(true); } template diff --git a/shared/source/command_stream/command_stream_receiver_hw_bdw_plus.inl b/shared/source/command_stream/command_stream_receiver_hw_bdw_plus.inl index 7c63cf52d5..a39b626601 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_bdw_plus.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_bdw_plus.inl @@ -87,11 +87,10 @@ bool CommandStreamReceiverHw::isMultiOsContextCapable() const { } template -inline typename GfxFamily::PIPE_CONTROL *CommandStreamReceiverHw::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) { - auto pCmd = addPipeControlCmd(commandStream); - pCmd->setTextureCacheInvalidationEnable(true); - pCmd->setDcFlushEnable(true); - return pCmd; +inline void CommandStreamReceiverHw::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) { + PipeControlArgs args(true); + args.textureCacheInvalidationEnable = true; + addPipeControlCmd(commandStream, args); } } // namespace NEO diff --git a/shared/source/command_stream/command_stream_receiver_hw_tgllp_plus.inl b/shared/source/command_stream/command_stream_receiver_hw_tgllp_plus.inl index 40102cf675..5682ed6dfa 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_tgllp_plus.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_tgllp_plus.inl @@ -9,6 +9,8 @@ #include "shared/source/command_stream/command_stream_receiver_hw.h" #include "shared/source/helpers/state_compute_mode_helper.h" +#include "pipe_control_args.h" + namespace NEO { template void CommandStreamReceiverHw::programComputeMode(LinearStream &stream, DispatchFlags &dispatchFlags) { @@ -16,6 +18,7 @@ void CommandStreamReceiverHw::programComputeMode(LinearStream &stream if (isComputeModeNeeded()) { programAdditionalPipelineSelect(stream, dispatchFlags.pipelineSelectArgs, true); this->lastSentCoherencyRequest = static_cast(dispatchFlags.requiresCoherency); + auto stateComputeMode = GfxFamily::cmdInitStateComputeMode; adjustThreadArbitionPolicy(&stateComputeMode); EncodeStates::adjustStateComputeMode(stream, dispatchFlags.numGrfRequired, &stateComputeMode, isMultiOsContextCapable(), dispatchFlags.requiresCoherency); @@ -35,11 +38,10 @@ inline bool CommandStreamReceiverHw::isComputeModeNeeded() const { } template <> -inline typename Family::PIPE_CONTROL *CommandStreamReceiverHw::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) { - auto pCmd = addPipeControlCmd(commandStream); - pCmd->setTextureCacheInvalidationEnable(true); - pCmd->setDcFlushEnable(true); - pCmd->setHdcPipelineFlush(true); - return pCmd; +inline void CommandStreamReceiverHw::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) { + PipeControlArgs args(true); + args.textureCacheInvalidationEnable = true; + args.hdcPipelineFlush = true; + addPipeControlCmd(commandStream, args); } } // namespace NEO diff --git a/shared/source/command_stream/experimental_command_buffer.inl b/shared/source/command_stream/experimental_command_buffer.inl index 1161723112..a291a36a02 100644 --- a/shared/source/command_stream/experimental_command_buffer.inl +++ b/shared/source/command_stream/experimental_command_buffer.inl @@ -12,6 +12,8 @@ #include "shared/source/helpers/hw_helper.h" #include "shared/source/memory_manager/graphics_allocation.h" +#include "pipe_control_args.h" + namespace NEO { template @@ -73,10 +75,14 @@ void ExperimentalCommandBuffer::addTimeStampPipeControl() { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; uint64_t timeStampAddress = timestamps->getGpuAddress() + timestampsOffset; - - MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - *currentStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, timeStampAddress, 0llu, - false, *commandStreamReceiver->peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->getHardwareInfo()); + PipeControlArgs args; + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + *currentStream, + PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_TIMESTAMP, + timeStampAddress, + 0llu, + *commandStreamReceiver->peekExecutionEnvironment().rootDeviceEnvironments[commandStreamReceiver->getRootDeviceIndex()]->getHardwareInfo(), + args); //moving to next chunk timestampsOffset += sizeof(uint64_t); diff --git a/shared/source/direct_submission/dispatchers/render_dispatcher.inl b/shared/source/direct_submission/dispatchers/render_dispatcher.inl index 0da4a17493..cd49075e28 100644 --- a/shared/source/direct_submission/dispatchers/render_dispatcher.inl +++ b/shared/source/direct_submission/dispatchers/render_dispatcher.inl @@ -10,6 +10,8 @@ #include "shared/source/direct_submission/dispatchers/render_dispatcher.h" #include "shared/source/helpers/hw_helper.h" +#include "pipe_control_args.h" + namespace NEO { template @@ -29,13 +31,14 @@ inline void RenderDispatcher::dispatchMonitorFence(LinearStream &cmdB uint64_t immediateData, const HardwareInfo &hwInfo) { using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; - MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( + PipeControlArgs args(true); + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( cmdBuffer, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, gpuAddress, immediateData, - true, - hwInfo); + hwInfo, + args); } template diff --git a/shared/source/gen11/command_stream_receiver_hw_gen11.cpp b/shared/source/gen11/command_stream_receiver_hw_gen11.cpp index 369f4fc7dd..aed4433fec 100644 --- a/shared/source/gen11/command_stream_receiver_hw_gen11.cpp +++ b/shared/source/gen11/command_stream_receiver_hw_gen11.cpp @@ -39,15 +39,16 @@ void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, if (peekHwInfo().platform.eProductFamily == IGFX_ICELAKE_LP) { if (dispatchFlags.pipelineSelectArgs.mediaSamplerRequired) { if (!lastVmeSubslicesConfig) { - auto pc = addPipeControlCmd(stream); - pc->setDcFlushEnable(true); - pc->setRenderTargetCacheFlushEnable(true); - pc->setInstructionCacheInvalidateEnable(true); - pc->setTextureCacheInvalidationEnable(true); - pc->setPipeControlFlushEnable(true); - pc->setVfCacheInvalidationEnable(true); - pc->setConstantCacheInvalidationEnable(true); - pc->setStateCacheInvalidationEnable(true); + PipeControlArgs args; + args.dcFlushEnable = true; + args.renderTargetCacheFlushEnable = true; + args.instructionCacheInvalidateEnable = true; + args.textureCacheInvalidationEnable = true; + args.pipeControlFlushEnable = true; + args.vfCacheInvalidationEnable = true; + args.constantCacheInvalidationEnable = true; + args.stateCacheInvalidationEnable = true; + addPipeControlCmd(stream, args); uint32_t numSubslices = peekHwInfo().gtSystemInfo.SubSliceCount; uint32_t numSubslicesWithVme = numSubslices / 2; // 1 VME unit per DSS @@ -62,24 +63,27 @@ void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, reg.TheStructure.Common.SliceCountRequest = numSlicesForPowerGating; LriHelper::program(&stream, PWR_CLK_STATE_REGISTER::REG_ADDRESS, reg.TheStructure.RawData[0]); - addPipeControlCmd(stream); + args = {}; + addPipeControlCmd(stream, args); lastVmeSubslicesConfig = true; } } else { if (lastVmeSubslicesConfig) { - auto pc = addPipeControlCmd(stream); - pc->setDcFlushEnable(true); - pc->setRenderTargetCacheFlushEnable(true); - pc->setInstructionCacheInvalidateEnable(true); - pc->setTextureCacheInvalidationEnable(true); - pc->setPipeControlFlushEnable(true); - pc->setVfCacheInvalidationEnable(true); - pc->setConstantCacheInvalidationEnable(true); - pc->setStateCacheInvalidationEnable(true); - pc->setGenericMediaStateClear(true); + PipeControlArgs args; + args.dcFlushEnable = true; + args.renderTargetCacheFlushEnable = true; + args.instructionCacheInvalidateEnable = true; + args.textureCacheInvalidationEnable = true; + args.pipeControlFlushEnable = true; + args.vfCacheInvalidationEnable = true; + args.constantCacheInvalidationEnable = true; + args.stateCacheInvalidationEnable = true; + args.genericMediaStateClear = true; + addPipeControlCmd(stream, args); - addPipeControlCmd(stream); + args = {}; + addPipeControlCmd(stream, args); // In Gen11-LP, software programs this register as if GT consists of // 2 slices with 4 subslices in each slice. Hardware maps this to the @@ -98,7 +102,7 @@ void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, LriHelper::program(&stream, PWR_CLK_STATE_REGISTER::REG_ADDRESS, reg.TheStructure.RawData[0]); - addPipeControlCmd(stream); + addPipeControlCmd(stream, args); } } } diff --git a/shared/source/gen12lp/preamble_gen12lp.cpp b/shared/source/gen12lp/preamble_gen12lp.cpp index 8f47f7d258..9ce1677940 100644 --- a/shared/source/gen12lp/preamble_gen12lp.cpp +++ b/shared/source/gen12lp/preamble_gen12lp.cpp @@ -12,6 +12,7 @@ #include "opencl/source/gen12lp/helpers_gen12lp.h" #include "opencl/source/helpers/hardware_commands_helper.h" +#include "pipe_control_args.h" #include "reg_configs_common.h" namespace NEO { @@ -38,8 +39,9 @@ void PreambleHelper::programPipelineSelect(LinearStream *pCommandSt using PIPELINE_SELECT = typename TGLLPFamily::PIPELINE_SELECT; if (HardwareCommandsHelper::isPipeControlPriorToPipelineSelectWArequired(hwInfo)) { - auto pipeControl = MemorySynchronizationCommands::addPipeControl(*pCommandStream, false); - pipeControl->setRenderTargetCacheFlushEnable(true); + PipeControlArgs args; + args.renderTargetCacheFlushEnable = true; + MemorySynchronizationCommands::addPipeControl(*pCommandStream, args); } auto pCmd = pCommandStream->getSpaceForCmd(); diff --git a/shared/source/helpers/CMakeLists.txt b/shared/source/helpers/CMakeLists.txt index 9eb7271c63..00fa1f4fac 100644 --- a/shared/source/helpers/CMakeLists.txt +++ b/shared/source/helpers/CMakeLists.txt @@ -80,6 +80,8 @@ set(NEO_CORE_HELPERS ${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.cpp ${CMAKE_CURRENT_SOURCE_DIR}/timestamp_packet.h ${CMAKE_CURRENT_SOURCE_DIR}/vec.h + ${CMAKE_CURRENT_SOURCE_DIR}/definitions/pipe_control_args_base.h + ${CMAKE_CURRENT_SOURCE_DIR}/definitions${BRANCH_DIR_SUFFIX}/pipe_control_args.h ) set_property(GLOBAL PROPERTY NEO_CORE_HELPERS ${NEO_CORE_HELPERS}) diff --git a/shared/source/helpers/definitions/pipe_control_args.h b/shared/source/helpers/definitions/pipe_control_args.h new file mode 100644 index 0000000000..b4d2814bad --- /dev/null +++ b/shared/source/helpers/definitions/pipe_control_args.h @@ -0,0 +1,16 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +#include "shared/source/helpers/definitions/pipe_control_args_base.h" + +namespace NEO { +struct PipeControlArgs : PipeControlArgsBase { + PipeControlArgs() = default; + PipeControlArgs(bool dcFlush) : PipeControlArgsBase(dcFlush) {} +}; +} // namespace NEO diff --git a/shared/source/helpers/definitions/pipe_control_args_base.h b/shared/source/helpers/definitions/pipe_control_args_base.h new file mode 100644 index 0000000000..182e8f8f68 --- /dev/null +++ b/shared/source/helpers/definitions/pipe_control_args_base.h @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2020 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#pragma once +namespace NEO { +struct PipeControlArgsBase { + PipeControlArgsBase() = default; + PipeControlArgsBase(bool dcFlush) : dcFlushEnable(dcFlush) {} + + bool dcFlushEnable = false; + bool renderTargetCacheFlushEnable = false; + bool instructionCacheInvalidateEnable = false; + bool textureCacheInvalidationEnable = false; + bool pipeControlFlushEnable = false; + bool vfCacheInvalidationEnable = false; + bool constantCacheInvalidationEnable = false; + bool stateCacheInvalidationEnable = false; + bool genericMediaStateClear = false; + bool hdcPipelineFlush = false; +}; +} // namespace NEO diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index a738ea25cf..c87cc38a96 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -21,10 +21,11 @@ #include namespace NEO { -class GraphicsAllocation; -struct RootDeviceEnvironment; -struct HardwareCapabilities; class GmmHelper; +class GraphicsAllocation; +struct HardwareCapabilities; +struct RootDeviceEnvironment; +struct PipeControlArgs; class HwHelper { public: @@ -260,26 +261,32 @@ template struct MemorySynchronizationCommands { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; - static PIPE_CONTROL *obtainPipeControlAndProgramPostSyncOperation(LinearStream &commandStream, - POST_SYNC_OPERATION operation, - uint64_t gpuAddress, - uint64_t immediateData, - bool dcFlush, const HardwareInfo &hwInfo); - static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo); + + static void addPipeControlAndProgramPostSyncOperation(LinearStream &commandStream, + POST_SYNC_OPERATION operation, + uint64_t gpuAddress, + uint64_t immediateData, + const HardwareInfo &hwInfo, + PipeControlArgs &args); + static void setPostSyncExtraProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo); + static void addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo); - static void setExtraPipeControlProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo); - static PIPE_CONTROL *addPipeControl(LinearStream &commandStream, bool dcFlush); + static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo); + + static void addPipeControl(LinearStream &commandStream, PipeControlArgs &args); + + static void addFullCacheFlush(LinearStream &commandStream); + static void setCacheFlushExtraProperties(PIPE_CONTROL &pipeControl); + static size_t getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo); static size_t getSizeForSinglePipeControl(); static size_t getSizeForSingleSynchronization(const HardwareInfo &hwInfo); static size_t getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo); - - static PIPE_CONTROL *addFullCacheFlush(LinearStream &commandStream); static size_t getSizeForFullCacheFlush(); - static void setExtraCacheFlushFields(PIPE_CONTROL &pipeControl); protected: - static void setPipeControl(PIPE_CONTROL &pipeControl, bool dcFlush); + static void setPipeControl(PIPE_CONTROL &pipeControl, PipeControlArgs &args); + static void setPipeControlExtraProperties(PIPE_CONTROL &pipeControl, PipeControlArgs &args); }; union SURFACE_STATE_BUFFER_LENGTH { diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index f4eddbbd3f..0fa8322c9c 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -20,6 +20,8 @@ #include "opencl/source/helpers/dispatch_info.h" #include "opencl/source/helpers/hardware_commands_helper.h" +#include "pipe_control_args.h" + namespace NEO { template @@ -185,35 +187,48 @@ bool HwHelperHw::isBlitAuxTranslationRequired(const HardwareInfo &hwInfo (multiDispatchInfo.getMemObjsForAuxTranslation()->size() > 0); } -template -typename Family::PIPE_CONTROL *MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( - LinearStream &commandStream, POST_SYNC_OPERATION operation, uint64_t gpuAddress, uint64_t immediateData, bool dcFlush, const HardwareInfo &hwInfo) { - using PIPE_CONTROL = typename Family::PIPE_CONTROL; +template +void MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( + LinearStream &commandStream, + POST_SYNC_OPERATION operation, + uint64_t gpuAddress, + uint64_t immediateData, + const HardwareInfo &hwInfo, + PipeControlArgs &args) { + using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; addPipeControlWA(commandStream, gpuAddress, hwInfo); PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd(); - PIPE_CONTROL cmd = Family::cmdInitPipeControl; - setPipeControl(cmd, dcFlush); + PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; + setPipeControl(cmd, args); cmd.setPostSyncOperation(operation); cmd.setAddress(static_cast(gpuAddress & 0x0000FFFFFFFFULL)); cmd.setAddressHigh(static_cast(gpuAddress >> 32)); - cmd.setDcFlushEnable(dcFlush); if (operation == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { cmd.setImmediateData(immediateData); } - setExtraPipeControlProperties(cmd, hwInfo); + setPostSyncExtraProperties(cmd, hwInfo); *pipeControl = cmd; - MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, gpuAddress, hwInfo); - - return pipeControl; + MemorySynchronizationCommands::addAdditionalSynchronization(commandStream, gpuAddress, hwInfo); } template -void MemorySynchronizationCommands::setPipeControl(typename GfxFamily::PIPE_CONTROL &pipeControl, bool dcFlush) { +void MemorySynchronizationCommands::setPipeControl(typename GfxFamily::PIPE_CONTROL &pipeControl, PipeControlArgs &args) { pipeControl.setCommandStreamerStallEnable(true); - pipeControl.setDcFlushEnable(dcFlush); + pipeControl.setDcFlushEnable(args.dcFlushEnable); + pipeControl.setConstantCacheInvalidationEnable(args.constantCacheInvalidationEnable); + pipeControl.setInstructionCacheInvalidateEnable(args.instructionCacheInvalidateEnable); + pipeControl.setPipeControlFlushEnable(args.pipeControlFlushEnable); + pipeControl.setRenderTargetCacheFlushEnable(args.renderTargetCacheFlushEnable); + pipeControl.setStateCacheInvalidationEnable(args.stateCacheInvalidationEnable); + pipeControl.setTextureCacheInvalidationEnable(args.textureCacheInvalidationEnable); + pipeControl.setVfCacheInvalidationEnable(args.vfCacheInvalidationEnable); + pipeControl.setVfCacheInvalidationEnable(args.vfCacheInvalidationEnable); + pipeControl.setGenericMediaStateClear(args.genericMediaStateClear); + + setPipeControlExtraProperties(pipeControl, args); if (DebugManager.flags.FlushAllCaches.get()) { pipeControl.setDcFlushEnable(true); @@ -228,14 +243,12 @@ void MemorySynchronizationCommands::setPipeControl(typename GfxFamily } template -typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands::addPipeControl(LinearStream &commandStream, bool dcFlush) { +void MemorySynchronizationCommands::addPipeControl(LinearStream &commandStream, PipeControlArgs &args) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; - MemorySynchronizationCommands::setPipeControl(cmd, dcFlush); + MemorySynchronizationCommands::setPipeControl(cmd, args); auto pipeControl = commandStream.getSpaceForCmd(); *pipeControl = cmd; - - return pipeControl; } template @@ -332,24 +345,22 @@ size_t MemorySynchronizationCommands::getSizeForFullCacheFlush() { } template -typename GfxFamily::PIPE_CONTROL *MemorySynchronizationCommands::addFullCacheFlush(LinearStream &commandStream) { +void MemorySynchronizationCommands::addFullCacheFlush(LinearStream &commandStream) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd(); PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; - MemorySynchronizationCommands::setPipeControl(cmd, true); - cmd.setRenderTargetCacheFlushEnable(true); - cmd.setInstructionCacheInvalidateEnable(true); - cmd.setTextureCacheInvalidationEnable(true); - cmd.setPipeControlFlushEnable(true); - cmd.setConstantCacheInvalidationEnable(true); - cmd.setStateCacheInvalidationEnable(true); - - MemorySynchronizationCommands::setExtraCacheFlushFields(cmd); + PipeControlArgs args(true); + args.renderTargetCacheFlushEnable = true; + args.instructionCacheInvalidateEnable = true; + args.textureCacheInvalidationEnable = true; + args.pipeControlFlushEnable = true; + args.constantCacheInvalidationEnable = true; + args.stateCacheInvalidationEnable = true; + MemorySynchronizationCommands::setPipeControl(cmd, args); + MemorySynchronizationCommands::setCacheFlushExtraProperties(cmd); *pipeControl = cmd; - - return pipeControl; } template diff --git a/shared/source/helpers/hw_helper_bdw_plus.inl b/shared/source/helpers/hw_helper_bdw_plus.inl index 24914a21b2..039c378bff 100644 --- a/shared/source/helpers/hw_helper_bdw_plus.inl +++ b/shared/source/helpers/hw_helper_bdw_plus.inl @@ -76,15 +76,18 @@ uint64_t HwHelperHw::getGpuTimeStampInNS(uint64_t timeStamp, double f } template -void MemorySynchronizationCommands::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) { +inline void MemorySynchronizationCommands::addPipeControlWA(LinearStream &commandStream, uint64_t gpuAddress, const HardwareInfo &hwInfo) { } template -void MemorySynchronizationCommands::setExtraPipeControlProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo) { +inline void MemorySynchronizationCommands::setPostSyncExtraProperties(PIPE_CONTROL &pipeControl, const HardwareInfo &hwInfo) { } template -void MemorySynchronizationCommands::setExtraCacheFlushFields(PIPE_CONTROL &pipeControl) { +inline void MemorySynchronizationCommands::setCacheFlushExtraProperties(PIPE_CONTROL &pipeControl) { } +template +inline void MemorySynchronizationCommands::setPipeControlExtraProperties(typename GfxFamily::PIPE_CONTROL &pipeControl, PipeControlArgs &args) { +} } // namespace NEO diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index f4c8051d03..6fc8470a27 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -14,6 +14,8 @@ #include "shared/source/helpers/non_copyable_or_moveable.h" #include "shared/source/utilities/tag_allocator.h" +#include "pipe_control_args.h" + #include #include #include @@ -160,9 +162,10 @@ struct TimestampPacketHelper { auto cacheFlushTimestampPacketGpuAddress = timestampPacketDependencies->cacheFlushNodes.peekNodes()[0]->getGpuAddress() + offsetof(TimestampPacketStorage, packets[0].contextEnd); - MemorySynchronizationCommands::obtainPipeControlAndProgramPostSyncOperation( + PipeControlArgs args(true); + MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( cmdStream, GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, - cacheFlushTimestampPacketGpuAddress, 0, true, hwInfo); + cacheFlushTimestampPacketGpuAddress, 0, hwInfo, args); } for (auto &node : container.peekNodes()) {