From 0fd685541de3091f3b8a4255c590d440f357c0e7 Mon Sep 17 00:00:00 2001 From: Filip Hazubski Date: Mon, 20 Dec 2021 14:37:33 +0000 Subject: [PATCH] Add isDcFlushAllowed function to HwInfoConfig Signed-off-by: Filip Hazubski --- level_zero/core/source/cmdlist/cmdlist_hw.inl | 40 +++++++------ .../source/cmdlist/cmdlist_hw_immediate.inl | 9 ++- .../cmdlist/cmdlist_hw_xehp_and_later.inl | 5 +- .../core/source/cmdqueue/cmdqueue_hw.inl | 7 ++- .../core/source/cmdqueue/cmdqueue_hw_base.inl | 3 +- .../cmdqueue_xe_hp_core_and_later.inl | 4 +- .../core/source/gen11/cmdlist_gen11.inl | 3 +- .../definitions/cache_flush_gen12lp.inl | 4 +- level_zero/core/source/gen9/cmdlist_gen9.inl | 3 +- .../sources/cmdlist/test_cmdlist_2.cpp | 6 +- .../sources/cmdlist/test_cmdlist_5.cpp | 2 +- .../test_cmdlist_append_event_reset.cpp | 4 +- .../cmdlist/test_cmdlist_append_memory.cpp | 6 +- .../test_cmdlist_append_signal_event.cpp | 6 +- .../test_cmdlist_append_wait_on_events.cpp | 8 +-- .../cmdlist/test_cmdlist_xehp_and_later.cpp | 2 +- .../built_ins/aux_translation_builtin.h | 4 +- opencl/source/command_queue/enqueue_common.h | 5 +- .../gpgpu_walker_enabled_device_enqueue.inl | 3 +- .../gpgpu_walker_xehp_and_later.inl | 5 +- .../command_queue/hardware_interface_base.inl | 5 +- .../hardware_interface_xehp_and_later.inl | 8 ++- ...hardware_commands_helper_bdw_and_later.inl | 4 +- .../command_queue/dispatch_walker_tests.cpp | 4 +- .../dispatch_walker_tests_dg2_and_later.cpp | 4 +- .../dispatch_walker_tests_xehp_and_later.cpp | 4 +- .../command_queue/enqueue_kernel_1_tests.cpp | 2 +- ...and_stream_receiver_flush_task_1_tests.cpp | 8 +-- ...and_stream_receiver_flush_task_2_tests.cpp | 6 +- ...and_stream_receiver_flush_task_3_tests.cpp | 6 +- ...ceiver_flush_task_tests_xehp_and_later.cpp | 2 +- .../unit_test/gen11/hw_helper_tests_gen11.cpp | 2 +- .../unit_test/gen8/hw_helper_tests_gen8.cpp | 2 +- .../unit_test/gen9/hw_helper_tests_gen9.cpp | 2 +- .../unit_test/helpers/hw_helper_tests.cpp | 12 +++- .../unit_test/mem_obj/buffer_bcs_tests.cpp | 4 +- .../hw_helper_tests_xe_hpc_core.cpp | 2 +- .../command_encoder_bdw_and_later.inl | 4 +- .../command_encoder_xehp_and_later.inl | 9 +-- .../command_container/implicit_scaling.h | 3 +- .../implicit_scaling_xehp_and_later.inl | 9 ++- .../walker_partition_xehp_and_later.h | 10 ++-- .../command_stream_receiver_hw_base.inl | 39 +++++++------ ...mmand_stream_receiver_hw_bdw_and_later.inl | 7 ++- ...mmand_stream_receiver_hw_dg2_and_later.inl | 2 +- ...and_stream_receiver_hw_tgllp_and_later.inl | 4 +- ...mand_stream_receiver_hw_xehp_and_later.inl | 12 ++-- .../dispatchers/render_dispatcher.inl | 4 +- .../command_stream_receiver_hw_gen11.cpp | 21 +++---- shared/source/gen8/hw_helper_gen8.cpp | 2 +- shared/source/helpers/hw_helper.h | 4 +- shared/source/helpers/hw_helper_base.inl | 12 ++-- shared/source/helpers/timestamp_packet.h | 2 +- shared/source/os_interface/hw_info_config.h | 2 + shared/source/os_interface/hw_info_config.inl | 5 ++ .../xe_hpc_core/hw_helper_xe_hpc_core.cpp | 5 -- .../os_agnostic_hw_info_config_pvc.inl | 5 ++ .../test/common/mocks/mock_hw_info_config.cpp | 6 ++ ..._encode_dispatch_kernel_xehp_and_later.cpp | 2 +- .../test_implicit_scaling_xehp_and_later.cpp | 51 +++++++++++------ ...alker_partition_tests_xehp_and_later_1.cpp | 57 ++++++++++++------- ...alker_partition_tests_xehp_and_later_2.cpp | 17 ++++-- 62 files changed, 298 insertions(+), 202 deletions(-) diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index 1c567f6db7..bda8dd0eea 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -325,9 +325,9 @@ ze_result_t CommandListCoreFamily::appendEventReset(ze_event_hand event->getGpuAddress(this->device), Event::STATE_CLEARED, args); } else { + const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(event->signalScope); - auto &hwInfo = neoDevice->getHardwareInfo(); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(event->signalScope, hwInfo); size_t estimateSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo) * packetsToReset; if (this->partitionCount > 1) { estimateSize += estimateBufferSizeMultiTileBarrier(hwInfo); @@ -622,8 +622,9 @@ ze_result_t CommandListCoreFamily::appendImageCopyToMemory(void * } if (allocationStruct.needsFlush) { + const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } @@ -1036,7 +1037,8 @@ ze_result_t CommandListCoreFamily::appendPageFaultCopy(NEO::Graph isStateless); } - if (NEO::MemorySynchronizationCommands::isDcFlushAllowed(true)) { + const auto &hwInfo = this->device->getHwInfo(); + if (NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo)) { if (flushHost) { NEO::PipeControlArgs args; args.dcFlushEnable = true; @@ -1157,7 +1159,8 @@ ze_result_t CommandListCoreFamily::appendMemoryCopy(void *dstptr, appendEventForProfilingAllWalkers(hSignalEvent, false); - if (NEO::MemorySynchronizationCommands::isDcFlushAllowed(true)) { + const auto &hwInfo = this->device->getHwInfo(); + if (NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo)) { auto event = Event::fromHandle(hSignalEvent); if (event) { dstAllocationStruct.needsFlush &= !event->signalScope; @@ -1252,7 +1255,8 @@ ze_result_t CommandListCoreFamily::appendMemoryCopyRegion(void *d return result; } - if (NEO::MemorySynchronizationCommands::isDcFlushAllowed(true)) { + const auto &hwInfo = this->device->getHwInfo(); + if (NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo)) { auto event = Event::fromHandle(hSignalEvent); if (event) { dstAllocationStruct.needsFlush &= !event->signalScope; @@ -1604,7 +1608,8 @@ ze_result_t CommandListCoreFamily::appendMemoryFill(void *ptr, appendEventForProfilingAllWalkers(hSignalEvent, false); - if (NEO::MemorySynchronizationCommands::isDcFlushAllowed(true)) { + const auto &hwInfo = this->device->getHwInfo(); + if (NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo)) { auto event = Event::fromHandle(hSignalEvent); if (event) { hostPointerNeedsFlush &= !event->signalScope; @@ -1695,10 +1700,10 @@ void CommandListCoreFamily::appendSignalEventPostWalker(ze_event_ increaseCommandStreamSpace(NEO::EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite()); NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), baseAddr, Event::STATE_SIGNALED, args); } else { - auto &hwInfo = commandContainer.getDevice()->getHardwareInfo(); + const auto &hwInfo = this->device->getHwInfo(); increaseCommandStreamSpace(NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo)); NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(event->signalScope); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(event->signalScope, hwInfo); if (this->partitionCount > 1) { args.workloadPartitionOffset = true; event->setPacketsInUse(this->partitionCount); @@ -1848,15 +1853,15 @@ ze_result_t CommandListCoreFamily::appendSignalEvent(ze_event_han increaseCommandStreamSpace(NEO::EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite()); NEO::EncodeMiFlushDW::programMiFlushDw(*commandContainer.getCommandStream(), ptrOffset(baseAddr, eventSignalOffset), Event::STATE_SIGNALED, args); } else { + const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; bool applyScope = event->signalScope; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(applyScope); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(applyScope, hwInfo); if (this->partitionCount > 1) { args.workloadPartitionOffset = true; event->setPacketsInUse(this->partitionCount); } if (applyScope || event->isEventTimestampFlagSet()) { - auto &hwInfo = commandContainer.getDevice()->getHardwareInfo(); increaseCommandStreamSpace(NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo)); NEO::MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandContainer.getCommandStream(), @@ -1911,7 +1916,8 @@ ze_result_t CommandListCoreFamily::appendWaitOnEvents(uint32_t nu constexpr uint32_t eventStateClear = Event::State::STATE_CLEARED; bool dcFlushRequired = false; - if (NEO::MemorySynchronizationCommands::isDcFlushAllowed(true)) { + const auto &hwInfo = this->device->getHwInfo(); + if (NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo)) { for (uint32_t i = 0; i < numEvents; i++) { auto event = Event::fromHandle(phEvent[i]); dcFlushRequired |= !!event->waitScope; @@ -2037,17 +2043,18 @@ void CommandListCoreFamily::appendEventForProfiling(ze_event_hand if (beforeWalker) { appendWriteKernelTimestamp(hEvent, beforeWalker, true); } else { + const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(event->signalScope); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(event->signalScope, hwInfo); NEO::MemorySynchronizationCommands::setPostSyncExtraProperties(args, - commandContainer.getDevice()->getHardwareInfo()); + hwInfo); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); uint64_t baseAddr = event->getGpuAddress(this->device); NEO::MemorySynchronizationCommands::addAdditionalSynchronization(*commandContainer.getCommandStream(), baseAddr, - commandContainer.getDevice()->getHardwareInfo()); + hwInfo); appendWriteKernelTimestamp(hEvent, beforeWalker, true); } } @@ -2316,8 +2323,9 @@ ze_result_t CommandListCoreFamily::setGlobalWorkSizeIndirect(NEO: template void CommandListCoreFamily::programStateBaseAddress(NEO::CommandContainer &container, bool genericMediaStateClearRequired) { + const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); args.hdcPipelineFlush = true; args.textureCacheInvalidationEnable = true; NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl index 7331e23014..01fc06dd0b 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_immediate.inl @@ -283,8 +283,9 @@ ze_result_t CommandListCoreFamilyImmediate::appendSignalEvent(ze_ } } } else { + const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(event->signalScope); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(event->signalScope, hwInfo); this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_SIGNALED, args, false, false, false); if (this->isSyncModeQueue) { auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; @@ -314,8 +315,9 @@ ze_result_t CommandListCoreFamilyImmediate::appendEventReset(ze_e } } } else { + const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(event->signalScope); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(event->signalScope, hwInfo); this->csr->flushNonKernelTask(&event->getAllocation(this->device), event->getGpuAddress(this->device), Event::STATE_CLEARED, args, false, false, false); if (this->isSyncModeQueue) { auto timeoutMicroseconds = NEO::TimeoutControls::maxTimeout; @@ -370,7 +372,8 @@ ze_result_t CommandListCoreFamilyImmediate::appendWaitOnEvents(ui } } else { bool dcFlushRequired = false; - if (NEO::MemorySynchronizationCommands::isDcFlushAllowed(true)) { + const auto &hwInfo = this->device->getHwInfo(); + if (NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo)) { for (uint32_t i = 0; i < numEvents; i++) { auto event = Event::fromHandle(phWaitEvents[i]); dcFlushRequired |= (!event->waitScope) ? false : true; diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index fa20071d8a..2890443de9 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -130,6 +130,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z using COMPUTE_WALKER = typename GfxFamily::COMPUTE_WALKER; using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; + const auto &hwInfo = this->device->getHwInfo(); if (NEO::DebugManager.flags.ForcePipeControlPriorToWalker.get()) { increaseCommandStreamSpace(NEO::MemorySynchronizationCommands::getSizeForSinglePipeControl()); @@ -165,7 +166,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z auto event = Event::fromHandle(hEvent); eventAlloc = &event->getAllocation(this->device); commandContainer.addToResidencyContainer(eventAlloc); - L3FlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(event->signalScope); + L3FlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(event->signalScope, hwInfo); isTimestampEvent = event->isEventTimestampFlagSet(); eventAddress = event->getPacketAddress(this->device); } @@ -240,7 +241,7 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(z event->setPacketsInUse(partitionCount); } if (L3FlushEnable) { - size_t estimatedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(neoDevice->getHardwareInfo()); + size_t estimatedSize = NEO::MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); increaseCommandStreamSpace(estimatedSize); programEventL3Flush(hEvent, this->device, partitionCount, commandContainer); } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl index 1e24624e6a..435c430721 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw.inl @@ -435,7 +435,7 @@ ze_result_t CommandQueueHw::executeCommandLists( NEO::EncodeMiFlushDW::programMiFlushDw(child, fence->getGpuAddress(), Fence::STATE_SIGNALED, args); } else { NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); if (partitionCount > 1) { args.workloadPartitionOffset = true; } @@ -577,8 +577,9 @@ void CommandQueueHw::dispatchTaskCountWrite(NEO::LinearStream &co args.notifyEnable = csr->isUsedNotifyEnableForPostSync(); NEO::EncodeMiFlushDW::programMiFlushDw(commandStream, gpuAddress, taskCountToWrite, args); } else { + const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); if (partitionCount > 1) { args.workloadPartitionOffset = true; } @@ -588,7 +589,7 @@ void CommandQueueHw::dispatchTaskCountWrite(NEO::LinearStream &co POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, gpuAddress, taskCountToWrite, - device->getHwInfo(), + hwInfo, args); } } diff --git a/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl b/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl index 42dc5cb450..33b0509ab3 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_hw_base.inl @@ -34,8 +34,9 @@ void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; using STATE_BASE_ADDRESS = typename GfxFamily::STATE_BASE_ADDRESS; + const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs pcArgs; - pcArgs.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true); + pcArgs.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); pcArgs.textureCacheInvalidationEnable = true; NEO::MemorySynchronizationCommands::addPipeControl(commandStream, pcArgs); diff --git a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl index ca6e7d4781..20f7a4a59a 100644 --- a/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl +++ b/level_zero/core/source/cmdqueue/cmdqueue_xe_hp_core_and_later.inl @@ -27,8 +27,9 @@ void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool if (NEO::ApiSpecificConfig::getBindlessConfiguration()) { NEO::Device *neoDevice = device->getNEODevice(); auto globalHeapsBase = neoDevice->getBindlessHeapsHelper()->getGlobalHeapsBase(); + auto &hwInfo = neoDevice->getHardwareInfo(); NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); NEO::MemorySynchronizationCommands::addPipeControl(commandStream, args); auto pSbaCmd = static_cast(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS))); STATE_BASE_ADDRESS sbaCmd; @@ -52,7 +53,6 @@ void CommandQueueHw::programStateBaseAddress(uint64_t gsba, bool 1u); *pSbaCmd = sbaCmd; - auto &hwInfo = neoDevice->getHardwareInfo(); auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig.isAdditionalStateBaseAddressWARequired(hwInfo)) { pSbaCmd = static_cast(commandStream.getSpace(sizeof(STATE_BASE_ADDRESS))); diff --git a/level_zero/core/source/gen11/cmdlist_gen11.inl b/level_zero/core/source/gen11/cmdlist_gen11.inl index d09a4ce462..ea32cc7f6c 100644 --- a/level_zero/core/source/gen11/cmdlist_gen11.inl +++ b/level_zero/core/source/gen11/cmdlist_gen11.inl @@ -13,8 +13,9 @@ template void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { + const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } diff --git a/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl b/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl index 5cd2226186..5ee18437aa 100644 --- a/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl +++ b/level_zero/core/source/gen12lp/definitions/cache_flush_gen12lp.inl @@ -20,11 +20,11 @@ void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t num using GfxFamily = typename NEO::GfxFamilyMapper::GfxFamily; - auto &hwInfo = commandContainer.getDevice()->getHardwareInfo(); + const auto &hwInfo = this->device->getHwInfo(); bool supportL3Control = hwInfo.capabilityTable.supportCacheFlushAfterWalker; if (!supportL3Control) { NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } else { diff --git a/level_zero/core/source/gen9/cmdlist_gen9.inl b/level_zero/core/source/gen9/cmdlist_gen9.inl index bc24633efc..21805faae5 100644 --- a/level_zero/core/source/gen9/cmdlist_gen9.inl +++ b/level_zero/core/source/gen9/cmdlist_gen9.inl @@ -23,8 +23,9 @@ template void CommandListCoreFamily::applyMemoryRangesBarrier(uint32_t numRanges, const size_t *pRangeSizes, const void **pRanges) { + const auto &hwInfo = this->device->getHwInfo(); NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); NEO::MemorySynchronizationCommands::addPipeControl(*commandContainer.getCommandStream(), args); } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp index b0571830f0..7d34b0a5cb 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_2.cpp @@ -407,7 +407,7 @@ HWTEST2_F(CommandListCreate, givenCommandListAndHostPointersWhenMemoryCopyCalled cmd = genCmdCast(*itor); itor = find(++itor, genCmdList.end()); } - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmd->getDcFlushEnable()); } HWTEST2_F(CommandListCreate, givenCommandListAnd2DWhbufferenMemoryCopyRegionCalledThenCopyKernel2DCalled, IsAtLeastSkl) { @@ -480,7 +480,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventsThenS EXPECT_NE(cmdList.end(), itor); itor++; itor = find(itor, cmdList.end()); - if (MemorySynchronizationCommands::isDcFlushAllowed(true)) { + if (MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo)) { EXPECT_NE(cmdList.end(), itor); } else { EXPECT_EQ(cmdList.end(), itor); @@ -581,7 +581,7 @@ HWTEST2_F(CommandListCreate, givenCommandListWhenMemoryCopyWithSignalEventScopeS auto it = *(iterator.end() - 1); auto cmd1 = genCmdCast(*it); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmd1->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmd1->getDcFlushEnable()); } using ImageSupport = IsWithinProducts; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp index d0a0826f82..3fec50bb70 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_5.cpp @@ -619,7 +619,7 @@ HWTEST_F(CommandListCreate, givenCommandListWithCopyOnlyWhenAppendWaitEventsWith cmdList, ptrOffset(commandContainer.getCommandStream()->getCpuBase(), 0), commandContainer.getCommandStream()->getUsed())); auto itor = find(cmdList.begin(), cmdList.end()); - if (MemorySynchronizationCommands::isDcFlushAllowed(true)) { + if (MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo)) { EXPECT_NE(cmdList.end(), itor); } else { EXPECT_EQ(cmdList.end(), itor); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp index 28776a5b3c..bd9053a343 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_event_reset.cpp @@ -216,7 +216,7 @@ HWTEST2_F(CommandListAppendEventReset, givenEventWithHostScopeUsedInResetThenPip EXPECT_EQ(cmd->getImmediateData(), Event::STATE_CLEARED); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmd->getDcFlushEnable()); postSyncFound = true; } } @@ -277,7 +277,7 @@ HWTEST2_F(CommandListAppendEventReset, EXPECT_EQ(cmd->getImmediateData(), Event::STATE_CLEARED); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmd->getDcFlushEnable()); postSyncFound++; gpuAddress += event->getSinglePacketSize(); postSyncPipeControlItor = it; diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp index f3eae0402a..a42427a948 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_memory.cpp @@ -176,7 +176,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyRegionC cmd = genCmdCast(*itor); itor = find(++itor, genCmdList.end()); } - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmd->getDcFlushEnable()); } HWTEST2_F(AppendMemoryCopy, givenImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, IsAtLeastSkl) { @@ -240,7 +240,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListAndHostPointersWhenMemoryCopyCalledT cmd = genCmdCast(*itor); itor = find(++itor, genCmdList.end()); } - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmd->getDcFlushEnable()); } HWTEST2_F(AppendMemoryCopy, givenCopyCommandListWhenTimestampPassedToMemoryCopyThenAppendProfilingCalledOnceBeforeAndAfterCommand, IsAtLeastSkl) { @@ -377,7 +377,7 @@ HWTEST2_F(AppendMemoryCopy, givenCommandListWhenTimestampPassedToMemoryCopyThenA EXPECT_NE(cmdList.end(), itor); { auto cmd = genCmdCast(*itor); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmd->getDcFlushEnable()); } } } // namespace ult diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp index edaa428e9a..fc2029738b 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_signal_event.cpp @@ -93,7 +93,7 @@ HWTEST_F(CommandListAppendSignalEvent, givenEventWithScopeFlagDeviceWhenAppendin if (cmd->getPostSyncOperation() == POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA) { EXPECT_EQ(cmd->getImmediateData(), Event::STATE_SIGNALED); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmd->getDcFlushEnable()); postSyncFound = true; } } @@ -229,7 +229,7 @@ HWTEST2_F(CommandListAppendSignalEvent, EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData()); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmd->getDcFlushEnable()); EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); postSyncFound++; gpuAddress += event->getSinglePacketSize(); @@ -338,7 +338,7 @@ HWTEST2_F(CommandListAppendSignalEvent, EXPECT_EQ(Event::STATE_SIGNALED, cmd->getImmediateData()); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); EXPECT_EQ(gpuAddress, NEO::UnitTestHelper::getPipeControlPostSyncAddress(*cmd)); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmd->getDcFlushEnable()); EXPECT_TRUE(cmd->getWorkloadPartitionIdOffsetEnable()); postSyncFound++; gpuAddress += event->getSinglePacketSize(); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp index e1ddc4255b..0b2c4cbdfd 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_wait_on_events.cpp @@ -132,7 +132,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenEventWithWaitScopeFlagDeviceWhenAppe ASSERT_NE(cmd, nullptr); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmd->getDcFlushEnable()); } } @@ -297,7 +297,7 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenCommandBufferIsEmptyWhenAppendingWai commandList->commandContainer.getCommandStream()->getSpace(consumeSpace); size_t expectedConsumedSpace = sizeof(MI_SEMAPHORE_WAIT); - if (MemorySynchronizationCommands::isDcFlushAllowed(true)) { + if (MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo)) { expectedConsumedSpace += sizeof(PIPE_CONTROL); } @@ -329,14 +329,14 @@ HWTEST_F(CommandListAppendWaitOnEvent, givenCommandBufferIsEmptyWhenAppendingWai usedSpaceAfter)); auto itorPC = find(cmdList.begin(), cmdList.end()); - if (MemorySynchronizationCommands::isDcFlushAllowed(true)) { + if (MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo)) { ASSERT_NE(cmdList.end(), itorPC); { auto cmd = genCmdCast(*itorPC); ASSERT_NE(cmd, nullptr); EXPECT_TRUE(cmd->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmd->getDcFlushEnable()); } } else { EXPECT_EQ(cmdList.end(), itorPC); diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp index 21e0bb9f56..460d3350bf 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_xehp_and_later.cpp @@ -45,7 +45,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandListTests, whenCommandListIsCreatedThenPCAnd auto itorPc = find(cmdList.begin(), cmdList.end()); ASSERT_NE(cmdList.end(), itorPc); auto cmdPc = genCmdCast(*itorPc); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmdPc->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmdPc->getDcFlushEnable()); EXPECT_TRUE(cmdPc->getCommandStreamerStallEnable()); EXPECT_TRUE(cmdPc->getTextureCacheInvalidationEnable()); diff --git a/opencl/source/built_ins/aux_translation_builtin.h b/opencl/source/built_ins/aux_translation_builtin.h index 7dd4973fba..98aaafa48a 100644 --- a/opencl/source/built_ins/aux_translation_builtin.h +++ b/opencl/source/built_ins/aux_translation_builtin.h @@ -80,9 +80,9 @@ class BuiltInOp : public BuiltinDispatchInfoBuilder using RegisteredMethodDispatcherT = RegisteredMethodDispatcher; template - static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *, const HardwareInfo &) { + static void dispatchPipeControl(LinearStream &linearStream, TimestampPacketDependencies *, const HardwareInfo &hwInfo) { PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(dcFlush); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(dcFlush, hwInfo); MemorySynchronizationCommands::addPipeControl(linearStream, args); } diff --git a/opencl/source/command_queue/enqueue_common.h b/opencl/source/command_queue/enqueue_common.h index cf761b24e4..a89ca589bb 100644 --- a/opencl/source/command_queue/enqueue_common.h +++ b/opencl/source/command_queue/enqueue_common.h @@ -530,14 +530,15 @@ BlitProperties CommandQueueHw::processDispatchForBlitEnqueue(CommandS if (commandStream) { if (timestampPacketDependencies.cacheFlushNodes.peekNodes().size() > 0) { auto cacheFlushTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(*timestampPacketDependencies.cacheFlushNodes.peekNodes()[0]); + const auto &hwInfo = device->getHardwareInfo(); PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandStream, GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, cacheFlushTimestampPacketGpuAddress, 0, - device->getHardwareInfo(), + hwInfo, args); } } diff --git a/opencl/source/command_queue/gpgpu_walker_enabled_device_enqueue.inl b/opencl/source/command_queue/gpgpu_walker_enabled_device_enqueue.inl index 3cdc7e113c..0930955c9b 100644 --- a/opencl/source/command_queue/gpgpu_walker_enabled_device_enqueue.inl +++ b/opencl/source/command_queue/gpgpu_walker_enabled_device_enqueue.inl @@ -24,6 +24,7 @@ void GpgpuWalkerHelper::dispatchScheduler( using GPGPU_WALKER = typename GfxFamily::GPGPU_WALKER; using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START; + const auto &hwInfo = devQueueHw.getDevice().getHardwareInfo(); NEO::PipeControlArgs args; MemorySynchronizationCommands::addPipeControl(commandStream, args); @@ -100,7 +101,7 @@ void GpgpuWalkerHelper::dispatchScheduler( // Do not put BB_START only when returning in first Scheduler run if (devQueueHw.getSchedulerReturnInstance() != 1) { - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); MemorySynchronizationCommands::addPipeControl(commandStream, args); // Add BB Start Cmd to the SLB in the Primary Batch Buffer diff --git a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl index 42699303ef..de83f26635 100644 --- a/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl +++ b/opencl/source/command_queue/gpgpu_walker_xehp_and_later.inl @@ -102,13 +102,14 @@ void GpgpuWalkerHelper::setupTimestampPacket(LinearStream *cmdStream, auto gmmHelper = rootDeviceEnvironment.getGmmHelper(); - if (MemorySynchronizationCommands::isDcFlushAllowed(true)) { + const auto &hwInfo = *rootDeviceEnvironment.getHardwareInfo(); + if (MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo)) { postSyncData.setMocs(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); } else { postSyncData.setMocs(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); } - EncodeDispatchKernel::adjustTimestampPacket(*walkerCmd, *rootDeviceEnvironment.getHardwareInfo()); + EncodeDispatchKernel::adjustTimestampPacket(*walkerCmd, hwInfo); if (DebugManager.flags.OverridePostSyncMocs.get() != -1) { postSyncData.setMocs(DebugManager.flags.OverridePostSyncMocs.get()); diff --git a/opencl/source/command_queue/hardware_interface_base.inl b/opencl/source/command_queue/hardware_interface_base.inl index 21d8ec2305..16a2358d57 100644 --- a/opencl/source/command_queue/hardware_interface_base.inl +++ b/opencl/source/command_queue/hardware_interface_base.inl @@ -307,14 +307,15 @@ inline void HardwareInterface::dispatchDebugPauseCommands( using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; using POST_SYNC_OPERATION = typename PIPE_CONTROL::POST_SYNC_OPERATION; + const auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( *commandStream, POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, static_cast(confirmationTrigger), - commandQueue.getDevice().getHardwareInfo(), + hwInfo, args); } diff --git a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl index 1c30a81309..03fa671394 100644 --- a/opencl/source/command_queue/hardware_interface_xehp_and_later.inl +++ b/opencl/source/command_queue/hardware_interface_xehp_and_later.inl @@ -97,8 +97,9 @@ inline void HardwareInterface::programWalker( auto isCcsUsed = EngineHelpers::isCcs(commandQueue.getGpgpuEngine().osContext->getEngineType()); auto kernelUsesLocalIds = HardwareCommandsHelper::kernelUsesLocalIds(kernel); + const auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); if (auto kernelAllocation = kernelInfo.getGraphicsAllocation()) { - EncodeMemoryPrefetch::programMemoryPrefetch(commandStream, *kernelAllocation, kernelInfo.heapInfo.KernelHeapSize, 0, commandQueue.getDevice().getHardwareInfo()); + EncodeMemoryPrefetch::programMemoryPrefetch(commandStream, *kernelAllocation, kernelInfo.heapInfo.KernelHeapSize, 0, hwInfo); } HardwareCommandsHelper::sendIndirectState( @@ -122,7 +123,7 @@ inline void HardwareInterface::programWalker( numWorkGroups, localWorkSizes, simd, dim, localIdsGenerationByRuntime, inlineDataProgrammingRequired, requiredWalkOrder); - EncodeDispatchKernel::encodeAdditionalWalkerFields(commandQueue.getDevice().getHardwareInfo(), walkerCmd, kernel.getExecutionType()); + EncodeDispatchKernel::encodeAdditionalWalkerFields(hwInfo, walkerCmd, kernel.getExecutionType()); auto devices = queueCsr.getOsContext().getDeviceBitfield(); auto partitionWalker = ImplicitScalingHelper::isImplicitScalingEnabled(devices, !kernel.isSingleSubdevicePreferred()); @@ -137,7 +138,8 @@ inline void HardwareInterface::programWalker( false, false, kernel.usesImages(), - workPartitionAllocationGpuVa); + workPartitionAllocationGpuVa, + hwInfo); if (queueCsr.isStaticWorkPartitioningEnabled()) { queueCsr.setActivePartitions(std::max(queueCsr.getActivePartitions(), partitionCount)); } diff --git a/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl b/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl index acb704a152..75d83e0685 100644 --- a/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl +++ b/opencl/source/helpers/hardware_commands_helper_bdw_and_later.inl @@ -8,6 +8,7 @@ #pragma once #include "shared/source/helpers/hw_helper.h" +#include "opencl/source/command_queue/command_queue.h" #include "opencl/source/helpers/hardware_commands_helper.h" #include "opencl/source/kernel/kernel.h" @@ -164,8 +165,9 @@ void HardwareCommandsHelper::setInterfaceDescriptorOffset( template void HardwareCommandsHelper::programCacheFlushAfterWalkerCommand(LinearStream *commandStream, const CommandQueue &commandQueue, const Kernel *kernel, uint64_t postSyncAddress) { + const auto &hwInfo = commandQueue.getDevice().getHardwareInfo(); PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); MemorySynchronizationCommands::addPipeControl(*commandStream, args); } diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp index aad17e8614..f1717e8fb6 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests.cpp @@ -1367,7 +1367,7 @@ HWTEST_P(DispatchWalkerTestForAuxTranslation, givenKernelWhenAuxToNonAuxWhenTran ASSERT_EQ(2u, pipeControls.size()); auto beginPipeControl = genCmdCast(*(pipeControls[0])); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), beginPipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), beginPipeControl->getDcFlushEnable()); EXPECT_TRUE(beginPipeControl->getCommandStreamerStallEnable()); auto endPipeControl = genCmdCast(*(pipeControls[1])); @@ -1423,7 +1423,7 @@ HWTEST_P(DispatchWalkerTestForAuxTranslation, givenKernelWhenNonAuxToAuxWhenTran bool dcFlushRequired = (pClDevice->getHardwareInfo().platform.eRenderCoreFamily == IGFX_GEN8_CORE); auto beginPipeControl = genCmdCast(*(pipeControls[0])); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), beginPipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), beginPipeControl->getDcFlushEnable()); EXPECT_TRUE(beginPipeControl->getCommandStreamerStallEnable()); auto endPipeControl = genCmdCast(*(pipeControls[1])); diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests_dg2_and_later.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests_dg2_and_later.cpp index 45897cbea8..a27d18c7c3 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests_dg2_and_later.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests_dg2_and_later.cpp @@ -147,7 +147,7 @@ HWTEST2_F(Dg2AndLaterDispatchWalkerBasicTest, givenTimestampPacketWhenDispatchin auto gmmHelper = device->getGmmHelper(); - auto expectedMocs = MemorySynchronizationCommands::isDcFlushAllowed(true) ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + auto expectedMocs = MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo) ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(FamilyType::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walker->getPostSync().getOperation()); @@ -190,7 +190,7 @@ HWTEST2_F(Dg2AndLaterDispatchWalkerBasicTest, givenDebugVariableEnabledWhenEnque auto walker = genCmdCast(*hwParser.itorWalker); auto gmmHelper = device->getGmmHelper(); - auto expectedMocs = MemorySynchronizationCommands::isDcFlushAllowed(true) ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + auto expectedMocs = MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo) ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto &postSyncData = walker->getPostSync(); EXPECT_EQ(FamilyType::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, diff --git a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp index 0f3266f7e4..1d3a6482a3 100644 --- a/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_queue/dispatch_walker_tests_xehp_and_later.cpp @@ -451,7 +451,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenTimestamp EXPECT_NE(hwParser.itorWalker, hwParser.cmdList.end()); auto gmmHelper = device->getGmmHelper(); - auto expectedMocs = MemorySynchronizationCommands::isDcFlushAllowed(true) ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + auto expectedMocs = MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo) ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto walker = genCmdCast(*hwParser.itorWalker); EXPECT_EQ(FamilyType::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, walker->getPostSync().getOperation()); @@ -491,7 +491,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, XeHPAndLaterDispatchWalkerBasicTest, givenDebugVari auto walker = genCmdCast(*hwParser.itorWalker); auto gmmHelper = device->getGmmHelper(); - auto expectedMocs = MemorySynchronizationCommands::isDcFlushAllowed(true) ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); + auto expectedMocs = MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo) ? gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED) : gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER); auto &postSyncData = walker->getPostSync(); EXPECT_EQ(FamilyType::POSTSYNC_DATA::OPERATION::OPERATION_WRITE_TIMESTAMP, diff --git a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp index d32c172af2..ee6dec4f52 100644 --- a/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_kernel_1_tests.cpp @@ -1540,7 +1540,7 @@ struct PauseOnGpuTests : public EnqueueKernelTest { EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControlCmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControlCmd->getDcFlushEnable()); EXPECT_EQ(PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, pipeControlCmd->getPostSyncOperation()); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp index f4ccd7e105..88e7b0b631 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_1_tests.cpp @@ -579,7 +579,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenStateBaseAddressWhenItIsRequi EXPECT_NE(stateBaseAddressItor, pipeControlItor); auto pipeControlCmd = (typename FamilyType::PIPE_CONTROL *)*pipeControlItor; EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControlCmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControlCmd->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenNotApplicableL3ConfigWhenFlushingTaskThenDontReloadSba) { @@ -1242,11 +1242,11 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, GivenBlockingWh // Verify that the dcFlushEnabled bit is not set in PC auto pCmd = reinterpret_cast(pipeControlTask); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pCmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pCmd->getDcFlushEnable()); } } else { auto pCmd = reinterpret_cast(*itorPC); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pCmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pCmd->getDcFlushEnable()); } } @@ -1290,7 +1290,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelRequiringDCFlush // Verify that the dcFlushEnabled bit is set in PC auto pCmdWA = reinterpret_cast(*itorPC); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pCmdWA->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pCmdWA->getDcFlushEnable()); buffer->release(); } diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp index 28259a7145..6d05b4b9af 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_2_tests.cpp @@ -75,7 +75,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenBlockedKernelNotRequiringDCFl // Verify that the dcFlushEnabled bit is set in PC auto pCmdWA = reinterpret_cast(*itorPC); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pCmdWA->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pCmdWA->getDcFlushEnable()); buffer->release(); } @@ -366,10 +366,10 @@ HWCMDTEST_F(IGFX_GEN8_CORE, CommandStreamReceiverFlushTaskTests, EXPECT_NE(cmdList.end(), itorCmdP); auto itorCmd2 = find(itorCmdP, cmdList.end()); cmdPC = (PIPE_CONTROL *)*itorCmd2; - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmdPC->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmdPC->getDcFlushEnable()); } else { // single PIPE_CONTROL following GPGPU_WALKER has DcFlush and Write HwTag - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), cmdPC->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), cmdPC->getDcFlushEnable()); } retVal = clReleaseEvent(event); diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp index a85b7f15d1..9814368163 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_3_tests.cpp @@ -983,7 +983,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWhenCommandA auto pipeControl = genCmdCast(*itorPipeControl); mockCsr->flushBatchedSubmissions(); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWithOutOfOrderModeFisabledWhenCommandAreSubmittedThenDcFlushIsAdded) { @@ -1015,7 +1015,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenCsrInBatchingModeWithOutOfOrd auto pipeControl = genCmdCast(*itorPipeControl); mockCsr->flushBatchedSubmissions(); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenUpdateTaskCountFromWaitSetWhenFlushTaskThenThereIsNoPipeControlForUpdateTaskCount) { @@ -1173,7 +1173,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEpiloguePipeControlThenDcFlus auto pipeControl = genCmdCast(cmdBuffer->epiloguePipeControlLocation); ASSERT_NE(nullptr, pipeControl); mockCsr->flushBatchedSubmissions(); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } HWTEST_F(CommandStreamReceiverFlushTaskTests, givenEpiloguePipeControlWhendDcFlushDisabledByDebugFlagThenDcFlushIsDisabled) { diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp index a302a75d93..282a8f1cfe 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_flush_task_tests_xehp_and_later.cpp @@ -91,7 +91,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandStreamReceiverFlushTaskXeHPAndLaterTests, gi EXPECT_NE(stateBaseAddressItor, pipeControlItor); auto pipeControlCmd = reinterpret_cast(*pipeControlItor); EXPECT_TRUE(pipeControlCmd->getTextureCacheInvalidationEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControlCmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControlCmd->getDcFlushEnable()); EXPECT_TRUE(pipeControlCmd->getHdcPipelineFlush()); } diff --git a/opencl/test/unit_test/gen11/hw_helper_tests_gen11.cpp b/opencl/test/unit_test/gen11/hw_helper_tests_gen11.cpp index 88e01a1c74..6d3c12f787 100644 --- a/opencl/test/unit_test/gen11/hw_helper_tests_gen11.cpp +++ b/opencl/test/unit_test/gen11/hw_helper_tests_gen11.cpp @@ -50,7 +50,7 @@ GEN11TEST_F(MemorySynchronizatiopCommandsTestsGen11, WhenProgrammingCacheFlushTh std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); - MemorySynchronizationCommands::addFullCacheFlush(stream); + MemorySynchronizationCommands::addFullCacheFlush(stream, *defaultHwInfo); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); diff --git a/opencl/test/unit_test/gen8/hw_helper_tests_gen8.cpp b/opencl/test/unit_test/gen8/hw_helper_tests_gen8.cpp index d573369f76..a26bd5b10d 100644 --- a/opencl/test/unit_test/gen8/hw_helper_tests_gen8.cpp +++ b/opencl/test/unit_test/gen8/hw_helper_tests_gen8.cpp @@ -53,7 +53,7 @@ GEN8TEST_F(MemorySynchronizatiopCommandsTestsGen8, WhenProgrammingCacheFlushThen std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); - MemorySynchronizationCommands::addFullCacheFlush(stream); + MemorySynchronizationCommands::addFullCacheFlush(stream, *defaultHwInfo); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); diff --git a/opencl/test/unit_test/gen9/hw_helper_tests_gen9.cpp b/opencl/test/unit_test/gen9/hw_helper_tests_gen9.cpp index 98f9f6caf7..70a90a1b89 100644 --- a/opencl/test/unit_test/gen9/hw_helper_tests_gen9.cpp +++ b/opencl/test/unit_test/gen9/hw_helper_tests_gen9.cpp @@ -62,7 +62,7 @@ GEN9TEST_F(MemorySynchronizatiopCommandsTestsGen9, WhenProgrammingCacheFlushThen std::unique_ptr buffer(new uint8_t[128]); LinearStream stream(buffer.get(), 128); - MemorySynchronizationCommands::addFullCacheFlush(stream); + MemorySynchronizationCommands::addFullCacheFlush(stream, *defaultHwInfo); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getConstantCacheInvalidationEnable()); diff --git a/opencl/test/unit_test/helpers/hw_helper_tests.cpp b/opencl/test/unit_test/helpers/hw_helper_tests.cpp index e3d030e704..f1ec481707 100644 --- a/opencl/test/unit_test/helpers/hw_helper_tests.cpp +++ b/opencl/test/unit_test/helpers/hw_helper_tests.cpp @@ -289,7 +289,7 @@ HWTEST_F(PipeControlHelperTests, givenPostSyncWriteTimestampModeWhenHelperIsUsed } HWTEST_F(PipeControlHelperTests, givenHwHelperwhenAskingForDcFlushThenReturnTrue) { - EXPECT_TRUE(MemorySynchronizationCommands::isDcFlushAllowed(true)); + EXPECT_TRUE(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo)); } HWTEST_F(PipeControlHelperTests, givenDcFlushNotAllowedWhenProgrammingPipeControlThenDontSetDcFlush) { @@ -367,6 +367,12 @@ HWTEST_F(PipeControlHelperTests, givenNotifyEnableArgumentIsTrueWhenHelperIsUsed EXPECT_TRUE(memcmp(pipeControl, &expectedPipeControl, sizeof(PIPE_CONTROL)) == 0); } +HWTEST_F(PipeControlHelperTests, WhenIsDcFlushAllowedIsCalledThenCorrectResultIsReturned) { + auto &hwInfoConfig = *HwInfoConfig::get(defaultHwInfo->platform.eProductFamily); + EXPECT_FALSE(MemorySynchronizationCommands::isDcFlushAllowed(false, *defaultHwInfo)); + EXPECT_EQ(hwInfoConfig.isDcFlushAllowed(), MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo)); +} + TEST(HwInfoTest, givenHwInfoWhenChosenEngineTypeQueriedThenDefaultIsReturned) { HardwareInfo hwInfo = *defaultHwInfo; hwInfo.capabilityTable.defaultEngineType = aub_stream::ENGINE_RCS; @@ -1069,12 +1075,12 @@ HWTEST_F(PipeControlHelperTests, WhenProgrammingCacheFlushThenExpectBasicFieldsS LinearStream stream(buffer.get(), 128); - MemorySynchronizationCommands::addFullCacheFlush(stream); + MemorySynchronizationCommands::addFullCacheFlush(stream, *defaultHwInfo); PIPE_CONTROL *pipeControl = genCmdCast(buffer.get()); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); EXPECT_TRUE(pipeControl->getRenderTargetCacheFlushEnable()); EXPECT_TRUE(pipeControl->getInstructionCacheInvalidateEnable()); diff --git a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp index ee80677853..a3dc255c6b 100644 --- a/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_bcs_tests.cpp @@ -579,7 +579,7 @@ void BcsBufferTests::waitForCacheFlushFromBcsTest(MockCommandQueueHw auto pipeControlCmd = genCmdCast(*pipeControl); cacheFlushWriteAddress = NEO::UnitTestHelper::getPipeControlPostSyncAddress(*pipeControlCmd); if (cacheFlushWriteAddress != 0) { - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControlCmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControlCmd->getDcFlushEnable()); EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); EXPECT_EQ(isCacheFlushForBcsRequired, 0u == pipeControlCmd->getImmediateData()); break; @@ -693,7 +693,7 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBarrierWhenReleasingMultipleBlockedEnque stallingPipeControlFound = true; EXPECT_TRUE(pipeControlCmd->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControlCmd->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControlCmd->getDcFlushEnable()); break; } } diff --git a/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp index 5e6204d19b..f8f9684d03 100644 --- a/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp @@ -32,7 +32,7 @@ XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenPvcThenAuxTranslationIsNotRequire } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenHwHelperwhenAskingForDcFlushThenReturnFalse) { - EXPECT_FALSE(MemorySynchronizationCommands::isDcFlushAllowed(true)); + EXPECT_FALSE(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo)); } XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenCommandBufferAllocationTypeWhenGetAllocationDataIsCalledThenSystemMemoryIsRequested) { diff --git a/shared/source/command_container/command_encoder_bdw_and_later.inl b/shared/source/command_container/command_encoder_bdw_and_later.inl index 4df1215914..f6343e3594 100644 --- a/shared/source/command_container/command_encoder_bdw_and_later.inl +++ b/shared/source/command_container/command_encoder_bdw_and_later.inl @@ -112,7 +112,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, kernelDescriptor.payloadMappings.samplerTable.numSamplers, kernelDescriptor.payloadMappings.samplerTable.borderColor, dispatchInterface->getDynamicStateHeapData(), - device->getBindlessHeapsHelper(), device->getHardwareInfo()); + device->getBindlessHeapsHelper(), hwInfo); } idd.setSamplerStatePointer(samplerStateOffset); @@ -169,7 +169,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, if (flush) { PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); if (dirtyHeaps) { args.hdcPipelineFlush = true; } diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index 04144d25cf..c23528a122 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -137,7 +137,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, heap, kernelDescriptor.payloadMappings.samplerTable.tableOffset, kernelDescriptor.payloadMappings.samplerTable.numSamplers, kernelDescriptor.payloadMappings.samplerTable.borderColor, dispatchInterface->getDynamicStateHeapData(), - device->getBindlessHeapsHelper(), device->getHardwareInfo()); + device->getBindlessHeapsHelper(), hwInfo); if (ApiSpecificConfig::getBindlessConfiguration()) { container.getResidencyContainer().push_back(device->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::BindlesHeapType::GLOBAL_DSH)->getGraphicsAllocation()); } @@ -209,7 +209,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, if (container.isAnyHeapDirty() || requiresUncachedMocs || requiresGlobalAtomicsUpdate) { PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); MemorySynchronizationCommands::addPipeControl(*container.getCommandStream(), args); STATE_BASE_ADDRESS sbaCmd; auto gmmHelper = container.getDevice()->getGmmHelper(); @@ -251,7 +251,7 @@ void EncodeDispatchKernel::encode(CommandContainer &container, postSync.setDestinationAddress(eventAddress); auto gmmHelper = device->getRootDeviceEnvironment().getGmmHelper(); - if (MemorySynchronizationCommands::isDcFlushAllowed(true)) { + if (MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo)) { postSync.setMocs(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED)); } else { postSync.setMocs(gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)); @@ -285,7 +285,8 @@ void EncodeDispatchKernel::encode(CommandContainer &container, true, true, false, - workPartitionAllocationGpuVa); + workPartitionAllocationGpuVa, + hwInfo); } else { partitionCount = 1; auto buffer = listCmdBufferStream->getSpace(sizeof(walkerCmd)); diff --git a/shared/source/command_container/implicit_scaling.h b/shared/source/command_container/implicit_scaling.h index c11c0fe54e..2410a6bf5b 100644 --- a/shared/source/command_container/implicit_scaling.h +++ b/shared/source/command_container/implicit_scaling.h @@ -51,7 +51,8 @@ struct ImplicitScalingDispatch { bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool usesImages, - uint64_t workPartitionAllocationGpuVa); + uint64_t workPartitionAllocationGpuVa, + const HardwareInfo &hwInfo); static bool &getPipeControlStallRequired(); diff --git a/shared/source/command_container/implicit_scaling_xehp_and_later.inl b/shared/source/command_container/implicit_scaling_xehp_and_later.inl index e5b7fab833..73438f0ce8 100644 --- a/shared/source/command_container/implicit_scaling_xehp_and_later.inl +++ b/shared/source/command_container/implicit_scaling_xehp_and_later.inl @@ -82,7 +82,8 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS bool useSecondaryBatchBuffer, bool apiSelfCleanup, bool usesImages, - uint64_t workPartitionAllocationGpuVa) { + uint64_t workPartitionAllocationGpuVa, + const HardwareInfo &hwInfo) { uint32_t totalProgrammedSize = 0u; const uint32_t tileCount = static_cast(devices.count()); const bool preferStaticPartitioning = workPartitionAllocationGpuVa != 0u; @@ -106,7 +107,8 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS cmdBufferGpuAddress, &walkerCmd, totalProgrammedSize, - args); + args, + hwInfo); } else { if (DebugManager.flags.ExperimentalSetWalkerPartitionCount.get()) { partitionCount = DebugManager.flags.ExperimentalSetWalkerPartitionCount.get(); @@ -120,7 +122,8 @@ void ImplicitScalingDispatch::dispatchCommands(LinearStream &commandS cmdBufferGpuAddress, &walkerCmd, totalProgrammedSize, - args); + args, + hwInfo); } commandStream.getSpace(totalProgrammedSize); } diff --git a/shared/source/command_container/walker_partition_xehp_and_later.h b/shared/source/command_container/walker_partition_xehp_and_later.h index 6498c2ced0..2908ec3cd0 100644 --- a/shared/source/command_container/walker_partition_xehp_and_later.h +++ b/shared/source/command_container/walker_partition_xehp_and_later.h @@ -518,7 +518,8 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, uint64_t gpuAddressOfAllocation, COMPUTE_WALKER *inputWalker, uint32_t &totalBytesProgrammed, - WalkerPartitionArgs &args) { + WalkerPartitionArgs &args, + const NEO::HardwareInfo &hwInfo) { totalBytesProgrammed = 0u; void *currentBatchBufferPointer = cpuPointer; @@ -559,7 +560,7 @@ void constructDynamicallyPartitionedCommandBuffer(void *cpuPointer, if (args.emitPipeControlStall) { NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, args); } @@ -660,7 +661,8 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer, uint64_t gpuAddressOfAllocation, COMPUTE_WALKER *inputWalker, uint32_t &totalBytesProgrammed, - WalkerPartitionArgs &args) { + WalkerPartitionArgs &args, + const NEO::HardwareInfo &hwInfo) { totalBytesProgrammed = 0u; void *currentBatchBufferPointer = cpuPointer; @@ -688,7 +690,7 @@ void constructStaticallyPartitionedCommandBuffer(void *cpuPointer, if (args.emitPipeControlStall) { NEO::PipeControlArgs args; - args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = NEO::MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); programPipeControlCommand(currentBatchBufferPointer, totalBytesProgrammed, args); } diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 23e6036c76..fd14ef1245 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -54,10 +54,11 @@ CommandStreamReceiverHw::CommandStreamReceiverHw(ExecutionEnvironment const DeviceBitfield deviceBitfield) : CommandStreamReceiver(executionEnvironment, rootDeviceIndex, deviceBitfield) { - auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); - localMemoryEnabled = hwHelper.getEnableLocalMemory(peekHwInfo()); + const auto &hwInfo = peekHwInfo(); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); + localMemoryEnabled = hwHelper.getEnableLocalMemory(hwInfo); - resetKmdNotifyHelper(new KmdNotifyHelper(&peekHwInfo().capabilityTable.kmdNotifyProperties)); + resetKmdNotifyHelper(new KmdNotifyHelper(&hwInfo.capabilityTable.kmdNotifyProperties)); if (DebugManager.flags.FlattenBatchBufferForAUBDump.get() || DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { flatBatchBufferHelper.reset(new FlatBatchBufferHelperHw(executionEnvironment)); @@ -184,6 +185,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( initProgrammingFlags(); } + const auto &hwInfo = peekHwInfo(); bool updateTag = false; if (dispatchFlags.blocking || dispatchFlags.dcFlush || dispatchFlags.guardCommandBufferWithPipeControl) { if (this->dispatchMode == DispatchMode::ImmediateDispatch) { @@ -211,7 +213,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( if (updateTag) { PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(dispatchFlags.dcFlush); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(dispatchFlags.dcFlush, hwInfo); args.notifyEnable = isUsedNotifyEnableForPostSync(); args.tlbInvalidation |= dispatchFlags.memoryMigrationRequired; args.textureCacheInvalidationEnable |= dispatchFlags.textureCacheFlush; @@ -221,7 +223,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, address, taskCount + 1, - peekHwInfo(), + hwInfo, args); } else { currentPipeControlForNooping = nullptr; @@ -247,13 +249,13 @@ CompletionStamp CommandStreamReceiverHw::flushTask( dispatchFlags.useSLM = true; } - auto newL3Config = PreambleHelper::getL3Config(peekHwInfo(), dispatchFlags.useSLM); + auto newL3Config = PreambleHelper::getL3Config(hwInfo, dispatchFlags.useSLM); auto isSpecialPipelineSelectModeChanged = PreambleHelper::isSpecialPipelineSelectModeChanged(lastSpecialPipelineSelectMode, dispatchFlags.pipelineSelectArgs.specialPipelineSelectMode, - peekHwInfo()); + hwInfo); if (dispatchFlags.threadArbitrationPolicy == ThreadArbitrationPolicy::NotPresent) { - auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); dispatchFlags.threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy(); } if (dispatchFlags.numGrfRequired == GrfConfig::NotApplicable) { @@ -329,7 +331,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( } programHardwareContext(commandStreamCSR); - programComputeMode(commandStreamCSR, dispatchFlags, device.getHardwareInfo()); + programComputeMode(commandStreamCSR, dispatchFlags, hwInfo); programPipelineSelect(commandStreamCSR, dispatchFlags.pipelineSelectArgs); programL3(commandStreamCSR, newL3Config); programPreamble(commandStreamCSR, device, newL3Config); @@ -355,7 +357,7 @@ CompletionStamp CommandStreamReceiverHw::flushTask( auto isStateBaseAddressDirty = dshDirty || iohDirty || sshDirty || stateBaseAddressDirty; auto mocsIndex = latestSentStatelessMocsConfig; - auto &hwHelper = HwHelper::get(peekHwInfo().platform.eRenderCoreFamily); + auto &hwHelper = HwHelper::get(hwInfo.platform.eRenderCoreFamily); if (dispatchFlags.l3CacheSettings != L3CachingSettings::NotApplicable) { auto l3On = dispatchFlags.l3CacheSettings != L3CachingSettings::l3CacheOff; @@ -678,7 +680,8 @@ inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { ResidencyContainer surfacesForSubmit; ResourcePackage resourcePackage; - auto pipeControlLocationSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(peekHwInfo()); + const auto &hwInfo = peekHwInfo(); + auto pipeControlLocationSize = MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo); void *currentPipeControlForNooping = nullptr; void *epiloguePipeControlLocation = nullptr; @@ -705,7 +708,7 @@ inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { //noop pipe control if (currentPipeControlForNooping) { if (DebugManager.flags.AddPatchInfoCommentsForAUBDump.get()) { - flatBatchBufferHelper->removePipeControlData(pipeControlLocationSize, currentPipeControlForNooping, peekHwInfo()); + flatBatchBufferHelper->removePipeControlData(pipeControlLocationSize, currentPipeControlForNooping, hwInfo); } memset(currentPipeControlForNooping, 0, pipeControlLocationSize); } @@ -745,7 +748,7 @@ inline bool CommandStreamReceiverHw::flushBatchedSubmissions() { //make sure we flush DC if needed if (epiloguePipeControlLocation) { bool flushDcInEpilogue = MemorySynchronizationCommands::isDcFlushAllowed( - !DebugManager.flags.DisableDcFlushInEpilogue.get()); + !DebugManager.flags.DisableDcFlushInEpilogue.get(), hwInfo); ((PIPE_CONTROL *)epiloguePipeControlLocation)->setDcFlushEnable(flushDcInEpilogue); } @@ -1184,18 +1187,19 @@ void CommandStreamReceiverHw::flushPipeControl() { auto lock = obtainUniqueOwnership(); - auto &commandStream = getCS(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(peekHwInfo())); + const auto &hwInfo = peekHwInfo(); + auto &commandStream = getCS(MemorySynchronizationCommands::getSizeForPipeControlWithPostSyncOperation(hwInfo)); auto commandStreamStart = commandStream.getUsed(); PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); args.notifyEnable = isUsedNotifyEnableForPostSync(); args.workloadPartitionOffset = isMultiTileOperationEnabled(); MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, getTagAllocation()->getGpuAddress(), taskCount + 1, - peekHwInfo(), + hwInfo, args); makeResident(*tagAllocation); @@ -1214,12 +1218,13 @@ void CommandStreamReceiverHw::flushPipeControl(GraphicsAllocation *ev programHardwareContext(commandStream); + const auto &hwInfo = peekHwInfo(); if (eventAlloc) { MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation(commandStream, PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, immediateGpuAddress, immediateData, - peekHwInfo(), + hwInfo, args); makeResident(*eventAlloc); } else { diff --git a/shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl index 2e7123e9b1..be11592f17 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_bdw_and_later.inl @@ -101,7 +101,7 @@ inline void CommandStreamReceiverHw::addPipeControlPriorToNonPipeline template inline void CommandStreamReceiverHw::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) { PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, peekHwInfo()); args.textureCacheInvalidationEnable = true; addPipeControlPriorToNonPipelinedStateCommand(commandStream, args); @@ -174,14 +174,15 @@ inline void CommandStreamReceiverHw::programStallingNoPostSyncCommand template inline void CommandStreamReceiverHw::programStallingPostSyncCommandsForBarrier(LinearStream &cmdStream, TagNodeBase &tagNode) { auto barrierTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(tagNode); + const auto &hwInfo = peekHwInfo(); PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( cmdStream, PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, barrierTimestampPacketGpuAddress, 0, - peekHwInfo(), + hwInfo, args); } diff --git a/shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl index 6d126dfcce..514b0ac914 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_dg2_and_later.inl @@ -53,7 +53,7 @@ inline void CommandStreamReceiverHw::addPipeControlBefore3dState(Line auto &hwInfo = peekHwInfo(); auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); if (hwInfoConfig->isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs()) && dispatchFlags.usePerDssBackedBuffer && !isPerDssBackedBufferSent) { DEBUG_BREAK_IF(perDssBackedBuffer == nullptr); diff --git a/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl index 194af31be7..0cce4242a3 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_tgllp_and_later.inl @@ -23,7 +23,7 @@ void CommandStreamReceiverHw::programComputeMode(LinearStream &stream auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); if (hwInfoConfig->isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs())) { PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); addPipeControlPriorToNonPipelinedStateCommand(stream, args); } @@ -49,7 +49,7 @@ inline bool CommandStreamReceiverHw::isComputeModeNeeded() const { template <> inline void CommandStreamReceiverHw::addPipeControlBeforeStateBaseAddress(LinearStream &commandStream) { PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, peekHwInfo()); args.textureCacheInvalidationEnable = true; args.hdcPipelineFlush = true; diff --git a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl index a7d6e18b2e..ca167a645f 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_xehp_and_later.inl @@ -188,7 +188,7 @@ inline void CommandStreamReceiverHw::addPipeControlBeforeStateSip(Lin auto hwInfoConfig = HwInfoConfig::get(hwInfo.platform.eProductFamily); bool debuggingEnabled = device.getDebugger() != nullptr; PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); if (hwInfoConfig->isPipeControlPriorToNonPipelinedStateCommandsWARequired(hwInfo, isRcs()) && debuggingEnabled && !hwHelper.isSipWANeeded(hwInfo)) { @@ -220,12 +220,13 @@ inline size_t CommandStreamReceiverHw::getCmdSizeForStallingPostSyncC template inline void CommandStreamReceiverHw::programStallingNoPostSyncCommandsForBarrier(LinearStream &cmdStream) { + const auto &hwInfo = peekHwInfo(); PipeControlArgs args; if (isMultiTileOperationEnabled()) { ImplicitScalingDispatch::dispatchBarrierCommands(cmdStream, this->deviceBitfield, args, - peekHwInfo(), + hwInfo, 0, 0, false, @@ -238,14 +239,15 @@ inline void CommandStreamReceiverHw::programStallingNoPostSyncCommand template inline void CommandStreamReceiverHw::programStallingPostSyncCommandsForBarrier(LinearStream &cmdStream, TagNodeBase &tagNode) { auto barrierTimestampPacketGpuAddress = TimestampPacketHelper::getContextEndGpuAddress(tagNode); + const auto &hwInfo = peekHwInfo(); PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); if (isMultiTileOperationEnabled()) { args.workloadPartitionOffset = true; ImplicitScalingDispatch::dispatchBarrierCommands(cmdStream, this->deviceBitfield, args, - peekHwInfo(), + hwInfo, barrierTimestampPacketGpuAddress, 0, false, @@ -257,7 +259,7 @@ inline void CommandStreamReceiverHw::programStallingPostSyncCommandsF PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, barrierTimestampPacketGpuAddress, 0, - peekHwInfo(), + hwInfo, args); } } diff --git a/shared/source/direct_submission/dispatchers/render_dispatcher.inl b/shared/source/direct_submission/dispatchers/render_dispatcher.inl index 15afb3006c..882baad168 100644 --- a/shared/source/direct_submission/dispatchers/render_dispatcher.inl +++ b/shared/source/direct_submission/dispatchers/render_dispatcher.inl @@ -34,7 +34,7 @@ inline void RenderDispatcher::dispatchMonitorFence(LinearStream &cmdB bool partitionedWorkload) { using POST_SYNC_OPERATION = typename GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION; PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); args.workloadPartitionOffset = partitionedWorkload; MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( cmdBuffer, @@ -53,7 +53,7 @@ inline size_t RenderDispatcher::getSizeMonitorFence(const HardwareInf template inline void RenderDispatcher::dispatchCacheFlush(LinearStream &cmdBuffer, const HardwareInfo &hwInfo, uint64_t address) { - MemorySynchronizationCommands::addFullCacheFlush(cmdBuffer); + MemorySynchronizationCommands::addFullCacheFlush(cmdBuffer, hwInfo); } template diff --git a/shared/source/gen11/command_stream_receiver_hw_gen11.cpp b/shared/source/gen11/command_stream_receiver_hw_gen11.cpp index d8689143f4..b594d3b09b 100644 --- a/shared/source/gen11/command_stream_receiver_hw_gen11.cpp +++ b/shared/source/gen11/command_stream_receiver_hw_gen11.cpp @@ -38,11 +38,12 @@ template <> void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, DispatchFlags &dispatchFlags) { using PWR_CLK_STATE_REGISTER = Family::PWR_CLK_STATE_REGISTER; - if (HwInfoConfig::get(peekHwInfo().platform.eProductFamily)->isAdditionalMediaSamplerProgrammingRequired()) { + const auto &hwInfo = peekHwInfo(); + if (HwInfoConfig::get(hwInfo.platform.eProductFamily)->isAdditionalMediaSamplerProgrammingRequired()) { if (dispatchFlags.pipelineSelectArgs.mediaSamplerRequired) { if (!lastVmeSubslicesConfig) { PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); args.renderTargetCacheFlushEnable = true; args.instructionCacheInvalidateEnable = true; args.textureCacheInvalidationEnable = true; @@ -52,13 +53,13 @@ void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, args.stateCacheInvalidationEnable = true; MemorySynchronizationCommands::addPipeControl(stream, args); - uint32_t numSubslices = peekHwInfo().gtSystemInfo.SubSliceCount; + uint32_t numSubslices = hwInfo.gtSystemInfo.SubSliceCount; uint32_t numSubslicesWithVme = numSubslices / 2; // 1 VME unit per DSS uint32_t numSlicesForPowerGating = 1; // power gating supported only if #Slices = 1 PWR_CLK_STATE_REGISTER reg = Family::cmdInitPwrClkStateRegister; - reg.TheStructure.Common.EUmin = peekHwInfo().gtSystemInfo.MaxEuPerSubSlice; - reg.TheStructure.Common.EUmax = peekHwInfo().gtSystemInfo.MaxEuPerSubSlice; + reg.TheStructure.Common.EUmin = hwInfo.gtSystemInfo.MaxEuPerSubSlice; + reg.TheStructure.Common.EUmax = hwInfo.gtSystemInfo.MaxEuPerSubSlice; reg.TheStructure.Common.SSCountEn = 1; // Enable SScount reg.TheStructure.Common.SScount = numSubslicesWithVme; reg.TheStructure.Common.EnableSliceCountRequest = 1; // Enable SliceCountRequest @@ -76,7 +77,7 @@ void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, } else { if (lastVmeSubslicesConfig) { PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); args.renderTargetCacheFlushEnable = true; args.instructionCacheInvalidateEnable = true; args.textureCacheInvalidationEnable = true; @@ -93,13 +94,13 @@ void CommandStreamReceiverHw::programMediaSampler(LinearStream &stream, // In Gen11-LP, software programs this register as if GT consists of // 2 slices with 4 subslices in each slice. Hardware maps this to the // LP 1 slice/8-subslice physical layout - uint32_t numSubslices = peekHwInfo().gtSystemInfo.SubSliceCount; + uint32_t numSubslices = hwInfo.gtSystemInfo.SubSliceCount; uint32_t numSubslicesMapped = numSubslices / 2; - uint32_t numSlicesMapped = peekHwInfo().gtSystemInfo.SliceCount * 2; + uint32_t numSlicesMapped = hwInfo.gtSystemInfo.SliceCount * 2; PWR_CLK_STATE_REGISTER reg = Family::cmdInitPwrClkStateRegister; - reg.TheStructure.Common.EUmin = peekHwInfo().gtSystemInfo.MaxEuPerSubSlice; - reg.TheStructure.Common.EUmax = peekHwInfo().gtSystemInfo.MaxEuPerSubSlice; + reg.TheStructure.Common.EUmin = hwInfo.gtSystemInfo.MaxEuPerSubSlice; + reg.TheStructure.Common.EUmax = hwInfo.gtSystemInfo.MaxEuPerSubSlice; reg.TheStructure.Common.SSCountEn = 1; // Enable SScount reg.TheStructure.Common.SScount = numSubslicesMapped; reg.TheStructure.Common.EnableSliceCountRequest = 1; // Enable SliceCountRequest diff --git a/shared/source/gen8/hw_helper_gen8.cpp b/shared/source/gen8/hw_helper_gen8.cpp index c922f67a98..9e6a29aa86 100644 --- a/shared/source/gen8/hw_helper_gen8.cpp +++ b/shared/source/gen8/hw_helper_gen8.cpp @@ -59,7 +59,7 @@ bool HwHelperHw::isStatelesToStatefullWithOffsetSupported() const { template <> void MemorySynchronizationCommands::addPipeControl(LinearStream &commandStream, PipeControlArgs &args) { Family::PIPE_CONTROL cmd = Family::cmdInitPipeControl; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = true; MemorySynchronizationCommands::setPipeControl(cmd, args); Family::PIPE_CONTROL *cmdBuffer = commandStream.getSpaceForCmd(); *cmdBuffer = cmd; diff --git a/shared/source/helpers/hw_helper.h b/shared/source/helpers/hw_helper.h index c69cbbf0b6..7cb304e0ac 100644 --- a/shared/source/helpers/hw_helper.h +++ b/shared/source/helpers/hw_helper.h @@ -455,9 +455,9 @@ struct MemorySynchronizationCommands { static void addPipeControlWithCSStallOnly(LinearStream &commandStream); - static bool isDcFlushAllowed(bool isFlushPreferred); + static bool isDcFlushAllowed(bool isFlushPreferred, const HardwareInfo &hwInfo); - static void addFullCacheFlush(LinearStream &commandStream); + static void addFullCacheFlush(LinearStream &commandStream, const HardwareInfo &hwInfo); static void setCacheFlushExtraProperties(PipeControlArgs &args); static size_t getSizeForPipeControlWithPostSyncOperation(const HardwareInfo &hwInfo); diff --git a/shared/source/helpers/hw_helper_base.inl b/shared/source/helpers/hw_helper_base.inl index 102c1696fd..e78708b9aa 100644 --- a/shared/source/helpers/hw_helper_base.inl +++ b/shared/source/helpers/hw_helper_base.inl @@ -345,8 +345,12 @@ void MemorySynchronizationCommands::setPipeControl(typename GfxFamily } template -bool MemorySynchronizationCommands::isDcFlushAllowed(bool isFlushPreferred) { - return isFlushPreferred; +bool MemorySynchronizationCommands::isDcFlushAllowed(bool isFlushPreferred, const HardwareInfo &hwInfo) { + if (isFlushPreferred) { + const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily); + return hwInfoConfig.isDcFlushAllowed(); + } + return false; } template @@ -543,14 +547,14 @@ size_t MemorySynchronizationCommands::getSizeForFullCacheFlush() { } template -void MemorySynchronizationCommands::addFullCacheFlush(LinearStream &commandStream) { +void MemorySynchronizationCommands::addFullCacheFlush(LinearStream &commandStream, const HardwareInfo &hwInfo) { using PIPE_CONTROL = typename GfxFamily::PIPE_CONTROL; PIPE_CONTROL *pipeControl = commandStream.getSpaceForCmd(); PIPE_CONTROL cmd = GfxFamily::cmdInitPipeControl; PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); args.renderTargetCacheFlushEnable = true; args.instructionCacheInvalidateEnable = true; args.textureCacheInvalidationEnable = true; diff --git a/shared/source/helpers/timestamp_packet.h b/shared/source/helpers/timestamp_packet.h index af97d3667f..abee335165 100644 --- a/shared/source/helpers/timestamp_packet.h +++ b/shared/source/helpers/timestamp_packet.h @@ -174,7 +174,7 @@ struct TimestampPacketHelper { auto cacheFlushTimestampPacketGpuAddress = getContextEndGpuAddress(*timestampPacketDependencies->cacheFlushNodes.peekNodes()[0]); PipeControlArgs args; - args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true); + args.dcFlushEnable = MemorySynchronizationCommands::isDcFlushAllowed(true, hwInfo); MemorySynchronizationCommands::addPipeControlAndProgramPostSyncOperation( cmdStream, GfxFamily::PIPE_CONTROL::POST_SYNC_OPERATION::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA, cacheFlushTimestampPacketGpuAddress, 0, hwInfo, args); diff --git a/shared/source/os_interface/hw_info_config.h b/shared/source/os_interface/hw_info_config.h index a855b45897..ecb00f22bd 100644 --- a/shared/source/os_interface/hw_info_config.h +++ b/shared/source/os_interface/hw_info_config.h @@ -80,6 +80,7 @@ class HwInfoConfig { virtual bool isStorageInfoAdjustmentRequired() const = 0; virtual bool isBlitterForImagesSupported() const = 0; virtual bool isTile64With3DSurfaceOnBCSSupported(const HardwareInfo &hwInfo) const = 0; + virtual bool isDcFlushAllowed() const = 0; virtual uint32_t computeMaxNeededSubSliceSpace(const HardwareInfo &hwInfo) const = 0; virtual bool getUuid(Device *device, std::array &uuid) const = 0; @@ -146,6 +147,7 @@ class HwInfoConfigHw : public HwInfoConfig { bool isStorageInfoAdjustmentRequired() const override; bool isBlitterForImagesSupported() const override; bool isTile64With3DSurfaceOnBCSSupported(const HardwareInfo &hwInfo) const override; + bool isDcFlushAllowed() const override; uint32_t computeMaxNeededSubSliceSpace(const HardwareInfo &hwInfo) const override; bool getUuid(Device *device, std::array &uuid) const override; diff --git a/shared/source/os_interface/hw_info_config.inl b/shared/source/os_interface/hw_info_config.inl index 542c85564e..b99871451b 100644 --- a/shared/source/os_interface/hw_info_config.inl +++ b/shared/source/os_interface/hw_info_config.inl @@ -272,6 +272,11 @@ bool HwInfoConfigHw::isBlitterForImagesSupported() const { return false; } +template +bool HwInfoConfigHw::isDcFlushAllowed() const { + return true; +} + template uint32_t HwInfoConfigHw::computeMaxNeededSubSliceSpace(const HardwareInfo &hwInfo) const { return hwInfo.gtSystemInfo.MaxSubSlicesSupported; diff --git a/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp b/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp index 4167baffa8..dfd3f2322f 100644 --- a/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/hw_helper_xe_hpc_core.cpp @@ -215,11 +215,6 @@ bool MemorySynchronizationCommands::isPipeControlWArequired(const Hardwa return false; } -template <> -bool MemorySynchronizationCommands::isDcFlushAllowed(bool isFlushPreferred) { - return false; -} - template <> size_t MemorySynchronizationCommands::getSizeForAdditonalSynchronization(const HardwareInfo &hwInfo) { return (DebugManager.flags.DisablePipeControlPrecedingPostSyncCommand.get() == 1 ? 2 : 1) * getSizeForSingleAdditionalSynchronization(hwInfo); diff --git a/shared/source/xe_hpc_core/os_agnostic_hw_info_config_pvc.inl b/shared/source/xe_hpc_core/os_agnostic_hw_info_config_pvc.inl index 409ee8991d..6c3f198fe5 100644 --- a/shared/source/xe_hpc_core/os_agnostic_hw_info_config_pvc.inl +++ b/shared/source/xe_hpc_core/os_agnostic_hw_info_config_pvc.inl @@ -110,3 +110,8 @@ bool HwInfoConfigHw::isPipeControlPriorToNonPipelinedStateCommandsWA return required; } + +template <> +bool HwInfoConfigHw::isDcFlushAllowed() const { + return false; +} diff --git a/shared/test/common/mocks/mock_hw_info_config.cpp b/shared/test/common/mocks/mock_hw_info_config.cpp index edc6219181..783176be91 100644 --- a/shared/test/common/mocks/mock_hw_info_config.cpp +++ b/shared/test/common/mocks/mock_hw_info_config.cpp @@ -261,6 +261,12 @@ template <> bool HwInfoConfigHw::isTile64With3DSurfaceOnBCSSupported(const HardwareInfo &hwInfo) const { return false; } + +template <> +bool HwInfoConfigHw::isDcFlushAllowed() const { + return true; +} + template <> uint32_t HwInfoConfigHw::computeMaxNeededSubSliceSpace(const HardwareInfo &hwInfo) const { return hwInfo.gtSystemInfo.MaxSubSlicesSupported; diff --git a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp index 9b936dbf4e..a79b08461e 100644 --- a/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_encode_dispatch_kernel_xehp_and_later.cpp @@ -309,7 +309,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, CommandEncodeStatesTest, givenEventAddressWhenEncod auto itor = find(commands.begin(), commands.end()); ASSERT_NE(itor, commands.end()); auto cmd = genCmdCast(*itor); - if (MemorySynchronizationCommands::isDcFlushAllowed(true)) { + if (MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo)) { EXPECT_EQ(pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED), cmd->getPostSync().getMocs()); } else { EXPECT_EQ(pDevice->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER), cmd->getPostSync().getMocs()); diff --git a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp index 1bada84fc8..8401ac86ed 100644 --- a/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp +++ b/shared/test/unit_test/encoders/test_implicit_scaling_xehp_and_later.cpp @@ -30,7 +30,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenGetSizeWhenDispatchingCm expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(32, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, 0u); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, 0u, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(2u, partitionCount); @@ -72,7 +72,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndNoPartiti expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, false, false, false, 0u); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, false, false, false, 0u, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(1u, partitionCount); @@ -115,7 +115,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenWorkgroupOneAndPartition expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, 0u); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, 0u, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(1u, partitionCount); @@ -161,7 +161,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenDi expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(32, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(2u, partitionCount); @@ -212,7 +213,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningWhenPa expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(32, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(2u, partitionCount); @@ -265,7 +267,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); @@ -315,7 +318,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); @@ -351,7 +355,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenStaticPartitioningPrefer expectedSize = ImplicitScalingDispatch::getSize(false, true, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); @@ -387,7 +392,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, GivenDynamicPartitioningPrefe expectedSize = ImplicitScalingDispatch::getSize(false, false, twoTile, Vec3(0, 0, 0), Vec3(1, 1, 1)); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); @@ -434,7 +440,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); @@ -501,7 +508,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); @@ -560,7 +568,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); @@ -619,7 +628,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); @@ -685,7 +695,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); @@ -747,7 +758,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); @@ -811,7 +823,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); @@ -878,7 +891,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, true, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); @@ -944,7 +958,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, ImplicitScalingTests, EXPECT_EQ(expectedSize, estimatedSize); uint32_t partitionCount = 0; - ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, workPartitionAllocationAddress); + ImplicitScalingDispatch::dispatchCommands(commandStream, walker, twoTile, partitionCount, true, false, false, + workPartitionAllocationAddress, *defaultHwInfo); totalBytesProgrammed = commandStream.getUsed(); EXPECT_EQ(expectedSize, totalBytesProgrammed); EXPECT_EQ(twoTile.count(), partitionCount); diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp index 61e05c0737..fb4d629895 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_1.cpp @@ -6,6 +6,7 @@ */ #include "shared/source/helpers/hw_helper.h" +#include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/common/helpers/unit_test_helper.h" #include "shared/test/unit_test/encoders/walker_partition_fixture_xehp_and_later.h" @@ -46,7 +47,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst gpuVirtualAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); auto wparidMaskProgrammingLocation = cmdBufferAddress; @@ -99,7 +101,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); @@ -164,7 +166,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe cmdBufferGpuAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); EXPECT_EQ(controlSectionOffset + sizeof(StaticPartitioningControlSection), totalBytesProgrammed); auto parsedOffset = 0u; @@ -187,7 +190,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -243,7 +246,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd cmdBufferGpuAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); EXPECT_EQ(controlSectionOffset + sizeof(StaticPartitioningControlSection), totalBytesProgrammed); auto parsedOffset = 0u; @@ -285,7 +289,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -340,7 +344,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd cmdBufferGpuAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); EXPECT_EQ(controlSectionOffset, totalBytesProgrammed); auto parsedOffset = 0u; @@ -363,7 +368,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionAnd ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miSemaphoreWait = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -426,7 +431,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit cmdBufferGpuAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); @@ -459,7 +465,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -574,7 +580,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit cmdBufferGpuAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); @@ -607,7 +614,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -722,7 +729,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit cmdBufferGpuAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); @@ -757,7 +765,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -877,7 +885,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit cmdBufferGpuAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); @@ -912,7 +921,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWit ASSERT_NE(nullptr, pipeControl); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); } { auto miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -1050,7 +1059,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti gpuVirtualAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); auto wparidMaskProgrammingLocation = cmdBufferAddress; @@ -1103,7 +1113,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDebugModesForWalkerParti auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); @@ -1169,7 +1179,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe cmdBufferGpuAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); @@ -1207,7 +1218,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticWalkerPartitionWhe cmdBufferGpuAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); const auto expectedBytesProgrammed = WalkerPartition::estimateSpaceRequiredInCommandBuffer(testArgs); EXPECT_EQ(expectedBytesProgrammed, totalBytesProgrammed); @@ -1269,7 +1281,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr gpuVirtualAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); @@ -1319,7 +1332,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenStaticPartitionIsPreferr auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); diff --git a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp index 73c016c539..1fa2f2eb81 100644 --- a/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp +++ b/shared/test/unit_test/encoders/walker_partition_tests_xehp_and_later_2.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/test/common/helpers/default_hw_info.h" #include "shared/test/unit_test/encoders/walker_partition_fixture_xehp_and_later.h" HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenProgramRegisterCommandWhenItIsCalledThenLoadRegisterImmIsSetUnderPointer) { @@ -34,7 +35,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenWalkerPartitionWhenConst gpuVirtualAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); auto totalProgrammedSize = computeControlSectionOffset(testArgs) + sizeof(BatchBufferControlData); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); @@ -875,7 +877,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe gpuVirtualAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); auto wparidMaskProgrammingLocation = cmdBufferAddress; @@ -936,7 +939,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupSectionWhenDe auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -1076,7 +1079,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse gpuVirtualAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); auto wparidMaskProgrammingLocation = cmdBufferAddress; @@ -1138,7 +1142,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenSelfCleanupAndAtomicsUse auto pipeControl = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); ASSERT_NE(nullptr, pipeControl); EXPECT_TRUE(pipeControl->getCommandStreamerStallEnable()); - EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true), pipeControl->getDcFlushEnable()); + EXPECT_EQ(MemorySynchronizationCommands::isDcFlushAllowed(true, *defaultHwInfo), pipeControl->getDcFlushEnable()); parsedOffset += sizeof(WalkerPartition::PIPE_CONTROL); miAtomic = genCmdCast *>(ptrOffset(cmdBuffer, parsedOffset)); @@ -1281,7 +1285,8 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, WalkerPartitionTests, givenDynamicPartitioningWhenP gpuVirtualAddress, &walker, totalBytesProgrammed, - testArgs); + testArgs, + *defaultHwInfo); EXPECT_EQ(totalProgrammedSize, totalBytesProgrammed); auto wparidMaskProgrammingLocation = cmdBufferAddress;