From db9c0d11037486d67a70f96d8cc02a0541cbdfdc Mon Sep 17 00:00:00 2001 From: Bartosz Dunajski Date: Wed, 6 Apr 2022 14:41:45 +0000 Subject: [PATCH] Refactor and enable MI_MEM_FENCE programming for DirectSubmission dispatch Signed-off-by: Bartosz Dunajski --- .../linux/drm_command_stream_tests_1.cpp | 3 + .../windows/device_command_stream_tests.cpp | 25 +-- ...d_stream_receiver_hw_tests_xe_hpc_core.cpp | 2 +- .../hw_helper_tests_xe_hpc_core.cpp | 2 +- .../source/command_container/CMakeLists.txt | 2 +- .../command_container/command_encoder.h | 7 + .../command_container/command_encoder.inl | 9 + .../command_encoder_xe_hpc_core_and_later.inl | 31 +++ .../command_container/memory_fence_encoder.h | 28 --- .../direct_submission/direct_submission_hw.h | 5 + .../direct_submission_hw.inl | 52 ++++- ...direct_submission_xe_hp_core_and_later.inl | 21 +- shared/source/gen11/command_encoder_gen11.cpp | 1 + .../gen12lp/command_encoder_gen12lp.cpp | 1 + shared/source/gen8/command_encoder_gen8.cpp | 2 + shared/source/gen9/command_encoder_gen9.cpp | 1 + .../hw_cmds_generated_xe_hpc_core.inl | 2 +- .../xe_hp_core/command_encoder_xe_hp_core.cpp | 1 + .../command_encoder_xe_hpc_core.cpp | 2 + ...command_stream_receiver_hw_xe_hpc_core.cpp | 1 - .../command_encoder_xe_hpg_core.cpp | 1 + .../fixtures/direct_submission_fixture.h | 5 +- .../common/mocks/mock_direct_submission_hw.h | 3 + .../command_stream_receiver_tests.cpp | 1 + .../direct_submission_tests_1.cpp | 126 ++++++------ .../direct_submission_tests_2.cpp | 190 +++++++++++++++--- .../direct_submission_tests_xe_hp_core.cpp | 6 +- .../windows/wddm_direct_submission_tests.cpp | 45 +++-- .../encoders/test_command_encoder.cpp | 29 +++ .../xe_hpc_core/test_encode_xe_hpc_core.cpp | 30 --- 30 files changed, 417 insertions(+), 217 deletions(-) create mode 100644 shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl delete mode 100644 shared/source/command_container/memory_fence_encoder.h diff --git a/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp b/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp index 00ce98812d..77463227cc 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_command_stream_tests_1.cpp @@ -632,6 +632,9 @@ struct DrmCommandStreamBlitterDirectSubmissionTest : public DrmCommandStreamDire osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, PreemptionMode::ThreadGroup, device->getDeviceBitfield()))); osContext->ensureContextInitialized(); + + device->allEngines.emplace_back(csr, osContext.get()); + csr->initDirectSubmission(*device.get(), *osContext.get()); } diff --git a/opencl/test/unit_test/os_interface/windows/device_command_stream_tests.cpp b/opencl/test/unit_test/os_interface/windows/device_command_stream_tests.cpp index 12ace602e3..1347faf7d7 100644 --- a/opencl/test/unit_test/os_interface/windows/device_command_stream_tests.cpp +++ b/opencl/test/unit_test/os_interface/windows/device_command_stream_tests.cpp @@ -1076,13 +1076,11 @@ TEST_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionFailsThenFlushReturnsE auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; - std::unique_ptr osContext; - osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, - EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::ThreadGroup, device->getDeviceBitfield()))); - osContext->setDefaultContext(true); + auto osContext = device->getDefaultEngine().osContext; + csr->callParentInitDirectSubmission = false; - bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); + bool ret = csr->initDirectSubmission(*device.get(), *osContext); EXPECT_TRUE(ret); EXPECT_TRUE(csr->isDirectSubmissionEnabled()); EXPECT_FALSE(csr->isBlitterDirectSubmissionEnabled()); @@ -1094,7 +1092,7 @@ TEST_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionFailsThenFlushReturnsE nullptr, false, false, QueueThrottle::MEDIUM, QueueSliceCount::defaultSliceCount, cs.getUsed(), &cs, commandBuffer->getUnderlyingBuffer(), false}; - csr->directSubmission = std::make_unique(*device.get(), *osContext.get()); + csr->directSubmission = std::make_unique(*device.get(), *osContext); auto res = csr->flush(batchBuffer, csr->getResidencyAllocations()); EXPECT_EQ(NEO::SubmissionStatus::FAILED, res); @@ -1114,12 +1112,10 @@ TEST_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnRcsWhenFlushi auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_RCS].engineSupported = true; - std::unique_ptr osContext; - osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, - EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::ThreadGroup, device->getDeviceBitfield()))); - osContext->setDefaultContext(true); + auto osContext = device->getDefaultEngine().osContext; + csr->callParentInitDirectSubmission = false; - bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); + bool ret = csr->initDirectSubmission(*device.get(), *osContext); EXPECT_TRUE(ret); EXPECT_TRUE(csr->isDirectSubmissionEnabled()); EXPECT_FALSE(csr->isBlitterDirectSubmissionEnabled()); @@ -1151,12 +1147,11 @@ TEST_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnBcsWhenFlushi auto hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); hwInfo->capabilityTable.directSubmissionEngines.data[aub_stream::ENGINE_BCS].engineSupported = true; - std::unique_ptr osContext; - osContext.reset(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), - 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::ThreadGroup, device->getDeviceBitfield()))); + auto osContext = device->getDefaultEngine().osContext; + csr->callParentInitDirectSubmission = false; csr->initBlitterDirectSubmission = true; - bool ret = csr->initDirectSubmission(*device.get(), *osContext.get()); + bool ret = csr->initDirectSubmission(*device.get(), *osContext); EXPECT_TRUE(ret); EXPECT_FALSE(csr->isDirectSubmissionEnabled()); EXPECT_TRUE(csr->isBlitterDirectSubmissionEnabled()); diff --git a/opencl/test/unit_test/xe_hpc_core/command_stream_receiver_hw_tests_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/command_stream_receiver_hw_tests_xe_hpc_core.cpp index 77271ee9c7..0ba1649fd8 100644 --- a/opencl/test/unit_test/xe_hpc_core/command_stream_receiver_hw_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/command_stream_receiver_hw_tests_xe_hpc_core.cpp @@ -304,7 +304,7 @@ XE_HPC_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTest, whenEnqueueKernel ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence); auto fenceCmd = genCmdCast(*itorMiMemFence); ASSERT_NE(nullptr, fenceCmd); - EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getAFenceType()); + EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType()); } } } diff --git a/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp b/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp index a694f90a7e..71b29daf73 100644 --- a/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp +++ b/opencl/test/unit_test/xe_hpc_core/hw_helper_tests_xe_hpc_core.cpp @@ -703,7 +703,7 @@ XE_HPC_CORETEST_F(HwHelperTestsXeHpcCore, givenMemorySynchronizationCommandsWhen auto fenceCmd = genCmdCast(*hwParser.cmdList.begin()); ASSERT_NE(nullptr, fenceCmd); - EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getAFenceType()); + EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType()); } } } diff --git a/shared/source/command_container/CMakeLists.txt b/shared/source/command_container/CMakeLists.txt index 9cd4d3e540..8aaefb80ac 100644 --- a/shared/source/command_container/CMakeLists.txt +++ b/shared/source/command_container/CMakeLists.txt @@ -39,7 +39,7 @@ endif() if(SUPPORT_PVC_AND_LATER) list(APPEND NEO_CORE_COMMAND_CONTAINER - ${CMAKE_CURRENT_SOURCE_DIR}/memory_fence_encoder.h + ${CMAKE_CURRENT_SOURCE_DIR}/command_encoder_xe_hpc_core_and_later.inl ) endif() diff --git a/shared/source/command_container/command_encoder.h b/shared/source/command_container/command_encoder.h index 061ad5d5b2..923df43efb 100644 --- a/shared/source/command_container/command_encoder.h +++ b/shared/source/command_container/command_encoder.h @@ -436,4 +436,11 @@ struct EncodeStoreMemory { } }; +template +struct EncodeMemoryFence { + static size_t getSystemMemoryFenceSize(); + + static void encodeSystemMemoryFence(LinearStream &commandStream, const GraphicsAllocation *globalFenceAllocation); +}; + } // namespace NEO diff --git a/shared/source/command_container/command_encoder.inl b/shared/source/command_container/command_encoder.inl index 28c8c86a28..cead2c2e9d 100644 --- a/shared/source/command_container/command_encoder.inl +++ b/shared/source/command_container/command_encoder.inl @@ -875,4 +875,13 @@ void EncodeEnableRayTracing::append3dStateBtd(void *ptr3dStateBtd) {} template inline void EncodeWA::setAdditionalPipeControlFlagsForNonPipelineStateCommand(PipeControlArgs &args) {} +template +size_t EncodeMemoryFence::getSystemMemoryFenceSize() { + return 0; +} + +template +void EncodeMemoryFence::encodeSystemMemoryFence(LinearStream &commandStream, const GraphicsAllocation *globalFenceAllocation) { +} + } // namespace NEO diff --git a/shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl b/shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl new file mode 100644 index 0000000000..ae4c7f3294 --- /dev/null +++ b/shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl @@ -0,0 +1,31 @@ +/* + * Copyright (C) 2021-2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/linear_stream.h" +#include "shared/source/memory_manager/graphics_allocation.h" + +#include "hw_cmds.h" + +namespace NEO { + +template <> +size_t EncodeMemoryFence::getSystemMemoryFenceSize() { + return sizeof(typename Family::STATE_SYSTEM_MEM_FENCE_ADDRESS); +} + +template <> +void EncodeMemoryFence::encodeSystemMemoryFence(LinearStream &commandStream, const GraphicsAllocation *globalFenceAllocation) { + using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename Family::STATE_SYSTEM_MEM_FENCE_ADDRESS; + + auto stateSystemFenceAddressSpace = commandStream.getSpaceForCmd(); + STATE_SYSTEM_MEM_FENCE_ADDRESS stateSystemFenceAddress = Family::cmdInitStateSystemMemFenceAddress; + stateSystemFenceAddress.setSystemMemoryFenceAddress(globalFenceAllocation->getGpuAddress()); + *stateSystemFenceAddressSpace = stateSystemFenceAddress; +} + +} // namespace NEO \ No newline at end of file diff --git a/shared/source/command_container/memory_fence_encoder.h b/shared/source/command_container/memory_fence_encoder.h deleted file mode 100644 index 6d234098e7..0000000000 --- a/shared/source/command_container/memory_fence_encoder.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright (C) 2021 Intel Corporation - * - * SPDX-License-Identifier: MIT - * - */ - -#pragma once - -#include "shared/source/command_stream/linear_stream.h" -#include "shared/source/memory_manager/graphics_allocation.h" - -namespace NEO { -template -struct EncodeMemoryFence { - using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename GfxFamily::STATE_SYSTEM_MEM_FENCE_ADDRESS; - - static size_t getSystemMemoryFenceSize() { - return sizeof(STATE_SYSTEM_MEM_FENCE_ADDRESS); - } - static void encodeSystemMemoryFence(LinearStream &commandStream, const GraphicsAllocation *globalFenceAllocation) { - auto stateSystemFenceAddressSpace = commandStream.getSpaceForCmd(); - STATE_SYSTEM_MEM_FENCE_ADDRESS stateSystemFenceAddress = GfxFamily::cmdInitStateSystemMemFenceAddress; - stateSystemFenceAddress.setSystemMemoryFenceAddress(globalFenceAllocation->getGpuAddress()); - *stateSystemFenceAddressSpace = stateSystemFenceAddress; - } -}; -} // namespace NEO \ No newline at end of file diff --git a/shared/source/direct_submission/direct_submission_hw.h b/shared/source/direct_submission/direct_submission_hw.h index a20f007cbd..9ee74920bd 100644 --- a/shared/source/direct_submission/direct_submission_hw.h +++ b/shared/source/direct_submission/direct_submission_hw.h @@ -122,6 +122,9 @@ class DirectSubmissionHw { void dispatchPartitionRegisterConfiguration(); size_t getSizePartitionRegisterConfigurationSection(); + void dispatchSystemMemoryFenceAddress(); + size_t getSizeSystemMemoryFenceAddress(); + void createDiagnostic(); void initDiagnostic(bool &submitOnInit); MOCKABLE_VIRTUAL void performDiagnosticMode(); @@ -169,5 +172,7 @@ class DirectSubmissionHw { bool partitionedMode = false; bool partitionConfigSet = true; bool useNotifyForPostSync = false; + bool miMemFenceRequired = false; + bool systemMemoryFenceAddressSet = false; }; } // namespace NEO diff --git a/shared/source/direct_submission/direct_submission_hw.inl b/shared/source/direct_submission/direct_submission_hw.inl index 5165f8b3e9..e64765b348 100644 --- a/shared/source/direct_submission/direct_submission_hw.inl +++ b/shared/source/direct_submission/direct_submission_hw.inl @@ -6,6 +6,7 @@ */ #include "shared/source/command_container/command_encoder.h" +#include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/command_stream/submissions_aggregator.h" #include "shared/source/debug_settings/debug_settings_manager.h" #include "shared/source/device/device.h" @@ -17,6 +18,7 @@ #include "shared/source/memory_manager/graphics_allocation.h" #include "shared/source/memory_manager/memory_manager.h" #include "shared/source/memory_manager/memory_operations_handler.h" +#include "shared/source/os_interface/hw_info_config.h" #include "shared/source/os_interface/os_context.h" #include "shared/source/utilities/cpu_info.h" #include "shared/source/utilities/cpuintrinsics.h" @@ -31,6 +33,9 @@ template DirectSubmissionHw::DirectSubmissionHw(Device &device, OsContext &osContext) : device(device), osContext(osContext) { + hwInfo = &device.getHardwareInfo(); + + auto hwInfoConfig = HwInfoConfig::get(hwInfo->platform.eProductFamily); disableCacheFlush = UllsDefaults::defaultDisableCacheFlush; disableMonitorFence = UllsDefaults::defaultDisableMonitorFence; @@ -39,6 +44,10 @@ DirectSubmissionHw::DirectSubmissionHw(Device &device, disableCacheFlush = !!DebugManager.flags.DirectSubmissionDisableCacheFlush.get(); } + miMemFenceRequired = hwInfoConfig->isGlobalFenceInCommandStreamRequired(*hwInfo); + if (DebugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get() == 0) { + miMemFenceRequired = false; + } if (DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get() != -1) { sfenceMode = static_cast(DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.get()); } @@ -50,7 +59,6 @@ DirectSubmissionHw::DirectSubmissionHw(Device &device, UNRECOVERABLE_IF(!CpuInfo::getInstance().isFeatureSupported(CpuInfo::featureClflush) && !disableCpuCacheFlush); - hwInfo = &device.getHardwareInfo(); createDiagnostic(); setPostSyncOffset(); } @@ -174,6 +182,12 @@ bool DirectSubmissionHw::initialize(bool submitOnInit, bo this->partitionConfigSet = true; } + if (this->miMemFenceRequired) { + startBufferSize += getSizeSystemMemoryFenceAddress(); + dispatchSystemMemoryFenceAddress(); + + this->systemMemoryFenceAddressSet = true; + } if (workloadMode == 1) { dispatchDiagnosticModeSection(); startBufferSize += getDiagnosticModeSection(); @@ -197,6 +211,10 @@ bool DirectSubmissionHw::startRingBuffer() { if (!this->partitionConfigSet) { startSize += getSizePartitionRegisterConfigurationSection(); } + if (this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) { + startSize += getSizeSystemMemoryFenceAddress(); + } + size_t requiredSize = startSize + getSizeDispatch() + getSizeEnd(); if (ringCommandStream.getAvailableSpace() < requiredSize) { switchRingBuffers(); @@ -208,6 +226,11 @@ bool DirectSubmissionHw::startRingBuffer() { this->partitionConfigSet = true; } + if (this->miMemFenceRequired && !this->systemMemoryFenceAddressSet) { + dispatchSystemMemoryFenceAddress(); + this->systemMemoryFenceAddressSet = true; + } + currentQueueWorkCount++; dispatchSemaphoreSection(currentQueueWorkCount); @@ -257,11 +280,8 @@ inline void DirectSubmissionHw::dispatchSemaphoreSection( value, COMPARE_OPERATION::COMPARE_OPERATION_SAD_GREATER_THAN_OR_EQUAL_SDD); - if constexpr (GfxFamily::isUsingMiMemFence) { - if (DebugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get() == 1) { - - MemorySynchronizationCommands::addAdditionalSynchronization(ringCommandStream, 0, true, this->device.getHardwareInfo()); - } + if (miMemFenceRequired) { + MemorySynchronizationCommands::addAdditionalSynchronization(ringCommandStream, 0, true, this->device.getHardwareInfo()); } dispatchPrefetchMitigation(); @@ -274,10 +294,8 @@ inline size_t DirectSubmissionHw::getSizeSemaphoreSection semaphoreSize += getSizePrefetchMitigation(); semaphoreSize += 2 * getSizeDisablePrefetcher(); - if constexpr (GfxFamily::isUsingMiMemFence) { - if (DebugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get() == 1) { - semaphoreSize += MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(this->device.getHardwareInfo()); - } + if (miMemFenceRequired) { + semaphoreSize += MemorySynchronizationCommands::getSizeForSingleAdditionalSynchronization(this->device.getHardwareInfo()); } return semaphoreSize; @@ -598,4 +616,18 @@ size_t DirectSubmissionHw::getDiagnosticModeSection() { return Dispatcher::getSizeStoreDwordCommand(); } +template +void DirectSubmissionHw::dispatchSystemMemoryFenceAddress() { + auto &engineControl = device.getEngine(this->osContext.getEngineType(), this->osContext.getEngineUsage()); + + UNRECOVERABLE_IF(engineControl.osContext->getContextId() != engineControl.osContext->getContextId()); + + EncodeMemoryFence::encodeSystemMemoryFence(ringCommandStream, engineControl.commandStreamReceiver->getGlobalFenceAllocation()); +} + +template +size_t DirectSubmissionHw::getSizeSystemMemoryFenceAddress() { + return EncodeMemoryFence::getSystemMemoryFenceSize(); +} + } // namespace NEO diff --git a/shared/source/direct_submission/direct_submission_xe_hp_core_and_later.inl b/shared/source/direct_submission/direct_submission_xe_hp_core_and_later.inl index f637b6f5bb..7ca1e6f838 100644 --- a/shared/source/direct_submission/direct_submission_xe_hp_core_and_later.inl +++ b/shared/source/direct_submission/direct_submission_xe_hp_core_and_later.inl @@ -6,7 +6,6 @@ */ #include "shared/source/command_container/implicit_scaling.h" -#include "shared/source/command_container/memory_fence_encoder.h" #include "shared/source/command_stream/command_stream_receiver.h" #include "shared/source/direct_submission/direct_submission_hw.h" @@ -17,29 +16,11 @@ inline void DirectSubmissionHw::dispatchPartitionRegister ImplicitScalingDispatch::dispatchRegisterConfiguration(ringCommandStream, this->workPartitionAllocation->getGpuAddress(), this->postSyncOffset); - - if constexpr (GfxFamily::isUsingMiMemFence) { - if (DebugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get() == 1) { - auto &engineControl = device.getEngine(this->osContext.getEngineType(), this->osContext.getEngineUsage()); - - UNRECOVERABLE_IF(engineControl.osContext->getContextId() != engineControl.osContext->getContextId()); - - EncodeMemoryFence::encodeSystemMemoryFence(ringCommandStream, engineControl.commandStreamReceiver->getGlobalFenceAllocation()); - } - } } template inline size_t DirectSubmissionHw::getSizePartitionRegisterConfigurationSection() { - auto size = ImplicitScalingDispatch::getRegisterConfigurationSize(); - - if constexpr (GfxFamily::isUsingMiMemFence) { - if (DebugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get() == 1) { - size += EncodeMemoryFence::getSystemMemoryFenceSize(); - } - } - - return size; + return ImplicitScalingDispatch::getRegisterConfigurationSize(); } template diff --git a/shared/source/gen11/command_encoder_gen11.cpp b/shared/source/gen11/command_encoder_gen11.cpp index 203f453b1b..727911eeea 100644 --- a/shared/source/gen11/command_encoder_gen11.cpp +++ b/shared/source/gen11/command_encoder_gen11.cpp @@ -92,4 +92,5 @@ template struct EncodeComputeMode; template struct EncodeEnableRayTracing; template struct EncodeNoop; template struct EncodeStoreMemory; +template struct EncodeMemoryFence; } // namespace NEO diff --git a/shared/source/gen12lp/command_encoder_gen12lp.cpp b/shared/source/gen12lp/command_encoder_gen12lp.cpp index 2e88f17f4c..3bd209ef18 100644 --- a/shared/source/gen12lp/command_encoder_gen12lp.cpp +++ b/shared/source/gen12lp/command_encoder_gen12lp.cpp @@ -117,4 +117,5 @@ template struct EncodeComputeMode; template struct EncodeEnableRayTracing; template struct EncodeNoop; template struct EncodeStoreMemory; +template struct EncodeMemoryFence; } // namespace NEO diff --git a/shared/source/gen8/command_encoder_gen8.cpp b/shared/source/gen8/command_encoder_gen8.cpp index ea9ed9a2ec..7bb7f1e787 100644 --- a/shared/source/gen8/command_encoder_gen8.cpp +++ b/shared/source/gen8/command_encoder_gen8.cpp @@ -65,4 +65,6 @@ template struct EncodeComputeMode; template struct EncodeEnableRayTracing; template struct EncodeNoop; template struct EncodeStoreMemory; +template struct EncodeMemoryFence; + } // namespace NEO diff --git a/shared/source/gen9/command_encoder_gen9.cpp b/shared/source/gen9/command_encoder_gen9.cpp index 7da62558c1..13adc9a353 100644 --- a/shared/source/gen9/command_encoder_gen9.cpp +++ b/shared/source/gen9/command_encoder_gen9.cpp @@ -77,4 +77,5 @@ template struct EncodeComputeMode; template struct EncodeEnableRayTracing; template struct EncodeNoop; template struct EncodeStoreMemory; +template struct EncodeMemoryFence; } // namespace NEO diff --git a/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl b/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl index c4956dae4e..e51849c918 100644 --- a/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl +++ b/shared/source/generated/xe_hpc_core/hw_cmds_generated_xe_hpc_core.inl @@ -7085,7 +7085,7 @@ struct MI_MEM_FENCE { inline void setFenceType(const FENCE_TYPE value) { TheStructure.Common.FenceType = value; } - inline FENCE_TYPE getAFenceType() const { + inline FENCE_TYPE getFenceType() const { return static_cast(TheStructure.Common.FenceType); } }; diff --git a/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp b/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp index 5b01283f33..d914e7e8c0 100644 --- a/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp +++ b/shared/source/xe_hp_core/command_encoder_xe_hp_core.cpp @@ -94,4 +94,5 @@ template struct EncodeWA; template struct EncodeEnableRayTracing; template struct EncodeNoop; template struct EncodeStoreMemory; +template struct EncodeMemoryFence; } // namespace NEO diff --git a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp index 108c883c20..df3ddebde0 100644 --- a/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_encoder_xe_hpc_core.cpp @@ -17,6 +17,7 @@ using Family = NEO::XE_HPC_COREFamily; #include "shared/source/command_container/command_encoder_tgllp_and_later.inl" +#include "shared/source/command_container/command_encoder_xe_hpc_core_and_later.inl" #include "shared/source/command_container/command_encoder_xe_hpg_core_and_later.inl" #include "shared/source/command_container/image_surface_state/compression_params_tgllp_and_later.inl" #include "shared/source/command_container/image_surface_state/compression_params_xehp_and_later.inl" @@ -290,4 +291,5 @@ template struct EncodeWA; template struct EncodeEnableRayTracing; template struct EncodeNoop; template struct EncodeStoreMemory; +template struct EncodeMemoryFence; } // namespace NEO diff --git a/shared/source/xe_hpc_core/command_stream_receiver_hw_xe_hpc_core.cpp b/shared/source/xe_hpc_core/command_stream_receiver_hw_xe_hpc_core.cpp index 067fbefbe8..dad644b06b 100644 --- a/shared/source/xe_hpc_core/command_stream_receiver_hw_xe_hpc_core.cpp +++ b/shared/source/xe_hpc_core/command_stream_receiver_hw_xe_hpc_core.cpp @@ -5,7 +5,6 @@ * */ -#include "shared/source/command_container/memory_fence_encoder.h" #include "shared/source/xe_hpc_core/hw_cmds.h" #include "shared/source/xe_hpc_core/hw_info.h" diff --git a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp index 5be0c108fe..2c4bfb3dee 100644 --- a/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp +++ b/shared/source/xe_hpg_core/command_encoder_xe_hpg_core.cpp @@ -189,4 +189,5 @@ template struct EncodeWA; template struct EncodeEnableRayTracing; template struct EncodeNoop; template struct EncodeStoreMemory; +template struct EncodeMemoryFence; } // namespace NEO diff --git a/shared/test/common/fixtures/direct_submission_fixture.h b/shared/test/common/fixtures/direct_submission_fixture.h index f314c089c5..0235e772ed 100644 --- a/shared/test/common/fixtures/direct_submission_fixture.h +++ b/shared/test/common/fixtures/direct_submission_fixture.h @@ -24,11 +24,10 @@ struct DirectSubmissionFixture : public DeviceFixture { DeviceFixture::SetUp(); DeviceFactory::prepareDeviceEnvironments(*pDevice->getExecutionEnvironment()); - osContext.reset(OsContext::create(nullptr, 0u, - EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield()))); + osContext = pDevice->getDefaultEngine().osContext; } - std::unique_ptr osContext; + OsContext *osContext = nullptr; }; struct DirectSubmissionDispatchBufferFixture : public DirectSubmissionFixture { diff --git a/shared/test/common/mocks/mock_direct_submission_hw.h b/shared/test/common/mocks/mock_direct_submission_hw.h index 0a609bd88a..ddb50a99b4 100644 --- a/shared/test/common/mocks/mock_direct_submission_hw.h +++ b/shared/test/common/mocks/mock_direct_submission_hw.h @@ -45,7 +45,9 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::getSizeSemaphoreSection; using BaseClass::getSizeStartSection; using BaseClass::getSizeSwitchRingBufferSection; + using BaseClass::getSizeSystemMemoryFenceAddress; using BaseClass::hwInfo; + using BaseClass::miMemFenceRequired; using BaseClass::osContext; using BaseClass::partitionConfigSet; using BaseClass::partitionedMode; @@ -64,6 +66,7 @@ struct MockDirectSubmissionHw : public DirectSubmissionHw using BaseClass::startRingBuffer; using BaseClass::stopRingBuffer; using BaseClass::switchRingBuffersAllocations; + using BaseClass::systemMemoryFenceAddressSet; using BaseClass::useNotifyForPostSync; using BaseClass::workloadMode; using BaseClass::workloadModeOneExpectedValue; diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index 037092bceb..c0196bae52 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -971,6 +971,7 @@ HWTEST_F(InitDirectSubmissionTest, givenNonDefaultContextContextWhenDirectSubmis HWTEST_F(InitDirectSubmissionTest, GivenBlitterOverrideEnabledWhenBlitterIsNonDefaultContextThenExpectDirectSubmissionStarted) { DebugManager.flags.DirectSubmissionOverrideBlitterSupport.set(1); DebugManager.flags.DirectSubmissionDisableMonitorFence.set(0); + DebugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0); auto csr = std::make_unique>(*device->executionEnvironment, device->getRootDeviceIndex(), device->getDeviceBitfield()); std::unique_ptr osContext(OsContext::create(device->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp index da056f1879..6478042a61 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp @@ -31,7 +31,7 @@ HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledSetThenExpectNoCpuCach DebugManager.flags.DirectSubmissionDisableCpuCacheFlush.set(1); MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_TRUE(directSubmission.disableCpuCacheFlush); uintptr_t expectedPtrVal = 0; @@ -51,7 +51,7 @@ HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledNotSetThenExpectCpuCac DebugManager.flags.DirectSubmissionDisableCpuCacheFlush.set(0); MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_FALSE(directSubmission.disableCpuCacheFlush); uintptr_t expectedPtrVal = 0xABCD00u; @@ -64,7 +64,7 @@ HWTEST_F(DirectSubmissionTest, whenDebugCacheFlushDisabledNotSetThenExpectCpuCac HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenStopThenRingIsNotStarted) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); auto &csr = pDevice->getUltCommandStreamReceiver(); csr.directSubmission.reset(&directSubmission); @@ -80,13 +80,13 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenStopThenRingIsNotStarted HWTEST_F(DirectSubmissionTest, givenBlitterDirectSubmissionWhenStopThenRingIsNotStarted) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); auto &csr = pDevice->getUltCommandStreamReceiver(); std::unique_ptr osContext(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0, EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_BCS, EngineUsage::Regular}, PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield()))); csr.blitterDirectSubmission.reset(&directSubmission); - csr.setupContext(*osContext.get()); + csr.setupContext(*osContext); bool ret = directSubmission.initialize(true, false); EXPECT_TRUE(ret); @@ -99,21 +99,11 @@ HWTEST_F(DirectSubmissionTest, givenBlitterDirectSubmissionWhenStopThenRingIsNot } HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenMakingResourcesResidentThenCorrectContextIsUsed) { - - auto &csr = pDevice->getUltCommandStreamReceiver(); - auto mockMemoryOperations = std::make_unique(); pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.reset(mockMemoryOperations.get()); - std::unique_ptr osContext2(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 2, - EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, - PreemptionMode::ThreadGroup, pDevice->getDeviceBitfield()))); - - MockDirectSubmissionHw> directSubmission(*pDevice, *osContext2.get()); - - csr.directSubmission.reset(&directSubmission); - csr.setupContext(*osContext2.get()); + MockDirectSubmissionHw> directSubmission(*pDevice, *osContext); bool ret = directSubmission.initialize(true, false); EXPECT_TRUE(ret); @@ -127,15 +117,14 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenMakingResourcesResidentT directSubmission.makeResourcesResident(allocs); - EXPECT_EQ(2u, mockMemoryOperations->makeResidentContextId); + EXPECT_EQ(osContext->getContextId(), mockMemoryOperations->makeResidentContextId); pDevice->getRootDeviceEnvironmentRef().memoryOperationsInterface.release(); - csr.directSubmission.release(); } HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStartedThenExpectAllocationsCreatedAndCommandsDispatched) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_TRUE(directSubmission.disableCpuCacheFlush); bool ret = directSubmission.initialize(true, false); @@ -151,7 +140,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsStarted HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsNotStartedThenExpectAllocationsCreatedAndCommandsNotDispatched) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -167,7 +156,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionInitializedWhenRingIsNotStar HWTEST_F(DirectSubmissionTest, givenDirectSubmissionSwitchBuffersWhenCurrentIsPrimaryThenExpectNextSecondary) { using RingBufferUse = typename MockDirectSubmissionHw>::RingBufferUse; MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -181,7 +170,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionSwitchBuffersWhenCurrentIsPr HWTEST_F(DirectSubmissionTest, givenDirectSubmissionSwitchBuffersWhenCurrentIsSecondaryThenExpectNextPrimary) { using RingBufferUse = typename MockDirectSubmissionHw>::RingBufferUse; MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -197,7 +186,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionSwitchBuffersWhenCurrentIsSe } HWTEST_F(DirectSubmissionTest, givenDirectSubmissionAllocateFailWhenRingIsStartedThenExpectRingNotStarted) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_TRUE(directSubmission.disableCpuCacheFlush); directSubmission.allocateOsResourcesReturn = false; @@ -210,7 +199,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionAllocateFailWhenRingIsStarte HWTEST_F(DirectSubmissionTest, givenDirectSubmissionSubmitFailWhenRingIsStartedThenExpectRingNotStartedCommandsDispatched) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); directSubmission.submitReturn = false; bool ret = directSubmission.initialize(true, false); @@ -222,7 +211,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionSubmitFailWhenRingIsStartedT HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsStartedThenExpectNoStartCommandsDispatched) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(true, false); EXPECT_TRUE(ret); @@ -235,7 +224,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsStartedThenEx HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsNotStartedThenExpectStartCommandsDispatched) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -249,7 +238,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsNotStartedThe HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsNotStartedSubmitFailThenExpectStartCommandsDispatchedRingNotStarted) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -264,14 +253,19 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsNotStartedSub HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsNotStartedAndSwitchBufferIsNeededThenExpectRingAllocationChangedStartCommandsDispatched) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); auto expectedRingBuffer = directSubmission.currentRingBuffer; GraphicsAllocation *oldRingBuffer = directSubmission.ringCommandStream.getGraphicsAllocation(); - directSubmission.ringCommandStream.getSpace(directSubmission.ringCommandStream.getAvailableSpace() - directSubmission.getSizeSemaphoreSection()); + auto requiredSize = directSubmission.getSizeSemaphoreSection(); + if (directSubmission.miMemFenceRequired) { + requiredSize += directSubmission.getSizeSystemMemoryFenceAddress(); + } + + directSubmission.ringCommandStream.getSpace(directSubmission.ringCommandStream.getAvailableSpace() - requiredSize); ret = directSubmission.startRingBuffer(); auto actualRingBuffer = directSubmission.currentRingBuffer; @@ -279,14 +273,14 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStartWhenRingIsNotStartedAnd EXPECT_TRUE(ret); EXPECT_TRUE(directSubmission.ringStart); EXPECT_NE(oldRingBuffer, directSubmission.ringCommandStream.getGraphicsAllocation()); - EXPECT_EQ(directSubmission.getSizeSemaphoreSection(), directSubmission.ringCommandStream.getUsed()); + EXPECT_EQ(requiredSize, directSubmission.ringCommandStream.getUsed()); EXPECT_NE(expectedRingBuffer, actualRingBuffer); } HWTEST_F(DirectSubmissionTest, givenDirectSubmissionStopWhenStopRingIsCalledThenExpectStopCommandDispatched) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(true, false); EXPECT_TRUE(ret); @@ -308,11 +302,11 @@ HWTEST_F(DirectSubmissionTest, using Dispatcher = RenderDispatcher; MockDirectSubmissionHw regularDirectSubmission(*pDevice, - *osContext.get()); + *osContext); size_t regularSizeEnd = regularDirectSubmission.getSizeEnd(); MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.allocateResources(); directSubmission.disableMonitorFence = true; @@ -354,7 +348,7 @@ HWTEST_F(DirectSubmissionTest, HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchSemaphoreThenExpectCorrectSizeUsed) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -365,7 +359,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchSemaphoreThenExp HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchStartSectionThenExpectCorrectSizeUsed) { MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -375,7 +369,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchStartSectionThen } HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchSwitchRingBufferSectionThenExpectCorrectSizeUsed) { - MockDirectSubmissionHw> directSubmission(*pDevice, *osContext.get()); + MockDirectSubmissionHw> directSubmission(*pDevice, *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -387,7 +381,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchSwitchRingBuffer HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchFlushSectionThenExpectCorrectSizeUsed) { using Dispatcher = RenderDispatcher; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -399,7 +393,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchFlushSectionThen HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchTagUpdateSectionThenExpectCorrectSizeUsed) { using Dispatcher = RenderDispatcher; MockDirectSubmissionHw - directSubmission(*pDevice, *osContext.get()); + directSubmission(*pDevice, *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -411,7 +405,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchTagUpdateSection HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenDispatchEndingSectionThenExpectCorrectSizeUsed) { using Dispatcher = RenderDispatcher; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -426,7 +420,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetDispatchSizeThenExpec DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0); MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); size_t expectedSize = directSubmission.getSizeStartSection() + Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) + @@ -442,7 +436,7 @@ HWTEST_F(DirectSubmissionTest, DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0); MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); directSubmission.workloadMode = 1; size_t expectedSize = Dispatcher::getSizeStoreDwordCommand() + @@ -460,7 +454,7 @@ HWTEST_F(DirectSubmissionTest, using Dispatcher = RenderDispatcher; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); directSubmission.workloadMode = 2; size_t expectedSize = Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) + Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) + @@ -474,7 +468,7 @@ HWTEST_F(DirectSubmissionTest, using Dispatcher = RenderDispatcher; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); directSubmission.disableCacheFlush = true; size_t expectedSize = directSubmission.getSizeStartSection() + Dispatcher::getSizeMonitorFence(*directSubmission.hwInfo) + @@ -489,7 +483,7 @@ HWTEST_F(DirectSubmissionTest, DebugManagerStateRestore restorer; DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0); MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); directSubmission.disableMonitorFence = true; size_t expectedSize = directSubmission.getSizeStartSection() + Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) + @@ -502,7 +496,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenGetEndSizeThenExpectCorr using Dispatcher = RenderDispatcher; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); size_t expectedSize = Dispatcher::getSizeStopCommandBuffer() + Dispatcher::getSizeCacheFlush(*directSubmission.hwInfo) + @@ -516,7 +510,7 @@ HWTEST_F(DirectSubmissionTest, givenDirectSubmissionWhenSettingAddressInReturnCo using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -533,7 +527,7 @@ HWTEST_F(DirectSubmissionTest, whenDirectSubmissionInitializedThenExpectCreatedA std::unique_ptr>> directSubmission = std::make_unique>>(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission->initialize(false, false); EXPECT_TRUE(ret); @@ -544,7 +538,7 @@ HWTEST_F(DirectSubmissionTest, whenDirectSubmissionInitializedThenExpectCreatedA directSubmission = std::make_unique< MockDirectSubmissionHw>>(*pDevice, - *osContext.get()); + *osContext); ret = directSubmission->initialize(false, false); EXPECT_TRUE(ret); @@ -555,7 +549,7 @@ HWTEST_F(DirectSubmissionTest, whenDirectSubmissionInitializedThenExpectCreatedA directSubmission = std::make_unique< MockDirectSubmissionHw>>(*pDevice, - *osContext.get()); + *osContext); ret = directSubmission->initialize(false, false); EXPECT_TRUE(ret); nulledAllocation = directSubmission->semaphores; @@ -683,7 +677,7 @@ HWTEST_F(DirectSubmissionTest, NEO::IoFunctions::mockFcloseCalled = 0u; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_TRUE(UllsDefaults::defaultDisableCacheFlush); EXPECT_FALSE(UllsDefaults::defaultDisableMonitorFence); EXPECT_TRUE(directSubmission.disableCacheFlush); @@ -718,7 +712,7 @@ HWTEST_F(DirectSubmissionTest, NEO::IoFunctions::mockFcloseCalled = 0u; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_TRUE(UllsDefaults::defaultDisableCacheFlush); EXPECT_FALSE(UllsDefaults::defaultDisableMonitorFence); EXPECT_TRUE(directSubmission.disableCacheFlush); @@ -734,6 +728,9 @@ HWTEST_F(DirectSubmissionTest, EXPECT_EQ(0u, NEO::IoFunctions::mockFcloseCalled); size_t expectedSize = Dispatcher::getSizePreemption() + directSubmission.getSizeSemaphoreSection(); + if (directSubmission.miMemFenceRequired) { + expectedSize += directSubmission.getSizeSystemMemoryFenceAddress(); + } EXPECT_EQ(expectedSize, directSubmission.ringCommandStream.getUsed()); } @@ -753,7 +750,7 @@ HWTEST_F(DirectSubmissionTest, NEO::IoFunctions::mockVfptrinfCalled = 0u; NEO::IoFunctions::mockFcloseCalled = 0u; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_TRUE(UllsDefaults::defaultDisableCacheFlush); EXPECT_FALSE(UllsDefaults::defaultDisableMonitorFence); EXPECT_TRUE(directSubmission.disableCacheFlush); @@ -793,7 +790,7 @@ HWTEST_F(DirectSubmissionTest, NEO::IoFunctions::mockVfptrinfCalled = 0u; NEO::IoFunctions::mockFcloseCalled = 0u; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); uint32_t expectedSemaphoreValue = directSubmission.currentQueueWorkCount; EXPECT_TRUE(UllsDefaults::defaultDisableCacheFlush); EXPECT_FALSE(UllsDefaults::defaultDisableMonitorFence); @@ -809,6 +806,10 @@ HWTEST_F(DirectSubmissionTest, directSubmission.getDiagnosticModeSection(); expectedSize += expectedExecCount * directSubmission.getSizeDispatch(); + if (directSubmission.miMemFenceRequired) { + expectedSize += directSubmission.getSizeSystemMemoryFenceAddress(); + } + bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); EXPECT_TRUE(directSubmission.ringStart); @@ -844,7 +845,12 @@ HWTEST_F(DirectSubmissionTest, EXPECT_EQ(expectedStoreAddress, storeCmd->getAddress()); } - uint8_t *cmdBufferPosition = static_cast(directSubmission.ringCommandStream.getCpuBase()) + Dispatcher::getSizePreemption(); + size_t sysMemFenceOffset = 0; + if (directSubmission.miMemFenceRequired) { + sysMemFenceOffset = directSubmission.getSizeSystemMemoryFenceAddress(); + } + + uint8_t *cmdBufferPosition = static_cast(directSubmission.ringCommandStream.getCpuBase()) + Dispatcher::getSizePreemption() + sysMemFenceOffset; MI_STORE_DATA_IMM *storeDataCmdAtPosition = genCmdCast(cmdBufferPosition); ASSERT_NE(nullptr, storeDataCmdAtPosition); EXPECT_EQ(1u, storeDataCmdAtPosition->getDataDword0()); @@ -880,7 +886,7 @@ HWTEST_F(DirectSubmissionTest, NEO::IoFunctions::mockVfptrinfCalled = 0u; NEO::IoFunctions::mockFcloseCalled = 0u; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); uint32_t expectedSemaphoreValue = directSubmission.currentQueueWorkCount; EXPECT_TRUE(UllsDefaults::defaultDisableCacheFlush); EXPECT_FALSE(UllsDefaults::defaultDisableMonitorFence); @@ -897,6 +903,10 @@ HWTEST_F(DirectSubmissionTest, EXPECT_EQ(expectedDispatch, directSubmission.getSizeDispatch()); expectedSize += expectedExecCount * expectedDispatch; + if (directSubmission.miMemFenceRequired) { + expectedSize += directSubmission.getSizeSystemMemoryFenceAddress(); + } + bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); EXPECT_TRUE(directSubmission.ringStart); @@ -939,7 +949,7 @@ HWTEST_F(DirectSubmissionTest, NEO::IoFunctions::mockFcloseCalled = 0u; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_NE(nullptr, directSubmission.diagnostic.get()); EXPECT_EQ(1u, NEO::IoFunctions::mockFopenCalled); @@ -990,7 +1000,7 @@ HWCMDTEST_F(IGFX_GEN8_CORE, DirectSubmissionTest, using Dispatcher = RenderDispatcher; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(true, false); EXPECT_TRUE(ret); @@ -1012,7 +1022,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionTest, givenDebugFlagSetWhenDispatch using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK; using Dispatcher = BlitterDispatcher; - MockDirectSubmissionHw directSubmission(*pDevice, *osContext.get()); + MockDirectSubmissionHw directSubmission(*pDevice, *osContext); bool ret = directSubmission.allocateResources(); EXPECT_TRUE(ret); diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index 44c56d7ca1..20bf093f5b 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -33,6 +33,128 @@ using DirectSubmissionTest = Test; using DirectSubmissionDispatchBufferTest = Test; +struct DirectSubmissionDispatchMiMemFenceTest : public DirectSubmissionDispatchBufferTest { + void SetUp() override { + DirectSubmissionDispatchBufferTest::SetUp(); + + auto hwInfoConfig = HwInfoConfig::get(pDevice->getHardwareInfo().platform.eProductFamily); + miMemFenceSupported = hwInfoConfig->isGlobalFenceInCommandStreamRequired(pDevice->getHardwareInfo()); + } + + template + void validateFenceProgramming(MockDirectSubmissionHw> &directSubmission, uint32_t expectedFenceCount, uint32_t expectedSysMemFenceCount) { + int32_t systemMemoryFenceId = -1; + uint32_t fenceCount = 0; + uint32_t sysMemFenceCount = 0; + + if constexpr (FamilyType::isUsingMiMemFence) { + using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; + using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; + + HardwareParse hwParse; + hwParse.parseCommands(directSubmission.ringCommandStream, 0); + hwParse.findHardwareCommands(); + + int32_t id = 0; + + for (auto &it : hwParse.cmdList) { + if (auto sysFenceAddress = genCmdCast(it)) { + EXPECT_EQ(-1, systemMemoryFenceId); + systemMemoryFenceId = id; + sysMemFenceCount++; + + EXPECT_NE(0u, sysFenceAddress->getSystemMemoryFenceAddress()); + } else if (auto miMemFence = genCmdCast(it)) { + if (miMemFence->getFenceType() == MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_ACQUIRE) { + EXPECT_TRUE(id > systemMemoryFenceId); + + fenceCount++; + } + } + + id++; + } + } + + if (miMemFenceSupported) { + if (expectedSysMemFenceCount > 0) { + EXPECT_NE(-1, systemMemoryFenceId); + } else { + EXPECT_EQ(-1, systemMemoryFenceId); + } + EXPECT_EQ(expectedFenceCount, fenceCount); + EXPECT_EQ(expectedSysMemFenceCount, sysMemFenceCount); + } else { + EXPECT_EQ(-1, systemMemoryFenceId); + EXPECT_EQ(0u, fenceCount); + EXPECT_EQ(0u, sysMemFenceCount); + } + } + + bool miMemFenceSupported = false; +}; + +HWTEST_F(DirectSubmissionDispatchMiMemFenceTest, givenMiMemFenceSupportedWhenInitializingDirectSubmissionThenEnableMiMemFenceProgramming) { + MockDirectSubmissionHw> directSubmission(*pDevice, *osContext); + + EXPECT_EQ(miMemFenceSupported, directSubmission.miMemFenceRequired); + EXPECT_FALSE(directSubmission.systemMemoryFenceAddressSet); + + EXPECT_TRUE(directSubmission.initialize(true, false)); + + EXPECT_EQ(miMemFenceSupported, directSubmission.systemMemoryFenceAddressSet); + + validateFenceProgramming(directSubmission, 1, 1); +} + +HWTEST_F(DirectSubmissionDispatchMiMemFenceTest, givenMiMemFenceSupportedWhenDispatchingWithoutInitThenEnableMiMemFenceProgramming) { + MockDirectSubmissionHw> directSubmission(*pDevice, *osContext); + FlushStampTracker flushStamp(true); + + EXPECT_EQ(miMemFenceSupported, directSubmission.miMemFenceRequired); + EXPECT_FALSE(directSubmission.systemMemoryFenceAddressSet); + + EXPECT_TRUE(directSubmission.initialize(false, false)); + + EXPECT_FALSE(directSubmission.systemMemoryFenceAddressSet); + + EXPECT_TRUE(directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp)); + + validateFenceProgramming(directSubmission, 2, 1); + + EXPECT_EQ(miMemFenceSupported, directSubmission.systemMemoryFenceAddressSet); +} + +HWTEST_F(DirectSubmissionDispatchMiMemFenceTest, givenMiMemFenceSupportedWhenSysMemFenceIsAlreadySentThenDontReprogram) { + MockDirectSubmissionHw> directSubmission(*pDevice, *osContext); + FlushStampTracker flushStamp(true); + + EXPECT_EQ(miMemFenceSupported, directSubmission.miMemFenceRequired); + directSubmission.systemMemoryFenceAddressSet = true; + + EXPECT_TRUE(directSubmission.initialize(false, false)); + + EXPECT_TRUE(directSubmission.dispatchCommandBuffer(batchBuffer, flushStamp)); + + validateFenceProgramming(directSubmission, 2, 0); + + EXPECT_TRUE(directSubmission.systemMemoryFenceAddressSet); +} + +HWTEST_F(DirectSubmissionDispatchMiMemFenceTest, givenDebugFlagSetWhenCreatingDirectSubmissionThenDontEnableMiMemFenceProgramming) { + DebugManagerStateRestore restorer; + DebugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0); + + MockDirectSubmissionHw> directSubmission(*pDevice, *osContext); + + EXPECT_FALSE(directSubmission.miMemFenceRequired); + EXPECT_FALSE(directSubmission.systemMemoryFenceAddressSet); + + EXPECT_TRUE(directSubmission.initialize(true, false)); + + EXPECT_FALSE(directSubmission.systemMemoryFenceAddressSet); +} + HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, givenDirectSubmissionInPartitionModeWhenDispatchingCommandBufferThenExpectDispatchPartitionedPipeControlInCommandBuffer) { using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL; @@ -50,7 +172,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, ultCsr->createWorkPartitionAllocation(*pDevice); MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_TRUE(directSubmission.partitionConfigSet); directSubmission.partitionConfigSet = false; directSubmission.disableMonitorFence = false; @@ -70,6 +192,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, directSubmission.getSizeSemaphoreSection() + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_LOAD_REGISTER_MEM); + if (directSubmission.miMemFenceRequired) { + submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); + } EXPECT_EQ(submitSize, directSubmission.submitSize); EXPECT_EQ(oldRingAllocation->getGpuAddress(), directSubmission.submitGpuAddress); EXPECT_EQ(1u, directSubmission.handleResidencyCount); @@ -120,11 +245,11 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0); MockDirectSubmissionHw regularDirectSubmission(*pDevice, - *osContext.get()); + *osContext); size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(); MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); directSubmission.disableMonitorFence = true; bool ret = directSubmission.allocateResources(); @@ -171,11 +296,11 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, DebugManager.flags.DirectSubmissionDisableCacheFlush.set(0); MockDirectSubmissionHw regularDirectSubmission(*pDevice, - *osContext.get()); + *osContext); size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(); MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); directSubmission.disableCacheFlush = true; bool ret = directSubmission.allocateResources(); @@ -220,12 +345,12 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, using Dispatcher = RenderDispatcher; MockDirectSubmissionHw regularDirectSubmission(*pDevice, - *osContext.get()); + *osContext); size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(); MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); directSubmission.workloadMode = 1; bool ret = directSubmission.allocateResources(); @@ -267,11 +392,11 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; MockDirectSubmissionHw> regularDirectSubmission(*pDevice, - *osContext.get()); + *osContext); size_t regularSizeDispatch = regularDirectSubmission.getSizeDispatch(); MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); directSubmission.workloadMode = 2; bool ret = directSubmission.allocateResources(); @@ -307,7 +432,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, FlushStampTracker flushStamp(true); MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(true, false); EXPECT_TRUE(ret); @@ -319,6 +444,9 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(1u, directSubmission.submitCount); size_t submitSize = RenderDispatcher::getSizePreemption() + directSubmission.getSizeSemaphoreSection(); + if (directSubmission.miMemFenceRequired) { + submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); + } EXPECT_EQ(submitSize, directSubmission.submitSize); EXPECT_EQ(oldRingAllocation->getGpuAddress(), directSubmission.submitGpuAddress); EXPECT_EQ(1u, directSubmission.handleResidencyCount); @@ -346,7 +474,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, FlushStampTracker flushStamp(true); MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -365,11 +493,16 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(3u, directSubmission.currentQueueWorkCount); EXPECT_EQ(1u, directSubmission.submitCount); size_t submitSize = directSubmission.getSizeSemaphoreSection(); + if (directSubmission.miMemFenceRequired) { + submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); + } EXPECT_EQ(submitSize, directSubmission.submitSize); EXPECT_EQ(oldRingAllocation->getGpuAddress(), directSubmission.submitGpuAddress); EXPECT_EQ(2u, directSubmission.handleResidencyCount); - EXPECT_EQ(directSubmission.getSizeDispatch() + directSubmission.getSizeSemaphoreSection(), directSubmission.ringCommandStream.getUsed()); + size_t dispatchSize = submitSize + directSubmission.getSizeDispatch(); + + EXPECT_EQ(dispatchSize, directSubmission.ringCommandStream.getUsed()); EXPECT_TRUE(directSubmission.ringStart); } @@ -378,7 +511,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, FlushStampTracker flushStamp(true); MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(true, false); EXPECT_TRUE(ret); @@ -390,6 +523,9 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(1u, directSubmission.submitCount); size_t submitSize = RenderDispatcher::getSizePreemption() + directSubmission.getSizeSemaphoreSection(); + if (directSubmission.miMemFenceRequired) { + submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); + } EXPECT_EQ(submitSize, directSubmission.submitSize); EXPECT_EQ(oldRingAllocation->getGpuAddress(), directSubmission.submitGpuAddress); EXPECT_EQ(1u, directSubmission.handleResidencyCount); @@ -414,7 +550,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, FlushStampTracker flushStamp(true); MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -434,10 +570,15 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(3u, directSubmission.currentQueueWorkCount); EXPECT_EQ(1u, directSubmission.submitCount); size_t submitSize = directSubmission.getSizeSemaphoreSection(); + if (directSubmission.miMemFenceRequired) { + submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); + } EXPECT_EQ(submitSize, directSubmission.submitSize); EXPECT_EQ(2u, directSubmission.handleResidencyCount); - EXPECT_EQ(directSubmission.getSizeDispatch() + directSubmission.getSizeSemaphoreSection(), directSubmission.ringCommandStream.getUsed()); + size_t dispatchSize = submitSize + directSubmission.getSizeDispatch(); + + EXPECT_EQ(dispatchSize, directSubmission.ringCommandStream.getUsed()); EXPECT_TRUE(directSubmission.ringStart); } @@ -447,7 +588,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenDirectSubmissionPrintBuffersWh FlushStampTracker flushStamp(true); MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); testing::internal::CaptureStdout(); @@ -481,7 +622,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, FlushStampTracker flushStamp(true); MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_TRUE(directSubmission.partitionConfigSet); directSubmission.activeTiles = 2; directSubmission.partitionedMode = true; @@ -497,6 +638,9 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, directSubmission.getSizeSemaphoreSection() + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_LOAD_REGISTER_MEM); + if (directSubmission.miMemFenceRequired) { + submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); + } EXPECT_EQ(submitSize, directSubmission.submitSize); EXPECT_EQ(1u, directSubmission.handleResidencyCount); EXPECT_EQ(4u, directSubmission.makeResourcesResidentVectorSize); @@ -542,7 +686,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, FlushStampTracker flushStamp(true); MockDirectSubmissionHw> directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_TRUE(directSubmission.partitionConfigSet); directSubmission.activeTiles = 2; directSubmission.partitionedMode = true; @@ -594,7 +738,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, FlushStampTracker flushStamp(true); MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); directSubmission.disableMonitorFence = false; bool ret = directSubmission.initialize(true, true); @@ -630,7 +774,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, FlushStampTracker flushStamp(true); MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); directSubmission.disableMonitorFence = false; bool ret = directSubmission.initialize(true, true); @@ -669,7 +813,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, FlushStampTracker flushStamp(true); MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(true, true); EXPECT_TRUE(ret); @@ -694,7 +838,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, FlushStampTracker flushStamp(true); MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); bool ret = directSubmission.initialize(true, true); EXPECT_TRUE(ret); @@ -716,7 +860,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, givenDebugFlagSetWhenDispatchingWor for (int32_t debugFlag : {-1, 0, 1, 2}) { DebugManager.flags.DirectSubmissionInsertSfenceInstructionPriorToSubmission.set(debugFlag); - MockDirectSubmissionHw directSubmission(*pDevice, *osContext.get()); + MockDirectSubmissionHw directSubmission(*pDevice, *osContext); EXPECT_TRUE(directSubmission.initialize(true, true)); auto initialCounterValue = CpuIntrinsicsTests::sfenceCounter.load(); diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_xe_hp_core.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_xe_hp_core.cpp index a1b64a9401..31658c8c8a 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_xe_hp_core.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_xe_hp_core.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2021 Intel Corporation + * Copyright (C) 2021-2022 Intel Corporation * * SPDX-License-Identifier: MIT * @@ -20,7 +20,7 @@ XE_HP_CORE_TEST_F(DirectSubmissionTestXE_HP_CORE, givenBlitterUsedWhenDispatchin using Dispatcher = BlitterDispatcher; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_EQ(sizeof(MI_BATCH_BUFFER_START), directSubmission.getSizePrefetchMitigation()); @@ -45,7 +45,7 @@ XE_HP_CORE_TEST_F(DirectSubmissionTestXE_HP_CORE, givenBlitterUsedWhenDispatchin using Dispatcher = BlitterDispatcher; MockDirectSubmissionHw directSubmission(*pDevice, - *osContext.get()); + *osContext); EXPECT_EQ(sizeof(MI_ARB_CHECK), directSubmission.getSizeDisablePrefetcher()); diff --git a/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp b/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp index 15f27ddc2a..cb2019688f 100644 --- a/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp +++ b/shared/test/unit_test/direct_submission/windows/wddm_direct_submission_tests.cpp @@ -34,15 +34,16 @@ struct WddmDirectSubmissionFixture : public WddmFixture { executionEnvironment->memoryManager.reset(new WddmMemoryManager{*executionEnvironment}); device.reset(MockDevice::create(executionEnvironment.get(), 0u)); - osContext = std::make_unique(*wddm, 0u, - EngineDescriptorHelper::getDefaultDescriptor({aub_stream::ENGINE_RCS, EngineUsage::Regular}, - PreemptionMode::ThreadGroup, device->getDeviceBitfield())); - osContext->ensureContextInitialized(); + + osContext = static_cast(device->getDefaultEngine().osContext); + + wddmMockInterface->createMonitoredFence(*osContext); + device->setPreemptionMode(PreemptionMode::ThreadGroup); } WddmMockInterface20 *wddmMockInterface; - std::unique_ptr osContext; + OsContextWin *osContext = nullptr; std::unique_ptr device; }; @@ -51,7 +52,7 @@ using WddmDirectSubmissionTest = WddmDirectSubmissionFixture; HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThenExpectProperCommandsDispatched) { std::unique_ptr>> wddmDirectSubmission = std::make_unique>>(*device.get(), - *osContext.get()); + *osContext); EXPECT_EQ(1u, wddmDirectSubmission->commandBufferHeader->NeedsMidBatchPreEmptionSupport); @@ -83,7 +84,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndNotStarted device->setPreemptionMode(PreemptionMode::Disabled); std::unique_ptr>> wddmDirectSubmission = std::make_unique>>(*device.get(), - *osContext.get()); + *osContext); EXPECT_EQ(0u, wddmDirectSubmission->commandBufferHeader->NeedsMidBatchPreEmptionSupport); @@ -110,7 +111,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndNotStarted HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSubmitingCmdBufferThenExpectPassWddmContextAndProperHeader) { MockWddmDirectSubmission> wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); bool ret = wddmDirectSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -129,7 +130,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSubmitingCmdBufferThenExpectPass HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenAllocateOsResourcesThenExpectRingMonitorFenceCreatedAndAllocationsResident) { MockWddmDirectSubmission> wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); bool ret = wddmDirectSubmission.allocateResources(); EXPECT_TRUE(ret); @@ -148,7 +149,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenAllocateOsResourcesFenceCreation ASSERT_NE(nullptr, ringBuffer); MockWddmDirectSubmission> wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); DirectSubmissionAllocations allocations; allocations.push_back(ringBuffer); @@ -167,7 +168,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenAllocateOsResourcesFenceCreation HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenAllocateOsResourcesResidencyFailsThenExpectRingMonitorFenceCreatedAndAllocationsNotResident) { MockWddmDirectSubmission> wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); wddm->callBaseMakeResident = false; wddm->makeResidentStatus = false; @@ -189,7 +190,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenGettingTagDataThenExpectContextM contextFence.currentFenceValue = value; MockWddmDirectSubmission> wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); TagData tagData; wddmDirectSubmission.getTagAddressValue(tagData); @@ -200,7 +201,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenGettingTagDataThenExpectContextM HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenHandleResidencyThenExpectWddmWaitOnPaginfFenceFromCpuCalled) { MockWddmDirectSubmission> wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); wddmDirectSubmission.handleResidency(); @@ -215,7 +216,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenHandlingRingBufferCompletionThen contextFence.currentFenceValue = value; MockWddmDirectSubmission> wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); uint64_t completionValue = 0x12345679ull; wddmDirectSubmission.handleCompletionRingBuffer(completionValue, contextFence); @@ -229,7 +230,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenHandlingRingBufferCompletionThen HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedThenExpectDispatchSwitchCommandsLinearStreamUpdated) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; MockWddmDirectSubmission> wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); bool ret = wddmDirectSubmission.initialize(true, false); EXPECT_TRUE(ret); @@ -255,7 +256,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedThenEx HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferNotStartedThenExpectNoSwitchCommandsLinearStreamUpdated) { using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; MockWddmDirectSubmission> wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); bool ret = wddmDirectSubmission.initialize(false, false); EXPECT_TRUE(ret); @@ -282,7 +283,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenSwitchingRingBufferStartedAndWai using RingBufferUse = typename MockWddmDirectSubmission>::RingBufferUse; using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START; MockWddmDirectSubmission> wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); bool ret = wddmDirectSubmission.initialize(true, false); EXPECT_TRUE(ret); @@ -318,7 +319,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenUpdatingTagValueThenExpectComple contextFence.currentFenceValue = value; MockWddmDirectSubmission> wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); uint64_t actualTagValue = wddmDirectSubmission.updateTagValue(); EXPECT_EQ(value, actualTagValue); @@ -347,7 +348,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmResidencyEnabledWhenCreatingDestroyi std::unique_ptr> wddmSubmission = std::make_unique>(*device.get(), - *osContext.get()); + *osContext); EXPECT_EQ(1u, NEO::IoFunctions::mockFopenCalled); EXPECT_EQ(2u, NEO::IoFunctions::mockVfptrinfCalled); @@ -374,7 +375,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmResidencyEnabledWhenAllocatingResour NEO::IoFunctions::mockFcloseCalled = 0u; MockWddmDirectSubmission wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); wddm->callBaseMakeResident = true; wddm->createPagingFenceLogger(); @@ -405,7 +406,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmResidencyEnabledWhenHandleResidencyT NEO::IoFunctions::mockFcloseCalled = 0u; MockWddmDirectSubmission wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); wddm->createPagingFenceLogger(); EXPECT_EQ(1u, NEO::IoFunctions::mockFopenCalled); @@ -434,7 +435,7 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmResidencyEnabledWhenSubmitToGpuThenS NEO::IoFunctions::mockFcloseCalled = 0u; MockWddmDirectSubmission wddmDirectSubmission(*device.get(), - *osContext.get()); + *osContext); wddm->createPagingFenceLogger(); EXPECT_EQ(1u, NEO::IoFunctions::mockFopenCalled); diff --git a/shared/test/unit_test/encoders/test_command_encoder.cpp b/shared/test/unit_test/encoders/test_command_encoder.cpp index 6dcb0b39ea..a412a6e111 100644 --- a/shared/test/unit_test/encoders/test_command_encoder.cpp +++ b/shared/test/unit_test/encoders/test_command_encoder.cpp @@ -125,3 +125,32 @@ HWTEST_F(CommandEncoderTest, GivenQwordStoreWhenAddingStoreDataImmThenExpectQwor EXPECT_TRUE(storeDataImm->getStoreQword()); EXPECT_EQ(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_QWORD, storeDataImm->getDwordLength()); } + +HWTEST_F(CommandEncoderTest, givenPlatformSupportingMiMemFenceWhenEncodingThenProgramSystemMemoryFence) { + uint64_t gpuAddress = 0x12340000; + constexpr size_t bufferSize = 64; + + NEO::MockGraphicsAllocation allocation(reinterpret_cast(0x1234000), gpuAddress, 0x123); + + uint8_t buffer[bufferSize] = {}; + LinearStream cmdStream(buffer, bufferSize); + + size_t size = EncodeMemoryFence::getSystemMemoryFenceSize(); + + EncodeMemoryFence::encodeSystemMemoryFence(cmdStream, &allocation); + + if constexpr (FamilyType::isUsingMiMemFence) { + using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; + + STATE_SYSTEM_MEM_FENCE_ADDRESS expectedCmd = FamilyType::cmdInitStateSystemMemFenceAddress; + expectedCmd.setSystemMemoryFenceAddress(gpuAddress); + + EXPECT_EQ(sizeof(STATE_SYSTEM_MEM_FENCE_ADDRESS), size); + EXPECT_EQ(sizeof(STATE_SYSTEM_MEM_FENCE_ADDRESS), cmdStream.getUsed()); + + EXPECT_EQ(0, memcmp(buffer, &expectedCmd, sizeof(STATE_SYSTEM_MEM_FENCE_ADDRESS))); + } else { + EXPECT_EQ(0u, size); + EXPECT_EQ(0u, cmdStream.getUsed()); + } +} \ No newline at end of file diff --git a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp index a79df2d6b5..35ab7f3e17 100644 --- a/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp +++ b/shared/test/unit_test/xe_hpc_core/test_encode_xe_hpc_core.cpp @@ -6,7 +6,6 @@ */ #include "shared/source/command_container/command_encoder.h" -#include "shared/source/command_container/memory_fence_encoder.h" #include "shared/source/command_stream/stream_properties.h" #include "shared/source/gmm_helper/gmm.h" #include "shared/source/gmm_helper/gmm_helper.h" @@ -401,32 +400,3 @@ XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenCleanHeapsAndSlmNotChangedAndU EXPECT_EQ(cmdSba->getStatelessDataPortAccessMemoryObjectControlState(), (gmmHelper->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED))); } - -XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, givenStreamWhenEncodingSystemMemoryFenceThenCorrectFenceAddressIsSet) { - using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; - - const GraphicsAllocation allocation(0, AllocationType::UNKNOWN, - nullptr, 1234, 0, 4096, MemoryPool::System4KBPages, MemoryManager::maxOsContextCount); - - auto before = cmdContainer->getCommandStream()->getUsed(); - auto cmd = reinterpret_cast(cmdContainer->getCommandStream()->getSpace(0)); - EncodeMemoryFence::encodeSystemMemoryFence(*cmdContainer->getCommandStream(), &allocation); - auto after = cmdContainer->getCommandStream()->getUsed(); - - EXPECT_EQ(sizeof(STATE_SYSTEM_MEM_FENCE_ADDRESS), after - before); - - STATE_SYSTEM_MEM_FENCE_ADDRESS expectedCmd = FamilyType::cmdInitStateSystemMemFenceAddress; - expectedCmd.setSystemMemoryFenceAddress(allocation.getGpuAddress()); - - EXPECT_EQ(expectedCmd.getSystemMemoryFenceAddress(), cmd->getSystemMemoryFenceAddress()); - EXPECT_EQ(expectedCmd.TheStructure.RawData[0], cmd->TheStructure.RawData[0]); - EXPECT_EQ(expectedCmd.TheStructure.RawData[1], cmd->TheStructure.RawData[1]); - EXPECT_EQ(expectedCmd.TheStructure.RawData[2], cmd->TheStructure.RawData[2]); -} - -XE_HPC_CORETEST_F(EncodeKernelXeHpcCoreTest, whenSizeForEncodeSystemMemoryFenceQueriedThenCorrectValueIsReturned) { - using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS; - - auto size = EncodeMemoryFence::getSystemMemoryFenceSize(); - EXPECT_EQ(sizeof(STATE_SYSTEM_MEM_FENCE_ADDRESS), size); -}