From 304fba1eba46b4ec248a4282a309a652f713a305 Mon Sep 17 00:00:00 2001 From: Lukasz Jobczyk Date: Wed, 30 Apr 2025 14:08:12 +0000 Subject: [PATCH] performance: Remove global fence from command stream on BMG Related-To: NEO-14642 Signed-off-by: Lukasz Jobczyk --- ...s_agnostic_product_helper_xe2_hpg_core.inl | 2 +- .../command_stream_receiver_tests.cpp | 2 +- .../direct_submission_tests_1.cpp | 8 +++--- .../direct_submission_tests_2.cpp | 27 ++++++++++++------- .../windows/device_command_stream_tests.cpp | 2 +- .../gfx_core_helper_tests_xe2_hpg_core.cpp | 11 +++----- 6 files changed, 29 insertions(+), 23 deletions(-) diff --git a/shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl b/shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl index a9b3cc9dfb..0c0912a113 100644 --- a/shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl +++ b/shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl @@ -20,7 +20,7 @@ bool ProductHelperHw::isBlitterForImagesSupported() const { template <> bool ProductHelperHw::isGlobalFenceInCommandStreamRequired(const HardwareInfo &hwInfo) const { - return !hwInfo.capabilityTable.isIntegratedDevice; + return false; } template <> diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index c4cb87756c..ba96ac25c2 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -3075,7 +3075,7 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenDeviceToHostCopyWhenFenceIsRequiredT fenceExpected &= getHelper().isDeviceToHostCopySignalingFenceRequired(); size_t expectedFenceCount = fenceExpected ? 1 : 0; - if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) { + if (getHelper().isGlobalFenceInCommandStreamRequired(pDevice->getHardwareInfo())) { expectedFenceCount += 2; } diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp index a9a2c2c37f..c59178419b 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp @@ -828,7 +828,7 @@ HWTEST_F(DirectSubmissionTest, EXPECT_EQ(0u, NEO::IoFunctions::mockFcloseCalled); size_t expectedSize = Dispatcher::getSizePreemption() + directSubmission.getSizeSemaphoreSection(false); - if (directSubmission.miMemFenceRequired && !heaplessStateInit) { + if (directSubmission.globalFenceAllocation && !heaplessStateInit) { expectedSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -910,7 +910,7 @@ HWTEST_F(DirectSubmissionTest, directSubmission.getDiagnosticModeSection(); expectedSize += expectedExecCount * (directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false)) - directSubmission.getSizeNewResourceHandler()); - if (directSubmission.miMemFenceRequired && !heaplessStateInit) { + if (directSubmission.globalFenceAllocation && !heaplessStateInit) { expectedSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -953,7 +953,7 @@ HWTEST_F(DirectSubmissionTest, } size_t cmdOffset = 0; - if (directSubmission.miMemFenceRequired && !heaplessStateInit) { + if (directSubmission.globalFenceAllocation && !heaplessStateInit) { cmdOffset = directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -1019,7 +1019,7 @@ HWTEST_F(DirectSubmissionTest, EXPECT_EQ(expectedDispatch, directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false)) - directSubmission.getSizeNewResourceHandler()); expectedSize += expectedExecCount * expectedDispatch; - if (directSubmission.miMemFenceRequired && !heaplessStateInit) { + if (directSubmission.globalFenceAllocation && !heaplessStateInit) { expectedSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index 3c4852562a..ed9843afc5 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -49,7 +49,7 @@ struct DirectSubmissionDispatchMiMemFenceTest : public DirectSubmissionDispatchB DirectSubmissionDispatchBufferTest::SetUp(); auto &productHelper = pDevice->getProductHelper(); - miMemFenceSupported = pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice ? false : productHelper.isGlobalFenceInDirectSubmissionRequired(pDevice->getHardwareInfo()); + miMemFenceSupported = productHelper.isGlobalFenceInDirectSubmissionRequired(pDevice->getHardwareInfo()); auto &compilerProductHelper = pDevice->getCompilerProductHelper(); heaplessStateInit = compilerProductHelper.isHeaplessStateInitEnabled(compilerProductHelper.isHeaplessModeEnabled(*defaultHwInfo)); @@ -109,9 +109,18 @@ struct DirectSubmissionDispatchMiMemFenceTest : public DirectSubmissionDispatchB EXPECT_EQ(expectedFenceCount, fenceCount); EXPECT_EQ(expectedSysMemFenceCount, sysMemFenceCount); } else { - EXPECT_EQ(-1, systemMemoryFenceId); + if (directSubmission.globalFenceAllocation) { + if (expectedSysMemFenceCount > 0) { + EXPECT_NE(-1, systemMemoryFenceId); + } else { + EXPECT_EQ(-1, systemMemoryFenceId); + } + EXPECT_EQ(expectedSysMemFenceCount, sysMemFenceCount); + } else { + EXPECT_EQ(-1, systemMemoryFenceId); + EXPECT_EQ(0u, sysMemFenceCount); + } EXPECT_EQ(0u, fenceCount); - EXPECT_EQ(0u, sysMemFenceCount); } } @@ -261,7 +270,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, directSubmission.getSizeSemaphoreSection(false) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_LOAD_REGISTER_MEM); - if (directSubmission.miMemFenceRequired && !heaplessStateInit) { + if (directSubmission.globalFenceAllocation && !heaplessStateInit) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -640,7 +649,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(1u, directSubmission.submitCount); size_t submitSize = RenderDispatcher::getSizePreemption() + directSubmission.getSizeSemaphoreSection(false); - if (directSubmission.miMemFenceRequired && !heaplessStateInit) { + if (directSubmission.globalFenceAllocation && !heaplessStateInit) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -694,7 +703,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(1u, directSubmission.handleResidencyCount); size_t submitSize = directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false)) - directSubmission.getSizeNewResourceHandler(); - if (directSubmission.miMemFenceRequired && !heaplessStateInit) { + if (directSubmission.globalFenceAllocation && !heaplessStateInit) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -720,7 +729,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(1u, directSubmission.submitCount); size_t submitSize = RenderDispatcher::getSizePreemption() + directSubmission.getSizeSemaphoreSection(false); - if (directSubmission.miMemFenceRequired && !heaplessStateInit) { + if (directSubmission.globalFenceAllocation && !heaplessStateInit) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -771,7 +780,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(1u, directSubmission.handleResidencyCount); size_t submitSize = directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false)) - directSubmission.getSizeNewResourceHandler(); - if (directSubmission.miMemFenceRequired && !heaplessStateInit) { + if (directSubmission.globalFenceAllocation && !heaplessStateInit) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -864,7 +873,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, directSubmission.getSizeSemaphoreSection(false) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_LOAD_REGISTER_MEM); - if (directSubmission.miMemFenceRequired && !heaplessStateInit) { + if (directSubmission.globalFenceAllocation && !heaplessStateInit) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } diff --git a/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp b/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp index b9bfa52b17..6839732d59 100644 --- a/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp @@ -1368,7 +1368,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnB auto &compilerProductHelper = device->getCompilerProductHelper(); auto heaplessStateInit = compilerProductHelper.isHeaplessStateInitEnabled(compilerProductHelper.isHeaplessModeEnabled(*defaultHwInfo)); - if (directSubmission->miMemFenceRequired && !heaplessStateInit) { + if (directSubmission->globalFenceAllocation && !heaplessStateInit) { expectedSize += directSubmission->getSizeSystemMemoryFenceAddress(); } if (directSubmission->isRelaxedOrderingEnabled()) { diff --git a/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp b/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp index d8734e002b..84b2f2e25c 100644 --- a/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp +++ b/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp @@ -520,17 +520,14 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenGfxCoreHelperWhenAskedIfFe } XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDefaultMemorySynchronizationCommandsWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) { - using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; - - EXPECT_EQ(!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment())); + EXPECT_EQ(0u, MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment())); } XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDebugMemorySynchronizationCommandsWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) { DebugManagerStateRestore restorer; debugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); - using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; - EXPECT_EQ(!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice * 2 * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment())); + EXPECT_EQ(0u, MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment())); } XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDontProgramGlobalFenceAsMiMemFenceCommandInCommandStreamWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) { @@ -680,9 +677,9 @@ XE2_HPG_CORETEST_F(ProductHelperTestXe2HpgCore, givenProductHelperWhenCallUseGem EXPECT_TRUE(productHelper.useGemCreateExtInAllocateMemoryByKMD()); } -XE2_HPG_CORETEST_F(ProductHelperTestXe2HpgCore, givenProductHelperWhenAskingForGlobalFenceSupportThenReturnTrue) { +XE2_HPG_CORETEST_F(ProductHelperTestXe2HpgCore, givenProductHelperWhenAskingForGlobalFenceSupportThenReturnFalse) { const auto &productHelper = getHelper(); - EXPECT_EQ(productHelper.isGlobalFenceInCommandStreamRequired(*defaultHwInfo), !defaultHwInfo->capabilityTable.isIntegratedDevice); + EXPECT_FALSE(productHelper.isGlobalFenceInCommandStreamRequired(*defaultHwInfo)); } XE2_HPG_CORETEST_F(ProductHelperTestXe2HpgCore, givenProductHelperWhenAskingForCooperativeEngineSupportThenReturnTrue) {