From 9595526a52e193a19cb83eb008ec7757ebfb3876 Mon Sep 17 00:00:00 2001 From: Compute-Runtime-Validation Date: Thu, 1 May 2025 05:46:43 +0200 Subject: [PATCH] Revert "performance: Remove global fence from command stream on BMG" This reverts commit 304fba1eba46b4ec248a4282a309a652f713a305. Signed-off-by: Compute-Runtime-Validation --- ...s_agnostic_product_helper_xe2_hpg_core.inl | 2 +- .../command_stream_receiver_tests.cpp | 2 +- .../direct_submission_tests_1.cpp | 8 +++--- .../direct_submission_tests_2.cpp | 27 +++++++------------ .../windows/device_command_stream_tests.cpp | 2 +- .../gfx_core_helper_tests_xe2_hpg_core.cpp | 11 +++++--- 6 files changed, 23 insertions(+), 29 deletions(-) diff --git a/shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl b/shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl index 0c0912a113..a9b3cc9dfb 100644 --- a/shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl +++ b/shared/source/xe2_hpg_core/os_agnostic_product_helper_xe2_hpg_core.inl @@ -20,7 +20,7 @@ bool ProductHelperHw::isBlitterForImagesSupported() const { template <> bool ProductHelperHw::isGlobalFenceInCommandStreamRequired(const HardwareInfo &hwInfo) const { - return false; + return !hwInfo.capabilityTable.isIntegratedDevice; } template <> diff --git a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp index ba96ac25c2..c4cb87756c 100644 --- a/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp +++ b/shared/test/unit_test/command_stream/command_stream_receiver_tests.cpp @@ -3075,7 +3075,7 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenDeviceToHostCopyWhenFenceIsRequiredT fenceExpected &= getHelper().isDeviceToHostCopySignalingFenceRequired(); size_t expectedFenceCount = fenceExpected ? 1 : 0; - if (getHelper().isGlobalFenceInCommandStreamRequired(pDevice->getHardwareInfo())) { + if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) { expectedFenceCount += 2; } diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp index c59178419b..a9a2c2c37f 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_1.cpp @@ -828,7 +828,7 @@ HWTEST_F(DirectSubmissionTest, EXPECT_EQ(0u, NEO::IoFunctions::mockFcloseCalled); size_t expectedSize = Dispatcher::getSizePreemption() + directSubmission.getSizeSemaphoreSection(false); - if (directSubmission.globalFenceAllocation && !heaplessStateInit) { + if (directSubmission.miMemFenceRequired && !heaplessStateInit) { expectedSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -910,7 +910,7 @@ HWTEST_F(DirectSubmissionTest, directSubmission.getDiagnosticModeSection(); expectedSize += expectedExecCount * (directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false)) - directSubmission.getSizeNewResourceHandler()); - if (directSubmission.globalFenceAllocation && !heaplessStateInit) { + if (directSubmission.miMemFenceRequired && !heaplessStateInit) { expectedSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -953,7 +953,7 @@ HWTEST_F(DirectSubmissionTest, } size_t cmdOffset = 0; - if (directSubmission.globalFenceAllocation && !heaplessStateInit) { + if (directSubmission.miMemFenceRequired && !heaplessStateInit) { cmdOffset = directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -1019,7 +1019,7 @@ HWTEST_F(DirectSubmissionTest, EXPECT_EQ(expectedDispatch, directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false)) - directSubmission.getSizeNewResourceHandler()); expectedSize += expectedExecCount * expectedDispatch; - if (directSubmission.globalFenceAllocation && !heaplessStateInit) { + if (directSubmission.miMemFenceRequired && !heaplessStateInit) { expectedSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { diff --git a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp index ed9843afc5..3c4852562a 100644 --- a/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp +++ b/shared/test/unit_test/direct_submission/direct_submission_tests_2.cpp @@ -49,7 +49,7 @@ struct DirectSubmissionDispatchMiMemFenceTest : public DirectSubmissionDispatchB DirectSubmissionDispatchBufferTest::SetUp(); auto &productHelper = pDevice->getProductHelper(); - miMemFenceSupported = productHelper.isGlobalFenceInDirectSubmissionRequired(pDevice->getHardwareInfo()); + miMemFenceSupported = pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice ? false : productHelper.isGlobalFenceInDirectSubmissionRequired(pDevice->getHardwareInfo()); auto &compilerProductHelper = pDevice->getCompilerProductHelper(); heaplessStateInit = compilerProductHelper.isHeaplessStateInitEnabled(compilerProductHelper.isHeaplessModeEnabled(*defaultHwInfo)); @@ -109,18 +109,9 @@ struct DirectSubmissionDispatchMiMemFenceTest : public DirectSubmissionDispatchB EXPECT_EQ(expectedFenceCount, fenceCount); EXPECT_EQ(expectedSysMemFenceCount, sysMemFenceCount); } else { - if (directSubmission.globalFenceAllocation) { - if (expectedSysMemFenceCount > 0) { - EXPECT_NE(-1, systemMemoryFenceId); - } else { - EXPECT_EQ(-1, systemMemoryFenceId); - } - EXPECT_EQ(expectedSysMemFenceCount, sysMemFenceCount); - } else { - EXPECT_EQ(-1, systemMemoryFenceId); - EXPECT_EQ(0u, sysMemFenceCount); - } + EXPECT_EQ(-1, systemMemoryFenceId); EXPECT_EQ(0u, fenceCount); + EXPECT_EQ(0u, sysMemFenceCount); } } @@ -270,7 +261,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, directSubmission.getSizeSemaphoreSection(false) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_LOAD_REGISTER_MEM); - if (directSubmission.globalFenceAllocation && !heaplessStateInit) { + if (directSubmission.miMemFenceRequired && !heaplessStateInit) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -649,7 +640,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(1u, directSubmission.submitCount); size_t submitSize = RenderDispatcher::getSizePreemption() + directSubmission.getSizeSemaphoreSection(false); - if (directSubmission.globalFenceAllocation && !heaplessStateInit) { + if (directSubmission.miMemFenceRequired && !heaplessStateInit) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -703,7 +694,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(1u, directSubmission.handleResidencyCount); size_t submitSize = directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false)) - directSubmission.getSizeNewResourceHandler(); - if (directSubmission.globalFenceAllocation && !heaplessStateInit) { + if (directSubmission.miMemFenceRequired && !heaplessStateInit) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -729,7 +720,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(1u, directSubmission.submitCount); size_t submitSize = RenderDispatcher::getSizePreemption() + directSubmission.getSizeSemaphoreSection(false); - if (directSubmission.globalFenceAllocation && !heaplessStateInit) { + if (directSubmission.miMemFenceRequired && !heaplessStateInit) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -780,7 +771,7 @@ HWTEST_F(DirectSubmissionDispatchBufferTest, EXPECT_EQ(1u, directSubmission.handleResidencyCount); size_t submitSize = directSubmission.getSizeDispatch(false, false, directSubmission.dispatchMonitorFenceRequired(false)) - directSubmission.getSizeNewResourceHandler(); - if (directSubmission.globalFenceAllocation && !heaplessStateInit) { + if (directSubmission.miMemFenceRequired && !heaplessStateInit) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } if (directSubmission.isRelaxedOrderingEnabled()) { @@ -873,7 +864,7 @@ HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest, directSubmission.getSizeSemaphoreSection(false) + sizeof(MI_LOAD_REGISTER_IMM) + sizeof(MI_LOAD_REGISTER_MEM); - if (directSubmission.globalFenceAllocation && !heaplessStateInit) { + if (directSubmission.miMemFenceRequired && !heaplessStateInit) { submitSize += directSubmission.getSizeSystemMemoryFenceAddress(); } diff --git a/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp b/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp index 6839732d59..b9bfa52b17 100644 --- a/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp +++ b/shared/test/unit_test/os_interface/windows/device_command_stream_tests.cpp @@ -1368,7 +1368,7 @@ HWTEST_TEMPLATED_F(WddmCommandStreamMockGdiTest, givenDirectSubmissionEnabledOnB auto &compilerProductHelper = device->getCompilerProductHelper(); auto heaplessStateInit = compilerProductHelper.isHeaplessStateInitEnabled(compilerProductHelper.isHeaplessModeEnabled(*defaultHwInfo)); - if (directSubmission->globalFenceAllocation && !heaplessStateInit) { + if (directSubmission->miMemFenceRequired && !heaplessStateInit) { expectedSize += directSubmission->getSizeSystemMemoryFenceAddress(); } if (directSubmission->isRelaxedOrderingEnabled()) { diff --git a/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp b/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp index 84b2f2e25c..d8734e002b 100644 --- a/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp +++ b/shared/test/unit_test/xe2_hpg_core/gfx_core_helper_tests_xe2_hpg_core.cpp @@ -520,14 +520,17 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenGfxCoreHelperWhenAskedIfFe } XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDefaultMemorySynchronizationCommandsWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) { - EXPECT_EQ(0u, MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment())); + using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; + + EXPECT_EQ(!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment())); } XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDebugMemorySynchronizationCommandsWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) { DebugManagerStateRestore restorer; debugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1); + using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE; - EXPECT_EQ(0u, MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment())); + EXPECT_EQ(!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice * 2 * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment())); } XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDontProgramGlobalFenceAsMiMemFenceCommandInCommandStreamWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) { @@ -677,9 +680,9 @@ XE2_HPG_CORETEST_F(ProductHelperTestXe2HpgCore, givenProductHelperWhenCallUseGem EXPECT_TRUE(productHelper.useGemCreateExtInAllocateMemoryByKMD()); } -XE2_HPG_CORETEST_F(ProductHelperTestXe2HpgCore, givenProductHelperWhenAskingForGlobalFenceSupportThenReturnFalse) { +XE2_HPG_CORETEST_F(ProductHelperTestXe2HpgCore, givenProductHelperWhenAskingForGlobalFenceSupportThenReturnTrue) { const auto &productHelper = getHelper(); - EXPECT_FALSE(productHelper.isGlobalFenceInCommandStreamRequired(*defaultHwInfo)); + EXPECT_EQ(productHelper.isGlobalFenceInCommandStreamRequired(*defaultHwInfo), !defaultHwInfo->capabilityTable.isIntegratedDevice); } XE2_HPG_CORETEST_F(ProductHelperTestXe2HpgCore, givenProductHelperWhenAskingForCooperativeEngineSupportThenReturnTrue) {