performance: Do not create global fence allocation on integrated

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2025-03-31 09:33:56 +00:00
committed by Compute-Runtime-Automation
parent 424b23eb24
commit ecf8a07d26
17 changed files with 139 additions and 56 deletions

View File

@@ -847,15 +847,14 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenStateBaseAddressNotChangedWhe
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
configureCSRHeapStatesToNonDirty<FamilyType>();
auto usedBefore = commandStreamReceiver.commandStream.getUsed();
flushTaskFlags.l3CacheSettings = L3CachingSettings::notApplicable;
flushTask(commandStreamReceiver);
auto base = commandStreamReceiver.commandStream.getCpuBase();
auto stateBaseAddress = base
? genCmdCast<typename FamilyType::STATE_BASE_ADDRESS *>(base)
: nullptr;
EXPECT_EQ(nullptr, stateBaseAddress);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, usedBefore);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), stateBaseAddressItor);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyCqsWhenFlushingTaskThenCommandNotAdded) {
@@ -937,7 +936,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleWh
flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled);
flushTask(commandStreamReceiver);
EXPECT_EQ(sizeNeeded, csrCS.getUsed());
EXPECT_GE(sizeNeeded, csrCS.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAndSbaWhenFlushingTaskThenOnlyAvailableMemoryIsUsed) {
@@ -977,7 +976,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn
flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled);
flushTask(commandStreamReceiver);
EXPECT_EQ(sizeNeeded, csrCS.getUsed());
EXPECT_GE(sizeNeeded, csrCS.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAndSbaAndPipeControlWhenFlushingTaskThenOnlyAvailableMemoryIsUsed) {
@@ -1026,7 +1025,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn
*mockDevice);
// Verify that we didn't grab a new CS buffer
EXPECT_EQ(expectedUsed, csrCS.getUsed());
EXPECT_GE(expectedUsed, csrCS.getUsed());
EXPECT_EQ(expectedBase, csrCS.getCpuBase());
}

View File

@@ -42,6 +42,7 @@ struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamRece
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
@@ -161,6 +162,7 @@ struct SystemMemoryFenceInDisabledConfigurationTest : public UltCommandStreamRec
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
@@ -247,13 +249,15 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaMiMemFenceTestXe2HpgCore, givenSystemMemo
commandStreamReceiver.flushBcsTask(blitPropertiesContainer, false, *pDevice);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
if (!pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
}
struct SystemMemoryFenceViaComputeWalkerTest : public UltCommandStreamReceiverTest {
@@ -283,13 +287,15 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe2HpgCore, givenSystemM
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
if (!pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
}
XE2_HPG_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe2HpgCore, givenSystemMemoryFenceGeneratedAsPostSyncOperationInComputeWalkerWhenDispatchWalkerIsCalledThenSystemMemoryFenceRequestInPostSyncDataIsProgrammed) {
@@ -347,13 +353,15 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaKernelInstructionTestXe2HpgCore, givenSys
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
}
struct SystemMemoryFenceInDefaultConfigurationTest : public UltCommandStreamReceiverTest {
@@ -374,6 +382,9 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
@@ -411,6 +422,9 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
@@ -449,6 +463,9 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
@@ -490,6 +507,9 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
@@ -533,6 +553,10 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
MockCommandQueueHw<FamilyType> commandQueue(&context, pClDevice, nullptr);

View File

@@ -42,6 +42,7 @@ struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamRece
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
@@ -222,13 +223,15 @@ XE3_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe3Core, givenSystemMemoryFe
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
}
struct SystemMemoryFenceViaKernelInstructionTest : public UltCommandStreamReceiverTest {
@@ -259,13 +262,15 @@ XE3_CORETEST_F(SystemMemoryFenceViaKernelInstructionTestXe3Core, givenSystemMemo
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
}
struct Xe3MidThreadCommandStreamReceiverTest : public UltCommandStreamReceiverTest {

View File

@@ -29,6 +29,7 @@ struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamRece
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
@@ -58,6 +59,7 @@ struct SystemMemoryFenceInDisabledConfigurationTest : public UltCommandStreamRec
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -599,26 +599,37 @@ XE_HPC_CORETEST_F(GfxCoreHelperTestsXeHpcCore, givenGfxCoreHelperWhenAskedIfFenc
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(-1);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(-1);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(-1);
EXPECT_EQ(gfxCoreHelper.isFenceAllocationRequired(hwInfo), !hwInfo.capabilityTable.isIntegratedDevice);
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_FALSE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(1);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
}