Revert "performance: Do not create global fence allocation on integrated"

This reverts commit ecf8a07d26.

Signed-off-by: Compute-Runtime-Validation <compute-runtime-validation@intel.com>
This commit is contained in:
Compute-Runtime-Validation
2025-04-04 14:59:34 +02:00
committed by Compute-Runtime-Automation
parent bd516b3552
commit f332571d96
17 changed files with 53 additions and 136 deletions

View File

@@ -847,14 +847,15 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenStateBaseAddressNotChangedWhe
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
configureCSRHeapStatesToNonDirty<FamilyType>();
auto usedBefore = commandStreamReceiver.commandStream.getUsed();
flushTaskFlags.l3CacheSettings = L3CachingSettings::notApplicable;
flushTask(commandStreamReceiver);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, usedBefore);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), stateBaseAddressItor);
auto base = commandStreamReceiver.commandStream.getCpuBase();
auto stateBaseAddress = base
? genCmdCast<typename FamilyType::STATE_BASE_ADDRESS *>(base)
: nullptr;
EXPECT_EQ(nullptr, stateBaseAddress);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyCqsWhenFlushingTaskThenCommandNotAdded) {
@@ -936,7 +937,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleWh
flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled);
flushTask(commandStreamReceiver);
EXPECT_GE(sizeNeeded, csrCS.getUsed());
EXPECT_EQ(sizeNeeded, csrCS.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAndSbaWhenFlushingTaskThenOnlyAvailableMemoryIsUsed) {
@@ -976,7 +977,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn
flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled);
flushTask(commandStreamReceiver);
EXPECT_GE(sizeNeeded, csrCS.getUsed());
EXPECT_EQ(sizeNeeded, csrCS.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAndSbaAndPipeControlWhenFlushingTaskThenOnlyAvailableMemoryIsUsed) {
@@ -1025,7 +1026,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn
*mockDevice);
// Verify that we didn't grab a new CS buffer
EXPECT_GE(expectedUsed, csrCS.getUsed());
EXPECT_EQ(expectedUsed, csrCS.getUsed());
EXPECT_EQ(expectedBase, csrCS.getCpuBase());
}

View File

@@ -42,7 +42,6 @@ struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamRece
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
@@ -162,7 +161,6 @@ struct SystemMemoryFenceInDisabledConfigurationTest : public UltCommandStreamRec
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
@@ -249,15 +247,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaMiMemFenceTestXe2HpgCore, givenSystemMemo
commandStreamReceiver.flushBcsTask(blitPropertiesContainer, false, *pDevice);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
if (!pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
struct SystemMemoryFenceViaComputeWalkerTest : public UltCommandStreamReceiverTest {
@@ -287,15 +283,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe2HpgCore, givenSystemM
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
if (!pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
XE2_HPG_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe2HpgCore, givenSystemMemoryFenceGeneratedAsPostSyncOperationInComputeWalkerWhenDispatchWalkerIsCalledThenSystemMemoryFenceRequestInPostSyncDataIsProgrammed) {
@@ -353,15 +347,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaKernelInstructionTestXe2HpgCore, givenSys
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
struct SystemMemoryFenceInDefaultConfigurationTest : public UltCommandStreamReceiverTest {
@@ -382,9 +374,6 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
@@ -422,9 +411,6 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
@@ -463,9 +449,6 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
@@ -507,9 +490,6 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
@@ -553,10 +533,6 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
MockCommandQueueHw<FamilyType> commandQueue(&context, pClDevice, nullptr);

View File

@@ -42,7 +42,6 @@ struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamRece
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
@@ -223,15 +222,13 @@ XE3_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe3Core, givenSystemMemoryFe
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
struct SystemMemoryFenceViaKernelInstructionTest : public UltCommandStreamReceiverTest {
@@ -262,15 +259,13 @@ XE3_CORETEST_F(SystemMemoryFenceViaKernelInstructionTestXe3Core, givenSystemMemo
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
struct Xe3MidThreadCommandStreamReceiverTest : public UltCommandStreamReceiverTest {

View File

@@ -29,7 +29,6 @@ struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamRece
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
@@ -59,7 +58,6 @@ struct SystemMemoryFenceInDisabledConfigurationTest : public UltCommandStreamRec
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;

View File

@@ -599,37 +599,26 @@ XE_HPC_CORETEST_F(GfxCoreHelperTestsXeHpcCore, givenGfxCoreHelperWhenAskedIfFenc
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(-1);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(-1);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(-1);
EXPECT_EQ(gfxCoreHelper.isFenceAllocationRequired(hwInfo), !hwInfo.capabilityTable.isIntegratedDevice);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_FALSE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(1);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
}