mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 14:58:08 +08:00
performance: Do not create global fence allocation on integrated
Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
424b23eb24
commit
ecf8a07d26
@@ -393,7 +393,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXe2HpgCore,
|
||||
|
||||
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
|
||||
EXPECT_EQ(postSyncData.getSystemMemoryFenceRequest(), !device->getHwInfo().capabilityTable.isIntegratedDevice);
|
||||
|
||||
result = context->freeMem(ptr);
|
||||
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Intel Corporation
|
||||
* Copyright (C) 2024-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -22,6 +22,10 @@ namespace ult {
|
||||
using CommandQueueCommandsXe2HpgCore = Test<DeviceFixture>;
|
||||
|
||||
HWTEST2_F(CommandQueueCommandsXe2HpgCore, givenCommandQueueWhenExecutingCommandListsThenStateSystemMemFenceAddressCmdIsGenerated, IsXe2HpgCore) {
|
||||
if (neoDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
ze_command_queue_desc_t desc = {};
|
||||
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
@@ -478,7 +478,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXe3Core,
|
||||
|
||||
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
|
||||
EXPECT_EQ(postSyncData.getSystemMemoryFenceRequest(), !device->getHwInfo().capabilityTable.isIntegratedDevice);
|
||||
|
||||
result = context->freeMem(ptr);
|
||||
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
|
||||
|
||||
@@ -847,15 +847,14 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenStateBaseAddressNotChangedWhe
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.isPreambleSent = true;
|
||||
configureCSRHeapStatesToNonDirty<FamilyType>();
|
||||
auto usedBefore = commandStreamReceiver.commandStream.getUsed();
|
||||
flushTaskFlags.l3CacheSettings = L3CachingSettings::notApplicable;
|
||||
|
||||
flushTask(commandStreamReceiver);
|
||||
|
||||
auto base = commandStreamReceiver.commandStream.getCpuBase();
|
||||
|
||||
auto stateBaseAddress = base
|
||||
? genCmdCast<typename FamilyType::STATE_BASE_ADDRESS *>(base)
|
||||
: nullptr;
|
||||
EXPECT_EQ(nullptr, stateBaseAddress);
|
||||
parseCommands<FamilyType>(commandStreamReceiver.commandStream, usedBefore);
|
||||
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(cmdList.end(), stateBaseAddressItor);
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyCqsWhenFlushingTaskThenCommandNotAdded) {
|
||||
@@ -937,7 +936,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleWh
|
||||
flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled);
|
||||
flushTask(commandStreamReceiver);
|
||||
|
||||
EXPECT_EQ(sizeNeeded, csrCS.getUsed());
|
||||
EXPECT_GE(sizeNeeded, csrCS.getUsed());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAndSbaWhenFlushingTaskThenOnlyAvailableMemoryIsUsed) {
|
||||
@@ -977,7 +976,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn
|
||||
flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled);
|
||||
flushTask(commandStreamReceiver);
|
||||
|
||||
EXPECT_EQ(sizeNeeded, csrCS.getUsed());
|
||||
EXPECT_GE(sizeNeeded, csrCS.getUsed());
|
||||
}
|
||||
|
||||
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAndSbaAndPipeControlWhenFlushingTaskThenOnlyAvailableMemoryIsUsed) {
|
||||
@@ -1026,7 +1025,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn
|
||||
*mockDevice);
|
||||
|
||||
// Verify that we didn't grab a new CS buffer
|
||||
EXPECT_EQ(expectedUsed, csrCS.getUsed());
|
||||
EXPECT_GE(expectedUsed, csrCS.getUsed());
|
||||
EXPECT_EQ(expectedBase, csrCS.getCpuBase());
|
||||
}
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@ struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamRece
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
UltCommandStreamReceiverTest::SetUp();
|
||||
}
|
||||
DebugManagerStateRestore restore;
|
||||
@@ -161,6 +162,7 @@ struct SystemMemoryFenceInDisabledConfigurationTest : public UltCommandStreamRec
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
UltCommandStreamReceiverTest::SetUp();
|
||||
}
|
||||
DebugManagerStateRestore restore;
|
||||
@@ -247,13 +249,15 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaMiMemFenceTestXe2HpgCore, givenSystemMemo
|
||||
commandStreamReceiver.flushBcsTask(blitPropertiesContainer, false, *pDevice);
|
||||
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream);
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
if (!pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream);
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
}
|
||||
}
|
||||
|
||||
struct SystemMemoryFenceViaComputeWalkerTest : public UltCommandStreamReceiverTest {
|
||||
@@ -283,13 +287,15 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe2HpgCore, givenSystemM
|
||||
commandStreamReceiver.programEnginePrologue(cmdStream);
|
||||
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream);
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
if (!pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream);
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
}
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe2HpgCore, givenSystemMemoryFenceGeneratedAsPostSyncOperationInComputeWalkerWhenDispatchWalkerIsCalledThenSystemMemoryFenceRequestInPostSyncDataIsProgrammed) {
|
||||
@@ -347,13 +353,15 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaKernelInstructionTestXe2HpgCore, givenSys
|
||||
commandStreamReceiver.programEnginePrologue(cmdStream);
|
||||
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream);
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream);
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
}
|
||||
}
|
||||
|
||||
struct SystemMemoryFenceInDefaultConfigurationTest : public UltCommandStreamReceiverTest {
|
||||
@@ -374,6 +382,9 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
||||
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
MockKernelWithInternals kernel(*pClDevice);
|
||||
MockContext context(pClDevice);
|
||||
@@ -411,6 +422,9 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
||||
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
MockKernelWithInternals kernel(*pClDevice);
|
||||
MockContext context(pClDevice);
|
||||
@@ -449,6 +463,9 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
||||
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
MockKernelWithInternals kernel(*pClDevice);
|
||||
MockContext context(pClDevice);
|
||||
@@ -490,6 +507,9 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
||||
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
MockKernelWithInternals kernel(*pClDevice);
|
||||
MockContext context(pClDevice);
|
||||
@@ -533,6 +553,10 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
||||
|
||||
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
MockKernelWithInternals kernel(*pClDevice);
|
||||
MockContext context(pClDevice);
|
||||
MockCommandQueueHw<FamilyType> commandQueue(&context, pClDevice, nullptr);
|
||||
|
||||
@@ -42,6 +42,7 @@ struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamRece
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
UltCommandStreamReceiverTest::SetUp();
|
||||
}
|
||||
DebugManagerStateRestore restore;
|
||||
@@ -222,13 +223,15 @@ XE3_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe3Core, givenSystemMemoryFe
|
||||
commandStreamReceiver.programEnginePrologue(cmdStream);
|
||||
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream);
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream);
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
}
|
||||
}
|
||||
|
||||
struct SystemMemoryFenceViaKernelInstructionTest : public UltCommandStreamReceiverTest {
|
||||
@@ -259,13 +262,15 @@ XE3_CORETEST_F(SystemMemoryFenceViaKernelInstructionTestXe3Core, givenSystemMemo
|
||||
commandStreamReceiver.programEnginePrologue(cmdStream);
|
||||
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream);
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(cmdStream);
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
}
|
||||
}
|
||||
|
||||
struct Xe3MidThreadCommandStreamReceiverTest : public UltCommandStreamReceiverTest {
|
||||
|
||||
@@ -29,6 +29,7 @@ struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamRece
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
UltCommandStreamReceiverTest::SetUp();
|
||||
}
|
||||
DebugManagerStateRestore restore;
|
||||
@@ -58,6 +59,7 @@ struct SystemMemoryFenceInDisabledConfigurationTest : public UltCommandStreamRec
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
UltCommandStreamReceiverTest::SetUp();
|
||||
}
|
||||
DebugManagerStateRestore restore;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -599,26 +599,37 @@ XE_HPC_CORETEST_F(GfxCoreHelperTestsXeHpcCore, givenGfxCoreHelperWhenAskedIfFenc
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(-1);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(-1);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(-1);
|
||||
EXPECT_EQ(gfxCoreHelper.isFenceAllocationRequired(hwInfo), !hwInfo.capabilityTable.isIntegratedDevice);
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
EXPECT_FALSE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(1);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
}
|
||||
|
||||
|
||||
@@ -415,7 +415,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
|
||||
.requiredDispatchWalkOrder = args.requiredDispatchWalkOrder,
|
||||
.localRegionSize = args.localRegionSize,
|
||||
.maxFrontEndThreads = args.device->getDeviceInfo().maxFrontEndThreads,
|
||||
.requiredSystemFence = args.requiresSystemMemoryFence(),
|
||||
.requiredSystemFence = args.requiresSystemMemoryFence() && args.device->getGfxCoreHelper().isFenceAllocationRequired(hwInfo),
|
||||
.hasSample = kernelDescriptor.kernelAttributes.flags.hasSample};
|
||||
|
||||
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2021-2024 Intel Corporation
|
||||
* Copyright (C) 2021-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -12,12 +12,19 @@ namespace NEO {
|
||||
|
||||
template <typename Family>
|
||||
bool GfxCoreHelperHw<Family>::isFenceAllocationRequired(const HardwareInfo &hwInfo) const {
|
||||
if ((debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() == 1) ||
|
||||
(debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get() == 1) ||
|
||||
(debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.get() == 1) ||
|
||||
(debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get() == 1)) {
|
||||
return true;
|
||||
}
|
||||
if ((debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() == 0) &&
|
||||
(debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get() == 0) &&
|
||||
(debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.get() == 0)) {
|
||||
(debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.get() == 0) &&
|
||||
(debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get() == 0)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return !hwInfo.capabilityTable.isIntegratedDevice;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
||||
@@ -4472,6 +4472,9 @@ HWTEST2_F(CommandStreamReceiverHwTest,
|
||||
givenImmediateFlushTaskWhenOneTimeContextSystemFenceRequiredThenExpectOneTimeSystemFenceCommand,
|
||||
IsHeapfulSupportedAndAtLeastXeHpcCore) {
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
if (pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
commandStreamReceiver.storeMakeResidentAllocations = true;
|
||||
|
||||
@@ -199,7 +199,7 @@ HWTEST_F(DirectSubmissionDispatchMiMemFenceTest, givenDebugFlagSetToTrueWhenCrea
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
|
||||
|
||||
if (heaplessStateInit) {
|
||||
if (heaplessStateInit || pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
#include "shared/source/os_interface/windows/wddm_residency_controller.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/debug_manager_state_restore.h"
|
||||
#include "shared/test/common/helpers/memory_management.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/helpers/variable_backup.h"
|
||||
#include "shared/test/common/mocks/mock_io_functions.h"
|
||||
@@ -86,7 +87,6 @@ using WddmDirectSubmissionWithMockGdiDllTest = Test<WddmDirectSubmissionWithMock
|
||||
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThenExpectProperCommandsDispatched) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
|
||||
std::unique_ptr<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>> wddmDirectSubmission =
|
||||
std::make_unique<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>>(*device->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
@@ -120,9 +120,15 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThe
|
||||
EXPECT_EQ(1u, wddmMockInterface->destroyMonitorFenceCalled);
|
||||
}
|
||||
|
||||
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedWithMiMemFenceSupportedThenMakeGlobalFenceResident) {
|
||||
struct WddmDirectSubmissionGlobalFenceTest : public WddmDirectSubmissionTest {
|
||||
void SetUp() override {
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
|
||||
WddmDirectSubmissionTest::SetUp();
|
||||
}
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
|
||||
};
|
||||
|
||||
HWTEST_F(WddmDirectSubmissionGlobalFenceTest, givenWddmWhenDirectIsInitializedWithMiMemFenceSupportedThenMakeGlobalFenceResident) {
|
||||
std::unique_ptr<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>> wddmDirectSubmission =
|
||||
std::make_unique<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>>(*device->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
|
||||
@@ -484,26 +484,37 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenGfxCoreHelperWhenAskedIfFe
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(-1);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(-1);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(-1);
|
||||
EXPECT_EQ(gfxCoreHelper.isFenceAllocationRequired(hwInfo), !hwInfo.capabilityTable.isIntegratedDevice);
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
EXPECT_FALSE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(1);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
}
|
||||
|
||||
|
||||
@@ -438,7 +438,7 @@ XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenDefaultSettingForFenceWhenKe
|
||||
|
||||
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
|
||||
EXPECT_EQ(postSyncData.getSystemMemoryFenceRequest(), !pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice);
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenCleanHeapsAndSlmNotChangedAndUncachedMocsRequestedThenSBAIsProgrammedAndMocsAreSet) {
|
||||
|
||||
@@ -486,26 +486,37 @@ XE3_CORETEST_F(GfxCoreHelperTestsXe3Core, givenGfxCoreHelperWhenAskedIfFenceAllo
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(-1);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(-1);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(-1);
|
||||
EXPECT_EQ(gfxCoreHelper.isFenceAllocationRequired(hwInfo), !hwInfo.capabilityTable.isIntegratedDevice);
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
EXPECT_FALSE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(1);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
|
||||
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
|
||||
}
|
||||
|
||||
|
||||
@@ -379,7 +379,7 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDefaultSettingForFenceWhenKernelUse
|
||||
|
||||
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
|
||||
EXPECT_EQ(postSyncData.getSystemMemoryFenceRequest(), !pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice);
|
||||
}
|
||||
|
||||
XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDebugFlagSetWhenSetPropertiesAllCalledThenDisablePipelinedThreadArbitrationPolicy) {
|
||||
|
||||
Reference in New Issue
Block a user