performance: Do not create global fence allocation on integrated

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2025-03-31 09:33:56 +00:00
committed by Compute-Runtime-Automation
parent 424b23eb24
commit ecf8a07d26
17 changed files with 139 additions and 56 deletions

View File

@@ -393,7 +393,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXe2HpgCore,
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
EXPECT_EQ(postSyncData.getSystemMemoryFenceRequest(), !device->getHwInfo().capabilityTable.isIntegratedDevice);
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2024 Intel Corporation
* Copyright (C) 2024-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -22,6 +22,10 @@ namespace ult {
using CommandQueueCommandsXe2HpgCore = Test<DeviceFixture>;
HWTEST2_F(CommandQueueCommandsXe2HpgCore, givenCommandQueueWhenExecutingCommandListsThenStateSystemMemFenceAddressCmdIsGenerated, IsXe2HpgCore) {
if (neoDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
ze_command_queue_desc_t desc = {};
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;

View File

@@ -478,7 +478,7 @@ HWTEST2_F(CommandListAppendLaunchKernelXe3Core,
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
EXPECT_EQ(postSyncData.getSystemMemoryFenceRequest(), !device->getHwInfo().capabilityTable.isIntegratedDevice);
result = context->freeMem(ptr);
ASSERT_EQ(result, ZE_RESULT_SUCCESS);

View File

@@ -847,15 +847,14 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenStateBaseAddressNotChangedWhe
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.isPreambleSent = true;
configureCSRHeapStatesToNonDirty<FamilyType>();
auto usedBefore = commandStreamReceiver.commandStream.getUsed();
flushTaskFlags.l3CacheSettings = L3CachingSettings::notApplicable;
flushTask(commandStreamReceiver);
auto base = commandStreamReceiver.commandStream.getCpuBase();
auto stateBaseAddress = base
? genCmdCast<typename FamilyType::STATE_BASE_ADDRESS *>(base)
: nullptr;
EXPECT_EQ(nullptr, stateBaseAddress);
parseCommands<FamilyType>(commandStreamReceiver.commandStream, usedBefore);
auto stateBaseAddressItor = find<typename FamilyType::STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(cmdList.end(), stateBaseAddressItor);
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEmptyCqsWhenFlushingTaskThenCommandNotAdded) {
@@ -937,7 +936,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleWh
flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled);
flushTask(commandStreamReceiver);
EXPECT_EQ(sizeNeeded, csrCS.getUsed());
EXPECT_GE(sizeNeeded, csrCS.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAndSbaWhenFlushingTaskThenOnlyAvailableMemoryIsUsed) {
@@ -977,7 +976,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn
flushTaskFlags.threadArbitrationPolicy, PreemptionMode::Disabled);
flushTask(commandStreamReceiver);
EXPECT_EQ(sizeNeeded, csrCS.getUsed());
EXPECT_GE(sizeNeeded, csrCS.getUsed());
}
HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAndSbaAndPipeControlWhenFlushingTaskThenOnlyAvailableMemoryIsUsed) {
@@ -1026,7 +1025,7 @@ HWTEST_F(CommandStreamReceiverFlushTaskTests, GivenEnoughMemoryOnlyForPreambleAn
*mockDevice);
// Verify that we didn't grab a new CS buffer
EXPECT_EQ(expectedUsed, csrCS.getUsed());
EXPECT_GE(expectedUsed, csrCS.getUsed());
EXPECT_EQ(expectedBase, csrCS.getCpuBase());
}

View File

@@ -42,6 +42,7 @@ struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamRece
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
@@ -161,6 +162,7 @@ struct SystemMemoryFenceInDisabledConfigurationTest : public UltCommandStreamRec
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
@@ -247,13 +249,15 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaMiMemFenceTestXe2HpgCore, givenSystemMemo
commandStreamReceiver.flushBcsTask(blitPropertiesContainer, false, *pDevice);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
if (!pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
}
struct SystemMemoryFenceViaComputeWalkerTest : public UltCommandStreamReceiverTest {
@@ -283,13 +287,15 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe2HpgCore, givenSystemM
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
if (!pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
}
XE2_HPG_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe2HpgCore, givenSystemMemoryFenceGeneratedAsPostSyncOperationInComputeWalkerWhenDispatchWalkerIsCalledThenSystemMemoryFenceRequestInPostSyncDataIsProgrammed) {
@@ -347,13 +353,15 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceViaKernelInstructionTestXe2HpgCore, givenSys
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
}
struct SystemMemoryFenceInDefaultConfigurationTest : public UltCommandStreamReceiverTest {
@@ -374,6 +382,9 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
@@ -411,6 +422,9 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
@@ -449,6 +463,9 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
@@ -490,6 +507,9 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
@@ -533,6 +553,10 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
MockKernelWithInternals kernel(*pClDevice);
MockContext context(pClDevice);
MockCommandQueueHw<FamilyType> commandQueue(&context, pClDevice, nullptr);

View File

@@ -42,6 +42,7 @@ struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamRece
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
@@ -222,13 +223,15 @@ XE3_CORETEST_F(SystemMemoryFenceViaComputeWalkerTestXe3Core, givenSystemMemoryFe
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
}
struct SystemMemoryFenceViaKernelInstructionTest : public UltCommandStreamReceiverTest {
@@ -259,13 +262,15 @@ XE3_CORETEST_F(SystemMemoryFenceViaKernelInstructionTestXe3Core, givenSystemMemo
commandStreamReceiver.programEnginePrologue(cmdStream);
EXPECT_TRUE(commandStreamReceiver.isEnginePrologueSent);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(cmdStream);
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
}
}
struct Xe3MidThreadCommandStreamReceiverTest : public UltCommandStreamReceiverTest {

View File

@@ -29,6 +29,7 @@ struct MemorySynchronizationViaMiSemaphoreWaitTest : public UltCommandStreamRece
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;
@@ -58,6 +59,7 @@ struct SystemMemoryFenceInDisabledConfigurationTest : public UltCommandStreamRec
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
UltCommandStreamReceiverTest::SetUp();
}
DebugManagerStateRestore restore;

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -599,26 +599,37 @@ XE_HPC_CORETEST_F(GfxCoreHelperTestsXeHpcCore, givenGfxCoreHelperWhenAskedIfFenc
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(-1);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(-1);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(-1);
EXPECT_EQ(gfxCoreHelper.isFenceAllocationRequired(hwInfo), !hwInfo.capabilityTable.isIntegratedDevice);
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_FALSE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(1);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
}

View File

@@ -415,7 +415,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container, EncodeDis
.requiredDispatchWalkOrder = args.requiredDispatchWalkOrder,
.localRegionSize = args.localRegionSize,
.maxFrontEndThreads = args.device->getDeviceInfo().maxFrontEndThreads,
.requiredSystemFence = args.requiresSystemMemoryFence(),
.requiredSystemFence = args.requiresSystemMemoryFence() && args.device->getGfxCoreHelper().isFenceAllocationRequired(hwInfo),
.hasSample = kernelDescriptor.kernelAttributes.flags.hasSample};
EncodeDispatchKernel<Family>::encodeAdditionalWalkerFields(rootDeviceEnvironment, walkerCmd, walkerArgs);

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -12,12 +12,19 @@ namespace NEO {
template <typename Family>
bool GfxCoreHelperHw<Family>::isFenceAllocationRequired(const HardwareInfo &hwInfo) const {
if ((debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() == 1) ||
(debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get() == 1) ||
(debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.get() == 1) ||
(debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get() == 1)) {
return true;
}
if ((debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() == 0) &&
(debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.get() == 0) &&
(debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.get() == 0)) {
(debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.get() == 0) &&
(debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get() == 0)) {
return false;
}
return true;
return !hwInfo.capabilityTable.isIntegratedDevice;
}
template <typename Family>

View File

@@ -4472,6 +4472,9 @@ HWTEST2_F(CommandStreamReceiverHwTest,
givenImmediateFlushTaskWhenOneTimeContextSystemFenceRequiredThenExpectOneTimeSystemFenceCommand,
IsHeapfulSupportedAndAtLeastXeHpcCore) {
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
if (pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}
auto &commandStreamReceiver = pDevice->getUltCommandStreamReceiver<FamilyType>();
commandStreamReceiver.storeMakeResidentAllocations = true;

View File

@@ -199,7 +199,7 @@ HWTEST_F(DirectSubmissionDispatchMiMemFenceTest, givenDebugFlagSetToTrueWhenCrea
DebugManagerStateRestore restorer;
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
if (heaplessStateInit) {
if (heaplessStateInit || pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
GTEST_SKIP();
}

View File

@@ -17,6 +17,7 @@
#include "shared/source/os_interface/windows/wddm_residency_controller.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/memory_management.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_io_functions.h"
@@ -86,7 +87,6 @@ using WddmDirectSubmissionWithMockGdiDllTest = Test<WddmDirectSubmissionWithMock
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThenExpectProperCommandsDispatched) {
DebugManagerStateRestore restorer;
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
std::unique_ptr<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>> wddmDirectSubmission =
std::make_unique<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>>(*device->getDefaultEngine().commandStreamReceiver);
@@ -120,9 +120,15 @@ HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedAndStartedThe
EXPECT_EQ(1u, wddmMockInterface->destroyMonitorFenceCalled);
}
HWTEST_F(WddmDirectSubmissionTest, givenWddmWhenDirectIsInitializedWithMiMemFenceSupportedThenMakeGlobalFenceResident) {
struct WddmDirectSubmissionGlobalFenceTest : public WddmDirectSubmissionTest {
void SetUp() override {
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
WddmDirectSubmissionTest::SetUp();
}
DebugManagerStateRestore restorer;
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
};
HWTEST_F(WddmDirectSubmissionGlobalFenceTest, givenWddmWhenDirectIsInitializedWithMiMemFenceSupportedThenMakeGlobalFenceResident) {
std::unique_ptr<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>> wddmDirectSubmission =
std::make_unique<MockWddmDirectSubmission<FamilyType, RenderDispatcher<FamilyType>>>(*device->getDefaultEngine().commandStreamReceiver);

View File

@@ -484,26 +484,37 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenGfxCoreHelperWhenAskedIfFe
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(-1);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(-1);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(-1);
EXPECT_EQ(gfxCoreHelper.isFenceAllocationRequired(hwInfo), !hwInfo.capabilityTable.isIntegratedDevice);
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_FALSE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(1);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
}

View File

@@ -438,7 +438,7 @@ XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenDefaultSettingForFenceWhenKe
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
EXPECT_EQ(postSyncData.getSystemMemoryFenceRequest(), !pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice);
}
XE2_HPG_CORETEST_F(EncodeKernelXe2HpgCoreTest, givenCleanHeapsAndSlmNotChangedAndUncachedMocsRequestedThenSBAIsProgrammedAndMocsAreSet) {

View File

@@ -486,26 +486,37 @@ XE3_CORETEST_F(GfxCoreHelperTestsXe3Core, givenGfxCoreHelperWhenAskedIfFenceAllo
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(-1);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(-1);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(-1);
EXPECT_EQ(gfxCoreHelper.isFenceAllocationRequired(hwInfo), !hwInfo.capabilityTable.isIntegratedDevice);
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_FALSE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(1);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(1);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(1);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(0);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(0);
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
EXPECT_TRUE(gfxCoreHelper.isFenceAllocationRequired(hwInfo));
}

View File

@@ -379,7 +379,7 @@ XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDefaultSettingForFenceWhenKernelUse
auto walkerCmd = genCmdCast<DefaultWalkerType *>(*itor);
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
EXPECT_EQ(postSyncData.getSystemMemoryFenceRequest(), !pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice);
}
XE3_CORETEST_F(EncodeKernelXe3CoreTest, givenDebugFlagSetWhenSetPropertiesAllCalledThenDisablePipelinedThreadArbitrationPolicy) {