performance: Do not program additional synchronization on integrated

Related-To: NEO-12324

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk
2024-09-25 15:28:26 +00:00
committed by Compute-Runtime-Automation
parent c98b0c346d
commit c93998bcb9
6 changed files with 65 additions and 44 deletions

View File

@@ -1255,7 +1255,7 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
auto miSemaphoreWaitCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(cmdIterator++));
EXPECT_NE(nullptr, miSemaphoreWaitCmd);
EXPECT_TRUE(UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd));
} else {
} else if (MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(pDevice->getRootDeviceEnvironment()) > 0) {
cmdIterator++;
}
}
@@ -1286,7 +1286,7 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
auto miSemaphoreWaitCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(cmdIterator++));
EXPECT_NE(nullptr, miSemaphoreWaitCmd);
EXPECT_TRUE(UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd));
} else {
} else if (MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(pDevice->getRootDeviceEnvironment()) > 0) {
cmdIterator++;
}
}

View File

@@ -396,11 +396,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
ASSERT_NE(nullptr, fenceCmd);
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
ASSERT_NE(nullptr, fenceCmd);
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
}
}
XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
@@ -432,11 +434,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
ASSERT_NE(nullptr, fenceCmd);
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
ASSERT_NE(nullptr, fenceCmd);
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
}
}
XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
@@ -468,11 +472,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
ASSERT_NE(nullptr, fenceCmd);
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
ASSERT_NE(nullptr, fenceCmd);
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
}
auto event = castToObject<Event>(kernelEvent);
event->release();
@@ -508,11 +514,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
ASSERT_NE(nullptr, fenceCmd);
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
ASSERT_NE(nullptr, fenceCmd);
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
}
auto event = castToObject<Event>(kernelEvent);
event->release();
@@ -549,11 +557,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
auto &postSyncData = walkerCmd->getPostSync();
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
ASSERT_NE(nullptr, fenceCmd);
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
ASSERT_NE(nullptr, fenceCmd);
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
}
auto event = castToObject<Event>(kernelEvent);
event->release();

View File

@@ -483,6 +483,11 @@ struct MemorySynchronizationCommands {
static void setBarrierWaFlags(void *barrierCmd);
enum class AdditionalSynchronizationType : uint32_t {
semaphore = 0,
fence,
none
};
static void addAdditionalSynchronizationForDirectSubmission(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment);
static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment);
static void setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment);

View File

@@ -166,27 +166,28 @@ aub_stream::MMIOList GfxCoreHelperHw<Family>::getExtraMmioList(const HardwareInf
template <>
size_t MemorySynchronizationCommands<Family>::getSizeForSingleAdditionalSynchronization(const RootDeviceEnvironment &rootDeviceEnvironment) {
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = true;
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = rootDeviceEnvironment.getHardwareInfo()->capabilityTable.isIntegratedDevice ? AdditionalSynchronizationType::none : AdditionalSynchronizationType::fence;
if (debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() != -1) {
programGlobalFenceAsMiMemFenceCommandInCommandStream = !!debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get();
programGlobalFenceAsMiMemFenceCommandInCommandStream = static_cast<AdditionalSynchronizationType>(debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get());
}
if (programGlobalFenceAsMiMemFenceCommandInCommandStream) {
if (programGlobalFenceAsMiMemFenceCommandInCommandStream == AdditionalSynchronizationType::fence) {
return sizeof(Family::MI_MEM_FENCE);
} else {
} else if (programGlobalFenceAsMiMemFenceCommandInCommandStream == AdditionalSynchronizationType::semaphore) {
return EncodeSemaphore<Family>::getSizeMiSemaphoreWait();
}
return 0;
}
template <>
void MemorySynchronizationCommands<Family>::setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment) {
using MI_MEM_FENCE = typename Family::MI_MEM_FENCE;
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = true;
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = rootDeviceEnvironment.getHardwareInfo()->capabilityTable.isIntegratedDevice ? AdditionalSynchronizationType::none : AdditionalSynchronizationType::fence;
if (debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() != -1) {
programGlobalFenceAsMiMemFenceCommandInCommandStream = !!debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get();
programGlobalFenceAsMiMemFenceCommandInCommandStream = static_cast<AdditionalSynchronizationType>(debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get());
}
if (programGlobalFenceAsMiMemFenceCommandInCommandStream) {
if (programGlobalFenceAsMiMemFenceCommandInCommandStream == AdditionalSynchronizationType::fence) {
MI_MEM_FENCE miMemFence = Family::cmdInitMemFence;
if (acquire) {
miMemFence.setFenceType(Family::MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_ACQUIRE);
@@ -195,7 +196,7 @@ void MemorySynchronizationCommands<Family>::setAdditionalSynchronization(void *&
}
*reinterpret_cast<MI_MEM_FENCE *>(commandsBuffer) = miMemFence;
commandsBuffer = ptrOffset(commandsBuffer, sizeof(MI_MEM_FENCE));
} else {
} else if (programGlobalFenceAsMiMemFenceCommandInCommandStream == AdditionalSynchronizationType::semaphore) {
EncodeSemaphore<Family>::programMiSemaphoreWait(reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandsBuffer),
gpuAddress,
EncodeSemaphore<Family>::invalidHardwareTag,

View File

@@ -3033,7 +3033,10 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenDeviceToHostCopyWhenFenceIsRequiredT
auto miMemFence = genCmdCast<typename FamilyType::MI_MEM_FENCE *>(*++cmdIterator);
fenceExpected &= getHelper<ProductHelper>().isDeviceToHostCopySignalingFenceRequired();
size_t expectedFenceCount = fenceExpected ? 3 : 2;
size_t expectedFenceCount = fenceExpected ? 1 : 0;
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
expectedFenceCount += 2;
}
auto fences = findAll<typename FamilyType::MI_MEM_FENCE *>(cmdIterator, cmdList.end());
EXPECT_EQ(expectedFenceCount, fences.size());

View File

@@ -417,7 +417,7 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenGfxCoreHelperWhenAskedIfFe
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDefaultMemorySynchronizationCommandsWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) {
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
EXPECT_EQ(sizeof(MI_MEM_FENCE), MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment()));
EXPECT_EQ(!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment()));
}
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDebugMemorySynchronizationCommandsWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) {
@@ -425,7 +425,7 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDebugMemorySynchronization
debugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1);
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
EXPECT_EQ(2 * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment()));
EXPECT_EQ(!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice * 2 * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment()));
}
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDontProgramGlobalFenceAsMiMemFenceCommandInCommandStreamWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) {
@@ -456,12 +456,14 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDefaultMemorySynchronizati
MemorySynchronizationCommands<FamilyType>::addAdditionalSynchronization(commandStream, 0x0, false, rootDeviceEnvironment);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStream);
EXPECT_EQ(1u, hwParser.cmdList.size());
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*hwParser.cmdList.begin());
ASSERT_NE(nullptr, fenceCmd);
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(rootDeviceEnvironment) > 0) {
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandStream);
EXPECT_EQ(1u, hwParser.cmdList.size());
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*hwParser.cmdList.begin());
ASSERT_NE(nullptr, fenceCmd);
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
}
}
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDontProgramGlobalFenceAsMiMemFenceCommandInCommandStreamWhenAddingAdditionalSynchronizationThenSemaphoreWaitIsCalled) {