mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 06:24:51 +08:00
performance: Do not program additional synchronization on integrated
Related-To: NEO-12324 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c98b0c346d
commit
c93998bcb9
@@ -1255,7 +1255,7 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
|
||||
auto miSemaphoreWaitCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(cmdIterator++));
|
||||
EXPECT_NE(nullptr, miSemaphoreWaitCmd);
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd));
|
||||
} else {
|
||||
} else if (MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(pDevice->getRootDeviceEnvironment()) > 0) {
|
||||
cmdIterator++;
|
||||
}
|
||||
}
|
||||
@@ -1286,7 +1286,7 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC
|
||||
auto miSemaphoreWaitCmd = genCmdCast<MI_SEMAPHORE_WAIT *>(*(cmdIterator++));
|
||||
EXPECT_NE(nullptr, miSemaphoreWaitCmd);
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd));
|
||||
} else {
|
||||
} else if (MemorySynchronizationCommands<FamilyType>::getSizeForSingleAdditionalSynchronization(pDevice->getRootDeviceEnvironment()) > 0) {
|
||||
cmdIterator++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -396,11 +396,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
|
||||
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
|
||||
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
|
||||
}
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
@@ -432,11 +434,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
|
||||
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
|
||||
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
|
||||
}
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
@@ -468,11 +472,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
|
||||
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
|
||||
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
|
||||
}
|
||||
|
||||
auto event = castToObject<Event>(kernelEvent);
|
||||
event->release();
|
||||
@@ -508,11 +514,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
|
||||
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
|
||||
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
|
||||
}
|
||||
|
||||
auto event = castToObject<Event>(kernelEvent);
|
||||
event->release();
|
||||
@@ -549,11 +557,13 @@ XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
|
||||
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
|
||||
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
|
||||
}
|
||||
|
||||
auto event = castToObject<Event>(kernelEvent);
|
||||
event->release();
|
||||
|
||||
@@ -483,6 +483,11 @@ struct MemorySynchronizationCommands {
|
||||
|
||||
static void setBarrierWaFlags(void *barrierCmd);
|
||||
|
||||
enum class AdditionalSynchronizationType : uint32_t {
|
||||
semaphore = 0,
|
||||
fence,
|
||||
none
|
||||
};
|
||||
static void addAdditionalSynchronizationForDirectSubmission(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static void addAdditionalSynchronization(LinearStream &commandStream, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
static void setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment);
|
||||
|
||||
@@ -166,27 +166,28 @@ aub_stream::MMIOList GfxCoreHelperHw<Family>::getExtraMmioList(const HardwareInf
|
||||
|
||||
template <>
|
||||
size_t MemorySynchronizationCommands<Family>::getSizeForSingleAdditionalSynchronization(const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = true;
|
||||
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = rootDeviceEnvironment.getHardwareInfo()->capabilityTable.isIntegratedDevice ? AdditionalSynchronizationType::none : AdditionalSynchronizationType::fence;
|
||||
if (debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() != -1) {
|
||||
programGlobalFenceAsMiMemFenceCommandInCommandStream = !!debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get();
|
||||
programGlobalFenceAsMiMemFenceCommandInCommandStream = static_cast<AdditionalSynchronizationType>(debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get());
|
||||
}
|
||||
|
||||
if (programGlobalFenceAsMiMemFenceCommandInCommandStream) {
|
||||
if (programGlobalFenceAsMiMemFenceCommandInCommandStream == AdditionalSynchronizationType::fence) {
|
||||
return sizeof(Family::MI_MEM_FENCE);
|
||||
} else {
|
||||
} else if (programGlobalFenceAsMiMemFenceCommandInCommandStream == AdditionalSynchronizationType::semaphore) {
|
||||
return EncodeSemaphore<Family>::getSizeMiSemaphoreWait();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <>
|
||||
void MemorySynchronizationCommands<Family>::setAdditionalSynchronization(void *&commandsBuffer, uint64_t gpuAddress, bool acquire, const RootDeviceEnvironment &rootDeviceEnvironment) {
|
||||
using MI_MEM_FENCE = typename Family::MI_MEM_FENCE;
|
||||
using MI_SEMAPHORE_WAIT = typename Family::MI_SEMAPHORE_WAIT;
|
||||
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = true;
|
||||
auto programGlobalFenceAsMiMemFenceCommandInCommandStream = rootDeviceEnvironment.getHardwareInfo()->capabilityTable.isIntegratedDevice ? AdditionalSynchronizationType::none : AdditionalSynchronizationType::fence;
|
||||
if (debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get() != -1) {
|
||||
programGlobalFenceAsMiMemFenceCommandInCommandStream = !!debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get();
|
||||
programGlobalFenceAsMiMemFenceCommandInCommandStream = static_cast<AdditionalSynchronizationType>(debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.get());
|
||||
}
|
||||
if (programGlobalFenceAsMiMemFenceCommandInCommandStream) {
|
||||
if (programGlobalFenceAsMiMemFenceCommandInCommandStream == AdditionalSynchronizationType::fence) {
|
||||
MI_MEM_FENCE miMemFence = Family::cmdInitMemFence;
|
||||
if (acquire) {
|
||||
miMemFence.setFenceType(Family::MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_ACQUIRE);
|
||||
@@ -195,7 +196,7 @@ void MemorySynchronizationCommands<Family>::setAdditionalSynchronization(void *&
|
||||
}
|
||||
*reinterpret_cast<MI_MEM_FENCE *>(commandsBuffer) = miMemFence;
|
||||
commandsBuffer = ptrOffset(commandsBuffer, sizeof(MI_MEM_FENCE));
|
||||
} else {
|
||||
} else if (programGlobalFenceAsMiMemFenceCommandInCommandStream == AdditionalSynchronizationType::semaphore) {
|
||||
EncodeSemaphore<Family>::programMiSemaphoreWait(reinterpret_cast<MI_SEMAPHORE_WAIT *>(commandsBuffer),
|
||||
gpuAddress,
|
||||
EncodeSemaphore<Family>::invalidHardwareTag,
|
||||
|
||||
@@ -3033,7 +3033,10 @@ HWTEST2_F(CommandStreamReceiverHwTest, givenDeviceToHostCopyWhenFenceIsRequiredT
|
||||
auto miMemFence = genCmdCast<typename FamilyType::MI_MEM_FENCE *>(*++cmdIterator);
|
||||
|
||||
fenceExpected &= getHelper<ProductHelper>().isDeviceToHostCopySignalingFenceRequired();
|
||||
size_t expectedFenceCount = fenceExpected ? 3 : 2;
|
||||
size_t expectedFenceCount = fenceExpected ? 1 : 0;
|
||||
if (!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
expectedFenceCount += 2;
|
||||
}
|
||||
|
||||
auto fences = findAll<typename FamilyType::MI_MEM_FENCE *>(cmdIterator, cmdList.end());
|
||||
EXPECT_EQ(expectedFenceCount, fences.size());
|
||||
|
||||
@@ -417,7 +417,7 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenGfxCoreHelperWhenAskedIfFe
|
||||
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDefaultMemorySynchronizationCommandsWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) {
|
||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
||||
|
||||
EXPECT_EQ(sizeof(MI_MEM_FENCE), MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment()));
|
||||
EXPECT_EQ(!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment()));
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDebugMemorySynchronizationCommandsWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) {
|
||||
@@ -425,7 +425,7 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDebugMemorySynchronization
|
||||
debugManager.flags.DisablePipeControlPrecedingPostSyncCommand.set(1);
|
||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
||||
|
||||
EXPECT_EQ(2 * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment()));
|
||||
EXPECT_EQ(!pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice * 2 * sizeof(MI_MEM_FENCE), MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pDevice->getRootDeviceEnvironment()));
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDontProgramGlobalFenceAsMiMemFenceCommandInCommandStreamWhenGettingSizeForAdditionalSynchronizationThenCorrectValueIsReturned) {
|
||||
@@ -456,12 +456,14 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDefaultMemorySynchronizati
|
||||
|
||||
MemorySynchronizationCommands<FamilyType>::addAdditionalSynchronization(commandStream, 0x0, false, rootDeviceEnvironment);
|
||||
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream);
|
||||
EXPECT_EQ(1u, hwParser.cmdList.size());
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*hwParser.cmdList.begin());
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
|
||||
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(rootDeviceEnvironment) > 0) {
|
||||
HardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandStream);
|
||||
EXPECT_EQ(1u, hwParser.cmdList.size());
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*hwParser.cmdList.begin());
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE, fenceCmd->getFenceType());
|
||||
}
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenDontProgramGlobalFenceAsMiMemFenceCommandInCommandStreamWhenAddingAdditionalSynchronizationThenSemaphoreWaitIsCalled) {
|
||||
|
||||
Reference in New Issue
Block a user