performance: Remove global fence allocation from BMG
Resolves: NEO-14642 Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
parent
304fba1eba
commit
c9a21c158e
|
@ -21,41 +21,6 @@ namespace ult {
|
|||
|
||||
using CommandQueueCommandsXe2HpgCore = Test<DeviceFixture>;
|
||||
|
||||
HWTEST2_F(CommandQueueCommandsXe2HpgCore, givenCommandQueueWhenExecutingCommandListsThenStateSystemMemFenceAddressCmdIsGenerated, IsXe2HpgCore) {
|
||||
if (neoDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
ze_command_queue_desc_t desc = {};
|
||||
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
|
||||
|
||||
auto commandQueue = new MockCommandQueueHw<FamilyType::gfxCoreFamily>(device, csr, &desc);
|
||||
commandQueue->initialize(false, false, false);
|
||||
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::compute, 0u, returnValue, false));
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
commandList->close();
|
||||
|
||||
commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false, nullptr, nullptr);
|
||||
|
||||
auto globalFence = csr->getGlobalFenceAllocation();
|
||||
|
||||
auto used = commandQueue->commandStream.getUsed();
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList, commandQueue->commandStream.getCpuBase(), used));
|
||||
|
||||
auto itor = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itor);
|
||||
EXPECT_EQ(globalFence->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueueCommandsXe2HpgCore, givenCommandQueueWhenExecutingCommandListsForTheSecondTimeThenStateSystemMemFenceAddressCmdIsNotGenerated, IsXe2HpgCore) {
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
ze_command_queue_desc_t desc = {};
|
||||
|
|
|
@ -375,133 +375,6 @@ struct SystemMemoryFenceInDefaultConfigurationTest : public UltCommandStreamRece
|
|||
DebugManagerStateRestore restore;
|
||||
};
|
||||
|
||||
using SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore = SystemMemoryFenceInDefaultConfigurationTest;
|
||||
|
||||
XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
givenNoEventProvidedWhenEnqueueKernelNotUsingSystemMemoryThenNoPostSyncFenceRequestDispatched) {
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
||||
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
MockKernelWithInternals kernel(*pClDevice);
|
||||
MockContext context(pClDevice);
|
||||
MockCommandQueueHw<FamilyType> commandQueue(&context, pClDevice, nullptr);
|
||||
auto &commandStreamReceiver = pClDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
size_t globalWorkSize[3] = {1, 1, 1};
|
||||
commandQueue.enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
ClHardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandQueue);
|
||||
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
|
||||
auto itorComputeWalker = find<COMPUTE_WALKER *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorComputeWalker);
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*itorComputeWalker);
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
|
||||
|
||||
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE_FENCE, fenceCmd->getFenceType());
|
||||
}
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
givenNoEventProvidedWhenEnqueueKernelUsingSystemMemoryThenPostSyncFenceRequestNotDispatched) {
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
||||
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
MockKernelWithInternals kernel(*pClDevice);
|
||||
MockContext context(pClDevice);
|
||||
MockCommandQueueHw<FamilyType> commandQueue(&context, pClDevice, nullptr);
|
||||
auto &commandStreamReceiver = pClDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
size_t globalWorkSize[3] = {1, 1, 1};
|
||||
kernel.mockKernel->anyKernelArgumentUsingSystemMemory = true;
|
||||
commandQueue.enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, nullptr);
|
||||
|
||||
ClHardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandQueue);
|
||||
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
|
||||
auto itorComputeWalker = find<COMPUTE_WALKER *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorComputeWalker);
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*itorComputeWalker);
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
|
||||
|
||||
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE_FENCE, fenceCmd->getFenceType());
|
||||
}
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(SystemMemoryFenceInDefaultConfigurationTestXe2HpgCore,
|
||||
givenEventProvidedWhenEnqueueKernelNotUsingSystemMemoryThenPostSyncFenceRequestNotDispatched) {
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
using COMPUTE_WALKER = typename FamilyType::COMPUTE_WALKER;
|
||||
using MI_MEM_FENCE = typename FamilyType::MI_MEM_FENCE;
|
||||
if (pClDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
MockKernelWithInternals kernel(*pClDevice);
|
||||
MockContext context(pClDevice);
|
||||
MockCommandQueueHw<FamilyType> commandQueue(&context, pClDevice, nullptr);
|
||||
auto &commandStreamReceiver = pClDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
size_t globalWorkSize[3] = {1, 1, 1};
|
||||
cl_event kernelEvent{};
|
||||
commandQueue.enqueueKernel(kernel, 1, nullptr, globalWorkSize, nullptr, 0, nullptr, &kernelEvent);
|
||||
|
||||
ClHardwareParse hwParser;
|
||||
hwParser.parseCommands<FamilyType>(commandQueue);
|
||||
|
||||
auto itorSystemMemFenceAddress = find<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorSystemMemFenceAddress);
|
||||
auto systemMemFenceAddressCmd = genCmdCast<STATE_SYSTEM_MEM_FENCE_ADDRESS *>(*itorSystemMemFenceAddress);
|
||||
EXPECT_EQ(commandStreamReceiver.globalFenceAllocation->getGpuAddress(), systemMemFenceAddressCmd->getSystemMemoryFenceAddress());
|
||||
|
||||
auto itorComputeWalker = find<COMPUTE_WALKER *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorComputeWalker);
|
||||
auto walkerCmd = genCmdCast<COMPUTE_WALKER *>(*itorComputeWalker);
|
||||
auto &postSyncData = walkerCmd->getPostSync();
|
||||
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
|
||||
|
||||
if (MemorySynchronizationCommands<FamilyType>::getSizeForAdditonalSynchronization(pClDevice->getRootDeviceEnvironment()) > 0) {
|
||||
auto itorMiMemFence = find<MI_MEM_FENCE *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
|
||||
ASSERT_NE(hwParser.cmdList.end(), itorMiMemFence);
|
||||
auto fenceCmd = genCmdCast<MI_MEM_FENCE *>(*itorMiMemFence);
|
||||
ASSERT_NE(nullptr, fenceCmd);
|
||||
EXPECT_EQ(MI_MEM_FENCE::FENCE_TYPE::FENCE_TYPE_RELEASE_FENCE, fenceCmd->getFenceType());
|
||||
}
|
||||
|
||||
auto event = castToObject<Event>(kernelEvent);
|
||||
event->release();
|
||||
}
|
||||
|
||||
struct Xe2MidThreadCommandStreamReceiverTest : public UltCommandStreamReceiverTest {
|
||||
void SetUp() override {
|
||||
debugManager.flags.ForcePreemptionMode.set(static_cast<int32_t>(PreemptionMode::MidThread));
|
||||
|
|
|
@ -24,7 +24,9 @@ bool GfxCoreHelperHw<Family>::isFenceAllocationRequired(const HardwareInfo &hwIn
|
|||
(debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.get() == 0)) {
|
||||
return false;
|
||||
}
|
||||
return !hwInfo.capabilityTable.isIntegratedDevice;
|
||||
return productHelper.isGlobalFenceInCommandStreamRequired(hwInfo) ||
|
||||
productHelper.isGlobalFenceInDirectSubmissionRequired(hwInfo) ||
|
||||
productHelper.isGlobalFenceInPostSyncRequired(hwInfo);
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
|
|
|
@ -4472,7 +4472,7 @@ HWTEST2_F(CommandStreamReceiverHwTest,
|
|||
givenImmediateFlushTaskWhenOneTimeContextSystemFenceRequiredThenExpectOneTimeSystemFenceCommand,
|
||||
IsHeapfulSupportedAndAtLeastXeHpcCore) {
|
||||
using STATE_SYSTEM_MEM_FENCE_ADDRESS = typename FamilyType::STATE_SYSTEM_MEM_FENCE_ADDRESS;
|
||||
if (pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
if (!pDevice->getGfxCoreHelper().isFenceAllocationRequired(pDevice->getHardwareInfo(), pDevice->getProductHelper())) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
|
|
|
@ -219,10 +219,14 @@ HWTEST_F(DirectSubmissionDispatchMiMemFenceTest, givenDebugFlagSetToTrueWhenCrea
|
|||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(1);
|
||||
|
||||
if (heaplessStateInit || pDevice->getHardwareInfo().capabilityTable.isIntegratedDevice) {
|
||||
if (heaplessStateInit) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
if (!pDevice->getDefaultEngine().commandStreamReceiver->getGlobalFenceAllocation()) {
|
||||
pDevice->getDefaultEngine().commandStreamReceiver->createGlobalFenceAllocation();
|
||||
}
|
||||
|
||||
MockDirectSubmissionHw<FamilyType, RenderDispatcher<FamilyType>> directSubmission(*pDevice->getDefaultEngine().commandStreamReceiver);
|
||||
|
||||
EXPECT_TRUE(directSubmission.miMemFenceRequired);
|
||||
|
@ -232,6 +236,8 @@ HWTEST_F(DirectSubmissionDispatchMiMemFenceTest, givenDebugFlagSetToTrueWhenCrea
|
|||
|
||||
EXPECT_EQ(directSubmission.systemMemoryFenceAddressSet, directSubmission.globalFenceAllocation != nullptr);
|
||||
EXPECT_TRUE(directSubmission.miMemFenceRequired);
|
||||
|
||||
reinterpret_cast<UltCommandStreamReceiver<FamilyType> *>(pDevice->getDefaultEngine().commandStreamReceiver)->cleanupResources();
|
||||
}
|
||||
|
||||
HWCMDTEST_F(IGFX_XE_HP_CORE, DirectSubmissionDispatchBufferTest,
|
||||
|
|
|
@ -486,7 +486,7 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenGfxCoreHelperWhenAskedIfFe
|
|||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(-1);
|
||||
debugManager.flags.ProgramGlobalFenceAsKernelInstructionInEUKernel.set(-1);
|
||||
debugManager.flags.DirectSubmissionInsertExtraMiMemFenceCommands.set(-1);
|
||||
EXPECT_EQ(gfxCoreHelper.isFenceAllocationRequired(hwInfo, productHelper), !hwInfo.capabilityTable.isIntegratedDevice);
|
||||
EXPECT_FALSE(gfxCoreHelper.isFenceAllocationRequired(hwInfo, productHelper));
|
||||
|
||||
debugManager.flags.ProgramGlobalFenceAsMiMemFenceCommandInCommandStream.set(0);
|
||||
debugManager.flags.ProgramGlobalFenceAsPostSyncOperationInComputeWalker.set(0);
|
||||
|
|
Loading…
Reference in New Issue