feature: add support for local memory in check available space

Related-To: NEO-10356

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2025-03-05 18:51:30 +00:00 committed by Compute-Runtime-Automation
parent 61fe9ebb8f
commit bb61dafd72
3 changed files with 128 additions and 6 deletions

View File

@ -304,7 +304,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
neoDevice->getMemoryManager()->isLocalMemorySupported(neoDevice->getRootDeviceIndex());
if (NEO::debugManager.flags.DirectSubmissionFlatRingBuffer.get() != -1) {
createSecondaryCmdBufferInHostMem &= !!NEO::debugManager.flags.DirectSubmissionFlatRingBuffer.get();
createSecondaryCmdBufferInHostMem = isImmediateType() && static_cast<bool>(NEO::debugManager.flags.DirectSubmissionFlatRingBuffer.get());
}
auto returnValue = commandContainer.initialize(deviceImp->getActiveDevice(),

View File

@ -53,9 +53,23 @@ template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAvailableSpace(uint32_t numEvents, bool hasRelaxedOrderingDependencies, size_t commandSize, bool requestCommandBufferInLocalMem) {
this->commandContainer.fillReusableAllocationLists();
/* Command container might has two command buffers. If it has, one is in local memory, because relaxed ordering requires that and one in system for copying it into ring buffer.
If relaxed ordering is needed in given dispatch and current command stream is in system memory, swap of command streams is required to ensure local memory. Same in the opposite scenario. */
if (hasRelaxedOrderingDependencies == NEO::MemoryPoolHelper::isSystemMemoryPool(this->commandContainer.getCommandStream()->getGraphicsAllocation()->getMemoryPool())) {
// Command container might have two command buffers - one in local mem (mainly for relaxed ordering and any other specific purposes) and one in system mem for copying into ring buffer.
// If relaxed ordering is needed in given dispatch or if we need to force Local mem usage, and current command stream is in system memory, swap of command streams is required to ensure local memory.
// If relaxed ordering is not needed and command buffer is in local mem, then also we need to swap.
bool swapStreams = false;
if (hasRelaxedOrderingDependencies) {
if (NEO::MemoryPoolHelper::isSystemMemoryPool(this->commandContainer.getCommandStream()->getGraphicsAllocation()->getMemoryPool())) {
swapStreams = true;
}
} else {
if (requestCommandBufferInLocalMem && NEO::MemoryPoolHelper::isSystemMemoryPool(this->commandContainer.getCommandStream()->getGraphicsAllocation()->getMemoryPool())) {
swapStreams = true;
} else if (!requestCommandBufferInLocalMem && !NEO::MemoryPoolHelper::isSystemMemoryPool(this->commandContainer.getCommandStream()->getGraphicsAllocation()->getMemoryPool())) {
swapStreams = true;
}
}
if (swapStreams) {
if (this->commandContainer.swapStreams()) {
this->cmdListCurrentStartOffset = this->commandContainer.getCommandStream()->getUsed();
}
@ -63,7 +77,7 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::checkAvailableSpace(uint32_t
size_t semaphoreSize = NEO::EncodeSemaphore<GfxFamily>::getSizeMiSemaphoreWait() * numEvents;
if (this->commandContainer.getCommandStream()->getAvailableSpace() < commandSize + semaphoreSize) {
bool requireSystemMemoryCommandBuffer = !hasRelaxedOrderingDependencies;
bool requireSystemMemoryCommandBuffer = !hasRelaxedOrderingDependencies && !requestCommandBufferInLocalMem;
auto alloc = this->commandContainer.reuseExistingCmdBuffer(requireSystemMemoryCommandBuffer);
this->commandContainer.addCurrentCommandBufferToReusableAllocationList();
@ -1676,7 +1690,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendCommandLists(ui
ze_event_handle_t hSignalEvent, uint32_t numWaitEvents, ze_event_handle_t *phWaitEvents) {
auto ret = ZE_RESULT_SUCCESS;
checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize, false);
checkAvailableSpace(numWaitEvents, false, commonImmediateCommandSize, this->dispatchCmdListBatchBufferAsPrimary);
if (numWaitEvents) {
ret = this->appendWaitOnEvents(numWaitEvents, phWaitEvents, nullptr, false, true, true, true, true, false);
}

View File

@ -945,6 +945,114 @@ HWTEST2_F(CommandListCreate, givenSecondaryCommandStreamForImmediateCmdListWhenC
EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(reinterpret_cast<CmdContainerMock *>(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList->getGraphicsAllocation()->getMemoryPool()));
}
HWTEST2_F(CommandListCreate, givenSystemAndLocalCommandStreamForImmediateCmdListWhenLocalIsRequiredAtCheckAvailableSpaceThenSwapCommandStreams, IsAtLeastXeHpcCore) {
auto mutableHwInfo = device->getNEODevice()->getRootDeviceEnvironmentRef().getMutableHardwareInfo();
VariableBackup<NEO::HardwareInfo> backupHwInfo(mutableHwInfo);
mutableHwInfo->featureTable.flags.ftrLocalMemory = true;
DebugManagerStateRestore restorer;
debugManager.flags.DirectSubmissionFlatRingBuffer.set(1);
static_cast<MockMemoryManager *>(device->getNEODevice()->getMemoryManager())->localMemorySupported[0] = true;
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::compute, returnValue));
ASSERT_NE(nullptr, commandList);
EXPECT_NE(reinterpret_cast<CmdContainerMock *>(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get(), nullptr);
EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(reinterpret_cast<CmdContainerMock *>(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList->getGraphicsAllocation()->getMemoryPool()));
auto immediateCmdList = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(commandList.get());
auto primaryCmdStream = commandList->getCmdContainer().getCommandStream();
// make sure system is current
immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, false);
// switch to local
immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, true);
// expect primary
EXPECT_EQ(commandList->getCmdContainer().getCommandStream(), primaryCmdStream);
// primary in local
EXPECT_FALSE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool()));
// exhaust primary local
primaryCmdStream->getSpace(primaryCmdStream->getAvailableSpace() - 4);
// check correct allocation is refreshed
immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, true);
EXPECT_EQ(primaryCmdStream->getAvailableSpace(), primaryCmdStream->getMaxAvailableSpace());
// check still in local
EXPECT_FALSE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool()));
// exhaust local but switch to system
primaryCmdStream->getSpace(primaryCmdStream->getAvailableSpace() - 4);
immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, false);
// current in system
EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool()));
// ask for local - verify new allocation is created
immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, true);
EXPECT_EQ(primaryCmdStream->getAvailableSpace(), primaryCmdStream->getMaxAvailableSpace());
EXPECT_FALSE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool()));
}
HWTEST2_F(CommandListCreate, givenSystemAndLocalCommandStreamForImmediateCmdListWhenSystemIsRequiredAtCheckAvailableSpaceThenSwapCommandStreams, IsAtLeastXeHpcCore) {
auto mutableHwInfo = device->getNEODevice()->getRootDeviceEnvironmentRef().getMutableHardwareInfo();
VariableBackup<NEO::HardwareInfo> backupHwInfo(mutableHwInfo);
mutableHwInfo->featureTable.flags.ftrLocalMemory = true;
DebugManagerStateRestore restorer;
debugManager.flags.DirectSubmissionFlatRingBuffer.set(1);
static_cast<MockMemoryManager *>(device->getNEODevice()->getMemoryManager())->localMemorySupported[0] = true;
ze_command_queue_desc_t desc = {};
desc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::compute, returnValue));
ASSERT_NE(nullptr, commandList);
EXPECT_NE(reinterpret_cast<CmdContainerMock *>(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get(), nullptr);
EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(reinterpret_cast<CmdContainerMock *>(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList->getGraphicsAllocation()->getMemoryPool()));
auto immediateCmdList = static_cast<CommandListCoreFamilyImmediate<gfxCoreFamily> *>(commandList.get());
auto secondaryCmdStream = reinterpret_cast<CmdContainerMock *>(&commandList->getCmdContainer())->secondaryCommandStreamForImmediateCmdList.get();
// make sure local is current
immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, true);
// switch to system
immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, false);
// expect secondary
EXPECT_EQ(commandList->getCmdContainer().getCommandStream(), secondaryCmdStream);
// secondary in system
EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool()));
// exhaust system
secondaryCmdStream->getSpace(secondaryCmdStream->getAvailableSpace() - 4);
// check correct allocation is refreshed
immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, false);
EXPECT_EQ(secondaryCmdStream->getAvailableSpace(), secondaryCmdStream->getMaxAvailableSpace());
// still in system
EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool()));
// exhaust system but switch to local
secondaryCmdStream->getSpace(secondaryCmdStream->getAvailableSpace() - 4);
immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, true);
// current in local
EXPECT_FALSE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool()));
// ask for system - verify new allocation is created
immediateCmdList->checkAvailableSpace(0u, false, commonImmediateCommandSize, false);
EXPECT_EQ(secondaryCmdStream->getAvailableSpace(), secondaryCmdStream->getMaxAvailableSpace());
EXPECT_TRUE(MemoryPoolHelper::isSystemMemoryPool(commandList->getCmdContainer().getCommandStream()->getGraphicsAllocation()->getMemoryPool()));
}
HWTEST2_F(CommandListCreate, givenNoSecondaryCommandStreamForImmediateCmdListWhenCheckAvailableSpaceThenNotSwapCommandStreams, MatchAny) {
if (!device->getHwInfo().featureTable.flags.ftrLocalMemory) {
GTEST_SKIP();