fix: enable single temporary allocations list mode 2
Related-To: NEO-14641 Signed-off-by: Bartosz Dunajski <bartosz.dunajski@intel.com>
This commit is contained in:
parent
82e2e410d6
commit
9c19e9eb96
|
@ -121,7 +121,7 @@ struct BcsSplit {
|
|||
result = appendCall(localDstPtr, localSrcPtr, localSize, eventHandle);
|
||||
|
||||
if (cmdList->flushTaskSubmissionEnabled()) {
|
||||
cmdList->executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, NEO::AppendOperations::nonKernel, false, cmdQsForSplit[i], nullptr, nullptr);
|
||||
cmdList->executeCommandListImmediateWithFlushTaskImpl(performMigration, hasStallingCmds, hasRelaxedOrderingDependencies, NEO::AppendOperations::nonKernel, true, cmdQsForSplit[i], nullptr, nullptr);
|
||||
} else {
|
||||
cmdList->executeCommandListImmediateImpl(performMigration, cmdQsForSplit[i]);
|
||||
}
|
||||
|
|
|
@ -911,6 +911,89 @@ HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsC
|
|||
context->freeMem(dstPtr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueueCommandsXeHpc, givenFlushTaskSubmissionEnabledAndSplitBcsCopyAndImmediateCommandListWhenAppendingMemoryCopyThenUpdateTaskCount, IsXeHpcCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.SplitBcsCopy.set(1);
|
||||
|
||||
using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW;
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto hwInfo = *NEO::defaultHwInfo;
|
||||
hwInfo.featureTable.ftrBcsInfo = 0b111111111;
|
||||
hwInfo.capabilityTable.blitterOperationsSupported = true;
|
||||
auto testNeoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo);
|
||||
auto testL0Device = std::unique_ptr<L0::Device>(L0::Device::create(driverHandle.get(), testNeoDevice, false, &returnValue));
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
desc.ordinal = static_cast<uint32_t>(testNeoDevice->getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::copy));
|
||||
|
||||
std::unique_ptr<L0::CommandList> commandList0(CommandList::createImmediate(productFamily,
|
||||
testL0Device.get(),
|
||||
&desc,
|
||||
false,
|
||||
NEO::EngineGroupType::copy,
|
||||
returnValue));
|
||||
ASSERT_NE(nullptr, commandList0);
|
||||
auto whiteBoxCmdList = static_cast<CommandList *>(commandList0.get());
|
||||
|
||||
EXPECT_EQ(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs.size(), 4u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getTaskCount(), 0u);
|
||||
|
||||
constexpr size_t alignment = 4096u;
|
||||
constexpr size_t size = 8 * MemoryConstants::megaByte;
|
||||
void *srcPtr;
|
||||
void *dstPtr;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
context->allocDeviceMem(device->toHandle(),
|
||||
&deviceDesc,
|
||||
size, alignment, &srcPtr);
|
||||
ze_host_mem_alloc_desc_t hostDesc = {};
|
||||
context->allocHostMem(&hostDesc, size, alignment, &dstPtr);
|
||||
auto ultCsr = static_cast<NEO::UltCommandStreamReceiver<FamilyType> *>(whiteBoxCmdList->getCsr(false));
|
||||
ultCsr->recordFlushedBatchBuffer = true;
|
||||
int client;
|
||||
ultCsr->registerClient(&client);
|
||||
|
||||
auto cmdStream = commandList0->getCmdContainer().getCommandStream();
|
||||
auto offset = cmdStream->getUsed();
|
||||
|
||||
auto result = commandList0->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, copyParams);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto csr2 = static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[2])->getCsr();
|
||||
auto csr3 = static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[3])->getCsr();
|
||||
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[0])->getCsr()->peekTaskCount(), 0u);
|
||||
EXPECT_EQ(static_cast<CommandQueueImp *>(static_cast<DeviceImp *>(testL0Device.get())->bcsSplit.cmdQs[1])->getCsr()->peekTaskCount(), 0u);
|
||||
EXPECT_EQ(csr2->peekTaskCount(), 1u);
|
||||
EXPECT_EQ(csr3->peekTaskCount(), 1u);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(cmdList, ptrOffset(cmdStream->getCpuBase(), offset), (cmdStream->getUsed() - offset)));
|
||||
|
||||
bool csr2TaskCountFound = false;
|
||||
bool csr3TaskCountFound = false;
|
||||
|
||||
for (auto &cmd : cmdList) {
|
||||
if (auto miFlushCmd = genCmdCast<MI_FLUSH_DW *>(cmd)) {
|
||||
if (miFlushCmd->getDestinationAddress() == csr2->getTagAllocation()->getGpuAddress()) {
|
||||
csr2TaskCountFound = true;
|
||||
} else if (miFlushCmd->getDestinationAddress() == csr3->getTagAllocation()->getGpuAddress()) {
|
||||
csr3TaskCountFound = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_TRUE(csr2TaskCountFound);
|
||||
EXPECT_TRUE(csr3TaskCountFound);
|
||||
|
||||
context->freeMem(srcPtr);
|
||||
context->freeMem(dstPtr);
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueueCommandsXeHpc, givenSyncCmdListAndSplitBcsCopyAndImmediateCommandListWhenAppendingMemoryCopyThenSuccessIsReturned, IsXeHpcCore) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.SplitBcsCopy.set(1);
|
||||
|
|
|
@ -92,7 +92,7 @@ MemoryManager::MemoryManager(ExecutionEnvironment &executionEnvironment) : execu
|
|||
supportsMultiStorageResources = !!debugManager.flags.EnableMultiStorageResources.get();
|
||||
}
|
||||
|
||||
if (debugManager.flags.UseSingleListForTemporaryAllocations.get() == 1) {
|
||||
if (debugManager.flags.UseSingleListForTemporaryAllocations.get() != 0) {
|
||||
singleTemporaryAllocationsList = true;
|
||||
temporaryAllocations = std::make_unique<AllocationsList>(AllocationUsage::TEMPORARY_ALLOCATION);
|
||||
}
|
||||
|
@ -115,14 +115,16 @@ void MemoryManager::cleanTemporaryAllocations(const CommandStreamReceiver &csr,
|
|||
auto *nextAlloc = currentAlloc->next;
|
||||
bool freeAllocation = false;
|
||||
|
||||
if (currentAlloc->isUsedByOsContext(waitedOsContextId)) {
|
||||
if (currentAlloc->hostPtrTaskCountAssignment == 0 && currentAlloc->getTaskCount(waitedOsContextId) <= waitedTaskCount) {
|
||||
if (!currentAlloc->isUsedByManyOsContexts() || !allocInUse(*currentAlloc)) {
|
||||
freeAllocation = true;
|
||||
if (currentAlloc->hostPtrTaskCountAssignment == 0) {
|
||||
if (currentAlloc->isUsedByOsContext(waitedOsContextId)) {
|
||||
if (currentAlloc->getTaskCount(waitedOsContextId) <= waitedTaskCount) {
|
||||
if (!currentAlloc->isUsedByManyOsContexts() || !allocInUse(*currentAlloc)) {
|
||||
freeAllocation = true;
|
||||
}
|
||||
}
|
||||
} else if (!allocInUse(*currentAlloc)) {
|
||||
freeAllocation = true;
|
||||
}
|
||||
} else if (!allocInUse(*currentAlloc)) {
|
||||
freeAllocation = true;
|
||||
}
|
||||
|
||||
if (freeAllocation) {
|
||||
|
|
|
@ -31,15 +31,9 @@ class MemoryAllocatorFixture : public MemoryManagementFixture {
|
|||
executionEnvironment->calculateMaxOsContextCount();
|
||||
|
||||
device.reset(MockDevice::createWithExecutionEnvironment<MockDevice>(defaultHwInfo.get(), executionEnvironment, 0u));
|
||||
memoryManager = new MockMemoryManager(false, false, *executionEnvironment);
|
||||
executionEnvironment->memoryManager.reset(memoryManager);
|
||||
csr = &device->getGpgpuCommandStreamReceiver();
|
||||
memoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
|
||||
|
||||
auto &gfxCoreHelper = device->getGfxCoreHelper();
|
||||
auto engineType = gfxCoreHelper.getGpgpuEngineInstances(device->getRootDeviceEnvironment())[0].first;
|
||||
auto osContext = memoryManager->createAndRegisterOsContext(csr, EngineDescriptorHelper::getDefaultDescriptor({engineType, EngineUsage::regular},
|
||||
PreemptionHelper::getDefaultPreemptionMode(*defaultHwInfo)));
|
||||
csr->setupContext(*osContext);
|
||||
csr = &device->getGpgpuCommandStreamReceiver();
|
||||
}
|
||||
|
||||
void tearDown() {
|
||||
|
|
|
@ -1129,18 +1129,18 @@ HWTEST_F(HostPtrAllocationTest, givenSingleTempAllocationsListWhenAddingToStorag
|
|||
TEST_F(HostPtrAllocationTest, givenDebugFlagSetWhenCreatingMemoryManagerThenEnableSingleTempAllocationsList) {
|
||||
DebugManagerStateRestore debugRestorer;
|
||||
|
||||
{
|
||||
auto memoryManager = std::make_unique<MockMemoryManager>(executionEnvironment);
|
||||
EXPECT_FALSE(memoryManager->isSingleTemporaryAllocationsListEnabled());
|
||||
EXPECT_EQ(nullptr, memoryManager->temporaryAllocations.get());
|
||||
}
|
||||
|
||||
debugManager.flags.UseSingleListForTemporaryAllocations.set(1);
|
||||
{
|
||||
auto memoryManager = std::make_unique<MockMemoryManager>(executionEnvironment);
|
||||
EXPECT_TRUE(memoryManager->isSingleTemporaryAllocationsListEnabled());
|
||||
EXPECT_NE(nullptr, memoryManager->temporaryAllocations.get());
|
||||
}
|
||||
|
||||
debugManager.flags.UseSingleListForTemporaryAllocations.set(0);
|
||||
{
|
||||
auto memoryManager = std::make_unique<MockMemoryManager>(executionEnvironment);
|
||||
EXPECT_FALSE(memoryManager->isSingleTemporaryAllocationsListEnabled());
|
||||
EXPECT_EQ(nullptr, memoryManager->temporaryAllocations.get());
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(HostPtrAllocationTest, whenOverlappedFragmentIsBiggerThenStoredAndStoredFragmentCannotBeDestroyedThenCheckForOverlappingReturnsError) {
|
||||
|
|
|
@ -179,6 +179,7 @@ TEST_F(TagAllocatorTest, WhenGettingAndReturningTagThenFreeAndUsedListsAreUpdate
|
|||
TEST_F(TagAllocatorTest, WhenTagAllocatorIsCreatedThenItPopulatesTagsWithProperDeviceBitfield) {
|
||||
size_t alignment = 64;
|
||||
|
||||
memoryManager->recentlyPassedDeviceBitfield = 0;
|
||||
EXPECT_NE(deviceBitfield, memoryManager->recentlyPassedDeviceBitfield);
|
||||
MockTagAllocator<TimeStamps> tagAllocator(memoryManager, 10, alignment, deviceBitfield);
|
||||
EXPECT_EQ(deviceBitfield, memoryManager->recentlyPassedDeviceBitfield);
|
||||
|
|
Loading…
Reference in New Issue