[5/n] Optimize Indirect Allocations.

Enable mechanism where feasible.

Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
This commit is contained in:
Michal Mrozek
2022-01-26 15:25:06 +00:00
committed by Compute-Runtime-Automation
parent f17dfc9a5c
commit 3d9548cee9
3 changed files with 74 additions and 1 deletions

View File

@ -171,7 +171,12 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
bool indirectAllocationsAllowed = commandList->hasIndirectAllocationsAllowed(); bool indirectAllocationsAllowed = commandList->hasIndirectAllocationsAllowed();
if (indirectAllocationsAllowed) { if (indirectAllocationsAllowed) {
auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager(); auto svmAllocsManager = device->getDriverHandle()->getSvmAllocsManager();
if (NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get() == 1) { auto submitAsPack = device->getDriverHandle()->getMemoryManager()->allowIndirectAllocationsAsPack(neoDevice->getRootDeviceIndex());
if (NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get() != -1) {
submitAsPack = !!NEO::DebugManager.flags.MakeIndirectAllocationsResidentAsPack.get();
}
if (submitAsPack) {
svmAllocsManager->makeIndirectAllocationsResident(*csr, csr->peekTaskCount() + 1u); svmAllocsManager->makeIndirectAllocationsResident(*csr, csr->peekTaskCount() + 1u);
} else { } else {
UnifiedMemoryControls unifiedMemoryControls = commandList->getUnifiedMemoryControls(); UnifiedMemoryControls unifiedMemoryControls = commandList->getUnifiedMemoryControls();

View File

@ -874,6 +874,66 @@ HWTEST_F(CommandQueueIndirectAllocations, givenDebugModeToTreatIndirectAllocatio
commandQueue->destroy(); commandQueue->destroy();
} }
HWTEST_F(CommandQueueIndirectAllocations, givenDeviceThatSupportsSubmittingIndirectAllocationsAsPackWhenIndirectAccessIsUsedThenWholePackIsMadeResident) {
const ze_command_queue_desc_t desc = {};
MockCsrHw2<FamilyType> csr(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());
csr.initializeTagAllocation();
csr.setupContext(*neoDevice->getDefaultEngine().osContext);
ze_result_t returnValue;
L0::CommandQueue *commandQueue = CommandQueue::create(productFamily,
device,
&csr,
&desc,
true,
false,
returnValue);
ASSERT_NE(nullptr, commandQueue);
std::unique_ptr<L0::CommandList> commandList(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue));
void *deviceAlloc = nullptr;
ze_device_mem_alloc_desc_t deviceDesc = {};
auto result = context->allocDeviceMem(device->toHandle(), &deviceDesc, 16384u, 4096u, &deviceAlloc);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto gpuAlloc = device->getDriverHandle()->getSvmAllocsManager()->getSVMAllocs()->get(deviceAlloc)->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
ASSERT_NE(nullptr, gpuAlloc);
createKernel(true);
kernel->unifiedMemoryControls.indirectDeviceAllocationsAllowed = true;
EXPECT_TRUE(kernel->getUnifiedMemoryControls().indirectDeviceAllocationsAllowed);
ze_group_count_t groupCount{1, 1, 1};
result = commandList->appendLaunchKernel(kernel->toHandle(),
&groupCount,
nullptr,
0,
nullptr);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto itorEvent = std::find(std::begin(commandList->commandContainer.getResidencyContainer()),
std::end(commandList->commandContainer.getResidencyContainer()),
gpuAlloc);
EXPECT_EQ(itorEvent, std::end(commandList->commandContainer.getResidencyContainer()));
auto commandListHandle = commandList->toHandle();
EXPECT_FALSE(gpuAlloc->isResident(csr.getOsContext().getContextId()));
static_cast<MockMemoryManager *>(driverHandle.get()->getMemoryManager())->overrideAllocateAsPackReturn = 1u;
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_TRUE(gpuAlloc->isResident(csr.getOsContext().getContextId()));
EXPECT_EQ(GraphicsAllocation::objectAlwaysResident, gpuAlloc->getResidencyTaskCount(csr.getOsContext().getContextId()));
device->getDriverHandle()->getSvmAllocsManager()->freeSVMAlloc(deviceAlloc);
commandQueue->destroy();
}
using DeviceCreateCommandQueueTest = Test<DeviceFixture>; using DeviceCreateCommandQueueTest = Test<DeviceFixture>;
TEST_F(DeviceCreateCommandQueueTest, givenLowPriorityDescWhenCreateCommandQueueIsCalledThenLowPriorityCsrIsAssigned) { TEST_F(DeviceCreateCommandQueueTest, givenLowPriorityDescWhenCreateCommandQueueIsCalledThenLowPriorityCsrIsAssigned) {
ze_command_queue_desc_t desc{}; ze_command_queue_desc_t desc{};

View File

@ -183,6 +183,13 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
} }
return allocateGraphicsMemoryForNonSvmHostPtrResult; return allocateGraphicsMemoryForNonSvmHostPtrResult;
} }
bool allowIndirectAllocationsAsPack(uint32_t rootDeviceIndex) override {
if (overrideAllocateAsPackReturn != -1) {
return !!overrideAllocateAsPackReturn;
} else {
return MemoryManager::allowIndirectAllocationsAsPack(rootDeviceIndex);
}
}
uint32_t copyMemoryToAllocationBanksCalled = 0u; uint32_t copyMemoryToAllocationBanksCalled = 0u;
uint32_t populateOsHandlesCalled = 0u; uint32_t populateOsHandlesCalled = 0u;
@ -191,6 +198,7 @@ class MockMemoryManager : public MemoryManagerCreate<OsAgnosticMemoryManager> {
uint32_t unlockResourceCalled = 0u; uint32_t unlockResourceCalled = 0u;
uint32_t lockResourceCalled = 0u; uint32_t lockResourceCalled = 0u;
uint32_t createGraphicsAllocationFromExistingStorageCalled = 0u; uint32_t createGraphicsAllocationFromExistingStorageCalled = 0u;
int32_t overrideAllocateAsPackReturn = -1;
std::vector<GraphicsAllocation *> allocationsFromExistingStorage{}; std::vector<GraphicsAllocation *> allocationsFromExistingStorage{};
AllocationData alignAllocationData; AllocationData alignAllocationData;
uint32_t successAllocatedGraphicsMemoryIndex = 0u; uint32_t successAllocatedGraphicsMemoryIndex = 0u;