clEnqueueMigrateMemINTEL with memory prefetch for KMD migrated shared allocation

Related-To: NEO-6740

Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
Milczarek, Slawomir
2022-07-13 12:10:15 +00:00
committed by Compute-Runtime-Automation
parent ae0e701f6f
commit 3e22f30ed5
5 changed files with 119 additions and 14 deletions

View File

@ -25,25 +25,15 @@ NEO::PipeControlArgs CommandListCoreFamily<IGFX_XE_HPC_CORE>::createBarrierFlags
template <>
ze_result_t CommandListCoreFamily<IGFX_XE_HPC_CORE>::appendMemoryPrefetch(const void *ptr, size_t size) {
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr);
auto svmAllocMgr = device->getDriverHandle()->getSvmAllocsManager();
auto allocData = svmAllocMgr->getSVMAlloc(ptr);
if (!allocData) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
auto allowPrefetchingKmdMigratedSharedAllocation = false;
if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() != -1) {
allowPrefetchingKmdMigratedSharedAllocation = !!NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get();
}
if (allowPrefetchingKmdMigratedSharedAllocation) {
auto memoryManager = device->getDriverHandle()->getMemoryManager();
if (memoryManager->isKmdMigrationAvailable(device->getRootDeviceIndex()) &&
(allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY)) {
auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
auto subDeviceId = static_cast<DeviceImp *>(device)->getPhysicalSubDeviceId();
memoryManager->setMemPrefetch(alloc, subDeviceId, device->getRootDeviceIndex());
}
if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() > 0) {
svmAllocMgr->prefetchMemory(*device->getNEODevice(), *allocData);
}
if (NEO::DebugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.get() != 1) {

View File

@ -4184,6 +4184,16 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemINTEL(
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_MIGRATEMEM_INTEL);
}
if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() > 0) {
auto pSvmAllocMgr = pCommandQueue->getContext().getSVMAllocsManager();
UNRECOVERABLE_IF(pSvmAllocMgr == nullptr);
auto allocData = pSvmAllocMgr->getSVMAlloc(ptr);
if (allocData) {
pSvmAllocMgr->prefetchMemory(pCommandQueue->getDevice(), *allocData);
}
}
}
return retVal;

View File

@ -897,6 +897,90 @@ TEST(clUnifiedSharedMemoryTests, whenClEnqueueMigrateMemINTELisCalledWithProperP
EXPECT_EQ(CL_SUCCESS, retVal);
}
TEST(clUnifiedSharedMemoryTests, givenUseKmdMigrationAndAppendMemoryPrefetchForKmdMigratedSharedAllocationsWhenClEnqueueMigrateMemINTELisCalledThenExplicitlyMigrateMemoryToTheDeviceAssociatedWithCommandQueue) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseKmdMigration.set(1);
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
MockContext mockContext;
auto device = mockContext.getDevice(0u);
REQUIRE_SVM_OR_SKIP(device);
MockCommandQueue mockCmdQueue{mockContext};
cl_int retVal = CL_SUCCESS;
auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, device, nullptr, 4, 0, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, unifiedMemorySharedAllocation);
retVal = clEnqueueMigrateMemINTEL(&mockCmdQueue, unifiedMemorySharedAllocation, 10, 0, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto mockMemoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
EXPECT_TRUE(mockMemoryManager->setMemPrefetchCalled);
EXPECT_EQ(0u, mockMemoryManager->memPrefetchSubDeviceId);
clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation);
}
TEST(clUnifiedSharedMemoryTests, givenContextWithMultipleSubdevicesWhenClEnqueueMigrateMemINTELisCalledThenExplicitlyMigrateMemoryToTheSubDeviceAssociatedWithCommandQueue) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseKmdMigration.set(1);
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
UltClDeviceFactory deviceFactory{1, 4};
cl_device_id allDevices[] = {deviceFactory.rootDevices[0], deviceFactory.subDevices[0], deviceFactory.subDevices[1],
deviceFactory.subDevices[2], deviceFactory.subDevices[3]};
MockContext multiTileContext(ClDeviceVector{allDevices, 5});
auto subDevice = deviceFactory.subDevices[1];
REQUIRE_SVM_OR_SKIP(subDevice);
MockCommandQueue mockCmdQueue(&multiTileContext, subDevice, 0, false);
cl_int retVal = CL_SUCCESS;
auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&multiTileContext, subDevice, nullptr, 4, 0, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, unifiedMemorySharedAllocation);
retVal = clEnqueueMigrateMemINTEL(&mockCmdQueue, unifiedMemorySharedAllocation, 10, 0, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto mockMemoryManager = static_cast<MockMemoryManager *>(subDevice->getMemoryManager());
EXPECT_TRUE(mockMemoryManager->setMemPrefetchCalled);
EXPECT_EQ(1u, mockMemoryManager->memPrefetchSubDeviceId);
clMemFreeINTEL(&multiTileContext, unifiedMemorySharedAllocation);
}
TEST(clUnifiedSharedMemoryTests, givenContextWithMultipleSubdevicesWhenClEnqueueMigrateMemINTELisCalledThenExplicitlyMigrateMemoryToTheRootDeviceAssociatedWithCommandQueue) {
DebugManagerStateRestore restorer;
DebugManager.flags.UseKmdMigration.set(1);
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
UltClDeviceFactory deviceFactory{1, 4};
cl_device_id allDevices[] = {deviceFactory.rootDevices[0], deviceFactory.subDevices[0], deviceFactory.subDevices[1],
deviceFactory.subDevices[2], deviceFactory.subDevices[3]};
MockContext multiTileContext(ClDeviceVector{allDevices, 5});
auto device = deviceFactory.rootDevices[0];
REQUIRE_SVM_OR_SKIP(device);
MockCommandQueue mockCmdQueue(&multiTileContext, device, 0, false);
cl_int retVal = CL_SUCCESS;
auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&multiTileContext, device, nullptr, 4, 0, &retVal);
EXPECT_EQ(CL_SUCCESS, retVal);
ASSERT_NE(nullptr, unifiedMemorySharedAllocation);
retVal = clEnqueueMigrateMemINTEL(&mockCmdQueue, unifiedMemorySharedAllocation, 10, 0, 0, nullptr, nullptr);
EXPECT_EQ(CL_SUCCESS, retVal);
auto mockMemoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
EXPECT_TRUE(mockMemoryManager->setMemPrefetchCalled);
EXPECT_EQ(0u, mockMemoryManager->memPrefetchSubDeviceId);
clMemFreeINTEL(&multiTileContext, unifiedMemorySharedAllocation);
}
TEST(clUnifiedSharedMemoryTests, whenClEnqueueMemAdviseINTELisCalledWithWrongQueueThenInvalidQueueErrorIsReturned) {
auto retVal = clEnqueueMemAdviseINTEL(0, nullptr, 0, 0, 0, nullptr, nullptr);
EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal);

View File

@ -662,4 +662,24 @@ AllocationType SVMAllocsManager::getGraphicsAllocationTypeAndCompressionPreferen
return allocationType;
}
void SVMAllocsManager::prefetchMemory(Device &device, SvmAllocationData &svmData) {
auto getSubDeviceId = [](Device &device) {
if (!device.isSubDevice()) {
uint32_t deviceBitField = static_cast<uint32_t>(device.getDeviceBitfield().to_ulong());
if (device.getDeviceBitfield().count() > 1) {
deviceBitField &= ~deviceBitField + 1;
}
return Math::log2(deviceBitField);
}
return static_cast<NEO::SubDevice *>(&device)->getSubDeviceIndex();
};
if (memoryManager->isKmdMigrationAvailable(device.getRootDeviceIndex()) &&
(svmData.memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY)) {
auto gfxAllocation = svmData.gpuAllocations.getGraphicsAllocation(device.getRootDeviceIndex());
auto subDeviceId = getSubDeviceId(device);
memoryManager->setMemPrefetch(gfxAllocation, subDeviceId, device.getRootDeviceIndex());
}
}
} // namespace NEO

View File

@ -183,6 +183,7 @@ class SVMAllocsManager {
std::atomic<uint32_t> allocationsCounter = 0;
void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount);
void prepareIndirectAllocationForDestruction(SvmAllocationData *);
void prefetchMemory(Device &device, SvmAllocationData &svmData);
std::map<CommandStreamReceiver *, InternalAllocationsTracker> indirectAllocationsResidency;