mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-22 10:17:01 +08:00
clEnqueueMigrateMemINTEL with memory prefetch for KMD migrated shared allocation
Related-To: NEO-6740 Signed-off-by: Milczarek, Slawomir <slawomir.milczarek@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
ae0e701f6f
commit
3e22f30ed5
@@ -25,25 +25,15 @@ NEO::PipeControlArgs CommandListCoreFamily<IGFX_XE_HPC_CORE>::createBarrierFlags
|
|||||||
|
|
||||||
template <>
|
template <>
|
||||||
ze_result_t CommandListCoreFamily<IGFX_XE_HPC_CORE>::appendMemoryPrefetch(const void *ptr, size_t size) {
|
ze_result_t CommandListCoreFamily<IGFX_XE_HPC_CORE>::appendMemoryPrefetch(const void *ptr, size_t size) {
|
||||||
auto allocData = device->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(ptr);
|
auto svmAllocMgr = device->getDriverHandle()->getSvmAllocsManager();
|
||||||
|
auto allocData = svmAllocMgr->getSVMAlloc(ptr);
|
||||||
|
|
||||||
if (!allocData) {
|
if (!allocData) {
|
||||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto allowPrefetchingKmdMigratedSharedAllocation = false;
|
if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() > 0) {
|
||||||
if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() != -1) {
|
svmAllocMgr->prefetchMemory(*device->getNEODevice(), *allocData);
|
||||||
allowPrefetchingKmdMigratedSharedAllocation = !!NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (allowPrefetchingKmdMigratedSharedAllocation) {
|
|
||||||
auto memoryManager = device->getDriverHandle()->getMemoryManager();
|
|
||||||
if (memoryManager->isKmdMigrationAvailable(device->getRootDeviceIndex()) &&
|
|
||||||
(allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY)) {
|
|
||||||
auto alloc = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
|
|
||||||
auto subDeviceId = static_cast<DeviceImp *>(device)->getPhysicalSubDeviceId();
|
|
||||||
memoryManager->setMemPrefetch(alloc, subDeviceId, device->getRootDeviceIndex());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (NEO::DebugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.get() != 1) {
|
if (NEO::DebugManager.flags.AddStatePrefetchCmdToMemoryPrefetchAPI.get() != 1) {
|
||||||
|
|||||||
@@ -4184,6 +4184,16 @@ CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemINTEL(
|
|||||||
auto pEvent = castToObjectOrAbort<Event>(*event);
|
auto pEvent = castToObjectOrAbort<Event>(*event);
|
||||||
pEvent->setCmdType(CL_COMMAND_MIGRATEMEM_INTEL);
|
pEvent->setCmdType(CL_COMMAND_MIGRATEMEM_INTEL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (NEO::DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.get() > 0) {
|
||||||
|
auto pSvmAllocMgr = pCommandQueue->getContext().getSVMAllocsManager();
|
||||||
|
UNRECOVERABLE_IF(pSvmAllocMgr == nullptr);
|
||||||
|
|
||||||
|
auto allocData = pSvmAllocMgr->getSVMAlloc(ptr);
|
||||||
|
if (allocData) {
|
||||||
|
pSvmAllocMgr->prefetchMemory(pCommandQueue->getDevice(), *allocData);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return retVal;
|
return retVal;
|
||||||
|
|||||||
@@ -897,6 +897,90 @@ TEST(clUnifiedSharedMemoryTests, whenClEnqueueMigrateMemINTELisCalledWithProperP
|
|||||||
EXPECT_EQ(CL_SUCCESS, retVal);
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(clUnifiedSharedMemoryTests, givenUseKmdMigrationAndAppendMemoryPrefetchForKmdMigratedSharedAllocationsWhenClEnqueueMigrateMemINTELisCalledThenExplicitlyMigrateMemoryToTheDeviceAssociatedWithCommandQueue) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.UseKmdMigration.set(1);
|
||||||
|
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
|
||||||
|
|
||||||
|
MockContext mockContext;
|
||||||
|
auto device = mockContext.getDevice(0u);
|
||||||
|
REQUIRE_SVM_OR_SKIP(device);
|
||||||
|
|
||||||
|
MockCommandQueue mockCmdQueue{mockContext};
|
||||||
|
cl_int retVal = CL_SUCCESS;
|
||||||
|
|
||||||
|
auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&mockContext, device, nullptr, 4, 0, &retVal);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
ASSERT_NE(nullptr, unifiedMemorySharedAllocation);
|
||||||
|
|
||||||
|
retVal = clEnqueueMigrateMemINTEL(&mockCmdQueue, unifiedMemorySharedAllocation, 10, 0, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
|
auto mockMemoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
|
||||||
|
EXPECT_TRUE(mockMemoryManager->setMemPrefetchCalled);
|
||||||
|
EXPECT_EQ(0u, mockMemoryManager->memPrefetchSubDeviceId);
|
||||||
|
|
||||||
|
clMemFreeINTEL(&mockContext, unifiedMemorySharedAllocation);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(clUnifiedSharedMemoryTests, givenContextWithMultipleSubdevicesWhenClEnqueueMigrateMemINTELisCalledThenExplicitlyMigrateMemoryToTheSubDeviceAssociatedWithCommandQueue) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.UseKmdMigration.set(1);
|
||||||
|
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
|
||||||
|
|
||||||
|
UltClDeviceFactory deviceFactory{1, 4};
|
||||||
|
cl_device_id allDevices[] = {deviceFactory.rootDevices[0], deviceFactory.subDevices[0], deviceFactory.subDevices[1],
|
||||||
|
deviceFactory.subDevices[2], deviceFactory.subDevices[3]};
|
||||||
|
MockContext multiTileContext(ClDeviceVector{allDevices, 5});
|
||||||
|
auto subDevice = deviceFactory.subDevices[1];
|
||||||
|
REQUIRE_SVM_OR_SKIP(subDevice);
|
||||||
|
|
||||||
|
MockCommandQueue mockCmdQueue(&multiTileContext, subDevice, 0, false);
|
||||||
|
cl_int retVal = CL_SUCCESS;
|
||||||
|
|
||||||
|
auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&multiTileContext, subDevice, nullptr, 4, 0, &retVal);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
ASSERT_NE(nullptr, unifiedMemorySharedAllocation);
|
||||||
|
|
||||||
|
retVal = clEnqueueMigrateMemINTEL(&mockCmdQueue, unifiedMemorySharedAllocation, 10, 0, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
|
auto mockMemoryManager = static_cast<MockMemoryManager *>(subDevice->getMemoryManager());
|
||||||
|
EXPECT_TRUE(mockMemoryManager->setMemPrefetchCalled);
|
||||||
|
EXPECT_EQ(1u, mockMemoryManager->memPrefetchSubDeviceId);
|
||||||
|
|
||||||
|
clMemFreeINTEL(&multiTileContext, unifiedMemorySharedAllocation);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(clUnifiedSharedMemoryTests, givenContextWithMultipleSubdevicesWhenClEnqueueMigrateMemINTELisCalledThenExplicitlyMigrateMemoryToTheRootDeviceAssociatedWithCommandQueue) {
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
DebugManager.flags.UseKmdMigration.set(1);
|
||||||
|
DebugManager.flags.AppendMemoryPrefetchForKmdMigratedSharedAllocations.set(1);
|
||||||
|
|
||||||
|
UltClDeviceFactory deviceFactory{1, 4};
|
||||||
|
cl_device_id allDevices[] = {deviceFactory.rootDevices[0], deviceFactory.subDevices[0], deviceFactory.subDevices[1],
|
||||||
|
deviceFactory.subDevices[2], deviceFactory.subDevices[3]};
|
||||||
|
MockContext multiTileContext(ClDeviceVector{allDevices, 5});
|
||||||
|
auto device = deviceFactory.rootDevices[0];
|
||||||
|
REQUIRE_SVM_OR_SKIP(device);
|
||||||
|
|
||||||
|
MockCommandQueue mockCmdQueue(&multiTileContext, device, 0, false);
|
||||||
|
cl_int retVal = CL_SUCCESS;
|
||||||
|
|
||||||
|
auto unifiedMemorySharedAllocation = clSharedMemAllocINTEL(&multiTileContext, device, nullptr, 4, 0, &retVal);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
ASSERT_NE(nullptr, unifiedMemorySharedAllocation);
|
||||||
|
|
||||||
|
retVal = clEnqueueMigrateMemINTEL(&mockCmdQueue, unifiedMemorySharedAllocation, 10, 0, 0, nullptr, nullptr);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
|
||||||
|
auto mockMemoryManager = static_cast<MockMemoryManager *>(device->getMemoryManager());
|
||||||
|
EXPECT_TRUE(mockMemoryManager->setMemPrefetchCalled);
|
||||||
|
EXPECT_EQ(0u, mockMemoryManager->memPrefetchSubDeviceId);
|
||||||
|
|
||||||
|
clMemFreeINTEL(&multiTileContext, unifiedMemorySharedAllocation);
|
||||||
|
}
|
||||||
|
|
||||||
TEST(clUnifiedSharedMemoryTests, whenClEnqueueMemAdviseINTELisCalledWithWrongQueueThenInvalidQueueErrorIsReturned) {
|
TEST(clUnifiedSharedMemoryTests, whenClEnqueueMemAdviseINTELisCalledWithWrongQueueThenInvalidQueueErrorIsReturned) {
|
||||||
auto retVal = clEnqueueMemAdviseINTEL(0, nullptr, 0, 0, 0, nullptr, nullptr);
|
auto retVal = clEnqueueMemAdviseINTEL(0, nullptr, 0, 0, 0, nullptr, nullptr);
|
||||||
EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal);
|
EXPECT_EQ(CL_INVALID_COMMAND_QUEUE, retVal);
|
||||||
|
|||||||
@@ -662,4 +662,24 @@ AllocationType SVMAllocsManager::getGraphicsAllocationTypeAndCompressionPreferen
|
|||||||
return allocationType;
|
return allocationType;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SVMAllocsManager::prefetchMemory(Device &device, SvmAllocationData &svmData) {
|
||||||
|
auto getSubDeviceId = [](Device &device) {
|
||||||
|
if (!device.isSubDevice()) {
|
||||||
|
uint32_t deviceBitField = static_cast<uint32_t>(device.getDeviceBitfield().to_ulong());
|
||||||
|
if (device.getDeviceBitfield().count() > 1) {
|
||||||
|
deviceBitField &= ~deviceBitField + 1;
|
||||||
|
}
|
||||||
|
return Math::log2(deviceBitField);
|
||||||
|
}
|
||||||
|
return static_cast<NEO::SubDevice *>(&device)->getSubDeviceIndex();
|
||||||
|
};
|
||||||
|
|
||||||
|
if (memoryManager->isKmdMigrationAvailable(device.getRootDeviceIndex()) &&
|
||||||
|
(svmData.memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY)) {
|
||||||
|
auto gfxAllocation = svmData.gpuAllocations.getGraphicsAllocation(device.getRootDeviceIndex());
|
||||||
|
auto subDeviceId = getSubDeviceId(device);
|
||||||
|
memoryManager->setMemPrefetch(gfxAllocation, subDeviceId, device.getRootDeviceIndex());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -183,6 +183,7 @@ class SVMAllocsManager {
|
|||||||
std::atomic<uint32_t> allocationsCounter = 0;
|
std::atomic<uint32_t> allocationsCounter = 0;
|
||||||
void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount);
|
void makeIndirectAllocationsResident(CommandStreamReceiver &commandStreamReceiver, uint32_t taskCount);
|
||||||
void prepareIndirectAllocationForDestruction(SvmAllocationData *);
|
void prepareIndirectAllocationForDestruction(SvmAllocationData *);
|
||||||
|
void prefetchMemory(Device &device, SvmAllocationData &svmData);
|
||||||
|
|
||||||
std::map<CommandStreamReceiver *, InternalAllocationsTracker> indirectAllocationsResidency;
|
std::map<CommandStreamReceiver *, InternalAllocationsTracker> indirectAllocationsResidency;
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user