fix: chunking prefetch add USER_FENCE

Add USER_FENCE before PREFETCH call and after the BIND

Related-To: NEO-8098

Signed-off by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
Signed-off-by: John Falkowski <john.falkowski@intel.com>
This commit is contained in:
John Falkowski
2023-08-14 16:31:23 +00:00
committed by Compute-Runtime-Automation
parent f43e6cc795
commit 2403212dcd
12 changed files with 203 additions and 12 deletions

View File

@@ -800,8 +800,19 @@ void SVMAllocsManager::prefetchMemory(Device &device, CommandStreamReceiver &com
return subDeviceIds;
};
if (memoryManager->isKmdMigrationAvailable(device.getRootDeviceIndex()) &&
(svmData.memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY)) {
// Perform prefetch for chunks if EnableBOChunkingPrefetch is 1
// and if KMD migration is set, as current target is to use
// chunking only with KMD migration
bool isChunkingNeededForDeviceAllocations = false;
if (NEO::DebugManager.flags.EnableBOChunkingPrefetch.get() &&
memoryManager->isKmdMigrationAvailable(device.getRootDeviceIndex()) &&
(svmData.memoryType == InternalMemoryType::DEVICE_UNIFIED_MEMORY)) {
isChunkingNeededForDeviceAllocations = true;
}
if ((memoryManager->isKmdMigrationAvailable(device.getRootDeviceIndex()) &&
(svmData.memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY)) ||
isChunkingNeededForDeviceAllocations) {
auto gfxAllocation = svmData.gpuAllocations.getGraphicsAllocation(device.getRootDeviceIndex());
auto subDeviceIds = commandStreamReceiver.getActivePartitions() > 1 ? getSubDeviceIds(commandStreamReceiver) : SubDeviceIdsVec{getSubDeviceId(device)};
memoryManager->setMemPrefetch(gfxAllocation, subDeviceIds, device.getRootDeviceIndex());