mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-03 06:49:52 +08:00
feature: Get Peer Allocation with specified base Pointer
Related-To: LOCI-4176 - Given a Base Pointer passed into Get Peer Allocation, then the base pointer is used in the map of the new allocation to the virtual memory. - Enables users to use the same pointer for all devices in Peer To Peer. - Currently unsupported on reserved memory due to mapped and exec resiedency of Virtual addresses. Signed-off-by: Neil R Spruit <neil.r.spruit@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
f98ac7098b
commit
ded9d7bff2
@@ -589,7 +589,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
|
||||
}
|
||||
|
||||
builtinKernel->setArgBufferWithAlloc(0u, allocationStruct.alignedAllocationPtr,
|
||||
allocationStruct.alloc);
|
||||
allocationStruct.alloc,
|
||||
nullptr);
|
||||
builtinKernel->setArgRedescribedImage(1u, image->toHandle());
|
||||
builtinKernel->setArgumentValue(2u, sizeof(size_t), &allocationStruct.offset);
|
||||
|
||||
@@ -734,7 +735,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
|
||||
|
||||
builtinKernel->setArgRedescribedImage(0u, image->toHandle());
|
||||
builtinKernel->setArgBufferWithAlloc(1u, allocationStruct.alignedAllocationPtr,
|
||||
allocationStruct.alloc);
|
||||
allocationStruct.alloc,
|
||||
nullptr);
|
||||
|
||||
uint32_t origin[] = {
|
||||
static_cast<uint32_t>(pSrcRegion->originX),
|
||||
@@ -1033,8 +1035,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
|
||||
return ret;
|
||||
}
|
||||
|
||||
builtinKernel->setArgBufferWithAlloc(0u, *reinterpret_cast<uintptr_t *>(dstPtr), dstPtrAlloc);
|
||||
builtinKernel->setArgBufferWithAlloc(1u, *reinterpret_cast<uintptr_t *>(srcPtr), srcPtrAlloc);
|
||||
builtinKernel->setArgBufferWithAlloc(0u, *reinterpret_cast<uintptr_t *>(dstPtr), dstPtrAlloc, nullptr);
|
||||
builtinKernel->setArgBufferWithAlloc(1u, *reinterpret_cast<uintptr_t *>(srcPtr), srcPtrAlloc, nullptr);
|
||||
|
||||
uint64_t elems = size / elementSize;
|
||||
builtinKernel->setArgumentValue(2, sizeof(elems), &elems);
|
||||
@@ -1512,8 +1514,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
|
||||
uint32_t srcPitches[2] = {(srcPitch), (srcSlicePitch)};
|
||||
uint32_t dstPitches[2] = {(dstPitch), (dstSlicePitch)};
|
||||
|
||||
builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
|
||||
builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
|
||||
builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc, nullptr);
|
||||
builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc, nullptr);
|
||||
builtinKernel->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
|
||||
builtinKernel->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
|
||||
builtinKernel->setArgumentValue(4, sizeof(srcPitches), &srcPitches);
|
||||
@@ -1576,8 +1578,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
|
||||
uint32_t srcOrigin[2] = {(srcRegion->originX + static_cast<uint32_t>(srcOffset)), (srcRegion->originY)};
|
||||
uint32_t dstOrigin[2] = {(dstRegion->originX + static_cast<uint32_t>(dstOffset)), (dstRegion->originY)};
|
||||
|
||||
builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
|
||||
builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
|
||||
builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc, nullptr);
|
||||
builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc, nullptr);
|
||||
builtinKernel->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
|
||||
builtinKernel->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
|
||||
builtinKernel->setArgumentValue(4, sizeof(srcPitch), &srcPitch);
|
||||
@@ -1620,7 +1622,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendUnalignedFillKernel(bool
|
||||
builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
|
||||
ze_group_count_t dispatchKernelRemainderArgs{static_cast<uint32_t>(unalignedSize / groupSizeX), 1u, 1u};
|
||||
uint32_t value = *(reinterpret_cast<const unsigned char *>(pattern));
|
||||
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc, nullptr);
|
||||
builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinKernel->setArgumentValue(2, sizeof(value), &value);
|
||||
|
||||
@@ -1741,7 +1743,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
|
||||
uint32_t value = 0;
|
||||
memset(&value, *reinterpret_cast<const unsigned char *>(pattern), 4);
|
||||
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc, nullptr);
|
||||
builtinKernel->setArgumentValue(1, sizeof(fillArguments.mainOffset), &fillArguments.mainOffset);
|
||||
builtinKernel->setArgumentValue(2, sizeof(value), &value);
|
||||
|
||||
@@ -1784,9 +1786,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
patternAllocOffset += patternSizeToCopy;
|
||||
} while (patternAllocOffset < patternAllocationSize);
|
||||
if (fillArguments.leftRemainingBytes == 0) {
|
||||
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
|
||||
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc, nullptr);
|
||||
builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
|
||||
builtinKernel->setArgBufferWithAlloc(2, reinterpret_cast<uintptr_t>(patternGfxAllocPtr), patternGfxAlloc);
|
||||
builtinKernel->setArgBufferWithAlloc(2, reinterpret_cast<uintptr_t>(patternGfxAllocPtr), patternGfxAlloc, nullptr);
|
||||
builtinKernel->setArgumentValue(3, sizeof(fillArguments.patternSizeInEls), &fillArguments.patternSizeInEls);
|
||||
|
||||
ze_group_count_t dispatchKernelArgs{static_cast<uint32_t>(fillArguments.groups), 1u, 1u};
|
||||
@@ -1809,13 +1811,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
|
||||
builtinKernelRemainder->setArgBufferWithAlloc(0,
|
||||
dstAllocation.alignedAllocationPtr,
|
||||
dstAllocation.alloc);
|
||||
dstAllocation.alloc, nullptr);
|
||||
builtinKernelRemainder->setArgumentValue(1,
|
||||
sizeof(dstOffsetRemainder),
|
||||
&dstOffsetRemainder);
|
||||
builtinKernelRemainder->setArgBufferWithAlloc(2,
|
||||
reinterpret_cast<uintptr_t>(patternGfxAllocPtr),
|
||||
patternGfxAlloc);
|
||||
patternGfxAlloc, nullptr);
|
||||
builtinKernelRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
|
||||
|
||||
res = appendLaunchKernelSplit(builtinKernelRemainder, &dispatchKernelArgs, signalEvent, launchParams);
|
||||
@@ -1840,13 +1842,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
|
||||
|
||||
builtinKernelRemainder->setArgBufferWithAlloc(0,
|
||||
dstAllocation.alignedAllocationPtr,
|
||||
dstAllocation.alloc);
|
||||
dstAllocation.alloc, nullptr);
|
||||
builtinKernelRemainder->setArgumentValue(1,
|
||||
sizeof(dstOffsetRemainder),
|
||||
&dstOffsetRemainder);
|
||||
builtinKernelRemainder->setArgBufferWithAlloc(2,
|
||||
reinterpret_cast<uintptr_t>(patternGfxAllocPtr) + patternOffsetRemainder,
|
||||
patternGfxAlloc);
|
||||
patternGfxAlloc, nullptr);
|
||||
builtinKernelRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
|
||||
|
||||
res = appendLaunchKernelSplit(builtinKernelRemainder, &dispatchKernelArgs, signalEvent, launchParams);
|
||||
@@ -1898,7 +1900,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
|
||||
if (driverHandle->isRemoteResourceNeeded(ptr, gpuAllocation, allocData, device)) {
|
||||
if (allocData) {
|
||||
uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress();
|
||||
gpuAllocation = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), nullptr);
|
||||
gpuAllocation = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), nullptr, nullptr);
|
||||
}
|
||||
if (gpuAllocation == nullptr) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
@@ -2024,7 +2026,7 @@ inline AlignedAllocationData CommandListCoreFamily<gfxCoreFamily>::getAlignedAll
|
||||
uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress();
|
||||
uint64_t offset = sourcePtr - pbase;
|
||||
|
||||
alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), &alignedPtr);
|
||||
alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), &alignedPtr, nullptr);
|
||||
alignedPtr += offset;
|
||||
|
||||
if (allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) {
|
||||
@@ -2422,7 +2424,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
|
||||
auto offsetValPtr = static_cast<uintptr_t>(pOffsetAllocationStruct.alloc->getGpuAddress());
|
||||
commandContainer.addToResidencyContainer(pOffsetAllocationStruct.alloc);
|
||||
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets);
|
||||
builtinKernel->setArgBufferWithAlloc(2, offsetValPtr, pOffsetAllocationStruct.alloc);
|
||||
builtinKernel->setArgBufferWithAlloc(2, offsetValPtr, pOffsetAllocationStruct.alloc, nullptr);
|
||||
builtinKernel->setArgumentValue(3u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
|
||||
offsetValPtr += sizeof(size_t);
|
||||
}
|
||||
@@ -2448,8 +2450,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
|
||||
|
||||
auto dstValPtr = static_cast<uintptr_t>(dstPtrAllocationStruct.alloc->getGpuAddress());
|
||||
|
||||
builtinKernel->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData);
|
||||
builtinKernel->setArgBufferWithAlloc(1, dstValPtr, dstPtrAllocationStruct.alloc);
|
||||
builtinKernel->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData, nullptr);
|
||||
builtinKernel->setArgBufferWithAlloc(1, dstValPtr, dstPtrAllocationStruct.alloc, nullptr);
|
||||
|
||||
auto dstAllocationType = dstPtrAllocationStruct.alloc->getAllocationType();
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
@@ -377,7 +377,14 @@ void ContextImp::freePeerAllocations(const void *ptr, bool blocking, Device *dev
|
||||
auto peerAllocData = &iter->second;
|
||||
auto peerAlloc = peerAllocData->gpuAllocations.getDefaultGraphicsAllocation();
|
||||
auto peerPtr = reinterpret_cast<void *>(peerAlloc->getGpuAddress());
|
||||
this->driverHandle->svmAllocsManager->freeSVMAlloc(peerPtr, blocking);
|
||||
if (peerAllocData->mappedAllocData) {
|
||||
auto gpuAllocations = peerAllocData->gpuAllocations;
|
||||
for (const auto &graphicsAllocation : gpuAllocations.getGraphicsAllocations()) {
|
||||
this->driverHandle->getMemoryManager()->freeGraphicsMemory(graphicsAllocation);
|
||||
}
|
||||
} else {
|
||||
this->driverHandle->svmAllocsManager->freeSVMAlloc(peerPtr, blocking);
|
||||
}
|
||||
deviceImp->peerAllocations.allocations.erase(iter);
|
||||
}
|
||||
|
||||
@@ -709,8 +716,8 @@ ze_result_t ContextImp::openIpcMemHandles(ze_device_handle_t hDevice,
|
||||
handles.push_back(static_cast<NEO::osHandle>(handle));
|
||||
}
|
||||
auto neoDevice = Device::fromHandle(hDevice)->getNEODevice()->getRootDevice();
|
||||
|
||||
*pptr = this->driverHandle->importFdHandles(neoDevice, flags, handles, nullptr);
|
||||
NEO::SvmAllocationData allocDataInternal(neoDevice->getRootDeviceIndex());
|
||||
*pptr = this->driverHandle->importFdHandles(neoDevice, flags, handles, nullptr, nullptr, allocDataInternal);
|
||||
if (nullptr == *pptr) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
@@ -1056,6 +1063,7 @@ ze_result_t ContextImp::mapVirtualMem(const void *ptr,
|
||||
allocData.size = size;
|
||||
allocData.pageSizeForAlignment = MemoryConstants::pageSize64k;
|
||||
allocData.setAllocId(this->driverHandle->svmAllocsManager->allocationsCounter++);
|
||||
allocData.memoryType = InternalMemoryType::RESERVED_DEVICE_MEMORY;
|
||||
NEO::MemoryMappedRange *mappedRange = new NEO::MemoryMappedRange;
|
||||
mappedRange->ptr = ptr;
|
||||
mappedRange->size = size;
|
||||
|
||||
@@ -1,10 +1,13 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/device/device.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
|
||||
#include "level_zero/core/source/context/context_imp.h"
|
||||
#include "level_zero/core/source/device/device.h"
|
||||
#include "level_zero/core/source/driver/driver_handle_imp.h"
|
||||
@@ -19,7 +22,9 @@ bool ContextImp::isShareableMemory(const void *exportDesc, bool exportableMemory
|
||||
}
|
||||
|
||||
void *ContextImp::getMemHandlePtr(ze_device_handle_t hDevice, uint64_t handle, NEO::AllocationType allocationType, ze_ipc_memory_flags_t flags) {
|
||||
return this->driverHandle->importFdHandle(Device::fromHandle(hDevice)->getNEODevice(), flags, handle, allocationType, nullptr);
|
||||
auto neoDevice = Device::fromHandle(hDevice)->getNEODevice();
|
||||
NEO::SvmAllocationData allocDataInternal(neoDevice->getRootDeviceIndex());
|
||||
return this->driverHandle->importFdHandle(neoDevice, flags, handle, allocationType, nullptr, nullptr, allocDataInternal);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/helpers/driver_model_type.h"
|
||||
#include "shared/source/memory_manager/memory_manager.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/source/os_interface/os_interface.h"
|
||||
|
||||
#include "level_zero/core/source/context/context_imp.h"
|
||||
@@ -47,11 +48,15 @@ void *ContextImp::getMemHandlePtr(ze_device_handle_t hDevice,
|
||||
reinterpret_cast<void *>(handle),
|
||||
allocationType);
|
||||
} else if (driverType == NEO::DriverModelType::DRM) {
|
||||
return this->driverHandle->importFdHandle(Device::fromHandle(hDevice)->getNEODevice(),
|
||||
auto neoDevice = Device::fromHandle(hDevice)->getNEODevice();
|
||||
NEO::SvmAllocationData allocDataInternal(neoDevice->getRootDeviceIndex());
|
||||
return this->driverHandle->importFdHandle(neoDevice,
|
||||
flags,
|
||||
handle,
|
||||
allocationType,
|
||||
nullptr);
|
||||
nullptr,
|
||||
nullptr,
|
||||
allocDataInternal);
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -476,7 +476,9 @@ void *DriverHandleImp::importFdHandle(NEO::Device *neoDevice,
|
||||
ze_ipc_memory_flags_t flags,
|
||||
uint64_t handle,
|
||||
NEO::AllocationType allocationType,
|
||||
NEO::GraphicsAllocation **pAlloc) {
|
||||
void *basePointer,
|
||||
NEO::GraphicsAllocation **pAlloc,
|
||||
NEO::SvmAllocationData &mappedPeerAllocData) {
|
||||
NEO::osHandle osHandle = static_cast<NEO::osHandle>(handle);
|
||||
NEO::AllocationProperties unifiedMemoryProperties{neoDevice->getRootDeviceIndex(),
|
||||
MemoryConstants::pageSize,
|
||||
@@ -489,29 +491,39 @@ void *DriverHandleImp::importFdHandle(NEO::Device *neoDevice,
|
||||
unifiedMemoryProperties,
|
||||
false,
|
||||
isHostIpcAllocation,
|
||||
false);
|
||||
false,
|
||||
basePointer);
|
||||
if (alloc == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
NEO::SvmAllocationData allocData(neoDevice->getRootDeviceIndex());
|
||||
allocData.gpuAllocations.addAllocation(alloc);
|
||||
allocData.cpuAllocation = nullptr;
|
||||
allocData.size = alloc->getUnderlyingBufferSize();
|
||||
allocData.memoryType =
|
||||
NEO::SvmAllocationData *allocDataTmp = nullptr;
|
||||
if (basePointer) {
|
||||
allocDataTmp = &mappedPeerAllocData;
|
||||
allocDataTmp->mappedAllocData = true;
|
||||
} else {
|
||||
allocDataTmp = &allocData;
|
||||
allocDataTmp->mappedAllocData = false;
|
||||
}
|
||||
allocDataTmp->gpuAllocations.addAllocation(alloc);
|
||||
allocDataTmp->cpuAllocation = nullptr;
|
||||
allocDataTmp->size = alloc->getUnderlyingBufferSize();
|
||||
allocDataTmp->memoryType =
|
||||
isHostIpcAllocation ? InternalMemoryType::HOST_UNIFIED_MEMORY : InternalMemoryType::DEVICE_UNIFIED_MEMORY;
|
||||
allocData.device = neoDevice;
|
||||
allocData.isImportedAllocation = true;
|
||||
allocDataTmp->device = neoDevice;
|
||||
allocDataTmp->isImportedAllocation = true;
|
||||
if (flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) {
|
||||
allocData.allocationFlagsProperty.flags.locallyUncachedResource = 1;
|
||||
allocDataTmp->allocationFlagsProperty.flags.locallyUncachedResource = 1;
|
||||
}
|
||||
|
||||
if (flags & ZE_IPC_MEMORY_FLAG_BIAS_UNCACHED) {
|
||||
allocData.allocationFlagsProperty.flags.locallyUncachedResource = 1;
|
||||
allocDataTmp->allocationFlagsProperty.flags.locallyUncachedResource = 1;
|
||||
}
|
||||
|
||||
this->getSvmAllocsManager()->insertSVMAlloc(allocData);
|
||||
|
||||
if (!basePointer) {
|
||||
this->getSvmAllocsManager()->insertSVMAlloc(allocData);
|
||||
}
|
||||
if (pAlloc) {
|
||||
*pAlloc = alloc;
|
||||
}
|
||||
@@ -519,7 +531,7 @@ void *DriverHandleImp::importFdHandle(NEO::Device *neoDevice,
|
||||
return reinterpret_cast<void *>(alloc->getGpuAddress());
|
||||
}
|
||||
|
||||
void *DriverHandleImp::importFdHandles(NEO::Device *neoDevice, ze_ipc_memory_flags_t flags, const std::vector<NEO::osHandle> &handles, NEO::GraphicsAllocation **pAlloc) {
|
||||
void *DriverHandleImp::importFdHandles(NEO::Device *neoDevice, ze_ipc_memory_flags_t flags, const std::vector<NEO::osHandle> &handles, void *basePtr, NEO::GraphicsAllocation **pAlloc, NEO::SvmAllocationData &mappedPeerAllocData) {
|
||||
NEO::AllocationProperties unifiedMemoryProperties{neoDevice->getRootDeviceIndex(),
|
||||
MemoryConstants::pageSize,
|
||||
NEO::AllocationType::BUFFER,
|
||||
@@ -531,26 +543,39 @@ void *DriverHandleImp::importFdHandles(NEO::Device *neoDevice, ze_ipc_memory_fla
|
||||
unifiedMemoryProperties,
|
||||
false,
|
||||
false,
|
||||
false);
|
||||
false,
|
||||
basePtr);
|
||||
if (alloc == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
NEO::SvmAllocationData *allocDataTmp = nullptr;
|
||||
NEO::SvmAllocationData allocData(neoDevice->getRootDeviceIndex());
|
||||
allocData.gpuAllocations.addAllocation(alloc);
|
||||
allocData.cpuAllocation = nullptr;
|
||||
allocData.size = alloc->getUnderlyingBufferSize();
|
||||
allocData.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY;
|
||||
allocData.device = neoDevice;
|
||||
|
||||
if (basePtr) {
|
||||
allocDataTmp = &mappedPeerAllocData;
|
||||
allocDataTmp->mappedAllocData = true;
|
||||
} else {
|
||||
allocDataTmp = &allocData;
|
||||
allocDataTmp->mappedAllocData = false;
|
||||
}
|
||||
|
||||
allocDataTmp->gpuAllocations.addAllocation(alloc);
|
||||
allocDataTmp->cpuAllocation = nullptr;
|
||||
allocDataTmp->size = alloc->getUnderlyingBufferSize();
|
||||
allocDataTmp->memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY;
|
||||
allocDataTmp->device = neoDevice;
|
||||
if (flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) {
|
||||
allocData.allocationFlagsProperty.flags.locallyUncachedResource = 1;
|
||||
allocDataTmp->allocationFlagsProperty.flags.locallyUncachedResource = 1;
|
||||
}
|
||||
|
||||
if (flags & ZE_IPC_MEMORY_FLAG_BIAS_UNCACHED) {
|
||||
allocData.allocationFlagsProperty.flags.locallyUncachedResource = 1;
|
||||
allocDataTmp->allocationFlagsProperty.flags.locallyUncachedResource = 1;
|
||||
}
|
||||
|
||||
this->getSvmAllocsManager()->insertSVMAlloc(allocData);
|
||||
if (!basePtr) {
|
||||
this->getSvmAllocsManager()->insertSVMAlloc(allocData);
|
||||
}
|
||||
|
||||
if (pAlloc) {
|
||||
*pAlloc = alloc;
|
||||
@@ -603,19 +628,21 @@ ze_result_t DriverHandleImp::getPeerImage(Device *device, Image *image, Image **
|
||||
NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device,
|
||||
NEO::SvmAllocationData *allocData,
|
||||
void *basePtr,
|
||||
uintptr_t *peerGpuAddress) {
|
||||
uintptr_t *peerGpuAddress,
|
||||
NEO::SvmAllocationData **peerAllocData) {
|
||||
DeviceImp *deviceImp = static_cast<DeviceImp *>(device);
|
||||
NEO::GraphicsAllocation *alloc = nullptr;
|
||||
|
||||
NEO::SvmAllocationData *peerAllocData = nullptr;
|
||||
void *peerMapAddress = basePtr;
|
||||
void *peerPtr = nullptr;
|
||||
|
||||
NEO::SvmAllocationData *peerAllocDataInternal = nullptr;
|
||||
|
||||
std::unique_lock<NEO::SpinLock> lock(deviceImp->peerAllocationsMutex);
|
||||
|
||||
auto iter = deviceImp->peerAllocations.allocations.find(basePtr);
|
||||
if (iter != deviceImp->peerAllocations.allocations.end()) {
|
||||
peerAllocData = &iter->second;
|
||||
alloc = peerAllocData->gpuAllocations.getDefaultGraphicsAllocation();
|
||||
peerAllocDataInternal = &iter->second;
|
||||
alloc = peerAllocDataInternal->gpuAllocations.getDefaultGraphicsAllocation();
|
||||
UNRECOVERABLE_IF(alloc == nullptr);
|
||||
peerPtr = reinterpret_cast<void *>(alloc->getGpuAddress());
|
||||
} else {
|
||||
@@ -624,6 +651,17 @@ NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device,
|
||||
ze_ipc_memory_flags_t flags = {};
|
||||
uint32_t numHandles = alloc->getNumHandles();
|
||||
|
||||
// Don't attempt to use the peerMapAddress for reserved memory due to the limitations in the address reserved.
|
||||
if (allocData->memoryType == InternalMemoryType::RESERVED_DEVICE_MEMORY) {
|
||||
peerMapAddress = nullptr;
|
||||
}
|
||||
|
||||
uint32_t peerAllocRootDeviceIndex = device->getNEODevice()->getRootDeviceIndex();
|
||||
if (numHandles > 1) {
|
||||
peerAllocRootDeviceIndex = device->getNEODevice()->getRootDevice()->getRootDeviceIndex();
|
||||
}
|
||||
NEO::SvmAllocationData allocDataInternal(peerAllocRootDeviceIndex);
|
||||
|
||||
if (numHandles > 1) {
|
||||
UNRECOVERABLE_IF(numHandles == 0);
|
||||
std::vector<NEO::osHandle> handles;
|
||||
@@ -636,7 +674,7 @@ NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device,
|
||||
handles.push_back(static_cast<NEO::osHandle>(handle));
|
||||
}
|
||||
auto neoDevice = device->getNEODevice()->getRootDevice();
|
||||
peerPtr = this->importFdHandles(neoDevice, flags, handles, &alloc);
|
||||
peerPtr = this->importFdHandles(neoDevice, flags, handles, peerMapAddress, &alloc, allocDataInternal);
|
||||
} else {
|
||||
uint64_t handle = 0;
|
||||
int ret = alloc->peekInternalHandle(this->getMemoryManager(), handle);
|
||||
@@ -647,15 +685,28 @@ NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device,
|
||||
flags,
|
||||
handle,
|
||||
NEO::AllocationType::BUFFER,
|
||||
&alloc);
|
||||
peerMapAddress,
|
||||
&alloc,
|
||||
allocDataInternal);
|
||||
}
|
||||
|
||||
if (peerPtr == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
peerAllocData = this->getSvmAllocsManager()->getSVMAlloc(peerPtr);
|
||||
deviceImp->peerAllocations.allocations.insert(std::make_pair(basePtr, *peerAllocData));
|
||||
peerAllocDataInternal = &allocDataInternal;
|
||||
if (peerMapAddress == nullptr) {
|
||||
peerAllocDataInternal = this->getSvmAllocsManager()->getSVMAlloc(peerPtr);
|
||||
}
|
||||
deviceImp->peerAllocations.allocations.insert(std::make_pair(basePtr, *peerAllocDataInternal));
|
||||
// Point to the new peer Alloc Data after it is recreated in the peer allocations map
|
||||
if (peerMapAddress) {
|
||||
peerAllocDataInternal = &deviceImp->peerAllocations.allocations.at(basePtr);
|
||||
}
|
||||
}
|
||||
|
||||
if (peerAllocData) {
|
||||
*peerAllocData = peerAllocDataInternal;
|
||||
}
|
||||
|
||||
if (peerGpuAddress) {
|
||||
|
||||
@@ -43,8 +43,8 @@ struct DriverHandleImp : public DriverHandle {
|
||||
|
||||
NEO::MemoryManager *getMemoryManager() override;
|
||||
void setMemoryManager(NEO::MemoryManager *memoryManager) override;
|
||||
MOCKABLE_VIRTUAL void *importFdHandle(NEO::Device *neoDevice, ze_ipc_memory_flags_t flags, uint64_t handle, NEO::AllocationType allocationType, NEO::GraphicsAllocation **pAlloc);
|
||||
MOCKABLE_VIRTUAL void *importFdHandles(NEO::Device *neoDevice, ze_ipc_memory_flags_t flags, const std::vector<NEO::osHandle> &handles, NEO::GraphicsAllocation **pAlloc);
|
||||
MOCKABLE_VIRTUAL void *importFdHandle(NEO::Device *neoDevice, ze_ipc_memory_flags_t flags, uint64_t handle, NEO::AllocationType allocationType, void *basePointer, NEO::GraphicsAllocation **pAlloc, NEO::SvmAllocationData &mappedPeerAllocData);
|
||||
MOCKABLE_VIRTUAL void *importFdHandles(NEO::Device *neoDevice, ze_ipc_memory_flags_t flags, const std::vector<NEO::osHandle> &handles, void *basePointer, NEO::GraphicsAllocation **pAlloc, NEO::SvmAllocationData &mappedPeerAllocData);
|
||||
MOCKABLE_VIRTUAL void *importNTHandle(ze_device_handle_t hDevice, void *handle, NEO::AllocationType allocationType);
|
||||
ze_result_t checkMemoryAccessFromDevice(Device *device, const void *ptr) override;
|
||||
NEO::SVMAllocsManager *getSvmAllocsManager() override;
|
||||
@@ -75,7 +75,8 @@ struct DriverHandleImp : public DriverHandle {
|
||||
NEO::GraphicsAllocation *getPeerAllocation(Device *device,
|
||||
NEO::SvmAllocationData *allocData,
|
||||
void *basePtr,
|
||||
uintptr_t *peerGpuAddress);
|
||||
uintptr_t *peerGpuAddress,
|
||||
NEO::SvmAllocationData **peerAllocData);
|
||||
void initializeVertexes();
|
||||
ze_result_t fabricVertexGetExp(uint32_t *pCount, ze_fabric_vertex_handle_t *phDevices) override;
|
||||
void createHostPointerManager();
|
||||
|
||||
@@ -289,7 +289,8 @@ ze_result_t EventPool::openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t &
|
||||
unifiedMemoryProperties,
|
||||
false,
|
||||
eventPool->isHostVisibleEventPoolAllocation,
|
||||
false);
|
||||
false,
|
||||
nullptr);
|
||||
|
||||
if (alloc == nullptr) {
|
||||
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
|
||||
@@ -82,7 +82,7 @@ ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_
|
||||
}
|
||||
if (lookupTable.sharedHandleType.isDMABUFHandle) {
|
||||
NEO::AllocationProperties properties(device->getRootDeviceIndex(), true, imgInfo, NEO::AllocationType::SHARED_IMAGE, device->getNEODevice()->getDeviceBitfield());
|
||||
allocation = device->getNEODevice()->getMemoryManager()->createGraphicsAllocationFromSharedHandle(lookupTable.sharedHandleType.fd, properties, false, false, true);
|
||||
allocation = device->getNEODevice()->getMemoryManager()->createGraphicsAllocationFromSharedHandle(lookupTable.sharedHandleType.fd, properties, false, false, true, nullptr);
|
||||
device->getNEODevice()->getMemoryManager()->closeSharedHandle(allocation);
|
||||
} else if (lookupTable.sharedHandleType.isNTHandle) {
|
||||
auto verifyResult = device->getNEODevice()->getMemoryManager()->verifyHandle(NEO::toOsHandle(lookupTable.sharedHandleType.ntHnadle), device->getNEODevice()->getRootDeviceIndex(), true);
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
|
||||
#include "shared/source/kernel/kernel_descriptor.h"
|
||||
#include "shared/source/memory_manager/graphics_allocation.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/source/unified_memory/unified_memory.h"
|
||||
|
||||
#include <level_zero/ze_api.h>
|
||||
@@ -114,7 +115,7 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI {
|
||||
virtual ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) = 0;
|
||||
virtual void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) = 0;
|
||||
|
||||
virtual ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) = 0;
|
||||
virtual ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) = 0;
|
||||
virtual ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) = 0;
|
||||
virtual ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
|
||||
uint32_t groupSizeZ) = 0;
|
||||
|
||||
@@ -526,7 +526,7 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) {
|
||||
ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) {
|
||||
const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
|
||||
const auto val = argVal;
|
||||
|
||||
@@ -534,8 +534,12 @@ ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal
|
||||
if (NEO::isValidOffset(arg.bindful) || NEO::isValidOffset(arg.bindless)) {
|
||||
setBufferSurfaceState(argIndex, reinterpret_cast<void *>(val), allocation);
|
||||
}
|
||||
|
||||
auto allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(allocation->getGpuAddress()));
|
||||
NEO::SvmAllocationData *allocData = nullptr;
|
||||
if (peerAllocData) {
|
||||
allocData = peerAllocData;
|
||||
} else {
|
||||
allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(allocation->getGpuAddress()));
|
||||
}
|
||||
if (allocData) {
|
||||
bool argWasUncacheable = isArgUncached[argIndex];
|
||||
bool argIsUncacheable = allocData->allocationFlagsProperty.flags.locallyUncachedResource;
|
||||
@@ -546,7 +550,6 @@ ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal
|
||||
}
|
||||
this->setKernelArgUncached(argIndex, argIsUncacheable);
|
||||
}
|
||||
|
||||
residencyContainer[argIndex] = allocation;
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
@@ -633,6 +636,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi
|
||||
if (allocData == nullptr) {
|
||||
allocData = svmAllocsManager->getSVMAlloc(requestedAddress);
|
||||
}
|
||||
NEO::SvmAllocationData *peerAllocData = nullptr;
|
||||
if (driverHandle->isRemoteResourceNeeded(requestedAddress, alloc, allocData, device)) {
|
||||
if (allocData == nullptr) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
@@ -640,18 +644,16 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi
|
||||
|
||||
uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress();
|
||||
uint64_t offset = (uint64_t)requestedAddress - pbase;
|
||||
|
||||
alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), &gpuAddress);
|
||||
alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), &gpuAddress, &peerAllocData);
|
||||
if (alloc == nullptr) {
|
||||
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
gpuAddress += offset;
|
||||
}
|
||||
|
||||
const uint32_t allocId = allocData ? allocData->getAllocId() : 0u;
|
||||
kernelArgInfos[argIndex] = KernelArgInfo{requestedAddress, allocId, allocationsCounter, false};
|
||||
|
||||
return setArgBufferWithAlloc(argIndex, gpuAddress, alloc);
|
||||
return setArgBufferWithAlloc(argIndex, gpuAddress, alloc, peerAllocData);
|
||||
}
|
||||
|
||||
ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void *argVal) {
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
#include "shared/source/command_stream/thread_arbitration_policy.h"
|
||||
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
|
||||
#include "shared/source/memory_manager/unified_memory_manager.h"
|
||||
#include "shared/source/unified_memory/unified_memory.h"
|
||||
|
||||
#include "level_zero/core/source/kernel/kernel.h"
|
||||
@@ -77,7 +78,7 @@ struct KernelImp : Kernel {
|
||||
|
||||
ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) override;
|
||||
|
||||
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override;
|
||||
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) override;
|
||||
|
||||
ze_result_t setArgImage(uint32_t argIndex, size_t argSize, const void *argVal);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user