feature: Get Peer Allocation with specified base Pointer

Related-To: LOCI-4176

- Given a Base Pointer passed into Get Peer Allocation, then the base
pointer is used in the map of the new allocation to the virtual memory.
- Enables users to use the same pointer for all devices in Peer To Peer.
- Currently unsupported on reserved memory due to mapped and exec
resiedency of Virtual addresses.

Signed-off-by: Neil R Spruit <neil.r.spruit@intel.com>
This commit is contained in:
Neil R Spruit
2023-05-04 01:40:52 +00:00
committed by Compute-Runtime-Automation
parent f98ac7098b
commit ded9d7bff2
65 changed files with 618 additions and 304 deletions

View File

@@ -589,7 +589,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyFromMemory(ze_i
}
builtinKernel->setArgBufferWithAlloc(0u, allocationStruct.alignedAllocationPtr,
allocationStruct.alloc);
allocationStruct.alloc,
nullptr);
builtinKernel->setArgRedescribedImage(1u, image->toHandle());
builtinKernel->setArgumentValue(2u, sizeof(size_t), &allocationStruct.offset);
@@ -734,7 +735,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendImageCopyToMemory(void *
builtinKernel->setArgRedescribedImage(0u, image->toHandle());
builtinKernel->setArgBufferWithAlloc(1u, allocationStruct.alignedAllocationPtr,
allocationStruct.alloc);
allocationStruct.alloc,
nullptr);
uint32_t origin[] = {
static_cast<uint32_t>(pSrcRegion->originX),
@@ -1033,8 +1035,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernelWithGA(v
return ret;
}
builtinKernel->setArgBufferWithAlloc(0u, *reinterpret_cast<uintptr_t *>(dstPtr), dstPtrAlloc);
builtinKernel->setArgBufferWithAlloc(1u, *reinterpret_cast<uintptr_t *>(srcPtr), srcPtrAlloc);
builtinKernel->setArgBufferWithAlloc(0u, *reinterpret_cast<uintptr_t *>(dstPtr), dstPtrAlloc, nullptr);
builtinKernel->setArgBufferWithAlloc(1u, *reinterpret_cast<uintptr_t *>(srcPtr), srcPtrAlloc, nullptr);
uint64_t elems = size / elementSize;
builtinKernel->setArgumentValue(2, sizeof(elems), &elems);
@@ -1512,8 +1514,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel3d(Align
uint32_t srcPitches[2] = {(srcPitch), (srcSlicePitch)};
uint32_t dstPitches[2] = {(dstPitch), (dstSlicePitch)};
builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc, nullptr);
builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc, nullptr);
builtinKernel->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
builtinKernel->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
builtinKernel->setArgumentValue(4, sizeof(srcPitches), &srcPitches);
@@ -1576,8 +1578,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryCopyKernel2d(Align
uint32_t srcOrigin[2] = {(srcRegion->originX + static_cast<uint32_t>(srcOffset)), (srcRegion->originY)};
uint32_t dstOrigin[2] = {(dstRegion->originX + static_cast<uint32_t>(dstOffset)), (dstRegion->originY)};
builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc);
builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc);
builtinKernel->setArgBufferWithAlloc(0, srcAlignedAllocation->alignedAllocationPtr, srcAlignedAllocation->alloc, nullptr);
builtinKernel->setArgBufferWithAlloc(1, dstAlignedAllocation->alignedAllocationPtr, dstAlignedAllocation->alloc, nullptr);
builtinKernel->setArgumentValue(2, sizeof(srcOrigin), &srcOrigin);
builtinKernel->setArgumentValue(3, sizeof(dstOrigin), &dstOrigin);
builtinKernel->setArgumentValue(4, sizeof(srcPitch), &srcPitch);
@@ -1620,7 +1622,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendUnalignedFillKernel(bool
builtinKernel->setGroupSize(groupSizeX, groupSizeY, groupSizeZ);
ze_group_count_t dispatchKernelRemainderArgs{static_cast<uint32_t>(unalignedSize / groupSizeX), 1u, 1u};
uint32_t value = *(reinterpret_cast<const unsigned char *>(pattern));
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc, nullptr);
builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinKernel->setArgumentValue(2, sizeof(value), &value);
@@ -1741,7 +1743,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
uint32_t value = 0;
memset(&value, *reinterpret_cast<const unsigned char *>(pattern), 4);
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc, nullptr);
builtinKernel->setArgumentValue(1, sizeof(fillArguments.mainOffset), &fillArguments.mainOffset);
builtinKernel->setArgumentValue(2, sizeof(value), &value);
@@ -1784,9 +1786,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
patternAllocOffset += patternSizeToCopy;
} while (patternAllocOffset < patternAllocationSize);
if (fillArguments.leftRemainingBytes == 0) {
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc);
builtinKernel->setArgBufferWithAlloc(0, dstAllocation.alignedAllocationPtr, dstAllocation.alloc, nullptr);
builtinKernel->setArgumentValue(1, sizeof(dstAllocation.offset), &dstAllocation.offset);
builtinKernel->setArgBufferWithAlloc(2, reinterpret_cast<uintptr_t>(patternGfxAllocPtr), patternGfxAlloc);
builtinKernel->setArgBufferWithAlloc(2, reinterpret_cast<uintptr_t>(patternGfxAllocPtr), patternGfxAlloc, nullptr);
builtinKernel->setArgumentValue(3, sizeof(fillArguments.patternSizeInEls), &fillArguments.patternSizeInEls);
ze_group_count_t dispatchKernelArgs{static_cast<uint32_t>(fillArguments.groups), 1u, 1u};
@@ -1809,13 +1811,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
builtinKernelRemainder->setArgBufferWithAlloc(0,
dstAllocation.alignedAllocationPtr,
dstAllocation.alloc);
dstAllocation.alloc, nullptr);
builtinKernelRemainder->setArgumentValue(1,
sizeof(dstOffsetRemainder),
&dstOffsetRemainder);
builtinKernelRemainder->setArgBufferWithAlloc(2,
reinterpret_cast<uintptr_t>(patternGfxAllocPtr),
patternGfxAlloc);
patternGfxAlloc, nullptr);
builtinKernelRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
res = appendLaunchKernelSplit(builtinKernelRemainder, &dispatchKernelArgs, signalEvent, launchParams);
@@ -1840,13 +1842,13 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendMemoryFill(void *ptr,
builtinKernelRemainder->setArgBufferWithAlloc(0,
dstAllocation.alignedAllocationPtr,
dstAllocation.alloc);
dstAllocation.alloc, nullptr);
builtinKernelRemainder->setArgumentValue(1,
sizeof(dstOffsetRemainder),
&dstOffsetRemainder);
builtinKernelRemainder->setArgBufferWithAlloc(2,
reinterpret_cast<uintptr_t>(patternGfxAllocPtr) + patternOffsetRemainder,
patternGfxAlloc);
patternGfxAlloc, nullptr);
builtinKernelRemainder->setArgumentValue(3, sizeof(patternAllocationSize), &patternAllocationSize);
res = appendLaunchKernelSplit(builtinKernelRemainder, &dispatchKernelArgs, signalEvent, launchParams);
@@ -1898,7 +1900,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendBlitFill(void *ptr,
if (driverHandle->isRemoteResourceNeeded(ptr, gpuAllocation, allocData, device)) {
if (allocData) {
uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress();
gpuAllocation = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), nullptr);
gpuAllocation = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), nullptr, nullptr);
}
if (gpuAllocation == nullptr) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
@@ -2024,7 +2026,7 @@ inline AlignedAllocationData CommandListCoreFamily<gfxCoreFamily>::getAlignedAll
uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress();
uint64_t offset = sourcePtr - pbase;
alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), &alignedPtr);
alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), &alignedPtr, nullptr);
alignedPtr += offset;
if (allocData->memoryType == InternalMemoryType::SHARED_UNIFIED_MEMORY) {
@@ -2422,7 +2424,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
auto offsetValPtr = static_cast<uintptr_t>(pOffsetAllocationStruct.alloc->getGpuAddress());
commandContainer.addToResidencyContainer(pOffsetAllocationStruct.alloc);
builtinKernel = device->getBuiltinFunctionsLib()->getFunction(Builtin::QueryKernelTimestampsWithOffsets);
builtinKernel->setArgBufferWithAlloc(2, offsetValPtr, pOffsetAllocationStruct.alloc);
builtinKernel->setArgBufferWithAlloc(2, offsetValPtr, pOffsetAllocationStruct.alloc, nullptr);
builtinKernel->setArgumentValue(3u, sizeof(uint32_t), &useOnlyGlobalTimestamps);
offsetValPtr += sizeof(size_t);
}
@@ -2448,8 +2450,8 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendQueryKernelTimestamps(
auto dstValPtr = static_cast<uintptr_t>(dstPtrAllocationStruct.alloc->getGpuAddress());
builtinKernel->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData);
builtinKernel->setArgBufferWithAlloc(1, dstValPtr, dstPtrAllocationStruct.alloc);
builtinKernel->setArgBufferWithAlloc(0u, static_cast<uintptr_t>(timestampsGPUData->getGpuAddress()), timestampsGPUData, nullptr);
builtinKernel->setArgBufferWithAlloc(1, dstValPtr, dstPtrAllocationStruct.alloc, nullptr);
auto dstAllocationType = dstPtrAllocationStruct.alloc->getAllocationType();
CmdListKernelLaunchParams launchParams = {};

View File

@@ -377,7 +377,14 @@ void ContextImp::freePeerAllocations(const void *ptr, bool blocking, Device *dev
auto peerAllocData = &iter->second;
auto peerAlloc = peerAllocData->gpuAllocations.getDefaultGraphicsAllocation();
auto peerPtr = reinterpret_cast<void *>(peerAlloc->getGpuAddress());
this->driverHandle->svmAllocsManager->freeSVMAlloc(peerPtr, blocking);
if (peerAllocData->mappedAllocData) {
auto gpuAllocations = peerAllocData->gpuAllocations;
for (const auto &graphicsAllocation : gpuAllocations.getGraphicsAllocations()) {
this->driverHandle->getMemoryManager()->freeGraphicsMemory(graphicsAllocation);
}
} else {
this->driverHandle->svmAllocsManager->freeSVMAlloc(peerPtr, blocking);
}
deviceImp->peerAllocations.allocations.erase(iter);
}
@@ -709,8 +716,8 @@ ze_result_t ContextImp::openIpcMemHandles(ze_device_handle_t hDevice,
handles.push_back(static_cast<NEO::osHandle>(handle));
}
auto neoDevice = Device::fromHandle(hDevice)->getNEODevice()->getRootDevice();
*pptr = this->driverHandle->importFdHandles(neoDevice, flags, handles, nullptr);
NEO::SvmAllocationData allocDataInternal(neoDevice->getRootDeviceIndex());
*pptr = this->driverHandle->importFdHandles(neoDevice, flags, handles, nullptr, nullptr, allocDataInternal);
if (nullptr == *pptr) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
@@ -1056,6 +1063,7 @@ ze_result_t ContextImp::mapVirtualMem(const void *ptr,
allocData.size = size;
allocData.pageSizeForAlignment = MemoryConstants::pageSize64k;
allocData.setAllocId(this->driverHandle->svmAllocsManager->allocationsCounter++);
allocData.memoryType = InternalMemoryType::RESERVED_DEVICE_MEMORY;
NEO::MemoryMappedRange *mappedRange = new NEO::MemoryMappedRange;
mappedRange->ptr = ptr;
mappedRange->size = size;

View File

@@ -1,10 +1,13 @@
/*
* Copyright (C) 2022 Intel Corporation
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/device/device.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "level_zero/core/source/context/context_imp.h"
#include "level_zero/core/source/device/device.h"
#include "level_zero/core/source/driver/driver_handle_imp.h"
@@ -19,7 +22,9 @@ bool ContextImp::isShareableMemory(const void *exportDesc, bool exportableMemory
}
void *ContextImp::getMemHandlePtr(ze_device_handle_t hDevice, uint64_t handle, NEO::AllocationType allocationType, ze_ipc_memory_flags_t flags) {
return this->driverHandle->importFdHandle(Device::fromHandle(hDevice)->getNEODevice(), flags, handle, allocationType, nullptr);
auto neoDevice = Device::fromHandle(hDevice)->getNEODevice();
NEO::SvmAllocationData allocDataInternal(neoDevice->getRootDeviceIndex());
return this->driverHandle->importFdHandle(neoDevice, flags, handle, allocationType, nullptr, nullptr, allocDataInternal);
}
} // namespace L0

View File

@@ -9,6 +9,7 @@
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/helpers/driver_model_type.h"
#include "shared/source/memory_manager/memory_manager.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/os_interface/os_interface.h"
#include "level_zero/core/source/context/context_imp.h"
@@ -47,11 +48,15 @@ void *ContextImp::getMemHandlePtr(ze_device_handle_t hDevice,
reinterpret_cast<void *>(handle),
allocationType);
} else if (driverType == NEO::DriverModelType::DRM) {
return this->driverHandle->importFdHandle(Device::fromHandle(hDevice)->getNEODevice(),
auto neoDevice = Device::fromHandle(hDevice)->getNEODevice();
NEO::SvmAllocationData allocDataInternal(neoDevice->getRootDeviceIndex());
return this->driverHandle->importFdHandle(neoDevice,
flags,
handle,
allocationType,
nullptr);
nullptr,
nullptr,
allocDataInternal);
} else {
return nullptr;
}

View File

@@ -476,7 +476,9 @@ void *DriverHandleImp::importFdHandle(NEO::Device *neoDevice,
ze_ipc_memory_flags_t flags,
uint64_t handle,
NEO::AllocationType allocationType,
NEO::GraphicsAllocation **pAlloc) {
void *basePointer,
NEO::GraphicsAllocation **pAlloc,
NEO::SvmAllocationData &mappedPeerAllocData) {
NEO::osHandle osHandle = static_cast<NEO::osHandle>(handle);
NEO::AllocationProperties unifiedMemoryProperties{neoDevice->getRootDeviceIndex(),
MemoryConstants::pageSize,
@@ -489,29 +491,39 @@ void *DriverHandleImp::importFdHandle(NEO::Device *neoDevice,
unifiedMemoryProperties,
false,
isHostIpcAllocation,
false);
false,
basePointer);
if (alloc == nullptr) {
return nullptr;
}
NEO::SvmAllocationData allocData(neoDevice->getRootDeviceIndex());
allocData.gpuAllocations.addAllocation(alloc);
allocData.cpuAllocation = nullptr;
allocData.size = alloc->getUnderlyingBufferSize();
allocData.memoryType =
NEO::SvmAllocationData *allocDataTmp = nullptr;
if (basePointer) {
allocDataTmp = &mappedPeerAllocData;
allocDataTmp->mappedAllocData = true;
} else {
allocDataTmp = &allocData;
allocDataTmp->mappedAllocData = false;
}
allocDataTmp->gpuAllocations.addAllocation(alloc);
allocDataTmp->cpuAllocation = nullptr;
allocDataTmp->size = alloc->getUnderlyingBufferSize();
allocDataTmp->memoryType =
isHostIpcAllocation ? InternalMemoryType::HOST_UNIFIED_MEMORY : InternalMemoryType::DEVICE_UNIFIED_MEMORY;
allocData.device = neoDevice;
allocData.isImportedAllocation = true;
allocDataTmp->device = neoDevice;
allocDataTmp->isImportedAllocation = true;
if (flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) {
allocData.allocationFlagsProperty.flags.locallyUncachedResource = 1;
allocDataTmp->allocationFlagsProperty.flags.locallyUncachedResource = 1;
}
if (flags & ZE_IPC_MEMORY_FLAG_BIAS_UNCACHED) {
allocData.allocationFlagsProperty.flags.locallyUncachedResource = 1;
allocDataTmp->allocationFlagsProperty.flags.locallyUncachedResource = 1;
}
this->getSvmAllocsManager()->insertSVMAlloc(allocData);
if (!basePointer) {
this->getSvmAllocsManager()->insertSVMAlloc(allocData);
}
if (pAlloc) {
*pAlloc = alloc;
}
@@ -519,7 +531,7 @@ void *DriverHandleImp::importFdHandle(NEO::Device *neoDevice,
return reinterpret_cast<void *>(alloc->getGpuAddress());
}
void *DriverHandleImp::importFdHandles(NEO::Device *neoDevice, ze_ipc_memory_flags_t flags, const std::vector<NEO::osHandle> &handles, NEO::GraphicsAllocation **pAlloc) {
void *DriverHandleImp::importFdHandles(NEO::Device *neoDevice, ze_ipc_memory_flags_t flags, const std::vector<NEO::osHandle> &handles, void *basePtr, NEO::GraphicsAllocation **pAlloc, NEO::SvmAllocationData &mappedPeerAllocData) {
NEO::AllocationProperties unifiedMemoryProperties{neoDevice->getRootDeviceIndex(),
MemoryConstants::pageSize,
NEO::AllocationType::BUFFER,
@@ -531,26 +543,39 @@ void *DriverHandleImp::importFdHandles(NEO::Device *neoDevice, ze_ipc_memory_fla
unifiedMemoryProperties,
false,
false,
false);
false,
basePtr);
if (alloc == nullptr) {
return nullptr;
}
NEO::SvmAllocationData *allocDataTmp = nullptr;
NEO::SvmAllocationData allocData(neoDevice->getRootDeviceIndex());
allocData.gpuAllocations.addAllocation(alloc);
allocData.cpuAllocation = nullptr;
allocData.size = alloc->getUnderlyingBufferSize();
allocData.memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY;
allocData.device = neoDevice;
if (basePtr) {
allocDataTmp = &mappedPeerAllocData;
allocDataTmp->mappedAllocData = true;
} else {
allocDataTmp = &allocData;
allocDataTmp->mappedAllocData = false;
}
allocDataTmp->gpuAllocations.addAllocation(alloc);
allocDataTmp->cpuAllocation = nullptr;
allocDataTmp->size = alloc->getUnderlyingBufferSize();
allocDataTmp->memoryType = InternalMemoryType::DEVICE_UNIFIED_MEMORY;
allocDataTmp->device = neoDevice;
if (flags & ZE_DEVICE_MEM_ALLOC_FLAG_BIAS_UNCACHED) {
allocData.allocationFlagsProperty.flags.locallyUncachedResource = 1;
allocDataTmp->allocationFlagsProperty.flags.locallyUncachedResource = 1;
}
if (flags & ZE_IPC_MEMORY_FLAG_BIAS_UNCACHED) {
allocData.allocationFlagsProperty.flags.locallyUncachedResource = 1;
allocDataTmp->allocationFlagsProperty.flags.locallyUncachedResource = 1;
}
this->getSvmAllocsManager()->insertSVMAlloc(allocData);
if (!basePtr) {
this->getSvmAllocsManager()->insertSVMAlloc(allocData);
}
if (pAlloc) {
*pAlloc = alloc;
@@ -603,19 +628,21 @@ ze_result_t DriverHandleImp::getPeerImage(Device *device, Image *image, Image **
NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device,
NEO::SvmAllocationData *allocData,
void *basePtr,
uintptr_t *peerGpuAddress) {
uintptr_t *peerGpuAddress,
NEO::SvmAllocationData **peerAllocData) {
DeviceImp *deviceImp = static_cast<DeviceImp *>(device);
NEO::GraphicsAllocation *alloc = nullptr;
NEO::SvmAllocationData *peerAllocData = nullptr;
void *peerMapAddress = basePtr;
void *peerPtr = nullptr;
NEO::SvmAllocationData *peerAllocDataInternal = nullptr;
std::unique_lock<NEO::SpinLock> lock(deviceImp->peerAllocationsMutex);
auto iter = deviceImp->peerAllocations.allocations.find(basePtr);
if (iter != deviceImp->peerAllocations.allocations.end()) {
peerAllocData = &iter->second;
alloc = peerAllocData->gpuAllocations.getDefaultGraphicsAllocation();
peerAllocDataInternal = &iter->second;
alloc = peerAllocDataInternal->gpuAllocations.getDefaultGraphicsAllocation();
UNRECOVERABLE_IF(alloc == nullptr);
peerPtr = reinterpret_cast<void *>(alloc->getGpuAddress());
} else {
@@ -624,6 +651,17 @@ NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device,
ze_ipc_memory_flags_t flags = {};
uint32_t numHandles = alloc->getNumHandles();
// Don't attempt to use the peerMapAddress for reserved memory due to the limitations in the address reserved.
if (allocData->memoryType == InternalMemoryType::RESERVED_DEVICE_MEMORY) {
peerMapAddress = nullptr;
}
uint32_t peerAllocRootDeviceIndex = device->getNEODevice()->getRootDeviceIndex();
if (numHandles > 1) {
peerAllocRootDeviceIndex = device->getNEODevice()->getRootDevice()->getRootDeviceIndex();
}
NEO::SvmAllocationData allocDataInternal(peerAllocRootDeviceIndex);
if (numHandles > 1) {
UNRECOVERABLE_IF(numHandles == 0);
std::vector<NEO::osHandle> handles;
@@ -636,7 +674,7 @@ NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device,
handles.push_back(static_cast<NEO::osHandle>(handle));
}
auto neoDevice = device->getNEODevice()->getRootDevice();
peerPtr = this->importFdHandles(neoDevice, flags, handles, &alloc);
peerPtr = this->importFdHandles(neoDevice, flags, handles, peerMapAddress, &alloc, allocDataInternal);
} else {
uint64_t handle = 0;
int ret = alloc->peekInternalHandle(this->getMemoryManager(), handle);
@@ -647,15 +685,28 @@ NEO::GraphicsAllocation *DriverHandleImp::getPeerAllocation(Device *device,
flags,
handle,
NEO::AllocationType::BUFFER,
&alloc);
peerMapAddress,
&alloc,
allocDataInternal);
}
if (peerPtr == nullptr) {
return nullptr;
}
peerAllocData = this->getSvmAllocsManager()->getSVMAlloc(peerPtr);
deviceImp->peerAllocations.allocations.insert(std::make_pair(basePtr, *peerAllocData));
peerAllocDataInternal = &allocDataInternal;
if (peerMapAddress == nullptr) {
peerAllocDataInternal = this->getSvmAllocsManager()->getSVMAlloc(peerPtr);
}
deviceImp->peerAllocations.allocations.insert(std::make_pair(basePtr, *peerAllocDataInternal));
// Point to the new peer Alloc Data after it is recreated in the peer allocations map
if (peerMapAddress) {
peerAllocDataInternal = &deviceImp->peerAllocations.allocations.at(basePtr);
}
}
if (peerAllocData) {
*peerAllocData = peerAllocDataInternal;
}
if (peerGpuAddress) {

View File

@@ -43,8 +43,8 @@ struct DriverHandleImp : public DriverHandle {
NEO::MemoryManager *getMemoryManager() override;
void setMemoryManager(NEO::MemoryManager *memoryManager) override;
MOCKABLE_VIRTUAL void *importFdHandle(NEO::Device *neoDevice, ze_ipc_memory_flags_t flags, uint64_t handle, NEO::AllocationType allocationType, NEO::GraphicsAllocation **pAlloc);
MOCKABLE_VIRTUAL void *importFdHandles(NEO::Device *neoDevice, ze_ipc_memory_flags_t flags, const std::vector<NEO::osHandle> &handles, NEO::GraphicsAllocation **pAlloc);
MOCKABLE_VIRTUAL void *importFdHandle(NEO::Device *neoDevice, ze_ipc_memory_flags_t flags, uint64_t handle, NEO::AllocationType allocationType, void *basePointer, NEO::GraphicsAllocation **pAlloc, NEO::SvmAllocationData &mappedPeerAllocData);
MOCKABLE_VIRTUAL void *importFdHandles(NEO::Device *neoDevice, ze_ipc_memory_flags_t flags, const std::vector<NEO::osHandle> &handles, void *basePointer, NEO::GraphicsAllocation **pAlloc, NEO::SvmAllocationData &mappedPeerAllocData);
MOCKABLE_VIRTUAL void *importNTHandle(ze_device_handle_t hDevice, void *handle, NEO::AllocationType allocationType);
ze_result_t checkMemoryAccessFromDevice(Device *device, const void *ptr) override;
NEO::SVMAllocsManager *getSvmAllocsManager() override;
@@ -75,7 +75,8 @@ struct DriverHandleImp : public DriverHandle {
NEO::GraphicsAllocation *getPeerAllocation(Device *device,
NEO::SvmAllocationData *allocData,
void *basePtr,
uintptr_t *peerGpuAddress);
uintptr_t *peerGpuAddress,
NEO::SvmAllocationData **peerAllocData);
void initializeVertexes();
ze_result_t fabricVertexGetExp(uint32_t *pCount, ze_fabric_vertex_handle_t *phDevices) override;
void createHostPointerManager();

View File

@@ -289,7 +289,8 @@ ze_result_t EventPool::openEventPoolIpcHandle(const ze_ipc_event_pool_handle_t &
unifiedMemoryProperties,
false,
eventPool->isHostVisibleEventPoolAllocation,
false);
false,
nullptr);
if (alloc == nullptr) {
return ZE_RESULT_ERROR_OUT_OF_DEVICE_MEMORY;

View File

@@ -82,7 +82,7 @@ ze_result_t ImageCoreFamily<gfxCoreFamily>::initialize(Device *device, const ze_
}
if (lookupTable.sharedHandleType.isDMABUFHandle) {
NEO::AllocationProperties properties(device->getRootDeviceIndex(), true, imgInfo, NEO::AllocationType::SHARED_IMAGE, device->getNEODevice()->getDeviceBitfield());
allocation = device->getNEODevice()->getMemoryManager()->createGraphicsAllocationFromSharedHandle(lookupTable.sharedHandleType.fd, properties, false, false, true);
allocation = device->getNEODevice()->getMemoryManager()->createGraphicsAllocationFromSharedHandle(lookupTable.sharedHandleType.fd, properties, false, false, true, nullptr);
device->getNEODevice()->getMemoryManager()->closeSharedHandle(allocation);
} else if (lookupTable.sharedHandleType.isNTHandle) {
auto verifyResult = device->getNEODevice()->getMemoryManager()->verifyHandle(NEO::toOsHandle(lookupTable.sharedHandleType.ntHnadle), device->getNEODevice()->getRootDeviceIndex(), true);

View File

@@ -10,6 +10,7 @@
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
#include "shared/source/kernel/kernel_descriptor.h"
#include "shared/source/memory_manager/graphics_allocation.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/unified_memory/unified_memory.h"
#include <level_zero/ze_api.h>
@@ -114,7 +115,7 @@ struct Kernel : _ze_kernel_handle_t, virtual NEO::DispatchKernelEncoderI {
virtual ze_result_t setArgumentValue(uint32_t argIndex, size_t argSize, const void *pArgValue) = 0;
virtual void setGroupCount(uint32_t groupCountX, uint32_t groupCountY, uint32_t groupCountZ) = 0;
virtual ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) = 0;
virtual ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) = 0;
virtual ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) = 0;
virtual ze_result_t setGroupSize(uint32_t groupSizeX, uint32_t groupSizeY,
uint32_t groupSizeZ) = 0;

View File

@@ -526,7 +526,7 @@ ze_result_t KernelImp::setArgRedescribedImage(uint32_t argIndex, ze_image_handle
return ZE_RESULT_SUCCESS;
}
ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) {
ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) {
const auto &arg = kernelImmData->getDescriptor().payloadMappings.explicitArgs[argIndex].as<NEO::ArgDescPointer>();
const auto val = argVal;
@@ -534,8 +534,12 @@ ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal
if (NEO::isValidOffset(arg.bindful) || NEO::isValidOffset(arg.bindless)) {
setBufferSurfaceState(argIndex, reinterpret_cast<void *>(val), allocation);
}
auto allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(allocation->getGpuAddress()));
NEO::SvmAllocationData *allocData = nullptr;
if (peerAllocData) {
allocData = peerAllocData;
} else {
allocData = this->module->getDevice()->getDriverHandle()->getSvmAllocsManager()->getSVMAlloc(reinterpret_cast<void *>(allocation->getGpuAddress()));
}
if (allocData) {
bool argWasUncacheable = isArgUncached[argIndex];
bool argIsUncacheable = allocData->allocationFlagsProperty.flags.locallyUncachedResource;
@@ -546,7 +550,6 @@ ze_result_t KernelImp::setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal
}
this->setKernelArgUncached(argIndex, argIsUncacheable);
}
residencyContainer[argIndex] = allocation;
return ZE_RESULT_SUCCESS;
@@ -633,6 +636,7 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi
if (allocData == nullptr) {
allocData = svmAllocsManager->getSVMAlloc(requestedAddress);
}
NEO::SvmAllocationData *peerAllocData = nullptr;
if (driverHandle->isRemoteResourceNeeded(requestedAddress, alloc, allocData, device)) {
if (allocData == nullptr) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
@@ -640,18 +644,16 @@ ze_result_t KernelImp::setArgBuffer(uint32_t argIndex, size_t argSize, const voi
uint64_t pbase = allocData->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress();
uint64_t offset = (uint64_t)requestedAddress - pbase;
alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), &gpuAddress);
alloc = driverHandle->getPeerAllocation(device, allocData, reinterpret_cast<void *>(pbase), &gpuAddress, &peerAllocData);
if (alloc == nullptr) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
gpuAddress += offset;
}
const uint32_t allocId = allocData ? allocData->getAllocId() : 0u;
kernelArgInfos[argIndex] = KernelArgInfo{requestedAddress, allocId, allocationsCounter, false};
return setArgBufferWithAlloc(argIndex, gpuAddress, alloc);
return setArgBufferWithAlloc(argIndex, gpuAddress, alloc, peerAllocData);
}
ze_result_t KernelImp::setArgImage(uint32_t argIndex, size_t argSize, const void *argVal) {

View File

@@ -9,6 +9,7 @@
#include "shared/source/command_stream/thread_arbitration_policy.h"
#include "shared/source/kernel/dispatch_kernel_encoder_interface.h"
#include "shared/source/memory_manager/unified_memory_manager.h"
#include "shared/source/unified_memory/unified_memory.h"
#include "level_zero/core/source/kernel/kernel.h"
@@ -77,7 +78,7 @@ struct KernelImp : Kernel {
ze_result_t setArgRedescribedImage(uint32_t argIndex, ze_image_handle_t argVal) override;
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation) override;
ze_result_t setArgBufferWithAlloc(uint32_t argIndex, uintptr_t argVal, NEO::GraphicsAllocation *allocation, NEO::SvmAllocationData *peerAllocData) override;
ze_result_t setArgImage(uint32_t argIndex, size_t argSize, const void *argVal);