mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 05:56:36 +08:00
Reuse graphics allocations in svmMemcpy
Related-To: NEO-6352 Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
32370473ad
commit
457ef00abf
@@ -979,15 +979,4 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void *CommandQueue::convertAddressWithOffsetToGpuVa(void *ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation) {
|
|
||||||
// If this is device or shared USM pointer, it is already a gpuVA and we don't have to do anything.
|
|
||||||
// Otherwise, we assume this is a cpuVA and we have to convert to gpuVA, while preserving offset from allocation start.
|
|
||||||
const bool isCpuPtr = (memoryType != DEVICE_UNIFIED_MEMORY) && (memoryType != SHARED_UNIFIED_MEMORY);
|
|
||||||
if (isCpuPtr) {
|
|
||||||
size_t dstOffset = ptrDiff(ptr, allocation.getUnderlyingBuffer());
|
|
||||||
ptr = reinterpret_cast<void *>(allocation.getGpuAddress() + dstOffset);
|
|
||||||
}
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -322,7 +322,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||||||
return requiresCacheFlushAfterWalker;
|
return requiresCacheFlushAfterWalker;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void *convertAddressWithOffsetToGpuVa(void *ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation);
|
template <typename PtrType>
|
||||||
|
static PtrType convertAddressWithOffsetToGpuVa(PtrType ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation);
|
||||||
|
|
||||||
void updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount);
|
void updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount);
|
||||||
uint32_t peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const;
|
uint32_t peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const;
|
||||||
@@ -404,6 +405,18 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
|
|||||||
std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
|
std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename PtrType>
|
||||||
|
PtrType CommandQueue::convertAddressWithOffsetToGpuVa(PtrType ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation) {
|
||||||
|
// If this is device or shared USM pointer, it is already a gpuVA and we don't have to do anything.
|
||||||
|
// Otherwise, we assume this is a cpuVA and we have to convert to gpuVA, while preserving offset from allocation start.
|
||||||
|
const bool isCpuPtr = (memoryType != DEVICE_UNIFIED_MEMORY) && (memoryType != SHARED_UNIFIED_MEMORY);
|
||||||
|
if (isCpuPtr) {
|
||||||
|
size_t dstOffset = ptrDiff(ptr, allocation.getUnderlyingBuffer());
|
||||||
|
ptr = reinterpret_cast<PtrType>(allocation.getGpuAddress() + dstOffset);
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
using CommandQueueCreateFunc = CommandQueue *(*)(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage);
|
using CommandQueueCreateFunc = CommandQueue *(*)(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage);
|
||||||
|
|
||||||
} // namespace NEO
|
} // namespace NEO
|
||||||
|
|||||||
@@ -73,8 +73,12 @@ struct CsrSelectionArgs {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void processResource(const MultiGraphicsAllocation &multiGfxAlloc, uint32_t rootDeviceIndex, Resource &outResource) {
|
static void processResource(const MultiGraphicsAllocation &multiGfxAlloc, uint32_t rootDeviceIndex, Resource &outResource) {
|
||||||
outResource.allocation = multiGfxAlloc.getGraphicsAllocation(rootDeviceIndex);
|
processResource(*multiGfxAlloc.getGraphicsAllocation(rootDeviceIndex), rootDeviceIndex, outResource);
|
||||||
outResource.isLocal = outResource.allocation->isAllocatedInLocalMemoryPool();
|
}
|
||||||
|
|
||||||
|
static void processResource(const GraphicsAllocation &gfxAlloc, uint32_t rootDeviceIndex, Resource &outResource) {
|
||||||
|
outResource.allocation = &gfxAlloc;
|
||||||
|
outResource.isLocal = gfxAlloc.isAllocatedInLocalMemoryPool();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline TransferDirection createTransferDirection(bool srcLocal, bool dstLocal) {
|
static inline TransferDirection createTransferDirection(bool srcLocal, bool dstLocal) {
|
||||||
|
|||||||
@@ -274,6 +274,24 @@ inline void setOperationParams(BuiltinOpParams &operationParams, size_t size,
|
|||||||
operationParams.dstOffset = {ptrDiff(dstPtr, operationParams.dstPtr), 0, 0};
|
operationParams.dstOffset = {ptrDiff(dstPtr, operationParams.dstPtr), 0, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename PtrType>
|
||||||
|
inline std::tuple<SvmAllocationData *, GraphicsAllocation *, PtrType> getExistingAlloc(Context *context,
|
||||||
|
PtrType ptr,
|
||||||
|
size_t size,
|
||||||
|
uint32_t rootDeviceIndex) {
|
||||||
|
SvmAllocationData *svmData = context->getSVMAllocsManager()->getSVMAlloc(ptr);
|
||||||
|
GraphicsAllocation *allocation = nullptr;
|
||||||
|
if (svmData) {
|
||||||
|
allocation = svmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
|
||||||
|
} else {
|
||||||
|
context->tryGetExistingMapAllocation(ptr, size, allocation);
|
||||||
|
if (allocation) {
|
||||||
|
ptr = CommandQueue::convertAddressWithOffsetToGpuVa(ptr, InternalMemoryType::NOT_SPECIFIED, *allocation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return std::make_tuple(svmData, allocation, ptr);
|
||||||
|
}
|
||||||
|
|
||||||
template <typename GfxFamily>
|
template <typename GfxFamily>
|
||||||
cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
||||||
void *dstPtr,
|
void *dstPtr,
|
||||||
@@ -287,28 +305,28 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
|||||||
return CL_INVALID_VALUE;
|
return CL_INVALID_VALUE;
|
||||||
}
|
}
|
||||||
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
|
||||||
auto dstSvmData = context->getSVMAllocsManager()->getSVMAlloc(dstPtr);
|
auto [dstSvmData, dstAllocation, dstGpuPtr] = getExistingAlloc(context, dstPtr, size, rootDeviceIndex);
|
||||||
auto srcSvmData = context->getSVMAllocsManager()->getSVMAlloc(srcPtr);
|
auto [srcSvmData, srcAllocation, srcGpuPtr] = getExistingAlloc(context, srcPtr, size, rootDeviceIndex);
|
||||||
|
|
||||||
enum CopyType { HostToHost,
|
enum CopyType { HostToHost,
|
||||||
SvmToHost,
|
SvmToHost,
|
||||||
HostToSvm,
|
HostToSvm,
|
||||||
SvmToSvm };
|
SvmToSvm };
|
||||||
CopyType copyType = HostToHost;
|
CopyType copyType = HostToHost;
|
||||||
if ((srcSvmData != nullptr) && (dstSvmData != nullptr)) {
|
if ((srcAllocation != nullptr) && (dstAllocation != nullptr)) {
|
||||||
copyType = SvmToSvm;
|
copyType = SvmToSvm;
|
||||||
} else if ((srcSvmData == nullptr) && (dstSvmData != nullptr)) {
|
} else if ((srcAllocation == nullptr) && (dstAllocation != nullptr)) {
|
||||||
copyType = HostToSvm;
|
copyType = HostToSvm;
|
||||||
} else if (srcSvmData != nullptr) {
|
} else if (srcAllocation != nullptr) {
|
||||||
copyType = SvmToHost;
|
copyType = SvmToHost;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto pageFaultManager = context->getMemoryManager()->getPageFaultManager();
|
auto pageFaultManager = context->getMemoryManager()->getPageFaultManager();
|
||||||
if (dstSvmData && pageFaultManager) {
|
if (dstSvmData && pageFaultManager) {
|
||||||
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()));
|
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(dstAllocation->getGpuAddress()));
|
||||||
}
|
}
|
||||||
if (srcSvmData && pageFaultManager) {
|
if (srcSvmData && pageFaultManager) {
|
||||||
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()));
|
pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(srcAllocation->getGpuAddress()));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto isStatelessRequired = false;
|
auto isStatelessRequired = false;
|
||||||
@@ -330,20 +348,20 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
|||||||
cl_command_type cmdType;
|
cl_command_type cmdType;
|
||||||
|
|
||||||
if (copyType == SvmToHost) {
|
if (copyType == SvmToHost) {
|
||||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmData->gpuAllocations, {}, device->getRootDeviceIndex(), &size};
|
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, srcAllocation, {}, device->getRootDeviceIndex(), &size};
|
||||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||||
|
|
||||||
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
GeneralSurface srcSvmSurf(srcAllocation);
|
||||||
HostPtrSurface dstHostPtrSurf(dstPtr, size);
|
HostPtrSurface dstHostPtrSurf(dstGpuPtr, size);
|
||||||
if (size != 0) {
|
if (size != 0) {
|
||||||
bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true);
|
bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true);
|
||||||
if (!status) {
|
if (!status) {
|
||||||
return CL_OUT_OF_RESOURCES;
|
return CL_OUT_OF_RESOURCES;
|
||||||
}
|
}
|
||||||
dstPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
|
dstGpuPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
|
||||||
notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
notifyEnqueueSVMMemcpy(srcAllocation, !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
|
||||||
}
|
}
|
||||||
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), dstPtr, dstHostPtrSurf.getAllocation());
|
setOperationParams(operationParams, size, srcGpuPtr, srcAllocation, dstGpuPtr, dstHostPtrSurf.getAllocation());
|
||||||
surfaces[0] = &srcSvmSurf;
|
surfaces[0] = &srcSvmSurf;
|
||||||
surfaces[1] = &dstHostPtrSurf;
|
surfaces[1] = &dstHostPtrSurf;
|
||||||
|
|
||||||
@@ -351,36 +369,33 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
|||||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||||
|
|
||||||
} else if (copyType == HostToSvm) {
|
} else if (copyType == HostToSvm) {
|
||||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, {}, &dstSvmData->gpuAllocations, device->getRootDeviceIndex(), &size};
|
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, {}, dstAllocation, device->getRootDeviceIndex(), &size};
|
||||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||||
|
|
||||||
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
|
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcGpuPtr), size);
|
||||||
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
GeneralSurface dstSvmSurf(dstAllocation);
|
||||||
cmdType = CL_COMMAND_WRITE_BUFFER;
|
cmdType = CL_COMMAND_WRITE_BUFFER;
|
||||||
if (size != 0) {
|
if (size != 0) {
|
||||||
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
|
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
|
||||||
if (!status) {
|
if (!status) {
|
||||||
return CL_OUT_OF_RESOURCES;
|
return CL_OUT_OF_RESOURCES;
|
||||||
}
|
}
|
||||||
srcPtr = reinterpret_cast<void *>(srcHostPtrSurf.getAllocation()->getGpuAddress());
|
srcGpuPtr = reinterpret_cast<void *>(srcHostPtrSurf.getAllocation()->getGpuAddress());
|
||||||
}
|
}
|
||||||
setOperationParams(operationParams, size, srcPtr, srcHostPtrSurf.getAllocation(),
|
setOperationParams(operationParams, size, srcGpuPtr, srcHostPtrSurf.getAllocation(), dstGpuPtr, dstAllocation);
|
||||||
dstPtr, dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
|
||||||
surfaces[0] = &dstSvmSurf;
|
surfaces[0] = &dstSvmSurf;
|
||||||
surfaces[1] = &srcHostPtrSurf;
|
surfaces[1] = &srcHostPtrSurf;
|
||||||
|
|
||||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
|
||||||
dispatchInfo.setBuiltinOpParams(operationParams);
|
dispatchInfo.setBuiltinOpParams(operationParams);
|
||||||
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
|
||||||
|
|
||||||
} else if (copyType == SvmToSvm) {
|
} else if (copyType == SvmToSvm) {
|
||||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmData->gpuAllocations, &dstSvmData->gpuAllocations, device->getRootDeviceIndex(), &size};
|
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, srcAllocation, dstAllocation, device->getRootDeviceIndex(), &size};
|
||||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||||
|
|
||||||
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
GeneralSurface srcSvmSurf(srcAllocation);
|
||||||
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
GeneralSurface dstSvmSurf(dstAllocation);
|
||||||
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex),
|
setOperationParams(operationParams, size, srcGpuPtr, srcAllocation, dstGpuPtr, dstAllocation);
|
||||||
dstPtr, dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
|
|
||||||
surfaces[0] = &srcSvmSurf;
|
surfaces[0] = &srcSvmSurf;
|
||||||
surfaces[1] = &dstSvmSurf;
|
surfaces[1] = &dstSvmSurf;
|
||||||
|
|
||||||
@@ -391,8 +406,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
|||||||
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &size};
|
CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &size};
|
||||||
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
|
||||||
|
|
||||||
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
|
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcGpuPtr), size);
|
||||||
HostPtrSurface dstHostPtrSurf(dstPtr, size);
|
HostPtrSurface dstHostPtrSurf(dstGpuPtr, size);
|
||||||
cmdType = CL_COMMAND_WRITE_BUFFER;
|
cmdType = CL_COMMAND_WRITE_BUFFER;
|
||||||
if (size != 0) {
|
if (size != 0) {
|
||||||
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
|
bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
|
||||||
@@ -400,10 +415,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
|
|||||||
if (!status) {
|
if (!status) {
|
||||||
return CL_OUT_OF_RESOURCES;
|
return CL_OUT_OF_RESOURCES;
|
||||||
}
|
}
|
||||||
srcPtr = reinterpret_cast<void *>(srcHostPtrSurf.getAllocation()->getGpuAddress());
|
srcGpuPtr = reinterpret_cast<void *>(srcHostPtrSurf.getAllocation()->getGpuAddress());
|
||||||
dstPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
|
dstGpuPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
|
||||||
}
|
}
|
||||||
setOperationParams(operationParams, size, srcPtr, srcHostPtrSurf.getAllocation(), dstPtr, dstHostPtrSurf.getAllocation());
|
setOperationParams(operationParams, size, srcGpuPtr, srcHostPtrSurf.getAllocation(), dstGpuPtr, dstHostPtrSurf.getAllocation());
|
||||||
surfaces[0] = &srcHostPtrSurf;
|
surfaces[0] = &srcHostPtrSurf;
|
||||||
surfaces[1] = &dstHostPtrSurf;
|
surfaces[1] = &dstHostPtrSurf;
|
||||||
|
|
||||||
|
|||||||
@@ -88,6 +88,21 @@ cl_int Context::tryGetExistingHostPtrAllocation(const void *ptr,
|
|||||||
GraphicsAllocation *&allocation,
|
GraphicsAllocation *&allocation,
|
||||||
InternalMemoryType &memoryType,
|
InternalMemoryType &memoryType,
|
||||||
bool &isCpuCopyAllowed) {
|
bool &isCpuCopyAllowed) {
|
||||||
|
cl_int retVal = tryGetExistingSvmAllocation(ptr, size, rootDeviceIndex, allocation, memoryType, isCpuCopyAllowed);
|
||||||
|
if (retVal != CL_SUCCESS || allocation != nullptr) {
|
||||||
|
return retVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
retVal = tryGetExistingMapAllocation(ptr, size, allocation);
|
||||||
|
return retVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_int Context::tryGetExistingSvmAllocation(const void *ptr,
|
||||||
|
size_t size,
|
||||||
|
uint32_t rootDeviceIndex,
|
||||||
|
GraphicsAllocation *&allocation,
|
||||||
|
InternalMemoryType &memoryType,
|
||||||
|
bool &isCpuCopyAllowed) {
|
||||||
if (getSVMAllocsManager()) {
|
if (getSVMAllocsManager()) {
|
||||||
SvmAllocationData *svmEntry = getSVMAllocsManager()->getSVMAlloc(ptr);
|
SvmAllocationData *svmEntry = getSVMAllocsManager()->getSVMAlloc(ptr);
|
||||||
if (svmEntry) {
|
if (svmEntry) {
|
||||||
@@ -101,16 +116,19 @@ cl_int Context::tryGetExistingHostPtrAllocation(const void *ptr,
|
|||||||
isCpuCopyAllowed = false;
|
isCpuCopyAllowed = false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return CL_SUCCESS;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
cl_int Context::tryGetExistingMapAllocation(const void *ptr,
|
||||||
|
size_t size,
|
||||||
|
GraphicsAllocation *&allocation) {
|
||||||
if (MapInfo mapInfo = {}; mapOperationsStorage.getInfoForHostPtr(ptr, size, mapInfo)) {
|
if (MapInfo mapInfo = {}; mapOperationsStorage.getInfoForHostPtr(ptr, size, mapInfo)) {
|
||||||
if (mapInfo.graphicsAllocation) {
|
if (mapInfo.graphicsAllocation) {
|
||||||
allocation = mapInfo.graphicsAllocation;
|
allocation = mapInfo.graphicsAllocation;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return CL_SUCCESS;
|
return CL_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -103,6 +103,15 @@ class Context : public BaseObject<_cl_context> {
|
|||||||
GraphicsAllocation *&allocation,
|
GraphicsAllocation *&allocation,
|
||||||
InternalMemoryType &memoryType,
|
InternalMemoryType &memoryType,
|
||||||
bool &isCpuCopyAllowed);
|
bool &isCpuCopyAllowed);
|
||||||
|
cl_int tryGetExistingSvmAllocation(const void *ptr,
|
||||||
|
size_t size,
|
||||||
|
uint32_t rootDeviceIndex,
|
||||||
|
GraphicsAllocation *&allocation,
|
||||||
|
InternalMemoryType &memoryType,
|
||||||
|
bool &isCpuCopyAllowed);
|
||||||
|
cl_int tryGetExistingMapAllocation(const void *ptr,
|
||||||
|
size_t size,
|
||||||
|
GraphicsAllocation *&allocation);
|
||||||
|
|
||||||
const std::set<uint32_t> &getRootDeviceIndices() const;
|
const std::set<uint32_t> &getRootDeviceIndices() const;
|
||||||
|
|
||||||
|
|||||||
@@ -57,6 +57,18 @@ struct EnqueueSvmTest : public ClDeviceFixture,
|
|||||||
ClDeviceFixture::TearDown();
|
ClDeviceFixture::TearDown();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::pair<ReleaseableObjectPtr<Buffer>, void *> createBufferAndMapItOnGpu() {
|
||||||
|
DebugManagerStateRestore restore{};
|
||||||
|
DebugManager.flags.DisableZeroCopyForBuffers.set(1);
|
||||||
|
|
||||||
|
BufferDefaults::context = this->context;
|
||||||
|
ReleaseableObjectPtr<Buffer> buffer = clUniquePtr(BufferHelper<>::create());
|
||||||
|
void *mappedPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_TRUE, CL_MAP_READ, 0, buffer->getSize(), 0, nullptr, nullptr, retVal);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_NE(nullptr, mappedPtr);
|
||||||
|
return {std::move(buffer), mappedPtr};
|
||||||
|
}
|
||||||
|
|
||||||
cl_int retVal = CL_SUCCESS;
|
cl_int retVal = CL_SUCCESS;
|
||||||
void *ptrSVM = nullptr;
|
void *ptrSVM = nullptr;
|
||||||
};
|
};
|
||||||
@@ -1915,3 +1927,135 @@ TEST_F(EnqueueSvmTest, givenPageFaultManagerWhenEnqueueMemFillThenAllocIsDecommi
|
|||||||
|
|
||||||
context->memoryManager = memoryManager;
|
context->memoryManager = memoryManager;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EnqueueSvmTest, givenCopyFromMappedPtrToSvmAllocWhenCallingSvmMemcpyThenReuseMappedAllocations) {
|
||||||
|
constexpr size_t size = 1u;
|
||||||
|
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
|
||||||
|
{
|
||||||
|
auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
|
||||||
|
std::ignore = buffer;
|
||||||
|
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||||
|
false, // cl_bool blocking_copy
|
||||||
|
ptrSVM, // void *dst_ptr
|
||||||
|
mappedPtr, // const void *src_ptr
|
||||||
|
size, // size_t size
|
||||||
|
0, // cl_uint num_events_in_wait_list
|
||||||
|
nullptr, // cl_evebt *event_wait_list
|
||||||
|
nullptr // cL_event *event
|
||||||
|
);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto notMappedPtr = std::make_unique<char[]>(size);
|
||||||
|
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||||
|
false, // cl_bool blocking_copy
|
||||||
|
ptrSVM, // void *dst_ptr
|
||||||
|
notMappedPtr.get(), // const void *src_ptr
|
||||||
|
size, // size_t size
|
||||||
|
0, // cl_uint num_events_in_wait_list
|
||||||
|
nullptr, // cl_evebt *event_wait_list
|
||||||
|
nullptr // cL_event *event
|
||||||
|
);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EnqueueSvmTest, givenCopyFromSvmAllocToMappedPtrWhenCallingSvmMemcpyThenReuseMappedAllocations) {
|
||||||
|
constexpr size_t size = 1u;
|
||||||
|
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
|
||||||
|
{
|
||||||
|
auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
|
||||||
|
std::ignore = buffer;
|
||||||
|
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||||
|
false, // cl_bool blocking_copy
|
||||||
|
mappedPtr, // void *dst_ptr
|
||||||
|
ptrSVM, // const void *src_ptr
|
||||||
|
size, // size_t size
|
||||||
|
0, // cl_uint num_events_in_wait_list
|
||||||
|
nullptr, // cl_evebt *event_wait_list
|
||||||
|
nullptr // cL_event *event
|
||||||
|
);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto notMappedPtr = std::make_unique<char[]>(size);
|
||||||
|
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||||
|
false, // cl_bool blocking_copy
|
||||||
|
notMappedPtr.get(), // void *dst_ptr
|
||||||
|
ptrSVM, // const void *src_ptr
|
||||||
|
size, // size_t size
|
||||||
|
0, // cl_uint num_events_in_wait_list
|
||||||
|
nullptr, // cl_evebt *event_wait_list
|
||||||
|
nullptr // cL_event *event
|
||||||
|
);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
HWTEST_F(EnqueueSvmTest, givenCopyFromMappedPtrToMappedPtrWhenCallingSvmMemcpyThenReuseMappedAllocations) {
|
||||||
|
constexpr size_t size = 1u;
|
||||||
|
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||||
|
|
||||||
|
{
|
||||||
|
auto [buffer1, mappedPtr1] = createBufferAndMapItOnGpu();
|
||||||
|
auto [buffer2, mappedPtr2] = createBufferAndMapItOnGpu();
|
||||||
|
std::ignore = buffer1;
|
||||||
|
std::ignore = buffer2;
|
||||||
|
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||||
|
false, // cl_bool blocking_copy
|
||||||
|
mappedPtr2, // void *dst_ptr
|
||||||
|
mappedPtr1, // const void *src_ptr
|
||||||
|
size, // size_t size
|
||||||
|
0, // cl_uint num_events_in_wait_list
|
||||||
|
nullptr, // cl_evebt *event_wait_list
|
||||||
|
nullptr // cL_event *event
|
||||||
|
);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
|
||||||
|
std::ignore = buffer;
|
||||||
|
auto notMappedPtr = std::make_unique<char[]>(size);
|
||||||
|
EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||||
|
false, // cl_bool blocking_copy
|
||||||
|
mappedPtr, // void *dst_ptr
|
||||||
|
notMappedPtr.get(), // const void *src_ptr
|
||||||
|
size, // size_t size
|
||||||
|
0, // cl_uint num_events_in_wait_list
|
||||||
|
nullptr, // cl_evebt *event_wait_list
|
||||||
|
nullptr // cL_event *event
|
||||||
|
);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
auto notMappedPtr = std::make_unique<char[]>(size);
|
||||||
|
auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
|
||||||
|
std::ignore = buffer;
|
||||||
|
EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
retVal = this->pCmdQ->enqueueSVMMemcpy(
|
||||||
|
false, // cl_bool blocking_copy
|
||||||
|
notMappedPtr.get(), // void *dst_ptr
|
||||||
|
mappedPtr, // const void *src_ptr
|
||||||
|
size, // size_t size
|
||||||
|
0, // cl_uint num_events_in_wait_list
|
||||||
|
nullptr, // cl_evebt *event_wait_list
|
||||||
|
nullptr // cL_event *event
|
||||||
|
);
|
||||||
|
EXPECT_EQ(CL_SUCCESS, retVal);
|
||||||
|
EXPECT_EQ(2u, csr.createAllocationForHostSurfaceCalled);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user