Implement local memory path for all devices in buffer

Related-To: NEO-4589
Signed-off-by: Krzysztof Gibala <krzysztof.gibala@intel.com>
This commit is contained in:
Krzysztof Gibala
2020-11-04 16:50:54 +01:00
committed by Compute-Runtime-Automation
parent ad747a5cbf
commit 3d9a180c12
11 changed files with 197 additions and 9 deletions

View File

@@ -44,6 +44,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
}
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
buffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex);
bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingRead, size, ptr,
numEventsInWaitList, eventWaitList);

View File

@@ -32,6 +32,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
const cl_event *eventWaitList,
cl_event *event) {
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
buffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex);
const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER;
auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast<void *>(ptr),
@@ -39,7 +43,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
//check if we are dealing with SVM pointer here for which we already have an allocation
if (!mapAllocation && this->getContext().getSVMAllocsManager()) {
auto rootDeviceIndex = getDevice().getRootDeviceIndex();
auto svmEntry = this->getContext().getSVMAllocsManager()->getSVMAlloc(ptr);
if (svmEntry) {
if ((svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) {

View File

@@ -178,6 +178,7 @@ Buffer *Buffer::create(Context *context,
void *ptr = nullptr;
bool forceCopyHostPtr = false;
bool copyExecuted = false;
for (auto &rootDeviceIndex : context->getRootDeviceIndices()) {
allocationInfo[rootDeviceIndex] = {};
@@ -375,7 +376,7 @@ Buffer *Buffer::create(Context *context,
}
pBuffer->setHostPtrMinSize(size);
if (allocationInfo[rootDeviceIndex].copyMemoryFromHostPtr) {
if (allocationInfo[rootDeviceIndex].copyMemoryFromHostPtr && !copyExecuted) {
auto gmm = allocationInfo[rootDeviceIndex].memory->getDefaultGmm();
bool gpuCopyRequired = (gmm && gmm->isRenderCompressed) || !MemoryPool::isSystemMemoryPool(allocationInfo[rootDeviceIndex].memory->getMemoryPool());
@@ -388,8 +389,10 @@ Buffer *Buffer::create(Context *context,
errcodeRet = CL_OUT_OF_RESOURCES;
}
}
copyExecuted = true;
} else {
memcpy_s(allocationInfo[rootDeviceIndex].memory->getUnderlyingBuffer(), size, hostPtr, size);
copyExecuted = true;
}
}
}

View File

@@ -131,6 +131,7 @@ class MemObj : public BaseObject<_cl_mem> {
const cl_mem_flags &getFlags() const { return flags; }
const cl_mem_flags &getFlagsIntel() const { return flagsIntel; }
const MultiGraphicsAllocation &getMultiGraphicsAllocation() const { return multiGraphicsAllocation; }
MultiGraphicsAllocation &getMigrateableMultiGraphicsAllocation() { return multiGraphicsAllocation; }
protected:
void getOsSpecificMemObjectInfo(const cl_mem_info &paramName, size_t *srcParamSize, void **srcParam);