Implement local memory path for all devices in buffer

Related-To: NEO-4589 Signed-off-by: Krzysztof Gibala <krzysztof.gibala@intel.com>
2026-01-04 23:56:39 +08:00 · 2020-11-04 16:50:54 +01:00
parent ad747a5cbf
commit 3d9a180c12
11 changed files with 197 additions and 9 deletions
--- a/opencl/source/command_queue/enqueue_read_buffer.h
+++ b/opencl/source/command_queue/enqueue_read_buffer.h
@@ -44,6 +44,9 @@ cl_int CommandQueueHw<GfxFamily>::enqueueReadBuffer(
    }

    auto rootDeviceIndex = getDevice().getRootDeviceIndex();
+
+    buffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex);
+
    bool isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
    bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingRead, size, ptr,
                                                 numEventsInWaitList, eventWaitList);
--- a/opencl/source/command_queue/enqueue_write_buffer.h
+++ b/opencl/source/command_queue/enqueue_write_buffer.h
@@ -32,6 +32,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(
    const cl_event *eventWaitList,
    cl_event *event) {

+    auto rootDeviceIndex = getDevice().getRootDeviceIndex();
+
+    buffer->getMigrateableMultiGraphicsAllocation().ensureMemoryOnDevice(*getDevice().getMemoryManager(), rootDeviceIndex);
+
    const cl_command_type cmdType = CL_COMMAND_WRITE_BUFFER;
    auto isMemTransferNeeded = buffer->isMemObjZeroCopy() ? buffer->checkIfMemoryTransferIsRequired(offset, 0, ptr, cmdType) : true;
    bool isCpuCopyAllowed = bufferCpuCopyAllowed(buffer, cmdType, blockingWrite, size, const_cast<void *>(ptr),
@@ -39,7 +43,6 @@ cl_int CommandQueueHw<GfxFamily>::enqueueWriteBuffer(

    //check if we are dealing with SVM pointer here for which we already have an allocation
    if (!mapAllocation && this->getContext().getSVMAllocsManager()) {
-        auto rootDeviceIndex = getDevice().getRootDeviceIndex();
        auto svmEntry = this->getContext().getSVMAllocsManager()->getSVMAlloc(ptr);
        if (svmEntry) {
            if ((svmEntry->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress() + svmEntry->size) < (castToUint64(ptr) + size)) {
--- a/opencl/source/mem_obj/buffer.cpp
+++ b/opencl/source/mem_obj/buffer.cpp
@@ -178,6 +178,7 @@ Buffer *Buffer::create(Context *context,

    void *ptr = nullptr;
    bool forceCopyHostPtr = false;
+    bool copyExecuted = false;

    for (auto &rootDeviceIndex : context->getRootDeviceIndices()) {
        allocationInfo[rootDeviceIndex] = {};
@@ -375,7 +376,7 @@ Buffer *Buffer::create(Context *context,
        }
        pBuffer->setHostPtrMinSize(size);

-        if (allocationInfo[rootDeviceIndex].copyMemoryFromHostPtr) {
+        if (allocationInfo[rootDeviceIndex].copyMemoryFromHostPtr && !copyExecuted) {
            auto gmm = allocationInfo[rootDeviceIndex].memory->getDefaultGmm();
            bool gpuCopyRequired = (gmm && gmm->isRenderCompressed) || !MemoryPool::isSystemMemoryPool(allocationInfo[rootDeviceIndex].memory->getMemoryPool());

@@ -388,8 +389,10 @@ Buffer *Buffer::create(Context *context,
                        errcodeRet = CL_OUT_OF_RESOURCES;
                    }
                }
+                copyExecuted = true;
            } else {
                memcpy_s(allocationInfo[rootDeviceIndex].memory->getUnderlyingBuffer(), size, hostPtr, size);
+                copyExecuted = true;
            }
        }
    }
--- a/opencl/source/mem_obj/mem_obj.h
+++ b/opencl/source/mem_obj/mem_obj.h
@@ -131,6 +131,7 @@ class MemObj : public BaseObject<_cl_mem> {
    const cl_mem_flags &getFlags() const { return flags; }
    const cl_mem_flags &getFlagsIntel() const { return flagsIntel; }
    const MultiGraphicsAllocation &getMultiGraphicsAllocation() const { return multiGraphicsAllocation; }
+    MultiGraphicsAllocation &getMigrateableMultiGraphicsAllocation() { return multiGraphicsAllocation; }

  protected:
    void getOsSpecificMemObjectInfo(const cl_mem_info &paramName, size_t *srcParamSize, void **srcParam);