Reuse graphics allocations in svmMemcpy

Related-To: NEO-6352 Signed-off-by: Maciej Dziuban <maciej.dziuban@intel.com>
2026-01-08 05:56:36 +08:00 · 2021-11-08 12:06:22 +00:00
parent 32370473ad
commit 457ef00abf
7 changed files with 238 additions and 46 deletions
--- a/opencl/source/command_queue/command_queue.cpp
+++ b/opencl/source/command_queue/command_queue.cpp
@@ -979,15 +979,4 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan
    }
 }
 void *CommandQueue::convertAddressWithOffsetToGpuVa(void *ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation) {
    // If this is device or shared USM pointer, it is already a gpuVA and we don't have to do anything.
    // Otherwise, we assume this is a cpuVA and we have to convert to gpuVA, while preserving offset from allocation start.
    const bool isCpuPtr = (memoryType != DEVICE_UNIFIED_MEMORY) && (memoryType != SHARED_UNIFIED_MEMORY);
    if (isCpuPtr) {
        size_t dstOffset = ptrDiff(ptr, allocation.getUnderlyingBuffer());
        ptr = reinterpret_cast<void *>(allocation.getGpuAddress() + dstOffset);
    }
    return ptr;
 }
 } // namespace NEO
--- a/opencl/source/command_queue/command_queue.h
+++ b/opencl/source/command_queue/command_queue.h
@@ -322,7 +322,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
        return requiresCacheFlushAfterWalker;
    }
-    static void *convertAddressWithOffsetToGpuVa(void *ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation);
+    template <typename PtrType>
    static PtrType convertAddressWithOffsetToGpuVa(PtrType ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation);
    void updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount);
    uint32_t peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const;
@@ -404,6 +405,18 @@ class CommandQueue : public BaseObject<_cl_command_queue> {
    std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
 };
 template <typename PtrType>
 PtrType CommandQueue::convertAddressWithOffsetToGpuVa(PtrType ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation) {
    // If this is device or shared USM pointer, it is already a gpuVA and we don't have to do anything.
    // Otherwise, we assume this is a cpuVA and we have to convert to gpuVA, while preserving offset from allocation start.
    const bool isCpuPtr = (memoryType != DEVICE_UNIFIED_MEMORY) && (memoryType != SHARED_UNIFIED_MEMORY);
    if (isCpuPtr) {
        size_t dstOffset = ptrDiff(ptr, allocation.getUnderlyingBuffer());
        ptr = reinterpret_cast<PtrType>(allocation.getGpuAddress() + dstOffset);
    }
    return ptr;
 }
 using CommandQueueCreateFunc = CommandQueue *(*)(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage);
 } // namespace NEO
--- a/opencl/source/command_queue/csr_selection_args.h
+++ b/opencl/source/command_queue/csr_selection_args.h
@@ -73,8 +73,12 @@ struct CsrSelectionArgs {
    }
    static void processResource(const MultiGraphicsAllocation &multiGfxAlloc, uint32_t rootDeviceIndex, Resource &outResource) {
-        outResource.allocation = multiGfxAlloc.getGraphicsAllocation(rootDeviceIndex);
+        processResource(*multiGfxAlloc.getGraphicsAllocation(rootDeviceIndex), rootDeviceIndex, outResource);
-        outResource.isLocal = outResource.allocation->isAllocatedInLocalMemoryPool();
+    }
    static void processResource(const GraphicsAllocation &gfxAlloc, uint32_t rootDeviceIndex, Resource &outResource) {
        outResource.allocation = &gfxAlloc;
        outResource.isLocal = gfxAlloc.isAllocatedInLocalMemoryPool();
    }
    static inline TransferDirection createTransferDirection(bool srcLocal, bool dstLocal) {
--- a/opencl/source/command_queue/enqueue_svm.h
+++ b/opencl/source/command_queue/enqueue_svm.h
@@ -274,6 +274,24 @@ inline void setOperationParams(BuiltinOpParams &operationParams, size_t size,
    operationParams.dstOffset = {ptrDiff(dstPtr, operationParams.dstPtr), 0, 0};
 }
 template <typename PtrType>
 inline std::tuple<SvmAllocationData *, GraphicsAllocation *, PtrType> getExistingAlloc(Context *context,
                                                                                       PtrType ptr,
                                                                                       size_t size,
                                                                                       uint32_t rootDeviceIndex) {
    SvmAllocationData *svmData = context->getSVMAllocsManager()->getSVMAlloc(ptr);
    GraphicsAllocation *allocation = nullptr;
    if (svmData) {
        allocation = svmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex);
    } else {
        context->tryGetExistingMapAllocation(ptr, size, allocation);
        if (allocation) {
            ptr = CommandQueue::convertAddressWithOffsetToGpuVa(ptr, InternalMemoryType::NOT_SPECIFIED, *allocation);
        }
    }
    return std::make_tuple(svmData, allocation, ptr);
 }
 template <typename GfxFamily>
 cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
                                                   void *dstPtr,
@@ -287,28 +305,28 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
        return CL_INVALID_VALUE;
    }
    auto rootDeviceIndex = getDevice().getRootDeviceIndex();
-    auto dstSvmData = context->getSVMAllocsManager()->getSVMAlloc(dstPtr);
+    auto [dstSvmData, dstAllocation, dstGpuPtr] = getExistingAlloc(context, dstPtr, size, rootDeviceIndex);
-    auto srcSvmData = context->getSVMAllocsManager()->getSVMAlloc(srcPtr);
+    auto [srcSvmData, srcAllocation, srcGpuPtr] = getExistingAlloc(context, srcPtr, size, rootDeviceIndex);
    enum CopyType { HostToHost,
                    SvmToHost,
                    HostToSvm,
                    SvmToSvm };
    CopyType copyType = HostToHost;
-    if ((srcSvmData != nullptr) && (dstSvmData != nullptr)) {
+    if ((srcAllocation != nullptr) && (dstAllocation != nullptr)) {
        copyType = SvmToSvm;
-    } else if ((srcSvmData == nullptr) && (dstSvmData != nullptr)) {
+    } else if ((srcAllocation == nullptr) && (dstAllocation != nullptr)) {
        copyType = HostToSvm;
-    } else if (srcSvmData != nullptr) {
+    } else if (srcAllocation != nullptr) {
        copyType = SvmToHost;
    }
    auto pageFaultManager = context->getMemoryManager()->getPageFaultManager();
    if (dstSvmData && pageFaultManager) {
-        pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()));
+        pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(dstAllocation->getGpuAddress()));
    }
    if (srcSvmData && pageFaultManager) {
-        pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress()));
+        pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast<void *>(srcAllocation->getGpuAddress()));
    }
    auto isStatelessRequired = false;
@@ -330,20 +348,20 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
    cl_command_type cmdType;
    if (copyType == SvmToHost) {
-        CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmData->gpuAllocations, {}, device->getRootDeviceIndex(), &size};
+        CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, srcAllocation, {}, device->getRootDeviceIndex(), &size};
        CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
-        GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
+        GeneralSurface srcSvmSurf(srcAllocation);
-        HostPtrSurface dstHostPtrSurf(dstPtr, size);
+        HostPtrSurface dstHostPtrSurf(dstGpuPtr, size);
        if (size != 0) {
            bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true);
            if (!status) {
                return CL_OUT_OF_RESOURCES;
            }
-            dstPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
+            dstGpuPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
-            notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
+            notifyEnqueueSVMMemcpy(srcAllocation, !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType()));
        }
-        setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), dstPtr, dstHostPtrSurf.getAllocation());
+        setOperationParams(operationParams, size, srcGpuPtr, srcAllocation, dstGpuPtr, dstHostPtrSurf.getAllocation());
        surfaces[0] = &srcSvmSurf;
        surfaces[1] = &dstHostPtrSurf;
@@ -351,36 +369,33 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_READ_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
    } else if (copyType == HostToSvm) {
-        CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, {}, &dstSvmData->gpuAllocations, device->getRootDeviceIndex(), &size};
+        CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, {}, dstAllocation, device->getRootDeviceIndex(), &size};
        CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
-        HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
+        HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcGpuPtr), size);
-        GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
+        GeneralSurface dstSvmSurf(dstAllocation);
        cmdType = CL_COMMAND_WRITE_BUFFER;
        if (size != 0) {
            bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
            if (!status) {
                return CL_OUT_OF_RESOURCES;
            }
-            srcPtr = reinterpret_cast<void *>(srcHostPtrSurf.getAllocation()->getGpuAddress());
+            srcGpuPtr = reinterpret_cast<void *>(srcHostPtrSurf.getAllocation()->getGpuAddress());
        }
-        setOperationParams(operationParams, size, srcPtr, srcHostPtrSurf.getAllocation(),
+        setOperationParams(operationParams, size, srcGpuPtr, srcHostPtrSurf.getAllocation(), dstGpuPtr, dstAllocation);
                           dstPtr, dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
        surfaces[0] = &dstSvmSurf;
        surfaces[1] = &srcHostPtrSurf;
        dispatchInfo.setBuiltinOpParams(operationParams);
        dispatchInfo.setBuiltinOpParams(operationParams);
        dispatchBcsOrGpgpuEnqueue<CL_COMMAND_WRITE_BUFFER>(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr);
    } else if (copyType == SvmToSvm) {
-        CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmData->gpuAllocations, &dstSvmData->gpuAllocations, device->getRootDeviceIndex(), &size};
+        CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, srcAllocation, dstAllocation, device->getRootDeviceIndex(), &size};
        CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
-        GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
+        GeneralSurface srcSvmSurf(srcAllocation);
-        GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
+        GeneralSurface dstSvmSurf(dstAllocation);
-        setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex),
+        setOperationParams(operationParams, size, srcGpuPtr, srcAllocation, dstGpuPtr, dstAllocation);
                           dstPtr, dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex));
        surfaces[0] = &srcSvmSurf;
        surfaces[1] = &dstSvmSurf;
@@ -391,8 +406,8 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
        CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &size};
        CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs);
-        HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
+        HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcGpuPtr), size);
-        HostPtrSurface dstHostPtrSurf(dstPtr, size);
+        HostPtrSurface dstHostPtrSurf(dstGpuPtr, size);
        cmdType = CL_COMMAND_WRITE_BUFFER;
        if (size != 0) {
            bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false);
@@ -400,10 +415,10 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
            if (!status) {
                return CL_OUT_OF_RESOURCES;
            }
-            srcPtr = reinterpret_cast<void *>(srcHostPtrSurf.getAllocation()->getGpuAddress());
+            srcGpuPtr = reinterpret_cast<void *>(srcHostPtrSurf.getAllocation()->getGpuAddress());
-            dstPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
+            dstGpuPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
        }
-        setOperationParams(operationParams, size, srcPtr, srcHostPtrSurf.getAllocation(), dstPtr, dstHostPtrSurf.getAllocation());
+        setOperationParams(operationParams, size, srcGpuPtr, srcHostPtrSurf.getAllocation(), dstGpuPtr, dstHostPtrSurf.getAllocation());
        surfaces[0] = &srcHostPtrSurf;
        surfaces[1] = &dstHostPtrSurf;
--- a/opencl/source/context/context.cpp
+++ b/opencl/source/context/context.cpp
@@ -88,6 +88,21 @@ cl_int Context::tryGetExistingHostPtrAllocation(const void *ptr,
                                                GraphicsAllocation *&allocation,
                                                InternalMemoryType &memoryType,
                                                bool &isCpuCopyAllowed) {
    cl_int retVal = tryGetExistingSvmAllocation(ptr, size, rootDeviceIndex, allocation, memoryType, isCpuCopyAllowed);
    if (retVal != CL_SUCCESS || allocation != nullptr) {
        return retVal;
    }
    retVal = tryGetExistingMapAllocation(ptr, size, allocation);
    return retVal;
 }
 cl_int Context::tryGetExistingSvmAllocation(const void *ptr,
                                            size_t size,
                                            uint32_t rootDeviceIndex,
                                            GraphicsAllocation *&allocation,
                                            InternalMemoryType &memoryType,
                                            bool &isCpuCopyAllowed) {
    if (getSVMAllocsManager()) {
        SvmAllocationData *svmEntry = getSVMAllocsManager()->getSVMAlloc(ptr);
        if (svmEntry) {
@@ -101,16 +116,19 @@ cl_int Context::tryGetExistingHostPtrAllocation(const void *ptr,
                    isCpuCopyAllowed = false;
                }
            }
            return CL_SUCCESS;
        }
    }
    return CL_SUCCESS;
 }
 cl_int Context::tryGetExistingMapAllocation(const void *ptr,
                                            size_t size,
                                            GraphicsAllocation *&allocation) {
    if (MapInfo mapInfo = {}; mapOperationsStorage.getInfoForHostPtr(ptr, size, mapInfo)) {
        if (mapInfo.graphicsAllocation) {
            allocation = mapInfo.graphicsAllocation;
        }
    }
    return CL_SUCCESS;
 }
--- a/opencl/source/context/context.h
+++ b/opencl/source/context/context.h
@@ -103,6 +103,15 @@ class Context : public BaseObject<_cl_context> {
                                           GraphicsAllocation *&allocation,
                                           InternalMemoryType &memoryType,
                                           bool &isCpuCopyAllowed);
    cl_int tryGetExistingSvmAllocation(const void *ptr,
                                       size_t size,
                                       uint32_t rootDeviceIndex,
                                       GraphicsAllocation *&allocation,
                                       InternalMemoryType &memoryType,
                                       bool &isCpuCopyAllowed);
    cl_int tryGetExistingMapAllocation(const void *ptr,
                                       size_t size,
                                       GraphicsAllocation *&allocation);
    const std::set<uint32_t> &getRootDeviceIndices() const;
--- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp
+++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp
@@ -57,6 +57,18 @@ struct EnqueueSvmTest : public ClDeviceFixture,
        ClDeviceFixture::TearDown();
    }
    std::pair<ReleaseableObjectPtr<Buffer>, void *> createBufferAndMapItOnGpu() {
        DebugManagerStateRestore restore{};
        DebugManager.flags.DisableZeroCopyForBuffers.set(1);
        BufferDefaults::context = this->context;
        ReleaseableObjectPtr<Buffer> buffer = clUniquePtr(BufferHelper<>::create());
        void *mappedPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_TRUE, CL_MAP_READ, 0, buffer->getSize(), 0, nullptr, nullptr, retVal);
        EXPECT_EQ(CL_SUCCESS, retVal);
        EXPECT_NE(nullptr, mappedPtr);
        return {std::move(buffer), mappedPtr};
    }
    cl_int retVal = CL_SUCCESS;
    void *ptrSVM = nullptr;
 };
@@ -1915,3 +1927,135 @@ TEST_F(EnqueueSvmTest, givenPageFaultManagerWhenEnqueueMemFillThenAllocIsDecommi
    context->memoryManager = memoryManager;
 }
 HWTEST_F(EnqueueSvmTest, givenCopyFromMappedPtrToSvmAllocWhenCallingSvmMemcpyThenReuseMappedAllocations) {
    constexpr size_t size = 1u;
    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
    {
        auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
        std::ignore = buffer;
        EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
        retVal = this->pCmdQ->enqueueSVMMemcpy(
            false,     // cl_bool  blocking_copy
            ptrSVM,    // void *dst_ptr
            mappedPtr, // const void *src_ptr
            size,      // size_t size
            0,         // cl_uint num_events_in_wait_list
            nullptr,   // cl_evebt *event_wait_list
            nullptr    // cL_event *event
        );
        EXPECT_EQ(CL_SUCCESS, retVal);
        EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
    }
    {
        auto notMappedPtr = std::make_unique<char[]>(size);
        EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
        retVal = this->pCmdQ->enqueueSVMMemcpy(
            false,              // cl_bool  blocking_copy
            ptrSVM,             // void *dst_ptr
            notMappedPtr.get(), // const void *src_ptr
            size,               // size_t size
            0,                  // cl_uint num_events_in_wait_list
            nullptr,            // cl_evebt *event_wait_list
            nullptr             // cL_event *event
        );
        EXPECT_EQ(CL_SUCCESS, retVal);
        EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
    }
 }
 HWTEST_F(EnqueueSvmTest, givenCopyFromSvmAllocToMappedPtrWhenCallingSvmMemcpyThenReuseMappedAllocations) {
    constexpr size_t size = 1u;
    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
    {
        auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
        std::ignore = buffer;
        EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
        retVal = this->pCmdQ->enqueueSVMMemcpy(
            false,     // cl_bool  blocking_copy
            mappedPtr, // void *dst_ptr
            ptrSVM,    // const void *src_ptr
            size,      // size_t size
            0,         // cl_uint num_events_in_wait_list
            nullptr,   // cl_evebt *event_wait_list
            nullptr    // cL_event *event
        );
        EXPECT_EQ(CL_SUCCESS, retVal);
        EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
    }
    {
        auto notMappedPtr = std::make_unique<char[]>(size);
        EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
        retVal = this->pCmdQ->enqueueSVMMemcpy(
            false,              // cl_bool  blocking_copy
            notMappedPtr.get(), // void *dst_ptr
            ptrSVM,             // const void *src_ptr
            size,               // size_t size
            0,                  // cl_uint num_events_in_wait_list
            nullptr,            // cl_evebt *event_wait_list
            nullptr             // cL_event *event
        );
        EXPECT_EQ(CL_SUCCESS, retVal);
        EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
    }
 }
 HWTEST_F(EnqueueSvmTest, givenCopyFromMappedPtrToMappedPtrWhenCallingSvmMemcpyThenReuseMappedAllocations) {
    constexpr size_t size = 1u;
    auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
    {
        auto [buffer1, mappedPtr1] = createBufferAndMapItOnGpu();
        auto [buffer2, mappedPtr2] = createBufferAndMapItOnGpu();
        std::ignore = buffer1;
        std::ignore = buffer2;
        EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
        retVal = this->pCmdQ->enqueueSVMMemcpy(
            false,      // cl_bool  blocking_copy
            mappedPtr2, // void *dst_ptr
            mappedPtr1, // const void *src_ptr
            size,       // size_t size
            0,          // cl_uint num_events_in_wait_list
            nullptr,    // cl_evebt *event_wait_list
            nullptr     // cL_event *event
        );
        EXPECT_EQ(CL_SUCCESS, retVal);
        EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
    }
    {
        auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
        std::ignore = buffer;
        auto notMappedPtr = std::make_unique<char[]>(size);
        EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled);
        retVal = this->pCmdQ->enqueueSVMMemcpy(
            false,              // cl_bool  blocking_copy
            mappedPtr,          // void *dst_ptr
            notMappedPtr.get(), // const void *src_ptr
            size,               // size_t size
            0,                  // cl_uint num_events_in_wait_list
            nullptr,            // cl_evebt *event_wait_list
            nullptr             // cL_event *event
        );
        EXPECT_EQ(CL_SUCCESS, retVal);
        EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
    }
    {
        auto notMappedPtr = std::make_unique<char[]>(size);
        auto [buffer, mappedPtr] = createBufferAndMapItOnGpu();
        std::ignore = buffer;
        EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled);
        retVal = this->pCmdQ->enqueueSVMMemcpy(
            false,              // cl_bool  blocking_copy
            notMappedPtr.get(), // void *dst_ptr
            mappedPtr,          // const void *src_ptr
            size,               // size_t size
            0,                  // cl_uint num_events_in_wait_list
            nullptr,            // cl_evebt *event_wait_list
            nullptr             // cL_event *event
        );
        EXPECT_EQ(CL_SUCCESS, retVal);
        EXPECT_EQ(2u, csr.createAllocationForHostSurfaceCalled);
    }
 }