diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index c6b1b28aaa..5ab856c8bc 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -979,15 +979,4 @@ void CommandQueue::waitForAllEngines(bool blockedQueue, PrintfHandler *printfHan } } -void *CommandQueue::convertAddressWithOffsetToGpuVa(void *ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation) { - // If this is device or shared USM pointer, it is already a gpuVA and we don't have to do anything. - // Otherwise, we assume this is a cpuVA and we have to convert to gpuVA, while preserving offset from allocation start. - const bool isCpuPtr = (memoryType != DEVICE_UNIFIED_MEMORY) && (memoryType != SHARED_UNIFIED_MEMORY); - if (isCpuPtr) { - size_t dstOffset = ptrDiff(ptr, allocation.getUnderlyingBuffer()); - ptr = reinterpret_cast(allocation.getGpuAddress() + dstOffset); - } - return ptr; -} - } // namespace NEO diff --git a/opencl/source/command_queue/command_queue.h b/opencl/source/command_queue/command_queue.h index 8672f49911..68c2f68308 100644 --- a/opencl/source/command_queue/command_queue.h +++ b/opencl/source/command_queue/command_queue.h @@ -322,7 +322,8 @@ class CommandQueue : public BaseObject<_cl_command_queue> { return requiresCacheFlushAfterWalker; } - static void *convertAddressWithOffsetToGpuVa(void *ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation); + template + static PtrType convertAddressWithOffsetToGpuVa(PtrType ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation); void updateBcsTaskCount(aub_stream::EngineType bcsEngineType, uint32_t newBcsTaskCount); uint32_t peekBcsTaskCount(aub_stream::EngineType bcsEngineType) const; @@ -404,6 +405,18 @@ class CommandQueue : public BaseObject<_cl_command_queue> { std::unique_ptr timestampPacketContainer; }; +template +PtrType CommandQueue::convertAddressWithOffsetToGpuVa(PtrType ptr, InternalMemoryType memoryType, GraphicsAllocation &allocation) { + // If this is device or shared USM pointer, it is already a gpuVA and we don't have to do anything. + // Otherwise, we assume this is a cpuVA and we have to convert to gpuVA, while preserving offset from allocation start. + const bool isCpuPtr = (memoryType != DEVICE_UNIFIED_MEMORY) && (memoryType != SHARED_UNIFIED_MEMORY); + if (isCpuPtr) { + size_t dstOffset = ptrDiff(ptr, allocation.getUnderlyingBuffer()); + ptr = reinterpret_cast(allocation.getGpuAddress() + dstOffset); + } + return ptr; +} + using CommandQueueCreateFunc = CommandQueue *(*)(Context *context, ClDevice *device, const cl_queue_properties *properties, bool internalUsage); } // namespace NEO diff --git a/opencl/source/command_queue/csr_selection_args.h b/opencl/source/command_queue/csr_selection_args.h index ae7ff491a8..8697473912 100644 --- a/opencl/source/command_queue/csr_selection_args.h +++ b/opencl/source/command_queue/csr_selection_args.h @@ -73,8 +73,12 @@ struct CsrSelectionArgs { } static void processResource(const MultiGraphicsAllocation &multiGfxAlloc, uint32_t rootDeviceIndex, Resource &outResource) { - outResource.allocation = multiGfxAlloc.getGraphicsAllocation(rootDeviceIndex); - outResource.isLocal = outResource.allocation->isAllocatedInLocalMemoryPool(); + processResource(*multiGfxAlloc.getGraphicsAllocation(rootDeviceIndex), rootDeviceIndex, outResource); + } + + static void processResource(const GraphicsAllocation &gfxAlloc, uint32_t rootDeviceIndex, Resource &outResource) { + outResource.allocation = &gfxAlloc; + outResource.isLocal = gfxAlloc.isAllocatedInLocalMemoryPool(); } static inline TransferDirection createTransferDirection(bool srcLocal, bool dstLocal) { diff --git a/opencl/source/command_queue/enqueue_svm.h b/opencl/source/command_queue/enqueue_svm.h index 89a3231139..109b065ae3 100644 --- a/opencl/source/command_queue/enqueue_svm.h +++ b/opencl/source/command_queue/enqueue_svm.h @@ -274,6 +274,24 @@ inline void setOperationParams(BuiltinOpParams &operationParams, size_t size, operationParams.dstOffset = {ptrDiff(dstPtr, operationParams.dstPtr), 0, 0}; } +template +inline std::tuple getExistingAlloc(Context *context, + PtrType ptr, + size_t size, + uint32_t rootDeviceIndex) { + SvmAllocationData *svmData = context->getSVMAllocsManager()->getSVMAlloc(ptr); + GraphicsAllocation *allocation = nullptr; + if (svmData) { + allocation = svmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex); + } else { + context->tryGetExistingMapAllocation(ptr, size, allocation); + if (allocation) { + ptr = CommandQueue::convertAddressWithOffsetToGpuVa(ptr, InternalMemoryType::NOT_SPECIFIED, *allocation); + } + } + return std::make_tuple(svmData, allocation, ptr); +} + template cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, @@ -287,28 +305,28 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, return CL_INVALID_VALUE; } auto rootDeviceIndex = getDevice().getRootDeviceIndex(); - auto dstSvmData = context->getSVMAllocsManager()->getSVMAlloc(dstPtr); - auto srcSvmData = context->getSVMAllocsManager()->getSVMAlloc(srcPtr); + auto [dstSvmData, dstAllocation, dstGpuPtr] = getExistingAlloc(context, dstPtr, size, rootDeviceIndex); + auto [srcSvmData, srcAllocation, srcGpuPtr] = getExistingAlloc(context, srcPtr, size, rootDeviceIndex); enum CopyType { HostToHost, SvmToHost, HostToSvm, SvmToSvm }; CopyType copyType = HostToHost; - if ((srcSvmData != nullptr) && (dstSvmData != nullptr)) { + if ((srcAllocation != nullptr) && (dstAllocation != nullptr)) { copyType = SvmToSvm; - } else if ((srcSvmData == nullptr) && (dstSvmData != nullptr)) { + } else if ((srcAllocation == nullptr) && (dstAllocation != nullptr)) { copyType = HostToSvm; - } else if (srcSvmData != nullptr) { + } else if (srcAllocation != nullptr) { copyType = SvmToHost; } auto pageFaultManager = context->getMemoryManager()->getPageFaultManager(); if (dstSvmData && pageFaultManager) { - pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress())); + pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(dstAllocation->getGpuAddress())); } if (srcSvmData && pageFaultManager) { - pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)->getGpuAddress())); + pageFaultManager->moveAllocationToGpuDomain(reinterpret_cast(srcAllocation->getGpuAddress())); } auto isStatelessRequired = false; @@ -330,20 +348,20 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, cl_command_type cmdType; if (copyType == SvmToHost) { - CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmData->gpuAllocations, {}, device->getRootDeviceIndex(), &size}; + CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, srcAllocation, {}, device->getRootDeviceIndex(), &size}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); - GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); - HostPtrSurface dstHostPtrSurf(dstPtr, size); + GeneralSurface srcSvmSurf(srcAllocation); + HostPtrSurface dstHostPtrSurf(dstGpuPtr, size); if (size != 0) { bool status = csr.createAllocationForHostSurface(dstHostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } - dstPtr = reinterpret_cast(dstHostPtrSurf.getAllocation()->getGpuAddress()); - notifyEnqueueSVMMemcpy(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType())); + dstGpuPtr = reinterpret_cast(dstHostPtrSurf.getAllocation()->getGpuAddress()); + notifyEnqueueSVMMemcpy(srcAllocation, !!blockingCopy, EngineHelpers::isBcs(csr.getOsContext().getEngineType())); } - setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), dstPtr, dstHostPtrSurf.getAllocation()); + setOperationParams(operationParams, size, srcGpuPtr, srcAllocation, dstGpuPtr, dstHostPtrSurf.getAllocation()); surfaces[0] = &srcSvmSurf; surfaces[1] = &dstHostPtrSurf; @@ -351,36 +369,33 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } else if (copyType == HostToSvm) { - CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, {}, &dstSvmData->gpuAllocations, device->getRootDeviceIndex(), &size}; + CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, {}, dstAllocation, device->getRootDeviceIndex(), &size}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); - HostPtrSurface srcHostPtrSurf(const_cast(srcPtr), size); - GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); + HostPtrSurface srcHostPtrSurf(const_cast(srcGpuPtr), size); + GeneralSurface dstSvmSurf(dstAllocation); cmdType = CL_COMMAND_WRITE_BUFFER; if (size != 0) { bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false); if (!status) { return CL_OUT_OF_RESOURCES; } - srcPtr = reinterpret_cast(srcHostPtrSurf.getAllocation()->getGpuAddress()); + srcGpuPtr = reinterpret_cast(srcHostPtrSurf.getAllocation()->getGpuAddress()); } - setOperationParams(operationParams, size, srcPtr, srcHostPtrSurf.getAllocation(), - dstPtr, dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); + setOperationParams(operationParams, size, srcGpuPtr, srcHostPtrSurf.getAllocation(), dstGpuPtr, dstAllocation); surfaces[0] = &dstSvmSurf; surfaces[1] = &srcHostPtrSurf; - dispatchInfo.setBuiltinOpParams(operationParams); dispatchInfo.setBuiltinOpParams(operationParams); dispatchBcsOrGpgpuEnqueue(dispatchInfo, surfaces, builtInType, numEventsInWaitList, eventWaitList, event, blockingCopy, csr); } else if (copyType == SvmToSvm) { - CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &srcSvmData->gpuAllocations, &dstSvmData->gpuAllocations, device->getRootDeviceIndex(), &size}; + CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, srcAllocation, dstAllocation, device->getRootDeviceIndex(), &size}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); - GeneralSurface srcSvmSurf(srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); - GeneralSurface dstSvmSurf(dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); - setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex), - dstPtr, dstSvmData->gpuAllocations.getGraphicsAllocation(rootDeviceIndex)); + GeneralSurface srcSvmSurf(srcAllocation); + GeneralSurface dstSvmSurf(dstAllocation); + setOperationParams(operationParams, size, srcGpuPtr, srcAllocation, dstGpuPtr, dstAllocation); surfaces[0] = &srcSvmSurf; surfaces[1] = &dstSvmSurf; @@ -391,8 +406,8 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, CsrSelectionArgs csrSelectionArgs{CL_COMMAND_SVM_MEMCPY, &size}; CommandStreamReceiver &csr = selectCsrForBuiltinOperation(csrSelectionArgs); - HostPtrSurface srcHostPtrSurf(const_cast(srcPtr), size); - HostPtrSurface dstHostPtrSurf(dstPtr, size); + HostPtrSurface srcHostPtrSurf(const_cast(srcGpuPtr), size); + HostPtrSurface dstHostPtrSurf(dstGpuPtr, size); cmdType = CL_COMMAND_WRITE_BUFFER; if (size != 0) { bool status = csr.createAllocationForHostSurface(srcHostPtrSurf, false); @@ -400,10 +415,10 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, if (!status) { return CL_OUT_OF_RESOURCES; } - srcPtr = reinterpret_cast(srcHostPtrSurf.getAllocation()->getGpuAddress()); - dstPtr = reinterpret_cast(dstHostPtrSurf.getAllocation()->getGpuAddress()); + srcGpuPtr = reinterpret_cast(srcHostPtrSurf.getAllocation()->getGpuAddress()); + dstGpuPtr = reinterpret_cast(dstHostPtrSurf.getAllocation()->getGpuAddress()); } - setOperationParams(operationParams, size, srcPtr, srcHostPtrSurf.getAllocation(), dstPtr, dstHostPtrSurf.getAllocation()); + setOperationParams(operationParams, size, srcGpuPtr, srcHostPtrSurf.getAllocation(), dstGpuPtr, dstHostPtrSurf.getAllocation()); surfaces[0] = &srcHostPtrSurf; surfaces[1] = &dstHostPtrSurf; diff --git a/opencl/source/context/context.cpp b/opencl/source/context/context.cpp index de205cb7f4..0283166c48 100644 --- a/opencl/source/context/context.cpp +++ b/opencl/source/context/context.cpp @@ -88,6 +88,21 @@ cl_int Context::tryGetExistingHostPtrAllocation(const void *ptr, GraphicsAllocation *&allocation, InternalMemoryType &memoryType, bool &isCpuCopyAllowed) { + cl_int retVal = tryGetExistingSvmAllocation(ptr, size, rootDeviceIndex, allocation, memoryType, isCpuCopyAllowed); + if (retVal != CL_SUCCESS || allocation != nullptr) { + return retVal; + } + + retVal = tryGetExistingMapAllocation(ptr, size, allocation); + return retVal; +} + +cl_int Context::tryGetExistingSvmAllocation(const void *ptr, + size_t size, + uint32_t rootDeviceIndex, + GraphicsAllocation *&allocation, + InternalMemoryType &memoryType, + bool &isCpuCopyAllowed) { if (getSVMAllocsManager()) { SvmAllocationData *svmEntry = getSVMAllocsManager()->getSVMAlloc(ptr); if (svmEntry) { @@ -101,16 +116,19 @@ cl_int Context::tryGetExistingHostPtrAllocation(const void *ptr, isCpuCopyAllowed = false; } } - return CL_SUCCESS; } } + return CL_SUCCESS; +} +cl_int Context::tryGetExistingMapAllocation(const void *ptr, + size_t size, + GraphicsAllocation *&allocation) { if (MapInfo mapInfo = {}; mapOperationsStorage.getInfoForHostPtr(ptr, size, mapInfo)) { if (mapInfo.graphicsAllocation) { allocation = mapInfo.graphicsAllocation; } } - return CL_SUCCESS; } diff --git a/opencl/source/context/context.h b/opencl/source/context/context.h index a57d5c5a37..88b4f5ceab 100644 --- a/opencl/source/context/context.h +++ b/opencl/source/context/context.h @@ -103,6 +103,15 @@ class Context : public BaseObject<_cl_context> { GraphicsAllocation *&allocation, InternalMemoryType &memoryType, bool &isCpuCopyAllowed); + cl_int tryGetExistingSvmAllocation(const void *ptr, + size_t size, + uint32_t rootDeviceIndex, + GraphicsAllocation *&allocation, + InternalMemoryType &memoryType, + bool &isCpuCopyAllowed); + cl_int tryGetExistingMapAllocation(const void *ptr, + size_t size, + GraphicsAllocation *&allocation); const std::set &getRootDeviceIndices() const; diff --git a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp index 8f50b40b0a..dba0fda694 100644 --- a/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_svm_tests.cpp @@ -57,6 +57,18 @@ struct EnqueueSvmTest : public ClDeviceFixture, ClDeviceFixture::TearDown(); } + std::pair, void *> createBufferAndMapItOnGpu() { + DebugManagerStateRestore restore{}; + DebugManager.flags.DisableZeroCopyForBuffers.set(1); + + BufferDefaults::context = this->context; + ReleaseableObjectPtr buffer = clUniquePtr(BufferHelper<>::create()); + void *mappedPtr = pCmdQ->enqueueMapBuffer(buffer.get(), CL_TRUE, CL_MAP_READ, 0, buffer->getSize(), 0, nullptr, nullptr, retVal); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_NE(nullptr, mappedPtr); + return {std::move(buffer), mappedPtr}; + } + cl_int retVal = CL_SUCCESS; void *ptrSVM = nullptr; }; @@ -1915,3 +1927,135 @@ TEST_F(EnqueueSvmTest, givenPageFaultManagerWhenEnqueueMemFillThenAllocIsDecommi context->memoryManager = memoryManager; } + +HWTEST_F(EnqueueSvmTest, givenCopyFromMappedPtrToSvmAllocWhenCallingSvmMemcpyThenReuseMappedAllocations) { + constexpr size_t size = 1u; + auto &csr = pDevice->getUltCommandStreamReceiver(); + + { + auto [buffer, mappedPtr] = createBufferAndMapItOnGpu(); + std::ignore = buffer; + EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); + retVal = this->pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + ptrSVM, // void *dst_ptr + mappedPtr, // const void *src_ptr + size, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); + } + { + auto notMappedPtr = std::make_unique(size); + EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); + retVal = this->pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + ptrSVM, // void *dst_ptr + notMappedPtr.get(), // const void *src_ptr + size, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); + } +} + +HWTEST_F(EnqueueSvmTest, givenCopyFromSvmAllocToMappedPtrWhenCallingSvmMemcpyThenReuseMappedAllocations) { + constexpr size_t size = 1u; + auto &csr = pDevice->getUltCommandStreamReceiver(); + + { + auto [buffer, mappedPtr] = createBufferAndMapItOnGpu(); + std::ignore = buffer; + EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); + retVal = this->pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + mappedPtr, // void *dst_ptr + ptrSVM, // const void *src_ptr + size, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); + } + { + auto notMappedPtr = std::make_unique(size); + EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); + retVal = this->pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + notMappedPtr.get(), // void *dst_ptr + ptrSVM, // const void *src_ptr + size, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); + } +} + +HWTEST_F(EnqueueSvmTest, givenCopyFromMappedPtrToMappedPtrWhenCallingSvmMemcpyThenReuseMappedAllocations) { + constexpr size_t size = 1u; + auto &csr = pDevice->getUltCommandStreamReceiver(); + + { + auto [buffer1, mappedPtr1] = createBufferAndMapItOnGpu(); + auto [buffer2, mappedPtr2] = createBufferAndMapItOnGpu(); + std::ignore = buffer1; + std::ignore = buffer2; + EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); + retVal = this->pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + mappedPtr2, // void *dst_ptr + mappedPtr1, // const void *src_ptr + size, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); + } + { + auto [buffer, mappedPtr] = createBufferAndMapItOnGpu(); + std::ignore = buffer; + auto notMappedPtr = std::make_unique(size); + EXPECT_EQ(0u, csr.createAllocationForHostSurfaceCalled); + retVal = this->pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + mappedPtr, // void *dst_ptr + notMappedPtr.get(), // const void *src_ptr + size, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); + } + { + auto notMappedPtr = std::make_unique(size); + auto [buffer, mappedPtr] = createBufferAndMapItOnGpu(); + std::ignore = buffer; + EXPECT_EQ(1u, csr.createAllocationForHostSurfaceCalled); + retVal = this->pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + notMappedPtr.get(), // void *dst_ptr + mappedPtr, // const void *src_ptr + size, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(2u, csr.createAllocationForHostSurfaceCalled); + } +}