diff --git a/runtime/command_queue/enqueue_svm.h b/runtime/command_queue/enqueue_svm.h index 166b06f034..dc1c07be5a 100644 --- a/runtime/command_queue/enqueue_svm.h +++ b/runtime/command_queue/enqueue_svm.h @@ -264,6 +264,18 @@ cl_int CommandQueueHw::enqueueSVMFree(cl_uint numSvmPointers, return CL_SUCCESS; } +inline void setOperationParams(BuiltinDispatchInfoBuilder::BuiltinOpParams &operationParams, size_t size, + const void *srcPtr, GraphicsAllocation *srcSvmAlloc, size_t srcPtrOffset, + void *dstPtr, GraphicsAllocation *dstSvmAlloc, size_t dstPtrOffset) { + operationParams.size = {size, 0, 0}; + operationParams.srcPtr = const_cast(srcPtr); + operationParams.srcSvmAlloc = srcSvmAlloc; + operationParams.srcOffset = {srcPtrOffset, 0, 0}; + operationParams.dstPtr = dstPtr; + operationParams.dstSvmAlloc = dstSvmAlloc; + operationParams.dstOffset = {dstPtrOffset, 0, 0}; +} + template cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, void *dstPtr, @@ -273,40 +285,100 @@ cl_int CommandQueueHw::enqueueSVMMemcpy(cl_bool blockingCopy, const cl_event *eventWaitList, cl_event *event) { + if ((dstPtr == nullptr) || (srcPtr == nullptr)) { + return CL_INVALID_VALUE; + } auto dstSvmData = context->getSVMAllocsManager()->getSVMAlloc(dstPtr); auto srcSvmData = context->getSVMAllocsManager()->getSVMAlloc(srcPtr); - if ((dstSvmData == nullptr) || (srcSvmData == nullptr)) { + + enum CopyType { InvalidCopyType, + SvmToHost, + HostToSvm, + SvmToSvm }; + CopyType copyType = InvalidCopyType; + if ((srcSvmData != nullptr) && (dstSvmData != nullptr)) { + copyType = SvmToSvm; + } else if ((srcSvmData == nullptr) && (dstSvmData != nullptr)) { + copyType = HostToSvm; + } else if (srcSvmData != nullptr) { + copyType = SvmToHost; + } else { return CL_INVALID_VALUE; } MultiDispatchInfo dispatchInfo; - auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer, this->getContext(), this->getDevice()); - BuiltInOwnershipWrapper builtInLock(builder, this->context); - BuiltinDispatchInfoBuilder::BuiltinOpParams operationParams; - operationParams.srcPtr = const_cast(srcPtr); - operationParams.dstPtr = dstPtr; - operationParams.srcSvmAlloc = srcSvmData->gpuAllocation; - operationParams.dstSvmAlloc = dstSvmData->gpuAllocation; - operationParams.srcOffset = {0, 0, 0}; - operationParams.dstOffset = {0, 0, 0}; - operationParams.size = {size, 0, 0}; - builder.buildDispatchInfos(dispatchInfo, operationParams); - GeneralSurface s1(srcSvmData->gpuAllocation), s2(dstSvmData->gpuAllocation); - Surface *surfaces[] = {&s1, &s2}; - - enqueueHandler( - surfaces, - blockingCopy ? true : false, - dispatchInfo, - numEventsInWaitList, - eventWaitList, - event); + Surface *surfaces[2]; + if (copyType == SvmToHost) { + GeneralSurface srcSvmSurf(srcSvmData->gpuAllocation); + HostPtrSurface dstHostPtrSurf(dstPtr, size); + if (size != 0) { + bool status = getCommandStreamReceiver().createAllocationForHostSurface(dstHostPtrSurf, true); + if (!status) { + return CL_OUT_OF_RESOURCES; + } + dstPtr = reinterpret_cast(dstHostPtrSurf.getAllocation()->getGpuAddress()); + } + void *alignedDstPtr = alignDown(dstPtr, 4); + size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr); + setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocation, 0, alignedDstPtr, nullptr, dstPtrOffset); + surfaces[0] = &srcSvmSurf; + surfaces[1] = &dstHostPtrSurf; + builder.buildDispatchInfos(dispatchInfo, operationParams); + enqueueHandler( + surfaces, + blockingCopy == CL_TRUE, + dispatchInfo, + numEventsInWaitList, + eventWaitList, + event); + } else if (copyType == HostToSvm) { + HostPtrSurface srcHostPtrSurf(const_cast(srcPtr), size); + GeneralSurface dstSvmSurf(dstSvmData->gpuAllocation); + if (size != 0) { + bool status = getCommandStreamReceiver().createAllocationForHostSurface(srcHostPtrSurf, false); + if (!status) { + return CL_OUT_OF_RESOURCES; + } + srcPtr = reinterpret_cast(srcHostPtrSurf.getAllocation()->getGpuAddress()); + } + void *alignedSrcPtr = alignDown(const_cast(srcPtr), 4); + size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr); + setOperationParams(operationParams, size, alignedSrcPtr, nullptr, srcPtrOffset, dstPtr, dstSvmData->gpuAllocation, 0); + surfaces[0] = &dstSvmSurf; + surfaces[1] = &srcHostPtrSurf; + builder.buildDispatchInfos(dispatchInfo, operationParams); + enqueueHandler( + surfaces, + blockingCopy == CL_TRUE, + dispatchInfo, + numEventsInWaitList, + eventWaitList, + event); + } else { + GeneralSurface srcSvmSurf(srcSvmData->gpuAllocation); + GeneralSurface dstSvmSurf(dstSvmData->gpuAllocation); + setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocation, 0, dstPtr, dstSvmData->gpuAllocation, 0); + surfaces[0] = &srcSvmSurf; + surfaces[1] = &dstSvmSurf; + builder.buildDispatchInfos(dispatchInfo, operationParams); + enqueueHandler( + surfaces, + blockingCopy ? true : false, + dispatchInfo, + numEventsInWaitList, + eventWaitList, + event); + } + if (event) { + auto pEvent = castToObjectOrAbort(*event); + pEvent->setCmdType(CL_COMMAND_SVM_MEMCPY); + } return CL_SUCCESS; } diff --git a/runtime/command_stream/command_stream_receiver.h b/runtime/command_stream/command_stream_receiver.h index 144a5c9573..d4c6c43bf3 100644 --- a/runtime/command_stream/command_stream_receiver.h +++ b/runtime/command_stream/command_stream_receiver.h @@ -154,7 +154,7 @@ class CommandStreamReceiver { AllocationsList &getTemporaryAllocations(); AllocationsList &getAllocationsForReuse(); InternalAllocationStorage *getInternalAllocationStorage() const { return internalAllocationStorage.get(); } - bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush); + MOCKABLE_VIRTUAL bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush); virtual size_t getPreferredTagPoolSize() const { return 512; } virtual void setupContext(OsContext &osContext) { this->osContext = &osContext; } OsContext &getOsContext() const { return *osContext; } diff --git a/unit_tests/command_queue/enqueue_svm_tests.cpp b/unit_tests/command_queue/enqueue_svm_tests.cpp index abc4386bb3..f65e6483d6 100644 --- a/unit_tests/command_queue/enqueue_svm_tests.cpp +++ b/unit_tests/command_queue/enqueue_svm_tests.cpp @@ -278,6 +278,133 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_InvalidValueSrcPtrIsNull) { EXPECT_EQ(CL_INVALID_VALUE, retVal); } +TEST_F(EnqueueSvmTest, GivenSrcHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) { + char srcHostPtr[260]; + void *pDstSVM = ptrSVM; + void *pSrcSVM = srcHostPtr; + cl_event event = nullptr; + retVal = this->pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + pDstSVM, // void *dst_ptr + pSrcSVM, // const void *src_ptr + 256, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + &event // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY; + cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); + EXPECT_EQ(expectedCmd, actualCmd); + clReleaseEvent(event); +} + +TEST_F(EnqueueSvmTest, GivenSrcHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) { + char srcHostPtr[260]; + void *pDstSVM = ptrSVM; + void *pSrcSVM = srcHostPtr; + retVal = this->pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + pDstSVM, // void *dst_ptr + pSrcSVM, // const void *src_ptr + 0, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); +} + +HWTEST_F(EnqueueSvmTest, GivenSrcHostPtrWhenEnqueueSVMMemcpyThenEnqueuWriteBufferIsCalled) { + char srcHostPtr[260]; + void *pSrcSVM = srcHostPtr; + void *pDstSVM = ptrSVM; + MockCommandQueueHw myCmdQ(context, pDevice, 0); + retVal = myCmdQ.enqueueSVMMemcpy( + false, // cl_bool blocking_copy + pDstSVM, // void *dst_ptr + pSrcSVM, // const void *src_ptr + 256, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_WRITE_BUFFER)); +} + +HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenEnqueueSVMMemcpyThenEnqueuReadBufferIsCalled) { + char dstHostPtr[260]; + void *pDstSVM = dstHostPtr; + void *pSrcSVM = ptrSVM; + MockCommandQueueHw myCmdQ(context, pDevice, 0); + retVal = myCmdQ.enqueueSVMMemcpy( + false, // cl_bool blocking_copy + pDstSVM, // void *dst_ptr + pSrcSVM, // const void *src_ptr + 256, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + EXPECT_EQ(myCmdQ.lastCommandType, static_cast(CL_COMMAND_READ_BUFFER)); +} + +TEST_F(EnqueueSvmTest, GivenDstHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) { + char dstHostPtr[260]; + void *pDstSVM = dstHostPtr; + void *pSrcSVM = ptrSVM; + cl_event event = nullptr; + retVal = this->pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + pDstSVM, // void *dst_ptr + pSrcSVM, // const void *src_ptr + 256, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + &event // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); + constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY; + cl_command_type actualCmd = castToObjectOrAbort(event)->getCommandType(); + EXPECT_EQ(expectedCmd, actualCmd); + clReleaseEvent(event); +} + +TEST_F(EnqueueSvmTest, GivenDstHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) { + char dstHostPtr[260]; + void *pDstSVM = dstHostPtr; + void *pSrcSVM = ptrSVM; + retVal = this->pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + pDstSVM, // void *dst_ptr + pSrcSVM, // const void *src_ptr + 0, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_SUCCESS, retVal); +} + +TEST_F(EnqueueSvmTest, GivenDstHostPtrAndSrcHostPtrWhenEnqueueSVMMemcpyThenReturnInvalidValue) { + char dstHostPtr[260]; + char srcHostPtr[260]; + void *pDstSVM = dstHostPtr; + void *pSrcSVM = srcHostPtr; + retVal = this->pCmdQ->enqueueSVMMemcpy( + false, // cl_bool blocking_copy + pDstSVM, // void *dst_ptr + pSrcSVM, // const void *src_ptr + 256, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_INVALID_VALUE, retVal); +} + TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_Success) { void *pDstSVM = ptrSVM; void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {}); @@ -915,3 +1042,53 @@ HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionAn auto walkerCount = hwParse.getCommandCount(); EXPECT_EQ(2u, walkerCount); } + +template +struct FailCsr : public CommandStreamReceiverHw { + FailCsr(ExecutionEnvironment &executionEnvironment) : CommandStreamReceiverHw(executionEnvironment){}; + bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override { + return CL_FALSE; + } +}; + +HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { + char dstHostPtr[260]; + void *pDstSVM = dstHostPtr; + void *pSrcSVM = ptrSVM; + MockCommandQueueHw cmdQ(context, pDevice, nullptr); + auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment()); + CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.engine->commandStreamReceiver; + cmdQ.engine->commandStreamReceiver = failCsr.get(); + retVal = cmdQ.enqueueSVMMemcpy( + false, // cl_bool blocking_copy + pDstSVM, // void *dst_ptr + pSrcSVM, // const void *src_ptr + 256, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); + cmdQ.engine->commandStreamReceiver = oldCommandStreamReceiver; +} + +HWTEST_F(EnqueueSvmTest, GivenSrcHostPtrAndSizeZeroWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) { + char srcHostPtr[260]; + void *pDstSVM = ptrSVM; + void *pSrcSVM = srcHostPtr; + MockCommandQueueHw cmdQ(context, pDevice, nullptr); + auto failCsr = std::make_unique>(*pDevice->getExecutionEnvironment()); + CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.engine->commandStreamReceiver; + cmdQ.engine->commandStreamReceiver = failCsr.get(); + retVal = cmdQ.enqueueSVMMemcpy( + false, // cl_bool blocking_copy + pDstSVM, // void *dst_ptr + pSrcSVM, // const void *src_ptr + 256, // size_t size + 0, // cl_uint num_events_in_wait_list + nullptr, // cl_evebt *event_wait_list + nullptr // cL_event *event + ); + EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal); + cmdQ.engine->commandStreamReceiver = oldCommandStreamReceiver; +}