Enabling clEnqueueSVMMemcpy between SVM and host pointer

Related-To: NEO-3011
Change-Id: I89aad599d7238ea2d319a4b1c72dffea2dba952b
Signed-off-by: Koska, Andrzej <andrzej.koska@intel.com>
This commit is contained in:
Koska, Andrzej 2019-05-06 10:50:25 +02:00 committed by sys_ocldev
parent c967327b3b
commit fa3d4f39f4
3 changed files with 272 additions and 23 deletions

View File

@ -264,6 +264,18 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMFree(cl_uint numSvmPointers,
return CL_SUCCESS;
}
inline void setOperationParams(BuiltinDispatchInfoBuilder::BuiltinOpParams &operationParams, size_t size,
const void *srcPtr, GraphicsAllocation *srcSvmAlloc, size_t srcPtrOffset,
void *dstPtr, GraphicsAllocation *dstSvmAlloc, size_t dstPtrOffset) {
operationParams.size = {size, 0, 0};
operationParams.srcPtr = const_cast<void *>(srcPtr);
operationParams.srcSvmAlloc = srcSvmAlloc;
operationParams.srcOffset = {srcPtrOffset, 0, 0};
operationParams.dstPtr = dstPtr;
operationParams.dstSvmAlloc = dstSvmAlloc;
operationParams.dstOffset = {dstPtrOffset, 0, 0};
}
template <typename GfxFamily>
cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
void *dstPtr,
@ -273,40 +285,100 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
const cl_event *eventWaitList,
cl_event *event) {
if ((dstPtr == nullptr) || (srcPtr == nullptr)) {
return CL_INVALID_VALUE;
}
auto dstSvmData = context->getSVMAllocsManager()->getSVMAlloc(dstPtr);
auto srcSvmData = context->getSVMAllocsManager()->getSVMAlloc(srcPtr);
if ((dstSvmData == nullptr) || (srcSvmData == nullptr)) {
enum CopyType { InvalidCopyType,
SvmToHost,
HostToSvm,
SvmToSvm };
CopyType copyType = InvalidCopyType;
if ((srcSvmData != nullptr) && (dstSvmData != nullptr)) {
copyType = SvmToSvm;
} else if ((srcSvmData == nullptr) && (dstSvmData != nullptr)) {
copyType = HostToSvm;
} else if (srcSvmData != nullptr) {
copyType = SvmToHost;
} else {
return CL_INVALID_VALUE;
}
MultiDispatchInfo dispatchInfo;
auto &builder = getDevice().getExecutionEnvironment()->getBuiltIns()->getBuiltinDispatchInfoBuilder(EBuiltInOps::CopyBufferToBuffer,
this->getContext(), this->getDevice());
BuiltInOwnershipWrapper builtInLock(builder, this->context);
BuiltinDispatchInfoBuilder::BuiltinOpParams operationParams;
operationParams.srcPtr = const_cast<void *>(srcPtr);
operationParams.dstPtr = dstPtr;
operationParams.srcSvmAlloc = srcSvmData->gpuAllocation;
operationParams.dstSvmAlloc = dstSvmData->gpuAllocation;
operationParams.srcOffset = {0, 0, 0};
operationParams.dstOffset = {0, 0, 0};
operationParams.size = {size, 0, 0};
builder.buildDispatchInfos(dispatchInfo, operationParams);
GeneralSurface s1(srcSvmData->gpuAllocation), s2(dstSvmData->gpuAllocation);
Surface *surfaces[] = {&s1, &s2};
enqueueHandler<CL_COMMAND_SVM_MEMCPY>(
surfaces,
blockingCopy ? true : false,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
Surface *surfaces[2];
if (copyType == SvmToHost) {
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocation);
HostPtrSurface dstHostPtrSurf(dstPtr, size);
if (size != 0) {
bool status = getCommandStreamReceiver().createAllocationForHostSurface(dstHostPtrSurf, true);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
dstPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
}
void *alignedDstPtr = alignDown(dstPtr, 4);
size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocation, 0, alignedDstPtr, nullptr, dstPtrOffset);
surfaces[0] = &srcSvmSurf;
surfaces[1] = &dstHostPtrSurf;
builder.buildDispatchInfos(dispatchInfo, operationParams);
enqueueHandler<CL_COMMAND_READ_BUFFER>(
surfaces,
blockingCopy == CL_TRUE,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
} else if (copyType == HostToSvm) {
HostPtrSurface srcHostPtrSurf(const_cast<void *>(srcPtr), size);
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocation);
if (size != 0) {
bool status = getCommandStreamReceiver().createAllocationForHostSurface(srcHostPtrSurf, false);
if (!status) {
return CL_OUT_OF_RESOURCES;
}
srcPtr = reinterpret_cast<void *>(srcHostPtrSurf.getAllocation()->getGpuAddress());
}
void *alignedSrcPtr = alignDown(const_cast<void *>(srcPtr), 4);
size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
setOperationParams(operationParams, size, alignedSrcPtr, nullptr, srcPtrOffset, dstPtr, dstSvmData->gpuAllocation, 0);
surfaces[0] = &dstSvmSurf;
surfaces[1] = &srcHostPtrSurf;
builder.buildDispatchInfos(dispatchInfo, operationParams);
enqueueHandler<CL_COMMAND_WRITE_BUFFER>(
surfaces,
blockingCopy == CL_TRUE,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
} else {
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocation);
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocation);
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocation, 0, dstPtr, dstSvmData->gpuAllocation, 0);
surfaces[0] = &srcSvmSurf;
surfaces[1] = &dstSvmSurf;
builder.buildDispatchInfos(dispatchInfo, operationParams);
enqueueHandler<CL_COMMAND_SVM_MEMCPY>(
surfaces,
blockingCopy ? true : false,
dispatchInfo,
numEventsInWaitList,
eventWaitList,
event);
}
if (event) {
auto pEvent = castToObjectOrAbort<Event>(*event);
pEvent->setCmdType(CL_COMMAND_SVM_MEMCPY);
}
return CL_SUCCESS;
}

View File

@ -154,7 +154,7 @@ class CommandStreamReceiver {
AllocationsList &getTemporaryAllocations();
AllocationsList &getAllocationsForReuse();
InternalAllocationStorage *getInternalAllocationStorage() const { return internalAllocationStorage.get(); }
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush);
MOCKABLE_VIRTUAL bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush);
virtual size_t getPreferredTagPoolSize() const { return 512; }
virtual void setupContext(OsContext &osContext) { this->osContext = &osContext; }
OsContext &getOsContext() const { return *osContext; }

View File

@ -278,6 +278,133 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_InvalidValueSrcPtrIsNull) {
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
TEST_F(EnqueueSvmTest, GivenSrcHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) {
char srcHostPtr[260];
void *pDstSVM = ptrSVM;
void *pSrcSVM = srcHostPtr;
cl_event event = nullptr;
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
&event // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY;
cl_command_type actualCmd = castToObjectOrAbort<Event>(event)->getCommandType();
EXPECT_EQ(expectedCmd, actualCmd);
clReleaseEvent(event);
}
TEST_F(EnqueueSvmTest, GivenSrcHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) {
char srcHostPtr[260];
void *pDstSVM = ptrSVM;
void *pSrcSVM = srcHostPtr;
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
0, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
}
HWTEST_F(EnqueueSvmTest, GivenSrcHostPtrWhenEnqueueSVMMemcpyThenEnqueuWriteBufferIsCalled) {
char srcHostPtr[260];
void *pSrcSVM = srcHostPtr;
void *pDstSVM = ptrSVM;
MockCommandQueueHw<FamilyType> myCmdQ(context, pDevice, 0);
retVal = myCmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(myCmdQ.lastCommandType, static_cast<cl_command_type>(CL_COMMAND_WRITE_BUFFER));
}
HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenEnqueueSVMMemcpyThenEnqueuReadBufferIsCalled) {
char dstHostPtr[260];
void *pDstSVM = dstHostPtr;
void *pSrcSVM = ptrSVM;
MockCommandQueueHw<FamilyType> myCmdQ(context, pDevice, 0);
retVal = myCmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
EXPECT_EQ(myCmdQ.lastCommandType, static_cast<cl_command_type>(CL_COMMAND_READ_BUFFER));
}
TEST_F(EnqueueSvmTest, GivenDstHostPtrAndEventWhenEnqueueSVMMemcpyThenEventCommandTypeIsCorrectlySet) {
char dstHostPtr[260];
void *pDstSVM = dstHostPtr;
void *pSrcSVM = ptrSVM;
cl_event event = nullptr;
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
&event // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
constexpr cl_command_type expectedCmd = CL_COMMAND_SVM_MEMCPY;
cl_command_type actualCmd = castToObjectOrAbort<Event>(event)->getCommandType();
EXPECT_EQ(expectedCmd, actualCmd);
clReleaseEvent(event);
}
TEST_F(EnqueueSvmTest, GivenDstHostPtrAndSizeZeroWhenEnqueueSVMMemcpyThenReturnSuccess) {
char dstHostPtr[260];
void *pDstSVM = dstHostPtr;
void *pSrcSVM = ptrSVM;
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
0, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
}
TEST_F(EnqueueSvmTest, GivenDstHostPtrAndSrcHostPtrWhenEnqueueSVMMemcpyThenReturnInvalidValue) {
char dstHostPtr[260];
char srcHostPtr[260];
void *pDstSVM = dstHostPtr;
void *pSrcSVM = srcHostPtr;
retVal = this->pCmdQ->enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_INVALID_VALUE, retVal);
}
TEST_F(EnqueueSvmTest, enqueueSVMMemcpy_Success) {
void *pDstSVM = ptrSVM;
void *pSrcSVM = context->getSVMAllocsManager()->createSVMAlloc(256, {});
@ -915,3 +1042,53 @@ HWTEST_F(EnqueueSvmTestLocalMemory, givenEnabledLocalMemoryWhenMappedSvmRegionAn
auto walkerCount = hwParse.getCommandCount<WALKER_TYPE>();
EXPECT_EQ(2u, walkerCount);
}
template <typename GfxFamily>
struct FailCsr : public CommandStreamReceiverHw<GfxFamily> {
FailCsr(ExecutionEnvironment &executionEnvironment) : CommandStreamReceiverHw<GfxFamily>(executionEnvironment){};
bool createAllocationForHostSurface(HostPtrSurface &surface, bool requiresL3Flush) override {
return CL_FALSE;
}
};
HWTEST_F(EnqueueSvmTest, GivenDstHostPtrWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) {
char dstHostPtr[260];
void *pDstSVM = dstHostPtr;
void *pSrcSVM = ptrSVM;
MockCommandQueueHw<FamilyType> cmdQ(context, pDevice, nullptr);
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment());
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.engine->commandStreamReceiver;
cmdQ.engine->commandStreamReceiver = failCsr.get();
retVal = cmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
cmdQ.engine->commandStreamReceiver = oldCommandStreamReceiver;
}
HWTEST_F(EnqueueSvmTest, GivenSrcHostPtrAndSizeZeroWhenHostPtrAllocationCreationFailsThenReturnOutOfResource) {
char srcHostPtr[260];
void *pDstSVM = ptrSVM;
void *pSrcSVM = srcHostPtr;
MockCommandQueueHw<FamilyType> cmdQ(context, pDevice, nullptr);
auto failCsr = std::make_unique<FailCsr<FamilyType>>(*pDevice->getExecutionEnvironment());
CommandStreamReceiver *oldCommandStreamReceiver = cmdQ.engine->commandStreamReceiver;
cmdQ.engine->commandStreamReceiver = failCsr.get();
retVal = cmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
256, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_OUT_OF_RESOURCES, retVal);
cmdQ.engine->commandStreamReceiver = oldCommandStreamReceiver;
}