Align a unified memory pointer during memcpy

Related-To: NEO-3570, NEO-3610

Change-Id: Id4d41da17a28ef512ba4c90bd71f419a24608d88
Signed-off-by: Jobczyk, Lukasz <lukasz.jobczyk@intel.com>
This commit is contained in:
Jobczyk, Lukasz
2019-08-27 10:46:55 +02:00
committed by sys_ocldev
parent bd8405aa3d
commit 4503e04083
3 changed files with 38 additions and 10 deletions

View File

@ -313,6 +313,12 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
BuiltinOpParams operationParams;
Surface *surfaces[2];
void *alignedSrcPtr = alignDown(const_cast<void *>(srcPtr), 4);
size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
void *alignedDstPtr = alignDown(dstPtr, 4);
size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
if (copyType == SvmToHost) {
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocation);
HostPtrSurface dstHostPtrSurf(dstPtr, size);
@ -323,10 +329,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
}
dstPtr = reinterpret_cast<void *>(dstHostPtrSurf.getAllocation()->getGpuAddress());
}
void *alignedDstPtr = alignDown(dstPtr, 4);
size_t dstPtrOffset = ptrDiff(dstPtr, alignedDstPtr);
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocation, 0, alignedDstPtr, nullptr, dstPtrOffset);
setOperationParams(operationParams, size, alignedSrcPtr, srcSvmData->gpuAllocation, srcPtrOffset, alignedDstPtr, nullptr, dstPtrOffset);
surfaces[0] = &srcSvmSurf;
surfaces[1] = &dstHostPtrSurf;
builder.buildDispatchInfos(dispatchInfo, operationParams);
@ -347,9 +350,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
}
srcPtr = reinterpret_cast<void *>(srcHostPtrSurf.getAllocation()->getGpuAddress());
}
void *alignedSrcPtr = alignDown(const_cast<void *>(srcPtr), 4);
size_t srcPtrOffset = ptrDiff(srcPtr, alignedSrcPtr);
setOperationParams(operationParams, size, alignedSrcPtr, nullptr, srcPtrOffset, dstPtr, dstSvmData->gpuAllocation, 0);
setOperationParams(operationParams, size, alignedSrcPtr, nullptr, srcPtrOffset, alignedDstPtr, dstSvmData->gpuAllocation, dstPtrOffset);
surfaces[0] = &dstSvmSurf;
surfaces[1] = &srcHostPtrSurf;
builder.buildDispatchInfos(dispatchInfo, operationParams);
@ -363,7 +364,7 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemcpy(cl_bool blockingCopy,
} else {
GeneralSurface srcSvmSurf(srcSvmData->gpuAllocation);
GeneralSurface dstSvmSurf(dstSvmData->gpuAllocation);
setOperationParams(operationParams, size, srcPtr, srcSvmData->gpuAllocation, 0, dstPtr, dstSvmData->gpuAllocation, 0);
setOperationParams(operationParams, size, alignedSrcPtr, srcSvmData->gpuAllocation, srcPtrOffset, alignedDstPtr, dstSvmData->gpuAllocation, dstPtrOffset);
surfaces[0] = &srcSvmSurf;
surfaces[1] = &dstSvmSurf;
builder.buildDispatchInfos(dispatchInfo, operationParams);
@ -429,10 +430,14 @@ cl_int CommandQueueHw<GfxFamily>::enqueueSVMMemFill(void *svmPtr,
BuiltinOpParams operationParams;
MemObj patternMemObj(this->context, 0, 0, alignUp(patternSize, 4), patternAllocation->getUnderlyingBuffer(),
patternAllocation->getUnderlyingBuffer(), patternAllocation, false, false, true);
void *alignedDstPtr = alignDown(svmPtr, 4);
size_t dstPtrOffset = ptrDiff(svmPtr, alignedDstPtr);
operationParams.srcMemObj = &patternMemObj;
operationParams.dstPtr = svmPtr;
operationParams.dstPtr = alignedDstPtr;
operationParams.dstSvmAlloc = svmData->gpuAllocation;
operationParams.dstOffset = {0, 0, 0};
operationParams.dstOffset = {dstPtrOffset, 0, 0};
operationParams.size = {size, 0, 0};
builder.buildDispatchInfos(dispatchInfo, operationParams);

View File

@ -496,6 +496,27 @@ TEST_F(EnqueueSvmTest, enqueueSVMMemcpyCoherentBlockedOnEvent_Success) {
uEvent->setStatus(-1);
}
HWTEST_F(EnqueueSvmTest, givenUnalignedAddressWhenEnqueueMemcpyThenDispatchInfoHasAlignedAddressAndProperOffset) {
void *pDstSVM = reinterpret_cast<void *>(0x17);
void *pSrcSVM = ptrSVM;
MockCommandQueueHw<FamilyType> myCmdQ(context, pDevice, 0);
retVal = myCmdQ.enqueueSVMMemcpy(
false, // cl_bool blocking_copy
pDstSVM, // void *dst_ptr
pSrcSVM, // const void *src_ptr
0, // size_t size
0, // cl_uint num_events_in_wait_list
nullptr, // cl_evebt *event_wait_list
nullptr // cL_event *event
);
EXPECT_EQ(CL_SUCCESS, retVal);
auto dstAddress = myCmdQ.kernelParams.dstPtr;
auto dstOffset = myCmdQ.kernelParams.dstOffset.x;
EXPECT_EQ(alignDown(pDstSVM, 4), dstAddress);
EXPECT_EQ(ptrDiff(pDstSVM, alignDown(pDstSVM, 4)), dstOffset);
}
TEST_F(EnqueueSvmTest, enqueueSVMMemFill_InvalidValue) {
void *svmPtr = nullptr;
const float pattern[1] = {1.2345f};

View File

@ -127,6 +127,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
}
void enqueueHandlerHook(const unsigned int commandType, const MultiDispatchInfo &dispatchInfo) override {
kernelParams = dispatchInfo.peekBuiltinOpParams();
lastCommandType = commandType;
for (auto &di : dispatchInfo) {
lastEnqueuedKernels.push_back(di.getKernel());
@ -148,6 +149,7 @@ class MockCommandQueueHw : public CommandQueueHw<GfxFamily> {
bool notifyEnqueueReadBufferCalled = false;
bool notifyEnqueueReadImageCalled = false;
bool cpuDataTransferHandlerCalled = false;
BuiltinOpParams kernelParams;
LinearStream *peekCommandStream() {
return this->commandStream;