Add Blitter support for clEnqueueSVMMemcpy in SVM_MEMCPY scenario

Related-To: NEO-4013
Change-Id: I0079bb9a88a1682b56fa4063447aa045cc548cc0
Signed-off-by: Krzysztof Gibala <krzysztof.gibala@intel.com>
This commit is contained in:
Krzysztof Gibala
2020-04-20 14:21:48 +02:00
committed by sys_ocldev
parent 42810f4690
commit 0fbdcc1d52
3 changed files with 70 additions and 6 deletions

View File

@ -618,7 +618,8 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const {
bool commandAllowed = (CL_COMMAND_READ_BUFFER == cmdType) || (CL_COMMAND_WRITE_BUFFER == cmdType) ||
(CL_COMMAND_COPY_BUFFER == cmdType) || (CL_COMMAND_READ_BUFFER_RECT == cmdType) ||
(CL_COMMAND_WRITE_BUFFER_RECT == cmdType) || (CL_COMMAND_COPY_BUFFER_RECT == cmdType);
(CL_COMMAND_WRITE_BUFFER_RECT == cmdType) || (CL_COMMAND_COPY_BUFFER_RECT == cmdType) ||
(CL_COMMAND_SVM_MEMCPY == cmdType);
return commandAllowed && blitAllowed;
}

View File

@ -20,11 +20,23 @@ struct ClBlitProperties {
const BuiltinOpParams &builtinOpParams) {
if (BlitterConstants::BlitDirection::BufferToBuffer == blitDirection) {
auto dstOffset = builtinOpParams.dstOffset.x + builtinOpParams.dstMemObj->getOffset();
auto srcOffset = builtinOpParams.srcOffset.x + builtinOpParams.srcMemObj->getOffset();
auto dstOffset = builtinOpParams.dstOffset.x;
auto srcOffset = builtinOpParams.srcOffset.x;
GraphicsAllocation *dstAllocation = nullptr;
GraphicsAllocation *srcAllocation = nullptr;
return BlitProperties::constructPropertiesForCopyBuffer(builtinOpParams.dstMemObj->getGraphicsAllocation(),
builtinOpParams.srcMemObj->getGraphicsAllocation(),
if (!builtinOpParams.dstSvmAlloc) {
dstOffset += builtinOpParams.dstMemObj->getOffset();
srcOffset += builtinOpParams.srcMemObj->getOffset();
dstAllocation = builtinOpParams.dstMemObj->getGraphicsAllocation();
srcAllocation = builtinOpParams.srcMemObj->getGraphicsAllocation();
} else {
dstAllocation = builtinOpParams.dstSvmAlloc;
srcAllocation = builtinOpParams.srcSvmAlloc;
}
return BlitProperties::constructPropertiesForCopyBuffer(dstAllocation,
srcAllocation,
{dstOffset, builtinOpParams.dstOffset.y, builtinOpParams.dstOffset.z},
{srcOffset, builtinOpParams.srcOffset.y, builtinOpParams.srcOffset.z},
builtinOpParams.size,
@ -113,7 +125,7 @@ struct ClBlitProperties {
return BlitterConstants::BlitDirection::HostPtrToBuffer;
} else if (CL_COMMAND_READ_BUFFER == commandType || CL_COMMAND_READ_BUFFER_RECT == commandType) {
return BlitterConstants::BlitDirection::BufferToHostPtr;
} else if (CL_COMMAND_COPY_BUFFER_RECT == commandType) {
} else if (CL_COMMAND_COPY_BUFFER_RECT == commandType || CL_COMMAND_SVM_MEMCPY == commandType) {
return BlitterConstants::BlitDirection::BufferToBuffer;
} else {
UNRECOVERABLE_IF(CL_COMMAND_COPY_BUFFER != commandType);

View File

@ -1740,6 +1740,57 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenDstHostPtrAndSrcHostPtrBlockingEnqueueSV
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenSvmToSvmCopyWhenEnqueueSVMMemcpyThenSvmMemcpyCommandIsCalledAndBcs) {
using XY_COPY_BLT = typename FamilyType::XY_COPY_BLT;
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
auto pDstSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(device->getRootDeviceIndex(), 256, {});
auto pSrcSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(device->getRootDeviceIndex(), 256, {});
cmdQ->enqueueSVMMemcpy(false, pDstSVM, pSrcSVM, 256, 0, nullptr, nullptr);
HardwareParse hwParser;
hwParser.parseCommands<FamilyType>(commandQueue->getBcsCommandStreamReceiver()->getCS(0));
auto commandItor = find<XY_COPY_BLT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
EXPECT_NE(hwParser.cmdList.end(), commandItor);
auto copyBltCmd = genCmdCast<XY_COPY_BLT *>(*commandItor);
EXPECT_EQ(pSrcSVM, reinterpret_cast<void *>(copyBltCmd->getSourceBaseAddress()));
EXPECT_EQ(pDstSVM, reinterpret_cast<void *>(copyBltCmd->getDestinationBaseAddress()));
bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pDstSVM);
bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenSvmToSvmCopyTypeWhenEnqueueNonBlockingSVMMemcpyThenSvmMemcpyCommandIsEnqueuedWhenUsingBcsThenCallWait) {
auto myMockCsr = new MyMockCsr<FamilyType>(*device->getExecutionEnvironment(), device->getRootDeviceIndex());
myMockCsr->taskCount = 1234;
myMockCsr->initializeTagAllocation();
myMockCsr->setupContext(*bcsMockContext->bcsOsContext);
bcsMockContext->bcsCsr.reset(myMockCsr);
EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()};
auto cmdQ = clUniquePtr(new MockCommandQueueHw<FamilyType>(bcsMockContext.get(), device.get(), nullptr));
cmdQ->bcsEngine = &bcsEngineControl;
auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver();
myMockCsr->gpgpuCsr = &gpgpuCsr;
auto pDstSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(device->getRootDeviceIndex(), 256, {});
auto pSrcSVM = bcsMockContext->getSVMAllocsManager()->createSVMAlloc(device->getRootDeviceIndex(), 256, {});
cmdQ->enqueueSVMMemcpy(false, pDstSVM, pSrcSVM, 256, 0, nullptr, nullptr);
EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty());
EXPECT_TRUE(myMockCsr->getTemporaryAllocations().peekIsEmpty());
cmdQ->enqueueSVMMemcpy(true, pDstSVM, pSrcSVM, 256, 0, nullptr, nullptr);
EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled);
bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pDstSVM);
bcsMockContext->getSVMAllocsManager()->freeSVMAlloc(pSrcSVM);
}
HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockedEnqueueWhenUsingBcsThenWaitForValidTaskCountOnBlockingCall) {
auto myMockCsr = new MyMockCsr<FamilyType>(*device->getExecutionEnvironment(), device->getRootDeviceIndex());
myMockCsr->taskCount = 1234;