Split copy along single dimension

Signed-off-by: Lukasz Jobczyk <lukasz.jobczyk@intel.com>
This commit is contained in:
Lukasz Jobczyk 2022-09-07 16:49:50 +00:00 committed by Compute-Runtime-Automation
parent a72213943e
commit f65d2aeb87
2 changed files with 12 additions and 26 deletions

View File

@ -1155,35 +1155,20 @@ cl_int CommandQueueHw<GfxFamily>::enqueueBlitSplit(MultiDispatchInfo &dispatchIn
TimestampPacketContainer previousEnqueueNode;
previousEnqueueNode.swapNodes(*this->timestampPacketContainer);
auto srcOffset = dispatchInfo.peekBuiltinOpParams().srcOffset;
auto dstOffset = dispatchInfo.peekBuiltinOpParams().dstOffset;
auto size = dispatchInfo.peekBuiltinOpParams().size;
auto srcOffset = dispatchInfo.peekBuiltinOpParams().srcOffset.x;
auto dstOffset = dispatchInfo.peekBuiltinOpParams().dstOffset.x;
auto size = dispatchInfo.peekBuiltinOpParams().size.x;
auto remainingSize = size;
for (size_t i = 0; i < copyEngines.size(); i++) {
auto localSizeX = remainingSize.x / (copyEngines.size() - i);
auto localSizeY = remainingSize.y / (copyEngines.size() - i);
auto localSizeZ = remainingSize.z / (copyEngines.size() - i);
auto localSize = remainingSize / (copyEngines.size() - i);
auto localParams = dispatchInfo.peekBuiltinOpParams();
localParams.size.x = localSizeX;
localParams.size.y = localSizeY;
localParams.size.z = localSizeZ;
localParams.srcOffset.x = (srcOffset.x + size.x - remainingSize.x);
localParams.srcOffset.y = (srcOffset.y + size.y - remainingSize.y);
localParams.srcOffset.z = (srcOffset.z + size.z - remainingSize.z);
localParams.dstOffset.x = (dstOffset.x + size.x - remainingSize.x);
localParams.dstOffset.y = (dstOffset.y + size.y - remainingSize.y);
localParams.dstOffset.z = (dstOffset.z + size.z - remainingSize.z);
localParams.size.x = localSize;
localParams.srcOffset.x = (srcOffset + size - remainingSize);
localParams.dstOffset.x = (dstOffset + size - remainingSize);
dispatchInfo.setBuiltinOpParams(localParams);
remainingSize.x -= localSizeX;
remainingSize.y -= localSizeY;
remainingSize.z -= localSizeZ;
remainingSize -= localSize;
this->timestampPacketContainer->assignAndIncrementNodesRefCounts(previousEnqueueNode);

View File

@ -353,7 +353,7 @@ HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueReadThenEnqueueB
DebugManager.flags.UpdateTaskCountFromWait.set(3);
auto memoryManager = static_cast<MockMemoryManager *>(pDevice->getMemoryManager());
memoryManager->returnFakeAllocation = true;
auto cmdQHw = static_cast<MockCommandQueueHw<FamilyType> *>(this->pCmdQ);
auto cmdQHw = std::make_unique<MockCommandQueueHw<FamilyType>>(context, pClDevice, nullptr);
auto csr1 = std::make_unique<CommandStreamReceiverHw<FamilyType>>(*pDevice->executionEnvironment, pDevice->getRootDeviceIndex(), pDevice->getDeviceBitfield());
std::unique_ptr<OsContext> osContext1(OsContext::create(pDevice->getExecutionEnvironment()->rootDeviceEnvironments[0]->osInterface.get(), 0,
@ -391,8 +391,9 @@ HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueReadThenEnqueueB
EXPECT_EQ(cmdQHw->getGpgpuCommandStreamReceiver().peekTaskCount(), 0u);
EXPECT_EQ(cmdQHw->getBcsCommandStreamReceiver(aub_stream::EngineType::ENGINE_BCS)->peekTaskCount(), 0u);
pCmdQ->release();
pCmdQ = nullptr;
EXPECT_EQ(cmdQHw->kernelParams.size.x, 8 * MemoryConstants::megaByte);
const_cast<StackVec<TagNodeBase *, 32u> &>(cmdQHw->timestampPacketContainer->peekNodes()).clear();
}
HWTEST_F(IoqCommandQueueHwBlitTest, givenSplitBcsCopyWhenEnqueueBlockingReadThenEnqueueBlitSplit) {