Optimize copying buffers by blitter

Change-Id: Ib2ef0350beac25b9352db7a2e26863b6079cc667
Signed-off-by: Kamil Kopryk <kamil.kopryk@intel.com>
Related-To: NEO-4864
This commit is contained in:
Kamil Kopryk 2020-08-19 13:33:45 +02:00 committed by sys_ocldev
parent fef4d05832
commit b09872f595
4 changed files with 580 additions and 297 deletions

View File

@ -361,7 +361,7 @@ struct BcsTests : public CommandStreamReceiverHwTest {
HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredCommands) {
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
constexpr size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
size_t notAlignedBltSize = (3 * max2DBlitSize) + 1;
size_t alignedBltSize = (3 * max2DBlitSize);
uint32_t alignedNumberOfBlts = 3;
@ -369,19 +369,23 @@ HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredComman
auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts;
auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts;
auto alignedCopySize = Vec3<size_t>{alignedBltSize, 1, 1};
auto notAlignedCopySize = Vec3<size_t>{notAlignedBltSize, 1, 1};
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
{alignedBltSize, 1, 1}, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment());
alignedCopySize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment());
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
{notAlignedBltSize, 1, 1}, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment());
notAlignedCopySize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment());
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(alignedCopySize, pClDevice->getRootDeviceEnvironment()));
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(notAlignedCopySize, pClDevice->getRootDeviceEnvironment()));
}
HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequiredCommands) {
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
constexpr size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
const size_t debugCommandsSize = (EncodeMiFlushDW<FamilyType>::getMiFlushDwCmdSizeForDataWrite() + EncodeSempahore<FamilyType>::getSizeMiSemaphoreWait()) * 2;
constexpr uint32_t numberOfBlts = 3;
@ -400,11 +404,12 @@ HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequir
blitPropertiesContainer, false, true, pClDevice->getRootDeviceEnvironment());
EXPECT_EQ(expectedSize, estimatedSize);
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(blitProperties.copySize, pClDevice->getRootDeviceEnvironment()));
}
HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) {
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
constexpr size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
Vec3<size_t> notAlignedBltSize = {(3 * max2DBlitSize) + 1, 4, 2};
Vec3<size_t> alignedBltSize = {(3 * max2DBlitSize), 4, 2};
size_t alignedNumberOfBlts = 3 * alignedBltSize.y * alignedBltSize.z;
@ -420,6 +425,75 @@ HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAdd
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(notAlignedBltSize, pClDevice->getRootDeviceEnvironment()));
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(alignedBltSize, pClDevice->getRootDeviceEnvironment()));
}
HWTEST_F(BcsTests, givenBltWithBigCopySizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) {
auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment();
auto maxWidthToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitWidth(rootDeviceEnvironment));
auto maxHeightToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitHeight(rootDeviceEnvironment));
constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK);
Vec3<size_t> alignedBltSize = {(3 * maxWidthToCopy), (4 * maxHeightToCopy), 2};
Vec3<size_t> notAlignedBltSize = {(3 * maxWidthToCopy + 1), (4 * maxHeightToCopy), 2};
auto isCopyRegionPrefered = BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(alignedBltSize, rootDeviceEnvironment);
size_t alignedNumberOfBlts = isCopyRegionPrefered ? (3 * 4 * alignedBltSize.z)
: (4 * maxHeightToCopy * alignedBltSize.z);
size_t notAlignedNumberOfBlts = isCopyRegionPrefered ? (4 * 4 * notAlignedBltSize.z)
: (4 * maxHeightToCopy * alignedBltSize.z);
auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts;
auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts;
auto alignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
alignedBltSize, csrDependencies, false, false, rootDeviceEnvironment);
auto notAlignedEstimatedSize = BlitCommandsHelper<FamilyType>::estimateBlitCommandsSize(
notAlignedBltSize, csrDependencies, false, false, rootDeviceEnvironment);
EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize);
EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize);
}
HWTEST_F(BcsTests, WhenGetNumberOfBlitsIsCalledThenCorrectValuesAreReturned) {
auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment();
auto maxWidthToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitWidth(rootDeviceEnvironment));
auto maxHeightToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitHeight(rootDeviceEnvironment));
{
Vec3<size_t> copySize = {maxWidthToCopy * maxHeightToCopy, 1, 3};
size_t expectednBlitsCopyRegion = maxHeightToCopy * 3;
size_t expectednBlitsCopyPerRow = 3;
auto nBlitsCopyRegion = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment);
auto nBlitsCopyPerRow = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow);
EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion);
EXPECT_FALSE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(copySize, rootDeviceEnvironment));
}
{
Vec3<size_t> copySize = {2 * maxWidthToCopy, 16, 3};
size_t expectednBlitsCopyRegion = 2 * 3;
size_t expectednBlitsCopyPerRow = 16 * 3;
auto nBlitsCopyRegion = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment);
auto nBlitsCopyPerRow = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow);
EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion);
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(copySize, rootDeviceEnvironment));
}
{
Vec3<size_t> copySize = {2 * maxWidthToCopy, 3 * maxHeightToCopy, 4};
size_t expectednBlitsCopyRegion = 2 * 3 * 4;
size_t expectednBlitsCopyPerRow = 3 * maxHeightToCopy * 4;
auto nBlitsCopyRegion = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment);
auto nBlitsCopyPerRow = BlitCommandsHelper<FamilyType>::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow);
EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion);
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(copySize, rootDeviceEnvironment));
}
}
HWTEST_F(BcsTests, whenAskingForCmdSizeForMiFlushDwWithMemoryWriteThenReturnCorrectValue) {
@ -463,7 +537,7 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeThenCa
}
HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeForWriteReadBufferRectThenCalculateForAllAttachedProperites) {
const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight;
const Vec3<size_t> bltSize = {(3 * max2DBlitSize), 4, 2};
const size_t numberOfBlts = 3 * bltSize.y * bltSize.z;
const size_t numberOfBlitOperations = 4 * bltSize.y * bltSize.z;
@ -1316,6 +1390,10 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
cl_buffer_region subBufferRegion1 = {subBuffer1Offset, 1};
auto subBuffer1 = clUniquePtr<Buffer>(buffer1->createSubBuffer(CL_MEM_READ_WRITE, 0, &subBufferRegion1, retVal));
Vec3<size_t> copySizes[2] = {{1, 1, 1},
{1, 2, 1}};
for (auto &copySize : copySizes) {
{
// from hostPtr
HardwareParse hwParser;
@ -1324,7 +1402,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
nullptr, hostPtr,
graphicsAllocation1->getGpuAddress() +
subBuffer1->getOffset(),
0, {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0);
0, {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -1349,7 +1427,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
nullptr, hostPtr,
graphicsAllocation1->getGpuAddress() +
subBuffer1->getOffset(),
0, {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0);
0, {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -1371,7 +1449,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(graphicsAllocation1,
graphicsAllocation2, 0, 0, {1, 1, 1}, 0, 0, 0, 0);
graphicsAllocation2, 0, 0, copySize, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -1395,7 +1473,8 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
BuiltinOpParams builtinOpParams = {};
builtinOpParams.dstMemObj = subBuffer2.get();
builtinOpParams.srcMemObj = subBuffer1.get();
builtinOpParams.size.x = 1;
builtinOpParams.size.x = copySize.x;
builtinOpParams.size.y = copySize.y;
auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToBuffer, csr, builtinOpParams);
@ -1413,6 +1492,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres
EXPECT_EQ(graphicsAllocation2->getGpuAddress() + subBuffer2Offset, bltCmd->getDestinationBaseAddress());
EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress());
}
}
}
HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidGpuAddress) {
@ -1431,6 +1511,10 @@ HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidG
const size_t hostPtrOffset = 0x1234;
Vec3<size_t> copySizes[2] = {{4, 1, 1},
{4, 2, 1}};
for (auto &copySize : copySizes) {
{
// from hostPtr
HardwareParse hwParser;
@ -1439,12 +1523,10 @@ HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidG
mapAllocation, mapPtr,
graphicsAllocation->getGpuAddress(),
castToUint64(mapPtr),
{hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0);
{hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
hwParser.parseCommands<FamilyType>(csr.commandStream);
auto cmdIterator = find<typename FamilyType::XY_COPY_BLT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), cmdIterator);
@ -1464,10 +1546,8 @@ HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidG
csr, graphicsAllocation,
mapAllocation, mapPtr,
graphicsAllocation->getGpuAddress(),
castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0);
castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);
auto cmdIterator = find<typename FamilyType::XY_COPY_BLT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
@ -1485,14 +1565,13 @@ HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidG
// bufferRect to hostPtr
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
auto copySize = Vec3<size_t>(4, 2, 1);
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr,
csr, graphicsAllocation,
mapAllocation, mapPtr,
graphicsAllocation->getGpuAddress(),
castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {4, 2, 1}, 0, 0, 0, 0);
castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);
auto cmdIterator = find<typename FamilyType::XY_COPY_BLT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
@ -1513,9 +1592,8 @@ HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidG
mapAllocation, mapPtr,
graphicsAllocation->getGpuAddress(),
castToUint64(mapPtr),
{hostPtrOffset, 0, 0}, 0, {4, 2, 1}, 0, 0, 0, 0);
{hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
hwParser.parseCommands<FamilyType>(csr.commandStream);
auto cmdIterator = find<typename FamilyType::XY_COPY_BLT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
@ -1528,6 +1606,7 @@ HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidG
}
EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getDestinationBaseAddress());
}
}
memoryManager->freeGraphicsMemory(mapAllocation);
}
@ -1635,6 +1714,10 @@ HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) {
uint64_t srcOffset = 2;
uint64_t dstOffset = 3;
Vec3<size_t> copySizes[2] = {{1, 1, 1},
{1, 2, 1}};
for (auto &copySize : copySizes) {
{
// from hostPtr
BuiltinOpParams builtinOpParams = {};
@ -1642,7 +1725,7 @@ HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) {
builtinOpParams.srcSvmAlloc = gpuAllocation;
builtinOpParams.srcPtr = reinterpret_cast<void *>(svmData->cpuAllocation->getGpuAddress() + srcOffset);
builtinOpParams.dstPtr = reinterpret_cast<void *>(svmData->cpuAllocation->getGpuAddress() + dstOffset);
builtinOpParams.size = {1, 1, 1};
builtinOpParams.size = copySize;
auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, builtinOpParams);
@ -1669,7 +1752,7 @@ HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) {
builtinOpParams.dstSvmAlloc = svmData->cpuAllocation;
builtinOpParams.dstPtr = reinterpret_cast<void *>(svmData->cpuAllocation + dstOffset);
builtinOpParams.srcPtr = reinterpret_cast<void *>(gpuAllocation + srcOffset);
builtinOpParams.size = {1, 1, 1};
builtinOpParams.size = copySize;
auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr,
csr, builtinOpParams);
@ -1688,6 +1771,8 @@ HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) {
EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress());
EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress());
}
}
svmAllocsManager.freeSVMAlloc(svmAlloc);
}
@ -1702,6 +1787,10 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
auto graphicsAllocation2 = buffer2->getGraphicsAllocation(pDevice->getRootDeviceIndex());
size_t addressOffsets[] = {0, 1, 1234};
Vec3<size_t> copySizes[2] = {{1, 1, 1},
{1, 2, 1}};
for (auto &copySize : copySizes) {
for (auto buffer1Offset : addressOffsets) {
{
@ -1712,7 +1801,7 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
csr, graphicsAllocation1,
nullptr, hostPtr,
graphicsAllocation1->getGpuAddress(),
0, 0, {buffer1Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0);
0, 0, {buffer1Offset, 0, 0}, copySize, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -1736,7 +1825,7 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
csr, graphicsAllocation1, nullptr,
hostPtr,
graphicsAllocation1->getGpuAddress(),
0, 0, {buffer1Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0);
0, 0, {buffer1Offset, 0, 0}, copySize, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -1758,7 +1847,7 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
auto offset = csr.commandStream.getUsed();
auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(graphicsAllocation1,
graphicsAllocation2,
{buffer1Offset, 0, 0}, {buffer2Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0);
{buffer1Offset, 0, 0}, {buffer2Offset, 0, 0}, copySize, 0, 0, 0, 0);
blitBuffer(&csr, blitProperties, true);
@ -1773,6 +1862,117 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec
EXPECT_EQ(ptrOffset(graphicsAllocation2->getGpuAddress(), buffer2Offset), bltCmd->getSourceBaseAddress());
}
}
}
}
HWTEST_F(BcsTests, givenBufferWithBigSizesWhenBlitOperationCalledThenProgramCorrectGpuAddresses) {
auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment();
auto maxWidthToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitWidth(rootDeviceEnvironment));
auto maxHeightToCopy = static_cast<size_t>(BlitCommandsHelper<FamilyType>::getMaxBlitHeight(rootDeviceEnvironment));
auto &csr = pDevice->getUltCommandStreamReceiver<FamilyType>();
cl_int retVal = CL_SUCCESS;
auto buffer1 = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
auto buffer2 = clUniquePtr<Buffer>(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal));
void *hostPtr = reinterpret_cast<void *>(0x12340000);
auto graphicsAllocation = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex());
size_t srcOrigin[] = {1, 2, 0};
size_t dstOrigin[] = {4, 3, 1};
size_t region[] = {maxWidthToCopy + 16, maxHeightToCopy + 16, 2};
size_t srcRowPitch = region[0] + 34;
size_t srcSlicePitch = srcRowPitch * region[1] + 36;
size_t dstRowPitch = region[0] + 40;
size_t dstSlicePitch = dstRowPitch * region[1] + 44;
auto srcAddressOffset = srcOrigin[0] + (srcOrigin[1] * srcRowPitch) + (srcOrigin[2] * srcSlicePitch);
auto dstAddressOffset = dstOrigin[0] + (dstOrigin[1] * dstRowPitch) + (dstOrigin[2] * dstSlicePitch);
EXPECT_TRUE(BlitCommandsHelper<FamilyType>::isCopyRegionPreferred(region, rootDeviceEnvironment));
// from hostPtr
HardwareParse hwParser;
auto offset = csr.commandStream.getUsed();
auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer,
csr, graphicsAllocation,
nullptr, hostPtr,
graphicsAllocation->getGpuAddress(),
0, srcOrigin, dstOrigin, region,
srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch);
blitBuffer(&csr, blitProperties, true);
hwParser.parseCommands<FamilyType>(csr.commandStream, offset);
//1st rectangle xCopy = maxWidthToCopy, yCopy = maxHeightToCopy, zCopy = 1
auto cmdIterator = find<typename FamilyType::XY_COPY_BLT *>(hwParser.cmdList.begin(), hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), cmdIterator);
auto bltCmd = genCmdCast<typename FamilyType::XY_COPY_BLT *>(*cmdIterator);
EXPECT_NE(nullptr, bltCmd);
if (pDevice->isFullRangeSvm()) {
EXPECT_EQ(ptrOffset(reinterpret_cast<uint64_t>(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress());
}
EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress());
srcAddressOffset += maxWidthToCopy;
dstAddressOffset += maxWidthToCopy;
// 2nd rectangle xCopy = (region[0] - maxWidthToCopy), yCopy = (region[0] - maxHeightToCopy), zCopy = 1
cmdIterator = find<typename FamilyType::XY_COPY_BLT *>(++cmdIterator, hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), cmdIterator);
bltCmd = genCmdCast<typename FamilyType::XY_COPY_BLT *>(*cmdIterator);
EXPECT_NE(nullptr, bltCmd);
if (pDevice->isFullRangeSvm()) {
EXPECT_EQ(ptrOffset(reinterpret_cast<uint64_t>(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress());
}
EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress());
srcAddressOffset += (region[0] - maxWidthToCopy);
srcAddressOffset += (srcRowPitch - region[0]);
srcAddressOffset += (srcRowPitch * (maxHeightToCopy - 1));
dstAddressOffset += (region[0] - maxWidthToCopy);
dstAddressOffset += (dstRowPitch - region[0]);
dstAddressOffset += (dstRowPitch * (maxHeightToCopy - 1));
// 3rd rectangle xCopy = maxWidthToCopy, yCopy = maxHeightToCopy, zCopy = 1
cmdIterator = find<typename FamilyType::XY_COPY_BLT *>(++cmdIterator, hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), cmdIterator);
bltCmd = genCmdCast<typename FamilyType::XY_COPY_BLT *>(*cmdIterator);
EXPECT_NE(nullptr, bltCmd);
if (pDevice->isFullRangeSvm()) {
EXPECT_EQ(ptrOffset(reinterpret_cast<uint64_t>(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress());
}
EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress());
srcAddressOffset += maxWidthToCopy;
dstAddressOffset += maxWidthToCopy;
//4th rectangle xCopy = (region[0] - maxWidthToCopy), yCopy = (region[0] - maxHeightToCopy), zCopy = 1
cmdIterator = find<typename FamilyType::XY_COPY_BLT *>(++cmdIterator, hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), cmdIterator);
bltCmd = genCmdCast<typename FamilyType::XY_COPY_BLT *>(*cmdIterator);
EXPECT_NE(nullptr, bltCmd);
if (pDevice->isFullRangeSvm()) {
EXPECT_EQ(ptrOffset(reinterpret_cast<uint64_t>(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress());
}
EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress());
srcAddressOffset += (region[0] - maxWidthToCopy);
srcAddressOffset += (srcRowPitch - region[0]);
srcAddressOffset += (srcRowPitch * (region[1] - maxHeightToCopy - 1));
srcAddressOffset += (srcSlicePitch - (srcRowPitch * region[1]));
dstAddressOffset += (region[0] - maxWidthToCopy);
dstAddressOffset += (dstRowPitch - region[0]);
dstAddressOffset += (dstRowPitch * (region[1] - maxHeightToCopy - 1));
dstAddressOffset += (dstSlicePitch - (dstRowPitch * region[1]));
//5th rectangle xCopy = maxWidthToCopy, yCopy = maxHeightToCopy, zCopy = 1
cmdIterator = find<typename FamilyType::XY_COPY_BLT *>(++cmdIterator, hwParser.cmdList.end());
ASSERT_NE(hwParser.cmdList.end(), cmdIterator);
bltCmd = genCmdCast<typename FamilyType::XY_COPY_BLT *>(*cmdIterator);
EXPECT_NE(nullptr, bltCmd);
if (pDevice->isFullRangeSvm()) {
EXPECT_EQ(ptrOffset(reinterpret_cast<uint64_t>(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress());
}
EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress());
}
HWTEST_F(BcsTests, givenAuxTranslationRequestWhenBlitCalledThenProgramCommandCorrectly) {

View File

@ -926,7 +926,7 @@ uint32_t CommandStreamReceiverHw<GfxFamily>::blitBuffer(const BlitPropertiesCont
EncodeStoreMMIO<GfxFamily>::encode(commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalStartAddress);
}
BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferPerRow(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]);
BlitCommandsHelper<GfxFamily>::dispatchBlitCommands(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]);
if (blitProperties.outputTimestampPacket) {
if (profilingEnabled) {

View File

@ -111,12 +111,18 @@ struct BlitCommandsHelper {
static uint64_t getMaxBlitHeightOverride(const RootDeviceEnvironment &rootDeviceEnvironment);
static void dispatchPostBlitCommand(LinearStream &linearStream);
static size_t estimatePostBlitCommandSize();
static size_t estimateBlitCommandsSize(Vec3<size_t> copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket,
static size_t estimateBlitCommandsSize(const Vec3<size_t> &copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket,
bool profilingEnabled, const RootDeviceEnvironment &rootDeviceEnvironment);
static size_t estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, bool profilingEnabled,
bool debugPauseEnabled, const RootDeviceEnvironment &rootDeviceEnvironment);
static size_t getNumberOfBlitsForCopyRegion(const Vec3<size_t> &copySize, const RootDeviceEnvironment &rootDeviceEnvironment);
static size_t getNumberOfBlitsForCopyPerRow(const Vec3<size_t> &copySize, const RootDeviceEnvironment &rootDeviceEnvironment);
static uint64_t calculateBlitCommandDestinationBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice);
static uint64_t calculateBlitCommandSourceBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice);
static uint64_t calculateBlitCommandDestinationBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice);
static uint64_t calculateBlitCommandSourceBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice);
static void dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment);
static void dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment);
static void dispatchBlitCommandsForBufferPerRow(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment);
static void dispatchBlitCommandsRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment);
static void dispatchBlitMemoryColorFill(NEO::GraphicsAllocation *dstAlloc, uint32_t *pattern, size_t patternSize, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment);
@ -135,5 +141,6 @@ struct BlitCommandsHelper {
static size_t getSizeForDebugPauseCommands();
static bool useOneBlitCopyCommand(Vec3<size_t> copySize, uint32_t bytesPerPixel);
static uint32_t getAvailableBytesPerPixel(size_t copySize, uint32_t srcOrigin, uint32_t dstOrigin, uint32_t srcSize, uint32_t dstSize);
static bool isCopyRegionPreferred(const Vec3<size_t> &copySize, const RootDeviceEnvironment &rootDeviceEnvironment);
};
} // namespace NEO

View File

@ -68,46 +68,22 @@ size_t BlitCommandsHelper<GfxFamily>::estimatePostBlitCommandSize() {
}
template <typename GfxFamily>
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(Vec3<size_t> copySize, const CsrDependencies &csrDependencies,
size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(const Vec3<size_t> &copySize, const CsrDependencies &csrDependencies,
bool updateTimestampPacket, bool profilingEnabled,
const RootDeviceEnvironment &rootDeviceEnvironment) {
size_t numberOfBlits = 0;
uint64_t width = 1;
uint64_t height = 1;
for (uint64_t slice = 0; slice < copySize.z; slice++) {
for (uint64_t row = 0; row < copySize.y; row++) {
uint64_t sizeToBlit = copySize.x;
while (sizeToBlit != 0) {
if (sizeToBlit > getMaxBlitWidth(rootDeviceEnvironment)) {
// dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight)
width = getMaxBlitWidth(rootDeviceEnvironment);
height = std::min((sizeToBlit / width), getMaxBlitHeight(rootDeviceEnvironment));
} else {
// dispatch 1D blt: (1 .. maxBlitWidth) x 1
width = sizeToBlit;
height = 1;
}
sizeToBlit -= (width * height);
numberOfBlits++;
}
}
}
const size_t cmdsSizePerBlit = (sizeof(typename GfxFamily::XY_COPY_BLT) + estimatePostBlitCommandSize());
size_t timestampCmdSize = 0;
if (updateTimestampPacket) {
if (profilingEnabled) {
timestampCmdSize = 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM);
} else {
timestampCmdSize = EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
}
timestampCmdSize = (profilingEnabled) ? 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM)
: EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
}
return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) +
(cmdsSizePerBlit * numberOfBlits) + timestampCmdSize;
bool preferRegionCopy = isCopyRegionPreferred(copySize, rootDeviceEnvironment);
auto nBlits = preferRegionCopy ? getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment)
: getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
auto sizePerBlit = (sizeof(typename GfxFamily::XY_COPY_BLT) + estimatePostBlitCommandSize());
return TimestampPacketHelper::getRequiredCmdStreamSize<GfxFamily>(csrDependencies) + (sizePerBlit * nBlits) + timestampCmdSize;
}
template <typename GfxFamily>
@ -121,7 +97,8 @@ size_t BlitCommandsHelper<GfxFamily>::estimateBlitCommandsSize(const BlitPropert
rootDeviceEnvironment);
}
size += MemorySynchronizationCommands<GfxFamily>::getSizeForAdditonalSynchronization(*rootDeviceEnvironment.getHardwareInfo());
size += EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);
size += EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite();
size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_END);
if (debugPauseEnabled) {
size += BlitCommandsHelper<GfxFamily>::getSizeForDebugPauseCommands();
@ -197,6 +174,7 @@ void BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferPerRow(const Bl
}
}
}
template <typename GfxFamily>
template <size_t patternSize>
void BlitCommandsHelper<GfxFamily>::dispatchBlitMemoryFill(NEO::GraphicsAllocation *dstAlloc, uint32_t *pattern, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment, COLOR_DEPTH depth) {
@ -301,4 +279,102 @@ uint32_t BlitCommandsHelper<GfxFamily>::getAvailableBytesPerPixel(size_t copySiz
return bytesPerPixel;
}
template <typename GfxFamily>
void BlitCommandsHelper<GfxFamily>::dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) {
bool preferCopyRegion = isCopyRegionPreferred(blitProperties.copySize, rootDeviceEnvironment);
preferCopyRegion ? dispatchBlitCommandsForBufferRegion(blitProperties, linearStream, rootDeviceEnvironment)
: dispatchBlitCommandsForBufferPerRow(blitProperties, linearStream, rootDeviceEnvironment);
}
template <typename GfxFamily>
uint64_t BlitCommandsHelper<GfxFamily>::calculateBlitCommandSourceBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice) {
return blitProperties.srcGpuAddress + blitProperties.srcOffset.x +
(blitProperties.srcOffset.y * blitProperties.srcRowPitch) +
(blitProperties.srcSlicePitch * (slice + blitProperties.srcOffset.z));
}
template <typename GfxFamily>
uint64_t BlitCommandsHelper<GfxFamily>::calculateBlitCommandDestinationBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice) {
return blitProperties.dstGpuAddress + blitProperties.dstOffset.x +
(blitProperties.dstOffset.y * blitProperties.dstRowPitch) +
(blitProperties.dstSlicePitch * (slice + blitProperties.dstOffset.z));
}
template <typename GfxFamily>
void BlitCommandsHelper<GfxFamily>::dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) {
const auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment);
const auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment);
for (size_t slice = 0u; slice < blitProperties.copySize.z; ++slice) {
auto srcAddress = calculateBlitCommandSourceBaseAddressCopyRegion(blitProperties, slice);
auto dstAddress = calculateBlitCommandDestinationBaseAddressCopyRegion(blitProperties, slice);
auto heightToCopy = blitProperties.copySize.y;
while (heightToCopy > 0) {
auto height = static_cast<uint32_t>(std::min(heightToCopy, static_cast<size_t>(maxHeightToCopy)));
auto widthToCopy = blitProperties.copySize.x;
while (widthToCopy > 0) {
auto width = static_cast<uint32_t>(std::min(widthToCopy, static_cast<size_t>(maxWidthToCopy)));
auto bltCmd = GfxFamily::cmdInitXyCopyBlt;
bltCmd.setSourceBaseAddress(srcAddress);
bltCmd.setDestinationBaseAddress(dstAddress);
bltCmd.setTransferWidth(width);
bltCmd.setTransferHeight(height);
bltCmd.setSourcePitch(static_cast<uint32_t>(blitProperties.srcRowPitch));
bltCmd.setDestinationPitch(static_cast<uint32_t>(blitProperties.dstRowPitch));
appendBlitCommandsForBuffer(blitProperties, bltCmd, rootDeviceEnvironment);
auto cmd = linearStream.getSpaceForCmd<typename GfxFamily::XY_COPY_BLT>();
*cmd = bltCmd;
dispatchPostBlitCommand(linearStream);
srcAddress += width;
dstAddress += width;
widthToCopy -= width;
}
heightToCopy -= height;
srcAddress += (blitProperties.srcRowPitch - blitProperties.copySize.x);
srcAddress += (blitProperties.srcRowPitch * (height - 1));
dstAddress += (blitProperties.dstRowPitch - blitProperties.copySize.x);
dstAddress += (blitProperties.dstRowPitch * (height - 1));
}
}
}
template <typename GfxFamily>
bool BlitCommandsHelper<GfxFamily>::isCopyRegionPreferred(const Vec3<size_t> &copySize, const RootDeviceEnvironment &rootDeviceEnvironment) {
bool preferCopyRegion = getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment) < getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment);
return preferCopyRegion;
}
template <typename GfxFamily>
size_t BlitCommandsHelper<GfxFamily>::getNumberOfBlitsForCopyRegion(const Vec3<size_t> &copySize, const RootDeviceEnvironment &rootDeviceEnvironment) {
auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment);
auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment);
auto xBlits = static_cast<size_t>(std::ceil(copySize.x / static_cast<double>(maxWidthToCopy)));
auto yBlits = static_cast<size_t>(std::ceil(copySize.y / static_cast<double>(maxHeightToCopy)));
auto zBlits = static_cast<size_t>(copySize.z);
auto nBlits = xBlits * yBlits * zBlits;
return nBlits;
}
template <typename GfxFamily>
size_t BlitCommandsHelper<GfxFamily>::getNumberOfBlitsForCopyPerRow(const Vec3<size_t> &copySize, const RootDeviceEnvironment &rootDeviceEnvironment) {
auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment);
auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment);
auto maxSizeRectangleToCopy = maxWidthToCopy * maxHeightToCopy;
auto xBlits = static_cast<size_t>(std::ceil(copySize.x / static_cast<double>(maxSizeRectangleToCopy)));
auto yBlits = copySize.y;
auto zBlits = copySize.z;
auto nBlits = xBlits * yBlits * zBlits;
return nBlits;
}
} // namespace NEO