From b09872f5956a3e1a54c68aafc63bd535ae1fb0ad Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Wed, 19 Aug 2020 13:33:45 +0200 Subject: [PATCH] Optimize copying buffers by blitter Change-Id: Ib2ef0350beac25b9352db7a2e26863b6079cc667 Signed-off-by: Kamil Kopryk Related-To: NEO-4864 --- .../command_stream_receiver_hw_tests.cpp | 720 +++++++++++------- .../command_stream_receiver_hw_base.inl | 2 +- shared/source/helpers/blit_commands_helper.h | 9 +- .../helpers/blit_commands_helper_base.inl | 146 +++- 4 files changed, 580 insertions(+), 297 deletions(-) diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.cpp index 7631b9b47e..34d097fdae 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.cpp @@ -361,7 +361,7 @@ struct BcsTests : public CommandStreamReceiverHwTest { HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; - constexpr size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); size_t notAlignedBltSize = (3 * max2DBlitSize) + 1; size_t alignedBltSize = (3 * max2DBlitSize); uint32_t alignedNumberOfBlts = 3; @@ -369,19 +369,23 @@ HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredComman auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts; auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts; + auto alignedCopySize = Vec3{alignedBltSize, 1, 1}; + auto notAlignedCopySize = Vec3{notAlignedBltSize, 1, 1}; auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( - {alignedBltSize, 1, 1}, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment()); + alignedCopySize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment()); auto notAlignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( - {notAlignedBltSize, 1, 1}, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment()); + notAlignedCopySize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); + EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(alignedCopySize, pClDevice->getRootDeviceEnvironment())); + EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(notAlignedCopySize, pClDevice->getRootDeviceEnvironment())); } HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; - constexpr size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); const size_t debugCommandsSize = (EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + EncodeSempahore::getSizeMiSemaphoreWait()) * 2; constexpr uint32_t numberOfBlts = 3; @@ -400,11 +404,12 @@ HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequir blitPropertiesContainer, false, true, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedSize, estimatedSize); + EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(blitProperties.copySize, pClDevice->getRootDeviceEnvironment())); } HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; - constexpr size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); Vec3 notAlignedBltSize = {(3 * max2DBlitSize) + 1, 4, 2}; Vec3 alignedBltSize = {(3 * max2DBlitSize), 4, 2}; size_t alignedNumberOfBlts = 3 * alignedBltSize.y * alignedBltSize.z; @@ -420,6 +425,75 @@ HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAdd EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); + EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(notAlignedBltSize, pClDevice->getRootDeviceEnvironment())); + EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(alignedBltSize, pClDevice->getRootDeviceEnvironment())); +} + +HWTEST_F(BcsTests, givenBltWithBigCopySizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) { + auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment(); + auto maxWidthToCopy = static_cast(BlitCommandsHelper::getMaxBlitWidth(rootDeviceEnvironment)); + auto maxHeightToCopy = static_cast(BlitCommandsHelper::getMaxBlitHeight(rootDeviceEnvironment)); + + constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + Vec3 alignedBltSize = {(3 * maxWidthToCopy), (4 * maxHeightToCopy), 2}; + Vec3 notAlignedBltSize = {(3 * maxWidthToCopy + 1), (4 * maxHeightToCopy), 2}; + + auto isCopyRegionPrefered = BlitCommandsHelper::isCopyRegionPreferred(alignedBltSize, rootDeviceEnvironment); + size_t alignedNumberOfBlts = isCopyRegionPrefered ? (3 * 4 * alignedBltSize.z) + : (4 * maxHeightToCopy * alignedBltSize.z); + size_t notAlignedNumberOfBlts = isCopyRegionPrefered ? (4 * 4 * notAlignedBltSize.z) + : (4 * maxHeightToCopy * alignedBltSize.z); + + auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts; + auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts; + + auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( + alignedBltSize, csrDependencies, false, false, rootDeviceEnvironment); + auto notAlignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( + notAlignedBltSize, csrDependencies, false, false, rootDeviceEnvironment); + + EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); + EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); +} + +HWTEST_F(BcsTests, WhenGetNumberOfBlitsIsCalledThenCorrectValuesAreReturned) { + auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment(); + auto maxWidthToCopy = static_cast(BlitCommandsHelper::getMaxBlitWidth(rootDeviceEnvironment)); + auto maxHeightToCopy = static_cast(BlitCommandsHelper::getMaxBlitHeight(rootDeviceEnvironment)); + + { + Vec3 copySize = {maxWidthToCopy * maxHeightToCopy, 1, 3}; + size_t expectednBlitsCopyRegion = maxHeightToCopy * 3; + size_t expectednBlitsCopyPerRow = 3; + auto nBlitsCopyRegion = BlitCommandsHelper::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment); + auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); + + EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); + EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion); + EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(copySize, rootDeviceEnvironment)); + } + { + Vec3 copySize = {2 * maxWidthToCopy, 16, 3}; + size_t expectednBlitsCopyRegion = 2 * 3; + size_t expectednBlitsCopyPerRow = 16 * 3; + auto nBlitsCopyRegion = BlitCommandsHelper::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment); + auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); + + EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); + EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion); + EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(copySize, rootDeviceEnvironment)); + } + { + Vec3 copySize = {2 * maxWidthToCopy, 3 * maxHeightToCopy, 4}; + size_t expectednBlitsCopyRegion = 2 * 3 * 4; + size_t expectednBlitsCopyPerRow = 3 * maxHeightToCopy * 4; + auto nBlitsCopyRegion = BlitCommandsHelper::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment); + auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); + + EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); + EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion); + EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(copySize, rootDeviceEnvironment)); + } } HWTEST_F(BcsTests, whenAskingForCmdSizeForMiFlushDwWithMemoryWriteThenReturnCorrectValue) { @@ -463,7 +537,7 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeThenCa } HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeForWriteReadBufferRectThenCalculateForAllAttachedProperites) { - const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; + constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; const Vec3 bltSize = {(3 * max2DBlitSize), 4, 2}; const size_t numberOfBlts = 3 * bltSize.y * bltSize.z; const size_t numberOfBlitOperations = 4 * bltSize.y * bltSize.z; @@ -1316,102 +1390,108 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres cl_buffer_region subBufferRegion1 = {subBuffer1Offset, 1}; auto subBuffer1 = clUniquePtr(buffer1->createSubBuffer(CL_MEM_READ_WRITE, 0, &subBufferRegion1, retVal)); - { - // from hostPtr - HardwareParse hwParser; - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, - csr, graphicsAllocation1, - nullptr, hostPtr, - graphicsAllocation1->getGpuAddress() + - subBuffer1->getOffset(), - 0, {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); + Vec3 copySizes[2] = {{1, 1, 1}, + {1, 2, 1}}; - blitBuffer(&csr, blitProperties, true); + for (auto ©Size : copySizes) { + { + // from hostPtr + HardwareParse hwParser; + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, + csr, graphicsAllocation1, + nullptr, hostPtr, + graphicsAllocation1->getGpuAddress() + + subBuffer1->getOffset(), + 0, {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); - hwParser.parseCommands(csr.commandStream); + blitBuffer(&csr, blitProperties, true); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + hwParser.parseCommands(csr.commandStream); - auto bltCmd = genCmdCast(*cmdIterator); - ASSERT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(ptrOffset(hostPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + + auto bltCmd = genCmdCast(*cmdIterator); + ASSERT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(hostPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); + } + EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getDestinationBaseAddress()); } - EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getDestinationBaseAddress()); - } - { - // to hostPtr - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, - csr, graphicsAllocation1, - nullptr, hostPtr, - graphicsAllocation1->getGpuAddress() + - subBuffer1->getOffset(), - 0, {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); + { + // to hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, + csr, graphicsAllocation1, + nullptr, hostPtr, + graphicsAllocation1->getGpuAddress() + + subBuffer1->getOffset(), + 0, {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); - blitBuffer(&csr, blitProperties, true); + blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream, offset); + hwParser.parseCommands(csr.commandStream, offset); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - ASSERT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(ptrOffset(hostPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + ASSERT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(hostPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); + } + EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); } - EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); - } - { - // Buffer to Buffer - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(graphicsAllocation1, - graphicsAllocation2, 0, 0, {1, 1, 1}, 0, 0, 0, 0); + { + // Buffer to Buffer + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(graphicsAllocation1, + graphicsAllocation2, 0, 0, copySize, 0, 0, 0, 0); - blitBuffer(&csr, blitProperties, true); + blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream, offset); + hwParser.parseCommands(csr.commandStream, offset); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - ASSERT_NE(nullptr, bltCmd); - EXPECT_EQ(graphicsAllocation1->getGpuAddress(), bltCmd->getDestinationBaseAddress()); - EXPECT_EQ(graphicsAllocation2->getGpuAddress(), bltCmd->getSourceBaseAddress()); - } + auto bltCmd = genCmdCast(*cmdIterator); + ASSERT_NE(nullptr, bltCmd); + EXPECT_EQ(graphicsAllocation1->getGpuAddress(), bltCmd->getDestinationBaseAddress()); + EXPECT_EQ(graphicsAllocation2->getGpuAddress(), bltCmd->getSourceBaseAddress()); + } - { - // Buffer to Buffer - with object offset - const size_t subBuffer2Offset = 0x20; - cl_buffer_region subBufferRegion2 = {subBuffer2Offset, 1}; - auto subBuffer2 = clUniquePtr(buffer2->createSubBuffer(CL_MEM_READ_WRITE, 0, &subBufferRegion2, retVal)); + { + // Buffer to Buffer - with object offset + const size_t subBuffer2Offset = 0x20; + cl_buffer_region subBufferRegion2 = {subBuffer2Offset, 1}; + auto subBuffer2 = clUniquePtr(buffer2->createSubBuffer(CL_MEM_READ_WRITE, 0, &subBufferRegion2, retVal)); - BuiltinOpParams builtinOpParams = {}; - builtinOpParams.dstMemObj = subBuffer2.get(); - builtinOpParams.srcMemObj = subBuffer1.get(); - builtinOpParams.size.x = 1; + BuiltinOpParams builtinOpParams = {}; + builtinOpParams.dstMemObj = subBuffer2.get(); + builtinOpParams.srcMemObj = subBuffer1.get(); + builtinOpParams.size.x = copySize.x; + builtinOpParams.size.y = copySize.y; - auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToBuffer, csr, builtinOpParams); + auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToBuffer, csr, builtinOpParams); - auto offset = csr.commandStream.getUsed(); - blitBuffer(&csr, blitProperties, true); + auto offset = csr.commandStream.getUsed(); + blitBuffer(&csr, blitProperties, true); - HardwareParse hwParser; - hwParser.parseCommands(csr.commandStream, offset); + HardwareParse hwParser; + hwParser.parseCommands(csr.commandStream, offset); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - EXPECT_EQ(graphicsAllocation2->getGpuAddress() + subBuffer2Offset, bltCmd->getDestinationBaseAddress()); - EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + EXPECT_EQ(graphicsAllocation2->getGpuAddress() + subBuffer2Offset, bltCmd->getDestinationBaseAddress()); + EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); + } } } @@ -1431,102 +1511,101 @@ HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidG const size_t hostPtrOffset = 0x1234; - { - // from hostPtr - HardwareParse hwParser; - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, - csr, graphicsAllocation, - mapAllocation, mapPtr, - graphicsAllocation->getGpuAddress(), - castToUint64(mapPtr), - {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); + Vec3 copySizes[2] = {{4, 1, 1}, + {4, 2, 1}}; - blitBuffer(&csr, blitProperties, true); + for (auto ©Size : copySizes) { + { + // from hostPtr + HardwareParse hwParser; + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, + csr, graphicsAllocation, + mapAllocation, mapPtr, + graphicsAllocation->getGpuAddress(), + castToUint64(mapPtr), + {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); - hwParser.parseCommands(csr.commandStream); + blitBuffer(&csr, blitProperties, true); + hwParser.parseCommands(csr.commandStream); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); + } + EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getDestinationBaseAddress()); } - EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getDestinationBaseAddress()); - } - { - // to hostPtr - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, - csr, graphicsAllocation, - mapAllocation, mapPtr, - graphicsAllocation->getGpuAddress(), - castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); + { + // to hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, + csr, graphicsAllocation, + mapAllocation, mapPtr, + graphicsAllocation->getGpuAddress(), + castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); + blitBuffer(&csr, blitProperties, true); + hwParser.parseCommands(csr.commandStream, offset); - blitBuffer(&csr, blitProperties, true); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - hwParser.parseCommands(csr.commandStream, offset); - - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); + } + EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getSourceBaseAddress()); } - EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getSourceBaseAddress()); - } - { - // bufferRect to hostPtr - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, - csr, graphicsAllocation, - mapAllocation, mapPtr, - graphicsAllocation->getGpuAddress(), - castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {4, 2, 1}, 0, 0, 0, 0); + { + // bufferRect to hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto copySize = Vec3(4, 2, 1); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, + csr, graphicsAllocation, + mapAllocation, mapPtr, + graphicsAllocation->getGpuAddress(), + castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); + blitBuffer(&csr, blitProperties, true); + hwParser.parseCommands(csr.commandStream, offset); - blitBuffer(&csr, blitProperties, true); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - hwParser.parseCommands(csr.commandStream, offset); - - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); + } + EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getSourceBaseAddress()); } - EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getSourceBaseAddress()); - } - { - // bufferWrite from hostPtr - HardwareParse hwParser; - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, - csr, graphicsAllocation, - mapAllocation, mapPtr, - graphicsAllocation->getGpuAddress(), - castToUint64(mapPtr), - {hostPtrOffset, 0, 0}, 0, {4, 2, 1}, 0, 0, 0, 0); - blitBuffer(&csr, blitProperties, true); + { + // bufferWrite from hostPtr + HardwareParse hwParser; + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, + csr, graphicsAllocation, + mapAllocation, mapPtr, + graphicsAllocation->getGpuAddress(), + castToUint64(mapPtr), + {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); + blitBuffer(&csr, blitProperties, true); + hwParser.parseCommands(csr.commandStream); - hwParser.parseCommands(csr.commandStream); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); + } + EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getDestinationBaseAddress()); } - EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getDestinationBaseAddress()); } memoryManager->freeGraphicsMemory(mapAllocation); @@ -1635,59 +1714,65 @@ HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) { uint64_t srcOffset = 2; uint64_t dstOffset = 3; - { - // from hostPtr - BuiltinOpParams builtinOpParams = {}; - builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; - builtinOpParams.srcSvmAlloc = gpuAllocation; - builtinOpParams.srcPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + srcOffset); - builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + dstOffset); - builtinOpParams.size = {1, 1, 1}; + Vec3 copySizes[2] = {{1, 1, 1}, + {1, 2, 1}}; - auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, - csr, builtinOpParams); - EXPECT_EQ(gpuAllocation, blitProperties.srcAllocation); - EXPECT_EQ(svmData->cpuAllocation, blitProperties.dstAllocation); + for (auto ©Size : copySizes) { + { + // from hostPtr + BuiltinOpParams builtinOpParams = {}; + builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; + builtinOpParams.srcSvmAlloc = gpuAllocation; + builtinOpParams.srcPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + srcOffset); + builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + dstOffset); + builtinOpParams.size = copySize; - blitBuffer(&csr, blitProperties, true); + auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, + csr, builtinOpParams); + EXPECT_EQ(gpuAllocation, blitProperties.srcAllocation); + EXPECT_EQ(svmData->cpuAllocation, blitProperties.dstAllocation); - HardwareParse hwParser; - hwParser.parseCommands(csr.commandStream, 0); + blitBuffer(&csr, blitProperties, true); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + HardwareParse hwParser; + hwParser.parseCommands(csr.commandStream, 0); - auto bltCmd = genCmdCast(*cmdIterator); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); - EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + + EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); + EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); + } + { + // to hostPtr + BuiltinOpParams builtinOpParams = {}; + builtinOpParams.srcSvmAlloc = gpuAllocation; + builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; + builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation + dstOffset); + builtinOpParams.srcPtr = reinterpret_cast(gpuAllocation + srcOffset); + builtinOpParams.size = copySize; + + auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, + csr, builtinOpParams); + + auto offset = csr.commandStream.getUsed(); + blitBuffer(&csr, blitProperties, true); + + HardwareParse hwParser; + hwParser.parseCommands(csr.commandStream, offset); + + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + + auto bltCmd = genCmdCast(*cmdIterator); + + EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); + EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); + } } - { - // to hostPtr - BuiltinOpParams builtinOpParams = {}; - builtinOpParams.srcSvmAlloc = gpuAllocation; - builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; - builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation + dstOffset); - builtinOpParams.srcPtr = reinterpret_cast(gpuAllocation + srcOffset); - builtinOpParams.size = {1, 1, 1}; - auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, - csr, builtinOpParams); - - auto offset = csr.commandStream.getUsed(); - blitBuffer(&csr, blitProperties, true); - - HardwareParse hwParser; - hwParser.parseCommands(csr.commandStream, offset); - - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - - auto bltCmd = genCmdCast(*cmdIterator); - - EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); - EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); - } svmAllocsManager.freeSVMAlloc(svmAlloc); } @@ -1702,79 +1787,194 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec auto graphicsAllocation2 = buffer2->getGraphicsAllocation(pDevice->getRootDeviceIndex()); size_t addressOffsets[] = {0, 1, 1234}; + Vec3 copySizes[2] = {{1, 1, 1}, + {1, 2, 1}}; - for (auto buffer1Offset : addressOffsets) { - { - // from hostPtr - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, - csr, graphicsAllocation1, - nullptr, hostPtr, - graphicsAllocation1->getGpuAddress(), - 0, 0, {buffer1Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0); + for (auto ©Size : copySizes) { - blitBuffer(&csr, blitProperties, true); + for (auto buffer1Offset : addressOffsets) { + { + // from hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, + csr, graphicsAllocation1, + nullptr, hostPtr, + graphicsAllocation1->getGpuAddress(), + 0, 0, {buffer1Offset, 0, 0}, copySize, 0, 0, 0, 0); - hwParser.parseCommands(csr.commandStream, offset); + blitBuffer(&csr, blitProperties, true); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + hwParser.parseCommands(csr.commandStream, offset); - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getSourceBaseAddress()); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getSourceBaseAddress()); + } + EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); } - EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); - } - { - // to hostPtr - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, - csr, graphicsAllocation1, nullptr, - hostPtr, - graphicsAllocation1->getGpuAddress(), - 0, 0, {buffer1Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0); + { + // to hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, + csr, graphicsAllocation1, nullptr, + hostPtr, + graphicsAllocation1->getGpuAddress(), + 0, 0, {buffer1Offset, 0, 0}, copySize, 0, 0, 0, 0); - blitBuffer(&csr, blitProperties, true); + blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream, offset); + hwParser.parseCommands(csr.commandStream, offset); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getDestinationBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getDestinationBaseAddress()); + } + EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getSourceBaseAddress()); } - EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getSourceBaseAddress()); - } - for (auto buffer2Offset : addressOffsets) { - // Buffer to Buffer - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(graphicsAllocation1, - graphicsAllocation2, - {buffer1Offset, 0, 0}, {buffer2Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0); + for (auto buffer2Offset : addressOffsets) { + // Buffer to Buffer + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(graphicsAllocation1, + graphicsAllocation2, + {buffer1Offset, 0, 0}, {buffer2Offset, 0, 0}, copySize, 0, 0, 0, 0); - blitBuffer(&csr, blitProperties, true); + blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream, offset); + hwParser.parseCommands(csr.commandStream, offset); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); - EXPECT_EQ(ptrOffset(graphicsAllocation2->getGpuAddress(), buffer2Offset), bltCmd->getSourceBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); + EXPECT_EQ(ptrOffset(graphicsAllocation2->getGpuAddress(), buffer2Offset), bltCmd->getSourceBaseAddress()); + } } } } +HWTEST_F(BcsTests, givenBufferWithBigSizesWhenBlitOperationCalledThenProgramCorrectGpuAddresses) { + auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); + auto maxWidthToCopy = static_cast(BlitCommandsHelper::getMaxBlitWidth(rootDeviceEnvironment)); + auto maxHeightToCopy = static_cast(BlitCommandsHelper::getMaxBlitHeight(rootDeviceEnvironment)); + auto &csr = pDevice->getUltCommandStreamReceiver(); + + cl_int retVal = CL_SUCCESS; + auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); + auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); + void *hostPtr = reinterpret_cast(0x12340000); + auto graphicsAllocation = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); + + size_t srcOrigin[] = {1, 2, 0}; + size_t dstOrigin[] = {4, 3, 1}; + size_t region[] = {maxWidthToCopy + 16, maxHeightToCopy + 16, 2}; + size_t srcRowPitch = region[0] + 34; + size_t srcSlicePitch = srcRowPitch * region[1] + 36; + size_t dstRowPitch = region[0] + 40; + size_t dstSlicePitch = dstRowPitch * region[1] + 44; + auto srcAddressOffset = srcOrigin[0] + (srcOrigin[1] * srcRowPitch) + (srcOrigin[2] * srcSlicePitch); + auto dstAddressOffset = dstOrigin[0] + (dstOrigin[1] * dstRowPitch) + (dstOrigin[2] * dstSlicePitch); + + EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(region, rootDeviceEnvironment)); + + // from hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, + csr, graphicsAllocation, + nullptr, hostPtr, + graphicsAllocation->getGpuAddress(), + 0, srcOrigin, dstOrigin, region, + srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch); + + blitBuffer(&csr, blitProperties, true); + hwParser.parseCommands(csr.commandStream, offset); + + //1st rectangle xCopy = maxWidthToCopy, yCopy = maxHeightToCopy, zCopy = 1 + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); + } + EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); + + srcAddressOffset += maxWidthToCopy; + dstAddressOffset += maxWidthToCopy; + + // 2nd rectangle xCopy = (region[0] - maxWidthToCopy), yCopy = (region[0] - maxHeightToCopy), zCopy = 1 + cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); + } + EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); + + srcAddressOffset += (region[0] - maxWidthToCopy); + srcAddressOffset += (srcRowPitch - region[0]); + srcAddressOffset += (srcRowPitch * (maxHeightToCopy - 1)); + dstAddressOffset += (region[0] - maxWidthToCopy); + dstAddressOffset += (dstRowPitch - region[0]); + dstAddressOffset += (dstRowPitch * (maxHeightToCopy - 1)); + + // 3rd rectangle xCopy = maxWidthToCopy, yCopy = maxHeightToCopy, zCopy = 1 + cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); + } + EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); + + srcAddressOffset += maxWidthToCopy; + dstAddressOffset += maxWidthToCopy; + + //4th rectangle xCopy = (region[0] - maxWidthToCopy), yCopy = (region[0] - maxHeightToCopy), zCopy = 1 + cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); + } + EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); + + srcAddressOffset += (region[0] - maxWidthToCopy); + srcAddressOffset += (srcRowPitch - region[0]); + srcAddressOffset += (srcRowPitch * (region[1] - maxHeightToCopy - 1)); + srcAddressOffset += (srcSlicePitch - (srcRowPitch * region[1])); + dstAddressOffset += (region[0] - maxWidthToCopy); + dstAddressOffset += (dstRowPitch - region[0]); + dstAddressOffset += (dstRowPitch * (region[1] - maxHeightToCopy - 1)); + dstAddressOffset += (dstSlicePitch - (dstRowPitch * region[1])); + + //5th rectangle xCopy = maxWidthToCopy, yCopy = maxHeightToCopy, zCopy = 1 + cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); + } + EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); +} + HWTEST_F(BcsTests, givenAuxTranslationRequestWhenBlitCalledThenProgramCommandCorrectly) { auto &csr = pDevice->getUltCommandStreamReceiver(); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index 41baa91836..303584e230 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -926,7 +926,7 @@ uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesCont EncodeStoreMMIO::encode(commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalStartAddress); } - BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]); + BlitCommandsHelper::dispatchBlitCommands(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]); if (blitProperties.outputTimestampPacket) { if (profilingEnabled) { diff --git a/shared/source/helpers/blit_commands_helper.h b/shared/source/helpers/blit_commands_helper.h index 5065e12b5d..a53e258188 100644 --- a/shared/source/helpers/blit_commands_helper.h +++ b/shared/source/helpers/blit_commands_helper.h @@ -111,12 +111,18 @@ struct BlitCommandsHelper { static uint64_t getMaxBlitHeightOverride(const RootDeviceEnvironment &rootDeviceEnvironment); static void dispatchPostBlitCommand(LinearStream &linearStream); static size_t estimatePostBlitCommandSize(); - static size_t estimateBlitCommandsSize(Vec3 copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket, + static size_t estimateBlitCommandsSize(const Vec3 ©Size, const CsrDependencies &csrDependencies, bool updateTimestampPacket, bool profilingEnabled, const RootDeviceEnvironment &rootDeviceEnvironment); static size_t estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, bool profilingEnabled, bool debugPauseEnabled, const RootDeviceEnvironment &rootDeviceEnvironment); + static size_t getNumberOfBlitsForCopyRegion(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment); + static size_t getNumberOfBlitsForCopyPerRow(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment); static uint64_t calculateBlitCommandDestinationBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); static uint64_t calculateBlitCommandSourceBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); + static uint64_t calculateBlitCommandDestinationBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice); + static uint64_t calculateBlitCommandSourceBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice); + static void dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); + static void dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); static void dispatchBlitCommandsForBufferPerRow(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); static void dispatchBlitCommandsRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); static void dispatchBlitMemoryColorFill(NEO::GraphicsAllocation *dstAlloc, uint32_t *pattern, size_t patternSize, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment); @@ -135,5 +141,6 @@ struct BlitCommandsHelper { static size_t getSizeForDebugPauseCommands(); static bool useOneBlitCopyCommand(Vec3 copySize, uint32_t bytesPerPixel); static uint32_t getAvailableBytesPerPixel(size_t copySize, uint32_t srcOrigin, uint32_t dstOrigin, uint32_t srcSize, uint32_t dstSize); + static bool isCopyRegionPreferred(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment); }; } // namespace NEO diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index 79dfc2a993..c6136b7c16 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -68,46 +68,22 @@ size_t BlitCommandsHelper::estimatePostBlitCommandSize() { } template -size_t BlitCommandsHelper::estimateBlitCommandsSize(Vec3 copySize, const CsrDependencies &csrDependencies, +size_t BlitCommandsHelper::estimateBlitCommandsSize(const Vec3 ©Size, const CsrDependencies &csrDependencies, bool updateTimestampPacket, bool profilingEnabled, const RootDeviceEnvironment &rootDeviceEnvironment) { - size_t numberOfBlits = 0; - uint64_t width = 1; - uint64_t height = 1; - - for (uint64_t slice = 0; slice < copySize.z; slice++) { - for (uint64_t row = 0; row < copySize.y; row++) { - uint64_t sizeToBlit = copySize.x; - while (sizeToBlit != 0) { - if (sizeToBlit > getMaxBlitWidth(rootDeviceEnvironment)) { - // dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight) - width = getMaxBlitWidth(rootDeviceEnvironment); - height = std::min((sizeToBlit / width), getMaxBlitHeight(rootDeviceEnvironment)); - - } else { - // dispatch 1D blt: (1 .. maxBlitWidth) x 1 - width = sizeToBlit; - height = 1; - } - sizeToBlit -= (width * height); - numberOfBlits++; - } - } - } - - const size_t cmdsSizePerBlit = (sizeof(typename GfxFamily::XY_COPY_BLT) + estimatePostBlitCommandSize()); - size_t timestampCmdSize = 0; if (updateTimestampPacket) { - if (profilingEnabled) { - timestampCmdSize = 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); - } else { - timestampCmdSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); - } + timestampCmdSize = (profilingEnabled) ? 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM) + : EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); } - return TimestampPacketHelper::getRequiredCmdStreamSize(csrDependencies) + - (cmdsSizePerBlit * numberOfBlits) + timestampCmdSize; + bool preferRegionCopy = isCopyRegionPreferred(copySize, rootDeviceEnvironment); + auto nBlits = preferRegionCopy ? getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment) + : getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); + + auto sizePerBlit = (sizeof(typename GfxFamily::XY_COPY_BLT) + estimatePostBlitCommandSize()); + + return TimestampPacketHelper::getRequiredCmdStreamSize(csrDependencies) + (sizePerBlit * nBlits) + timestampCmdSize; } template @@ -121,7 +97,8 @@ size_t BlitCommandsHelper::estimateBlitCommandsSize(const BlitPropert rootDeviceEnvironment); } size += MemorySynchronizationCommands::getSizeForAdditonalSynchronization(*rootDeviceEnvironment.getHardwareInfo()); - size += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename GfxFamily::MI_BATCH_BUFFER_END); + size += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); + size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_END); if (debugPauseEnabled) { size += BlitCommandsHelper::getSizeForDebugPauseCommands(); @@ -197,6 +174,7 @@ void BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(const Bl } } } + template template void BlitCommandsHelper::dispatchBlitMemoryFill(NEO::GraphicsAllocation *dstAlloc, uint32_t *pattern, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment, COLOR_DEPTH depth) { @@ -301,4 +279,102 @@ uint32_t BlitCommandsHelper::getAvailableBytesPerPixel(size_t copySiz return bytesPerPixel; } +template +void BlitCommandsHelper::dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { + bool preferCopyRegion = isCopyRegionPreferred(blitProperties.copySize, rootDeviceEnvironment); + + preferCopyRegion ? dispatchBlitCommandsForBufferRegion(blitProperties, linearStream, rootDeviceEnvironment) + : dispatchBlitCommandsForBufferPerRow(blitProperties, linearStream, rootDeviceEnvironment); +} + +template +uint64_t BlitCommandsHelper::calculateBlitCommandSourceBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice) { + return blitProperties.srcGpuAddress + blitProperties.srcOffset.x + + (blitProperties.srcOffset.y * blitProperties.srcRowPitch) + + (blitProperties.srcSlicePitch * (slice + blitProperties.srcOffset.z)); +} + +template +uint64_t BlitCommandsHelper::calculateBlitCommandDestinationBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice) { + return blitProperties.dstGpuAddress + blitProperties.dstOffset.x + + (blitProperties.dstOffset.y * blitProperties.dstRowPitch) + + (blitProperties.dstSlicePitch * (slice + blitProperties.dstOffset.z)); +} + +template +void BlitCommandsHelper::dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { + const auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment); + const auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment); + + for (size_t slice = 0u; slice < blitProperties.copySize.z; ++slice) { + auto srcAddress = calculateBlitCommandSourceBaseAddressCopyRegion(blitProperties, slice); + auto dstAddress = calculateBlitCommandDestinationBaseAddressCopyRegion(blitProperties, slice); + auto heightToCopy = blitProperties.copySize.y; + + while (heightToCopy > 0) { + auto height = static_cast(std::min(heightToCopy, static_cast(maxHeightToCopy))); + auto widthToCopy = blitProperties.copySize.x; + + while (widthToCopy > 0) { + auto width = static_cast(std::min(widthToCopy, static_cast(maxWidthToCopy))); + auto bltCmd = GfxFamily::cmdInitXyCopyBlt; + + bltCmd.setSourceBaseAddress(srcAddress); + bltCmd.setDestinationBaseAddress(dstAddress); + bltCmd.setTransferWidth(width); + bltCmd.setTransferHeight(height); + bltCmd.setSourcePitch(static_cast(blitProperties.srcRowPitch)); + bltCmd.setDestinationPitch(static_cast(blitProperties.dstRowPitch)); + + appendBlitCommandsForBuffer(blitProperties, bltCmd, rootDeviceEnvironment); + + auto cmd = linearStream.getSpaceForCmd(); + *cmd = bltCmd; + dispatchPostBlitCommand(linearStream); + + srcAddress += width; + dstAddress += width; + widthToCopy -= width; + } + + heightToCopy -= height; + srcAddress += (blitProperties.srcRowPitch - blitProperties.copySize.x); + srcAddress += (blitProperties.srcRowPitch * (height - 1)); + dstAddress += (blitProperties.dstRowPitch - blitProperties.copySize.x); + dstAddress += (blitProperties.dstRowPitch * (height - 1)); + } + } +} + +template +bool BlitCommandsHelper::isCopyRegionPreferred(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment) { + bool preferCopyRegion = getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment) < getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); + return preferCopyRegion; +} + +template +size_t BlitCommandsHelper::getNumberOfBlitsForCopyRegion(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment) { + auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment); + auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment); + auto xBlits = static_cast(std::ceil(copySize.x / static_cast(maxWidthToCopy))); + auto yBlits = static_cast(std::ceil(copySize.y / static_cast(maxHeightToCopy))); + auto zBlits = static_cast(copySize.z); + auto nBlits = xBlits * yBlits * zBlits; + + return nBlits; +} + +template +size_t BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment) { + auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment); + auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment); + auto maxSizeRectangleToCopy = maxWidthToCopy * maxHeightToCopy; + auto xBlits = static_cast(std::ceil(copySize.x / static_cast(maxSizeRectangleToCopy))); + auto yBlits = copySize.y; + auto zBlits = copySize.z; + auto nBlits = xBlits * yBlits * zBlits; + + return nBlits; +} + } // namespace NEO