From 9a05ad26b0f91ac0d4a061fad9f452e330481b2d Mon Sep 17 00:00:00 2001 From: Kamil Kopryk Date: Thu, 30 Jul 2020 15:39:19 +0200 Subject: [PATCH] Revert "Optimize copying buffers by blitter" This reverts commit 7dfe4be225f46862b5c25a39abc29bda17a3582d. Change-Id: Ifc59102a6b30ce1baf3ae86bda629996c878b6ce --- .../command_stream_receiver_hw_tests.cpp | 728 +++++++----------- .../command_stream_receiver_hw_base.inl | 2 +- shared/source/helpers/blit_commands_helper.h | 9 +- .../helpers/blit_commands_helper_base.inl | 146 +--- 4 files changed, 296 insertions(+), 589 deletions(-) diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.cpp index 36b919ee25..3207ba6715 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.cpp @@ -373,7 +373,7 @@ struct BcsTests : public CommandStreamReceiverHwTest { HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; - constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + constexpr size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); size_t notAlignedBltSize = (3 * max2DBlitSize) + 1; size_t alignedBltSize = (3 * max2DBlitSize); uint32_t alignedNumberOfBlts = 3; @@ -381,23 +381,19 @@ HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredComman auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts; auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts; - auto alignedCopySize = Vec3{alignedBltSize, 1, 1}; - auto notAlignedCopySize = Vec3{notAlignedBltSize, 1, 1}; auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( - alignedCopySize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment()); + {alignedBltSize, 1, 1}, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment()); auto notAlignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( - notAlignedCopySize, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment()); + {notAlignedBltSize, 1, 1}, csrDependencies, false, false, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); - EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(alignedCopySize, pClDevice->getRootDeviceEnvironment())); - EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(notAlignedCopySize, pClDevice->getRootDeviceEnvironment())); } HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; - constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + constexpr size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); const size_t debugCommandsSize = (EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + EncodeSempahore::getSizeMiSemaphoreWait()) * 2; constexpr uint32_t numberOfBlts = 3; @@ -416,12 +412,11 @@ HWTEST_F(BcsTests, givenDebugCapabilityWhenEstimatingCommandSizeThenAddAllRequir blitPropertiesContainer, false, true, pClDevice->getRootDeviceEnvironment()); EXPECT_EQ(expectedSize, estimatedSize); - EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(blitProperties.copySize, pClDevice->getRootDeviceEnvironment())); } HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; - constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); + constexpr size_t cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); Vec3 notAlignedBltSize = {(3 * max2DBlitSize) + 1, 4, 2}; Vec3 alignedBltSize = {(3 * max2DBlitSize), 4, 2}; size_t alignedNumberOfBlts = 3 * alignedBltSize.y * alignedBltSize.z; @@ -437,76 +432,6 @@ HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAdd EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); - EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(notAlignedBltSize, pClDevice->getRootDeviceEnvironment())); - EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(alignedBltSize, pClDevice->getRootDeviceEnvironment())); -} - -HWTEST_F(BcsTests, givenBltWithBigCopySizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) { - auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment(); - auto maxWidthToCopy = static_cast(BlitCommandsHelper::getMaxBlitWidth(rootDeviceEnvironment)); - auto maxHeightToCopy = static_cast(BlitCommandsHelper::getMaxBlitHeight(rootDeviceEnvironment)); - - constexpr auto cmdsSizePerBlit = sizeof(typename FamilyType::XY_COPY_BLT) + sizeof(typename FamilyType::MI_ARB_CHECK); - Vec3 alignedBltSize = {(3 * maxWidthToCopy), (4 * maxHeightToCopy), 2}; - Vec3 notAlignedBltSize = {(3 * maxWidthToCopy + 1), (4 * maxHeightToCopy), 2}; - - size_t alignedNumberOfBlts = 0; - size_t notAlignedNumberOfBlts = 0; - alignedNumberOfBlts = 3 * 4 * alignedBltSize.z; - notAlignedNumberOfBlts = 4 * 4 * notAlignedBltSize.z; - - auto expectedAlignedSize = cmdsSizePerBlit * alignedNumberOfBlts; - auto expectedNotAlignedSize = cmdsSizePerBlit * notAlignedNumberOfBlts; - - auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( - alignedBltSize, csrDependencies, false, false, rootDeviceEnvironment); - auto notAlignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize( - notAlignedBltSize, csrDependencies, false, false, rootDeviceEnvironment); - - EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); - EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); - EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(alignedBltSize, rootDeviceEnvironment)); - EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(notAlignedBltSize, rootDeviceEnvironment)); -} - -HWTEST_F(BcsTests, WhenGetNumberOfBlitsIsCalledThenCorrectValuesAreReturned) { - auto &rootDeviceEnvironment = pClDevice->getRootDeviceEnvironment(); - auto maxWidthToCopy = static_cast(BlitCommandsHelper::getMaxBlitWidth(rootDeviceEnvironment)); - auto maxHeightToCopy = static_cast(BlitCommandsHelper::getMaxBlitHeight(rootDeviceEnvironment)); - - { - Vec3 copySize = {maxWidthToCopy * maxHeightToCopy, 1, 3}; - size_t expectednBlitsCopyRegion = maxHeightToCopy * 3; - size_t expectednBlitsCopyPerRow = 3; - auto nBlitsCopyRegion = BlitCommandsHelper::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment); - auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); - - EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); - EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion); - EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(copySize, rootDeviceEnvironment)); - } - { - Vec3 copySize = {2 * maxWidthToCopy, 16, 3}; - size_t expectednBlitsCopyRegion = 2 * 3; - size_t expectednBlitsCopyPerRow = 16 * 3; - auto nBlitsCopyRegion = BlitCommandsHelper::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment); - auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); - - EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); - EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion); - EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(copySize, rootDeviceEnvironment)); - } - { - Vec3 copySize = {2 * maxWidthToCopy, 3 * maxHeightToCopy, 4}; - size_t expectednBlitsCopyRegion = 2 * 3 * 4; - size_t expectednBlitsCopyPerRow = 3 * maxHeightToCopy * 4; - auto nBlitsCopyRegion = BlitCommandsHelper::getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment); - auto nBlitsCopyPerRow = BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); - - EXPECT_EQ(expectednBlitsCopyPerRow, nBlitsCopyPerRow); - EXPECT_EQ(expectednBlitsCopyRegion, nBlitsCopyRegion); - EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(copySize, rootDeviceEnvironment)); - } } HWTEST_F(BcsTests, whenAskingForCmdSizeForMiFlushDwWithMemoryWriteThenReturnCorrectValue) { @@ -550,7 +475,7 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeThenCa } HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeForWriteReadBufferRectThenCalculateForAllAttachedProperites) { - constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; + const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; const Vec3 bltSize = {(3 * max2DBlitSize), 4, 2}; const size_t numberOfBlts = 3 * bltSize.y * bltSize.z; const size_t numberOfBlitOperations = 4 * bltSize.y * bltSize.z; @@ -1403,110 +1328,102 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres cl_buffer_region subBufferRegion1 = {subBuffer1Offset, 1}; auto subBuffer1 = clUniquePtr(buffer1->createSubBuffer(CL_MEM_READ_WRITE, 0, &subBufferRegion1, retVal)); - Vec3 copySizes[2] = {{1, 1, 1}, - {1, 2, 1}}; - EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(copySizes[0], pDevice->getRootDeviceEnvironment())); - EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(copySizes[1], pDevice->getRootDeviceEnvironment())); + { + // from hostPtr + HardwareParse hwParser; + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, + csr, graphicsAllocation1, + nullptr, hostPtr, + graphicsAllocation1->getGpuAddress() + + subBuffer1->getOffset(), + 0, {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); - for (auto ©Size : copySizes) { - { - // from hostPtr - HardwareParse hwParser; - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, - csr, graphicsAllocation1, - nullptr, hostPtr, - graphicsAllocation1->getGpuAddress() + - subBuffer1->getOffset(), - 0, {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); + blitBuffer(&csr, blitProperties, true); - blitBuffer(&csr, blitProperties, true); + hwParser.parseCommands(csr.commandStream); - hwParser.parseCommands(csr.commandStream); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - - auto bltCmd = genCmdCast(*cmdIterator); - ASSERT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(ptrOffset(hostPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); - } - EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getDestinationBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + ASSERT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(hostPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); } - { - // to hostPtr - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, - csr, graphicsAllocation1, - nullptr, hostPtr, - graphicsAllocation1->getGpuAddress() + - subBuffer1->getOffset(), - 0, {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); + EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getDestinationBaseAddress()); + } + { + // to hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, + csr, graphicsAllocation1, + nullptr, hostPtr, + graphicsAllocation1->getGpuAddress() + + subBuffer1->getOffset(), + 0, {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); - blitBuffer(&csr, blitProperties, true); + blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream, offset); + hwParser.parseCommands(csr.commandStream, offset); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - ASSERT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(ptrOffset(hostPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); - } - EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + ASSERT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(hostPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); } + EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); + } - { - // Buffer to Buffer - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(graphicsAllocation1, - graphicsAllocation2, 0, 0, copySize, 0, 0, 0, 0); + { + // Buffer to Buffer + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(graphicsAllocation1, + graphicsAllocation2, 0, 0, {1, 1, 1}, 0, 0, 0, 0); - blitBuffer(&csr, blitProperties, true); + blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream, offset); + hwParser.parseCommands(csr.commandStream, offset); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - ASSERT_NE(nullptr, bltCmd); - EXPECT_EQ(graphicsAllocation1->getGpuAddress(), bltCmd->getDestinationBaseAddress()); - EXPECT_EQ(graphicsAllocation2->getGpuAddress(), bltCmd->getSourceBaseAddress()); - } + auto bltCmd = genCmdCast(*cmdIterator); + ASSERT_NE(nullptr, bltCmd); + EXPECT_EQ(graphicsAllocation1->getGpuAddress(), bltCmd->getDestinationBaseAddress()); + EXPECT_EQ(graphicsAllocation2->getGpuAddress(), bltCmd->getSourceBaseAddress()); + } - { - // Buffer to Buffer - with object offset - const size_t subBuffer2Offset = 0x20; - cl_buffer_region subBufferRegion2 = {subBuffer2Offset, 1}; - auto subBuffer2 = clUniquePtr(buffer2->createSubBuffer(CL_MEM_READ_WRITE, 0, &subBufferRegion2, retVal)); + { + // Buffer to Buffer - with object offset + const size_t subBuffer2Offset = 0x20; + cl_buffer_region subBufferRegion2 = {subBuffer2Offset, 1}; + auto subBuffer2 = clUniquePtr(buffer2->createSubBuffer(CL_MEM_READ_WRITE, 0, &subBufferRegion2, retVal)); - BuiltinOpParams builtinOpParams = {}; - builtinOpParams.dstMemObj = subBuffer2.get(); - builtinOpParams.srcMemObj = subBuffer1.get(); - builtinOpParams.size.x = copySize.x; - builtinOpParams.size.y = copySize.y; + BuiltinOpParams builtinOpParams = {}; + builtinOpParams.dstMemObj = subBuffer2.get(); + builtinOpParams.srcMemObj = subBuffer1.get(); + builtinOpParams.size.x = 1; - auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToBuffer, csr, builtinOpParams); + auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToBuffer, csr, builtinOpParams); - auto offset = csr.commandStream.getUsed(); - blitBuffer(&csr, blitProperties, true); + auto offset = csr.commandStream.getUsed(); + blitBuffer(&csr, blitProperties, true); - HardwareParse hwParser; - hwParser.parseCommands(csr.commandStream, offset); + HardwareParse hwParser; + hwParser.parseCommands(csr.commandStream, offset); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - EXPECT_EQ(graphicsAllocation2->getGpuAddress() + subBuffer2Offset, bltCmd->getDestinationBaseAddress()); - EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); - } + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + EXPECT_EQ(graphicsAllocation2->getGpuAddress() + subBuffer2Offset, bltCmd->getDestinationBaseAddress()); + EXPECT_EQ(graphicsAllocation1->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); } } @@ -1526,104 +1443,102 @@ HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidG const size_t hostPtrOffset = 0x1234; - Vec3 copySizes[2] = {{4, 1, 1}, - {4, 2, 1}}; + { + // from hostPtr + HardwareParse hwParser; + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, + csr, graphicsAllocation, + mapAllocation, mapPtr, + graphicsAllocation->getGpuAddress(), + castToUint64(mapPtr), + {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); - EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(copySizes[0], pDevice->getRootDeviceEnvironment())); - EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(copySizes[1], pDevice->getRootDeviceEnvironment())); + blitBuffer(&csr, blitProperties, true); - for (auto ©Size : copySizes) { - { - // from hostPtr - HardwareParse hwParser; - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, - csr, graphicsAllocation, - mapAllocation, mapPtr, - graphicsAllocation->getGpuAddress(), - castToUint64(mapPtr), - {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); + hwParser.parseCommands(csr.commandStream); - blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); - } - EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getDestinationBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); } + EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getDestinationBaseAddress()); + } - { - // to hostPtr - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, - csr, graphicsAllocation, - mapAllocation, mapPtr, - graphicsAllocation->getGpuAddress(), - castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); - blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream, offset); + { + // to hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, + csr, graphicsAllocation, + mapAllocation, mapPtr, + graphicsAllocation->getGpuAddress(), + castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + blitBuffer(&csr, blitProperties, true); - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); - } - EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getSourceBaseAddress()); + hwParser.parseCommands(csr.commandStream, offset); + + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); } + EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getSourceBaseAddress()); + } - { - // bufferRect to hostPtr - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto copySize = Vec3(4, 2, 1); - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, - csr, graphicsAllocation, - mapAllocation, mapPtr, - graphicsAllocation->getGpuAddress(), - castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); - blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream, offset); + { + // bufferRect to hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, + csr, graphicsAllocation, + mapAllocation, mapPtr, + graphicsAllocation->getGpuAddress(), + castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {4, 2, 1}, 0, 0, 0, 0); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + blitBuffer(&csr, blitProperties, true); - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); - } - EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getSourceBaseAddress()); + hwParser.parseCommands(csr.commandStream, offset); + + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); } - { - // bufferWrite from hostPtr - HardwareParse hwParser; - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, - csr, graphicsAllocation, - mapAllocation, mapPtr, - graphicsAllocation->getGpuAddress(), - castToUint64(mapPtr), - {hostPtrOffset, 0, 0}, 0, copySize, 0, 0, 0, 0); - blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream); + EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getSourceBaseAddress()); + } + { + // bufferWrite from hostPtr + HardwareParse hwParser; + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, + csr, graphicsAllocation, + mapAllocation, mapPtr, + graphicsAllocation->getGpuAddress(), + castToUint64(mapPtr), + {hostPtrOffset, 0, 0}, 0, {4, 2, 1}, 0, 0, 0, 0); + blitBuffer(&csr, blitProperties, true); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + hwParser.parseCommands(csr.commandStream); - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); - } - EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getDestinationBaseAddress()); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getSourceBaseAddress()); } + EXPECT_EQ(graphicsAllocation->getGpuAddress(), bltCmd->getDestinationBaseAddress()); } memoryManager->freeGraphicsMemory(mapAllocation); @@ -1732,67 +1647,59 @@ HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) { uint64_t srcOffset = 2; uint64_t dstOffset = 3; - Vec3 copySizes[2] = {{1, 1, 1}, - {1, 2, 1}}; - EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(copySizes[0], pDevice->getRootDeviceEnvironment())); - EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(copySizes[1], pDevice->getRootDeviceEnvironment())); + { + // from hostPtr + BuiltinOpParams builtinOpParams = {}; + builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; + builtinOpParams.srcSvmAlloc = gpuAllocation; + builtinOpParams.srcPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + srcOffset); + builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + dstOffset); + builtinOpParams.size = {1, 1, 1}; - for (auto ©Size : copySizes) { - { - // from hostPtr - BuiltinOpParams builtinOpParams = {}; - builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; - builtinOpParams.srcSvmAlloc = gpuAllocation; - builtinOpParams.srcPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + srcOffset); - builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + dstOffset); - builtinOpParams.size = copySize; + auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, + csr, builtinOpParams); + EXPECT_EQ(gpuAllocation, blitProperties.srcAllocation); + EXPECT_EQ(svmData->cpuAllocation, blitProperties.dstAllocation); - auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, - csr, builtinOpParams); - EXPECT_EQ(gpuAllocation, blitProperties.srcAllocation); - EXPECT_EQ(svmData->cpuAllocation, blitProperties.dstAllocation); + blitBuffer(&csr, blitProperties, true); - blitBuffer(&csr, blitProperties, true); + HardwareParse hwParser; + hwParser.parseCommands(csr.commandStream, 0); - HardwareParse hwParser; - hwParser.parseCommands(csr.commandStream, 0); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto bltCmd = genCmdCast(*cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - - EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); - EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); - } - { - // to hostPtr - BuiltinOpParams builtinOpParams = {}; - builtinOpParams.srcSvmAlloc = gpuAllocation; - builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; - builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation + dstOffset); - builtinOpParams.srcPtr = reinterpret_cast(gpuAllocation + srcOffset); - builtinOpParams.size = copySize; - - auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, - csr, builtinOpParams); - - auto offset = csr.commandStream.getUsed(); - blitBuffer(&csr, blitProperties, true); - - HardwareParse hwParser; - hwParser.parseCommands(csr.commandStream, offset); - - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - - auto bltCmd = genCmdCast(*cmdIterator); - - EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); - EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); - } + EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); + EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); } + { + // to hostPtr + BuiltinOpParams builtinOpParams = {}; + builtinOpParams.srcSvmAlloc = gpuAllocation; + builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; + builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation + dstOffset); + builtinOpParams.srcPtr = reinterpret_cast(gpuAllocation + srcOffset); + builtinOpParams.size = {1, 1, 1}; + auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, + csr, builtinOpParams); + + auto offset = csr.commandStream.getUsed(); + blitBuffer(&csr, blitProperties, true); + + HardwareParse hwParser; + hwParser.parseCommands(csr.commandStream, offset); + + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + + auto bltCmd = genCmdCast(*cmdIterator); + + EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); + EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); + } svmAllocsManager.freeSVMAlloc(svmAlloc); } @@ -1807,196 +1714,79 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec auto graphicsAllocation2 = buffer2->getGraphicsAllocation(pDevice->getRootDeviceIndex()); size_t addressOffsets[] = {0, 1, 1234}; - Vec3 copySizes[2] = {{1, 1, 1}, - {1, 2, 1}}; - EXPECT_FALSE(BlitCommandsHelper::isCopyRegionPreferred(copySizes[0], pDevice->getRootDeviceEnvironment())); - EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(copySizes[1], pDevice->getRootDeviceEnvironment())); - for (auto ©Size : copySizes) { + for (auto buffer1Offset : addressOffsets) { + { + // from hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, + csr, graphicsAllocation1, + nullptr, hostPtr, + graphicsAllocation1->getGpuAddress(), + 0, 0, {buffer1Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0); - for (auto buffer1Offset : addressOffsets) { - { - // from hostPtr - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, - csr, graphicsAllocation1, - nullptr, hostPtr, - graphicsAllocation1->getGpuAddress(), - 0, 0, {buffer1Offset, 0, 0}, copySize, 0, 0, 0, 0); + blitBuffer(&csr, blitProperties, true); - blitBuffer(&csr, blitProperties, true); + hwParser.parseCommands(csr.commandStream, offset); - hwParser.parseCommands(csr.commandStream, offset); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getSourceBaseAddress()); - } - EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getSourceBaseAddress()); } - { - // to hostPtr - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, - csr, graphicsAllocation1, nullptr, - hostPtr, - graphicsAllocation1->getGpuAddress(), - 0, 0, {buffer1Offset, 0, 0}, copySize, 0, 0, 0, 0); + EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); + } + { + // to hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, + csr, graphicsAllocation1, nullptr, + hostPtr, + graphicsAllocation1->getGpuAddress(), + 0, 0, {buffer1Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0); - blitBuffer(&csr, blitProperties, true); + blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream, offset); + hwParser.parseCommands(csr.commandStream, offset); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getDestinationBaseAddress()); - } - EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getSourceBaseAddress()); + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getDestinationBaseAddress()); } - for (auto buffer2Offset : addressOffsets) { - // Buffer to Buffer - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(graphicsAllocation1, - graphicsAllocation2, - {buffer1Offset, 0, 0}, {buffer2Offset, 0, 0}, copySize, 0, 0, 0, 0); + EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getSourceBaseAddress()); + } + for (auto buffer2Offset : addressOffsets) { + // Buffer to Buffer + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForCopyBuffer(graphicsAllocation1, + graphicsAllocation2, + {buffer1Offset, 0, 0}, {buffer2Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0); - blitBuffer(&csr, blitProperties, true); + blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream, offset); + hwParser.parseCommands(csr.commandStream, offset); - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); + auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); + ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); - EXPECT_EQ(ptrOffset(graphicsAllocation2->getGpuAddress(), buffer2Offset), bltCmd->getSourceBaseAddress()); - } + auto bltCmd = genCmdCast(*cmdIterator); + EXPECT_NE(nullptr, bltCmd); + EXPECT_EQ(ptrOffset(graphicsAllocation1->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); + EXPECT_EQ(ptrOffset(graphicsAllocation2->getGpuAddress(), buffer2Offset), bltCmd->getSourceBaseAddress()); } } } -HWTEST_F(BcsTests, givenBufferWithBigSizesWhenBlitOperationCalledThenProgramCorrectGpuAddresses) { - auto &rootDeviceEnvironment = pDevice->getRootDeviceEnvironment(); - auto maxWidthToCopy = static_cast(BlitCommandsHelper::getMaxBlitWidth(rootDeviceEnvironment)); - auto maxHeightToCopy = static_cast(BlitCommandsHelper::getMaxBlitHeight(rootDeviceEnvironment)); - auto &csr = pDevice->getUltCommandStreamReceiver(); - - cl_int retVal = CL_SUCCESS; - auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); - auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); - void *hostPtr = reinterpret_cast(0x12340000); - auto graphicsAllocation = buffer1->getGraphicsAllocation(pDevice->getRootDeviceIndex()); - - size_t srcOrigin[] = {1, 2, 0}; - size_t dstOrigin[] = {4, 3, 1}; - size_t region[] = {maxWidthToCopy + 16, maxHeightToCopy + 16, 2}; - size_t srcRowPitch = region[0] + 34; - size_t srcSlicePitch = srcRowPitch * region[1] + 36; - size_t dstRowPitch = region[0] + 40; - size_t dstSlicePitch = dstRowPitch * region[1] + 44; - auto srcAddressOffset = srcOrigin[0] + (srcOrigin[1] * srcRowPitch) + (srcOrigin[2] * srcSlicePitch); - auto dstAddressOffset = dstOrigin[0] + (dstOrigin[1] * dstRowPitch) + (dstOrigin[2] * dstSlicePitch); - - EXPECT_TRUE(BlitCommandsHelper::isCopyRegionPreferred(region, rootDeviceEnvironment)); - - // from hostPtr - HardwareParse hwParser; - auto offset = csr.commandStream.getUsed(); - auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, - csr, graphicsAllocation, - nullptr, hostPtr, - graphicsAllocation->getGpuAddress(), - 0, srcOrigin, dstOrigin, region, - srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch); - - blitBuffer(&csr, blitProperties, true); - hwParser.parseCommands(csr.commandStream, offset); - - //1st rectangle xCopy = maxWidthToCopy, yCopy = maxHeightToCopy, zCopy = 1 - auto cmdIterator = find(hwParser.cmdList.begin(), hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - auto bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); - } - EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); - - srcAddressOffset += maxWidthToCopy; - dstAddressOffset += maxWidthToCopy; - - // 2nd rectangle xCopy = (region[0] - maxWidthToCopy), yCopy = (region[0] - maxHeightToCopy), zCopy = 1 - cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); - } - EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); - - srcAddressOffset += (region[0] - maxWidthToCopy); - srcAddressOffset += (srcRowPitch - region[0]); - srcAddressOffset += (srcRowPitch * (maxHeightToCopy - 1)); - dstAddressOffset += (region[0] - maxWidthToCopy); - dstAddressOffset += (dstRowPitch - region[0]); - dstAddressOffset += (dstRowPitch * (maxHeightToCopy - 1)); - - // 3rd rectangle xCopy = maxWidthToCopy, yCopy = maxHeightToCopy, zCopy = 1 - cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); - } - EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); - - srcAddressOffset += maxWidthToCopy; - dstAddressOffset += maxWidthToCopy; - - //4th rectangle xCopy = (region[0] - maxWidthToCopy), yCopy = (region[0] - maxHeightToCopy), zCopy = 1 - cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); - } - EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); - - srcAddressOffset += (region[0] - maxWidthToCopy); - srcAddressOffset += (srcRowPitch - region[0]); - srcAddressOffset += (srcRowPitch * (region[1] - maxHeightToCopy - 1)); - srcAddressOffset += (srcSlicePitch - (srcRowPitch * region[1])); - dstAddressOffset += (region[0] - maxWidthToCopy); - dstAddressOffset += (dstRowPitch - region[0]); - dstAddressOffset += (dstRowPitch * (region[1] - maxHeightToCopy - 1)); - dstAddressOffset += (dstSlicePitch - (dstRowPitch * region[1])); - - //5th rectangle xCopy = maxWidthToCopy, yCopy = maxHeightToCopy, zCopy = 1 - cmdIterator = find(++cmdIterator, hwParser.cmdList.end()); - ASSERT_NE(hwParser.cmdList.end(), cmdIterator); - bltCmd = genCmdCast(*cmdIterator); - EXPECT_NE(nullptr, bltCmd); - if (pDevice->isFullRangeSvm()) { - EXPECT_EQ(ptrOffset(reinterpret_cast(hostPtr), srcAddressOffset), bltCmd->getSourceBaseAddress()); - } - EXPECT_EQ(ptrOffset(graphicsAllocation->getGpuAddress(), dstAddressOffset), bltCmd->getDestinationBaseAddress()); -} - HWTEST_F(BcsTests, givenAuxTranslationRequestWhenBlitCalledThenProgramCommandCorrectly) { auto &csr = pDevice->getUltCommandStreamReceiver(); diff --git a/shared/source/command_stream/command_stream_receiver_hw_base.inl b/shared/source/command_stream/command_stream_receiver_hw_base.inl index efd5c608e4..ce2f4797f8 100644 --- a/shared/source/command_stream/command_stream_receiver_hw_base.inl +++ b/shared/source/command_stream/command_stream_receiver_hw_base.inl @@ -913,7 +913,7 @@ uint32_t CommandStreamReceiverHw::blitBuffer(const BlitPropertiesCont EncodeStoreMMIO::encode(commandStream, REG_GLOBAL_TIMESTAMP_LDW, timestampGlobalStartAddress); } - BlitCommandsHelper::dispatchBlitCommands(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]); + BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(blitProperties, commandStream, *this->executionEnvironment.rootDeviceEnvironments[this->rootDeviceIndex]); if (blitProperties.outputTimestampPacket) { if (profilingEnabled) { diff --git a/shared/source/helpers/blit_commands_helper.h b/shared/source/helpers/blit_commands_helper.h index a53e258188..5065e12b5d 100644 --- a/shared/source/helpers/blit_commands_helper.h +++ b/shared/source/helpers/blit_commands_helper.h @@ -111,18 +111,12 @@ struct BlitCommandsHelper { static uint64_t getMaxBlitHeightOverride(const RootDeviceEnvironment &rootDeviceEnvironment); static void dispatchPostBlitCommand(LinearStream &linearStream); static size_t estimatePostBlitCommandSize(); - static size_t estimateBlitCommandsSize(const Vec3 ©Size, const CsrDependencies &csrDependencies, bool updateTimestampPacket, + static size_t estimateBlitCommandsSize(Vec3 copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket, bool profilingEnabled, const RootDeviceEnvironment &rootDeviceEnvironment); static size_t estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, bool profilingEnabled, bool debugPauseEnabled, const RootDeviceEnvironment &rootDeviceEnvironment); - static size_t getNumberOfBlitsForCopyRegion(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment); - static size_t getNumberOfBlitsForCopyPerRow(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment); static uint64_t calculateBlitCommandDestinationBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); static uint64_t calculateBlitCommandSourceBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); - static uint64_t calculateBlitCommandDestinationBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice); - static uint64_t calculateBlitCommandSourceBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice); - static void dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); - static void dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); static void dispatchBlitCommandsForBufferPerRow(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); static void dispatchBlitCommandsRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); static void dispatchBlitMemoryColorFill(NEO::GraphicsAllocation *dstAlloc, uint32_t *pattern, size_t patternSize, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment); @@ -141,6 +135,5 @@ struct BlitCommandsHelper { static size_t getSizeForDebugPauseCommands(); static bool useOneBlitCopyCommand(Vec3 copySize, uint32_t bytesPerPixel); static uint32_t getAvailableBytesPerPixel(size_t copySize, uint32_t srcOrigin, uint32_t dstOrigin, uint32_t srcSize, uint32_t dstSize); - static bool isCopyRegionPreferred(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment); }; } // namespace NEO diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index c6136b7c16..79dfc2a993 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -68,22 +68,46 @@ size_t BlitCommandsHelper::estimatePostBlitCommandSize() { } template -size_t BlitCommandsHelper::estimateBlitCommandsSize(const Vec3 ©Size, const CsrDependencies &csrDependencies, +size_t BlitCommandsHelper::estimateBlitCommandsSize(Vec3 copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket, bool profilingEnabled, const RootDeviceEnvironment &rootDeviceEnvironment) { - size_t timestampCmdSize = 0; - if (updateTimestampPacket) { - timestampCmdSize = (profilingEnabled) ? 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM) - : EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); + size_t numberOfBlits = 0; + uint64_t width = 1; + uint64_t height = 1; + + for (uint64_t slice = 0; slice < copySize.z; slice++) { + for (uint64_t row = 0; row < copySize.y; row++) { + uint64_t sizeToBlit = copySize.x; + while (sizeToBlit != 0) { + if (sizeToBlit > getMaxBlitWidth(rootDeviceEnvironment)) { + // dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight) + width = getMaxBlitWidth(rootDeviceEnvironment); + height = std::min((sizeToBlit / width), getMaxBlitHeight(rootDeviceEnvironment)); + + } else { + // dispatch 1D blt: (1 .. maxBlitWidth) x 1 + width = sizeToBlit; + height = 1; + } + sizeToBlit -= (width * height); + numberOfBlits++; + } + } } - bool preferRegionCopy = isCopyRegionPreferred(copySize, rootDeviceEnvironment); - auto nBlits = preferRegionCopy ? getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment) - : getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); + const size_t cmdsSizePerBlit = (sizeof(typename GfxFamily::XY_COPY_BLT) + estimatePostBlitCommandSize()); - auto sizePerBlit = (sizeof(typename GfxFamily::XY_COPY_BLT) + estimatePostBlitCommandSize()); + size_t timestampCmdSize = 0; + if (updateTimestampPacket) { + if (profilingEnabled) { + timestampCmdSize = 4 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM); + } else { + timestampCmdSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); + } + } - return TimestampPacketHelper::getRequiredCmdStreamSize(csrDependencies) + (sizePerBlit * nBlits) + timestampCmdSize; + return TimestampPacketHelper::getRequiredCmdStreamSize(csrDependencies) + + (cmdsSizePerBlit * numberOfBlits) + timestampCmdSize; } template @@ -97,8 +121,7 @@ size_t BlitCommandsHelper::estimateBlitCommandsSize(const BlitPropert rootDeviceEnvironment); } size += MemorySynchronizationCommands::getSizeForAdditonalSynchronization(*rootDeviceEnvironment.getHardwareInfo()); - size += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); - size += sizeof(typename GfxFamily::MI_BATCH_BUFFER_END); + size += EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename GfxFamily::MI_BATCH_BUFFER_END); if (debugPauseEnabled) { size += BlitCommandsHelper::getSizeForDebugPauseCommands(); @@ -174,7 +197,6 @@ void BlitCommandsHelper::dispatchBlitCommandsForBufferPerRow(const Bl } } } - template template void BlitCommandsHelper::dispatchBlitMemoryFill(NEO::GraphicsAllocation *dstAlloc, uint32_t *pattern, LinearStream &linearStream, size_t size, const RootDeviceEnvironment &rootDeviceEnvironment, COLOR_DEPTH depth) { @@ -279,102 +301,4 @@ uint32_t BlitCommandsHelper::getAvailableBytesPerPixel(size_t copySiz return bytesPerPixel; } -template -void BlitCommandsHelper::dispatchBlitCommands(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { - bool preferCopyRegion = isCopyRegionPreferred(blitProperties.copySize, rootDeviceEnvironment); - - preferCopyRegion ? dispatchBlitCommandsForBufferRegion(blitProperties, linearStream, rootDeviceEnvironment) - : dispatchBlitCommandsForBufferPerRow(blitProperties, linearStream, rootDeviceEnvironment); -} - -template -uint64_t BlitCommandsHelper::calculateBlitCommandSourceBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice) { - return blitProperties.srcGpuAddress + blitProperties.srcOffset.x + - (blitProperties.srcOffset.y * blitProperties.srcRowPitch) + - (blitProperties.srcSlicePitch * (slice + blitProperties.srcOffset.z)); -} - -template -uint64_t BlitCommandsHelper::calculateBlitCommandDestinationBaseAddressCopyRegion(const BlitProperties &blitProperties, size_t slice) { - return blitProperties.dstGpuAddress + blitProperties.dstOffset.x + - (blitProperties.dstOffset.y * blitProperties.dstRowPitch) + - (blitProperties.dstSlicePitch * (slice + blitProperties.dstOffset.z)); -} - -template -void BlitCommandsHelper::dispatchBlitCommandsForBufferRegion(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { - const auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment); - const auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment); - - for (size_t slice = 0u; slice < blitProperties.copySize.z; ++slice) { - auto srcAddress = calculateBlitCommandSourceBaseAddressCopyRegion(blitProperties, slice); - auto dstAddress = calculateBlitCommandDestinationBaseAddressCopyRegion(blitProperties, slice); - auto heightToCopy = blitProperties.copySize.y; - - while (heightToCopy > 0) { - auto height = static_cast(std::min(heightToCopy, static_cast(maxHeightToCopy))); - auto widthToCopy = blitProperties.copySize.x; - - while (widthToCopy > 0) { - auto width = static_cast(std::min(widthToCopy, static_cast(maxWidthToCopy))); - auto bltCmd = GfxFamily::cmdInitXyCopyBlt; - - bltCmd.setSourceBaseAddress(srcAddress); - bltCmd.setDestinationBaseAddress(dstAddress); - bltCmd.setTransferWidth(width); - bltCmd.setTransferHeight(height); - bltCmd.setSourcePitch(static_cast(blitProperties.srcRowPitch)); - bltCmd.setDestinationPitch(static_cast(blitProperties.dstRowPitch)); - - appendBlitCommandsForBuffer(blitProperties, bltCmd, rootDeviceEnvironment); - - auto cmd = linearStream.getSpaceForCmd(); - *cmd = bltCmd; - dispatchPostBlitCommand(linearStream); - - srcAddress += width; - dstAddress += width; - widthToCopy -= width; - } - - heightToCopy -= height; - srcAddress += (blitProperties.srcRowPitch - blitProperties.copySize.x); - srcAddress += (blitProperties.srcRowPitch * (height - 1)); - dstAddress += (blitProperties.dstRowPitch - blitProperties.copySize.x); - dstAddress += (blitProperties.dstRowPitch * (height - 1)); - } - } -} - -template -bool BlitCommandsHelper::isCopyRegionPreferred(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment) { - bool preferCopyRegion = getNumberOfBlitsForCopyRegion(copySize, rootDeviceEnvironment) < getNumberOfBlitsForCopyPerRow(copySize, rootDeviceEnvironment); - return preferCopyRegion; -} - -template -size_t BlitCommandsHelper::getNumberOfBlitsForCopyRegion(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment) { - auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment); - auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment); - auto xBlits = static_cast(std::ceil(copySize.x / static_cast(maxWidthToCopy))); - auto yBlits = static_cast(std::ceil(copySize.y / static_cast(maxHeightToCopy))); - auto zBlits = static_cast(copySize.z); - auto nBlits = xBlits * yBlits * zBlits; - - return nBlits; -} - -template -size_t BlitCommandsHelper::getNumberOfBlitsForCopyPerRow(const Vec3 ©Size, const RootDeviceEnvironment &rootDeviceEnvironment) { - auto maxWidthToCopy = getMaxBlitWidth(rootDeviceEnvironment); - auto maxHeightToCopy = getMaxBlitHeight(rootDeviceEnvironment); - auto maxSizeRectangleToCopy = maxWidthToCopy * maxHeightToCopy; - auto xBlits = static_cast(std::ceil(copySize.x / static_cast(maxSizeRectangleToCopy))); - auto yBlits = copySize.y; - auto zBlits = copySize.z; - auto nBlits = xBlits * yBlits * zBlits; - - return nBlits; -} - } // namespace NEO