diff --git a/opencl/source/command_queue/command_queue.cpp b/opencl/source/command_queue/command_queue.cpp index 9893e5e927..8960977da0 100644 --- a/opencl/source/command_queue/command_queue.cpp +++ b/opencl/source/command_queue/command_queue.cpp @@ -617,7 +617,7 @@ bool CommandQueue::blitEnqueueAllowed(cl_command_type cmdType) const { } bool commandAllowed = (CL_COMMAND_READ_BUFFER == cmdType) || (CL_COMMAND_WRITE_BUFFER == cmdType) || - (CL_COMMAND_COPY_BUFFER == cmdType); + (CL_COMMAND_COPY_BUFFER == cmdType) || (CL_COMMAND_READ_BUFFER_RECT == cmdType); return commandAllowed && blitAllowed; } diff --git a/opencl/source/command_queue/enqueue_read_buffer_rect.h b/opencl/source/command_queue/enqueue_read_buffer_rect.h index 54de8b8bfb..7fec070652 100644 --- a/opencl/source/command_queue/enqueue_read_buffer_rect.h +++ b/opencl/source/command_queue/enqueue_read_buffer_rect.h @@ -35,15 +35,16 @@ cl_int CommandQueueHw::enqueueReadBufferRect( const cl_event *eventWaitList, cl_event *event) { + const cl_command_type cmdType = CL_COMMAND_READ_BUFFER_RECT; auto isMemTransferNeeded = true; if (buffer->isMemObjZeroCopy()) { size_t bufferOffset; size_t hostOffset; computeOffsetsValueForRectCommands(&bufferOffset, &hostOffset, bufferOrigin, hostOrigin, region, bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch); - isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, CL_COMMAND_READ_BUFFER_RECT); + isMemTransferNeeded = buffer->checkIfMemoryTransferIsRequired(bufferOffset, hostOffset, ptr, cmdType); } if (!isMemTransferNeeded) { - return enqueueMarkerForReadWriteOperation(buffer, ptr, CL_COMMAND_READ_BUFFER_RECT, blockingRead, + return enqueueMarkerForReadWriteOperation(buffer, ptr, cmdType, blockingRead, numEventsInWaitList, eventWaitList, event); } @@ -65,7 +66,8 @@ cl_int CommandQueueHw::enqueueReadBufferRect( if (region[0] != 0 && region[1] != 0 && region[2] != 0) { - bool status = getGpgpuCommandStreamReceiver().createAllocationForHostSurface(hostPtrSurf, true); + auto &csr = blitEnqueueAllowed(cmdType) ? *getBcsCommandStreamReceiver() : getGpgpuCommandStreamReceiver(); + bool status = csr.createAllocationForHostSurface(hostPtrSurf, true); if (!status) { return CL_OUT_OF_RESOURCES; } @@ -80,6 +82,7 @@ cl_int CommandQueueHw::enqueueReadBufferRect( dc.dstPtr = alignedDstPtr; dc.srcOffset = bufferOrigin; dc.dstOffset = hostOrigin; + dc.transferAllocation = hostPtrSurf.getAllocation(); dc.dstOffset.x += dstPtrOffset; dc.size = region; dc.srcRowPitch = bufferRowPitch; diff --git a/opencl/source/context/context.h b/opencl/source/context/context.h index b63bc98c8b..cfcc7445a5 100644 --- a/opencl/source/context/context.h +++ b/opencl/source/context/context.h @@ -7,6 +7,7 @@ #pragma once #include "shared/source/debug_settings/debug_settings_manager.h" +#include "shared/source/helpers/vec.h" #include "opencl/source/context/context_type.h" #include "opencl/source/context/driver_diagnostics.h" @@ -136,7 +137,7 @@ class Context : public BaseObject<_cl_context> { ContextType peekContextType() { return this->contextType; } - MOCKABLE_VIRTUAL BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, size_t size) const; + MOCKABLE_VIRTUAL BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, Vec3 size) const; SchedulerKernel &getSchedulerKernel(); diff --git a/opencl/source/context/context_extra.cpp b/opencl/source/context/context_extra.cpp index 22b8e7fb17..0cac4677a4 100644 --- a/opencl/source/context/context_extra.cpp +++ b/opencl/source/context/context_extra.cpp @@ -16,7 +16,7 @@ cl_int Context::processExtraProperties(cl_context_properties propertyType, cl_co return CL_INVALID_PROPERTY; } -BlitOperationResult Context::blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, size_t size) const { +BlitOperationResult Context::blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, Vec3 size) const { return BlitOperationResult::Unsupported; } } // namespace NEO diff --git a/opencl/source/helpers/cl_blit_properties.h b/opencl/source/helpers/cl_blit_properties.h index b5a1182244..eb68b06528 100644 --- a/opencl/source/helpers/cl_blit_properties.h +++ b/opencl/source/helpers/cl_blit_properties.h @@ -18,6 +18,7 @@ struct ClBlitProperties { static BlitProperties constructProperties(BlitterConstants::BlitDirection blitDirection, CommandStreamReceiver &commandStreamReceiver, const BuiltinOpParams &builtinOpParams) { + if (BlitterConstants::BlitDirection::BufferToBuffer == blitDirection) { auto dstOffset = builtinOpParams.dstOffset.x + builtinOpParams.dstMemObj->getOffset(); auto srcOffset = builtinOpParams.srcOffset.x + builtinOpParams.srcMemObj->getOffset(); @@ -28,21 +29,27 @@ struct ClBlitProperties { } GraphicsAllocation *gpuAllocation = nullptr; - size_t copyOffset = 0; + Vec3 copyOffset = 0; void *hostPtr = nullptr; - size_t hostPtrOffset = 0; + Vec3 hostPtrOffset = 0; uint64_t memObjGpuVa = 0; uint64_t hostAllocGpuVa = 0; GraphicsAllocation *hostAllocation = builtinOpParams.transferAllocation; + Vec3 copySize = 0; + size_t hostRowPitch = 0; + size_t hostSlicePitch = 0; + size_t gpuRowPitch = 0; + size_t gpuSlicePitch = 0; + if (BlitterConstants::BlitDirection::HostPtrToBuffer == blitDirection) { // write buffer hostPtr = builtinOpParams.srcPtr; - hostPtrOffset = builtinOpParams.srcOffset.x; - copyOffset = builtinOpParams.dstOffset.x; + hostPtrOffset = builtinOpParams.srcOffset; + copyOffset = builtinOpParams.dstOffset; memObjGpuVa = castToUint64(builtinOpParams.dstPtr); hostAllocGpuVa = castToUint64(builtinOpParams.srcPtr); @@ -54,14 +61,15 @@ struct ClBlitProperties { gpuAllocation = builtinOpParams.dstMemObj->getGraphicsAllocation(); memObjGpuVa = (gpuAllocation->getGpuAddress() + builtinOpParams.dstMemObj->getOffset()); } + copySize.x = builtinOpParams.size.x; } if (BlitterConstants::BlitDirection::BufferToHostPtr == blitDirection) { // read buffer hostPtr = builtinOpParams.dstPtr; - hostPtrOffset = builtinOpParams.dstOffset.x; - copyOffset = builtinOpParams.srcOffset.x; + hostPtrOffset = builtinOpParams.dstOffset; + copyOffset = builtinOpParams.srcOffset; memObjGpuVa = castToUint64(builtinOpParams.srcPtr); hostAllocGpuVa = castToUint64(builtinOpParams.dstPtr); @@ -73,6 +81,12 @@ struct ClBlitProperties { gpuAllocation = builtinOpParams.srcMemObj->getGraphicsAllocation(); memObjGpuVa = (gpuAllocation->getGpuAddress() + builtinOpParams.srcMemObj->getOffset()); } + + hostRowPitch = builtinOpParams.dstRowPitch; + hostSlicePitch = builtinOpParams.dstSlicePitch; + gpuRowPitch = builtinOpParams.srcRowPitch; + gpuSlicePitch = builtinOpParams.srcSlicePitch; + copySize = builtinOpParams.size; } UNRECOVERABLE_IF(BlitterConstants::BlitDirection::HostPtrToBuffer != blitDirection && @@ -80,13 +94,15 @@ struct ClBlitProperties { return BlitProperties::constructPropertiesForReadWriteBuffer(blitDirection, commandStreamReceiver, gpuAllocation, hostAllocation, hostPtr, memObjGpuVa, hostAllocGpuVa, - hostPtrOffset, copyOffset, builtinOpParams.size.x); + hostPtrOffset, copyOffset, copySize, + hostRowPitch, hostSlicePitch, + gpuRowPitch, gpuSlicePitch); } static BlitterConstants::BlitDirection obtainBlitDirection(uint32_t commandType) { if (CL_COMMAND_WRITE_BUFFER == commandType) { return BlitterConstants::BlitDirection::HostPtrToBuffer; - } else if (CL_COMMAND_READ_BUFFER == commandType) { + } else if (CL_COMMAND_READ_BUFFER == commandType || CL_COMMAND_READ_BUFFER_RECT == commandType) { return BlitterConstants::BlitDirection::BufferToHostPtr; } else { UNRECOVERABLE_IF(CL_COMMAND_COPY_BUFFER != commandType); diff --git a/opencl/source/mem_obj/buffer.cpp b/opencl/source/mem_obj/buffer.cpp index e2eef6a46b..8abc591f1f 100644 --- a/opencl/source/mem_obj/buffer.cpp +++ b/opencl/source/mem_obj/buffer.cpp @@ -305,7 +305,7 @@ Buffer *Buffer::create(Context *context, bool gpuCopyRequired = (gmm && gmm->isRenderCompressed) || !MemoryPool::isSystemMemoryPool(memory->getMemoryPool()); if (gpuCopyRequired) { - auto blitMemoryToAllocationResult = context->blitMemoryToAllocation(*pBuffer, memory, hostPtr, size); + auto blitMemoryToAllocationResult = context->blitMemoryToAllocation(*pBuffer, memory, hostPtr, {size, 1, 1}); if (blitMemoryToAllocationResult != BlitOperationResult::Success) { auto cmdQ = context->getSpecialQueue(); diff --git a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp index 47e340d1fb..e461b4dd09 100644 --- a/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp +++ b/opencl/test/unit_test/command_queue/blit_enqueue_tests.cpp @@ -5,6 +5,7 @@ * */ +#include "shared/source/helpers/vec.h" #include "shared/test/unit_test/helpers/debug_manager_state_restore.h" #include "shared/test/unit_test/utilities/base_object_utils.h" @@ -32,12 +33,12 @@ struct BlitAuxTranslationTests : public ::testing::Test { bcsCsr->initializeTagAllocation(); } - BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, size_t size) const override { + BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, Vec3 size) const override { auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, memory, nullptr, hostPtr, memory->getGpuAddress(), 0, - 0, 0, size); + 0, 0, size, 0, 0, 0, 0); BlitPropertiesContainer container; container.push_back(blitProperties); diff --git a/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp b/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp index 6704fee821..0d4a6d6780 100644 --- a/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp +++ b/opencl/test/unit_test/command_queue/enqueue_read_buffer_rect_tests.cpp @@ -536,10 +536,12 @@ HWTEST_F(EnqueueReadBufferRectTest, givenInOrderQueueAndDstPtrEqualSrcPtrAndNonZ } HWTEST_F(EnqueueReadWriteBufferRectDispatch, givenOffsetResultingInMisalignedPtrWhenEnqueueReadBufferRectForNon3DCaseIsCalledThenAddressInStateBaseAddressIsAlignedAndMatchesKernelDispatchInfoParams) { + hwInfo->capabilityTable.blitterOperationsSupported = false; initializeFixture(); if (device->areSharedSystemAllocationsAllowed()) { GTEST_SKIP(); } + auto cmdQ = std::make_unique>(context.get(), device.get(), &properties); buffer->forceDisallowCPUCopy = true; diff --git a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.cpp b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.cpp index 14929ef449..893ca26c9a 100644 --- a/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.cpp +++ b/opencl/test/unit_test/command_stream/command_stream_receiver_hw_tests.cpp @@ -341,14 +341,31 @@ struct BcsTests : public CommandStreamReceiverHwTest { HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeThenAddAllRequiredCommands) { constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; - uint64_t notAlignedBltSize = (3 * max2DBlitSize) + 1; - uint64_t alignedBltSize = (3 * max2DBlitSize); + size_t notAlignedBltSize = (3 * max2DBlitSize) + 1; + size_t alignedBltSize = (3 * max2DBlitSize); uint32_t alignedNumberOfBlts = 3; uint32_t notAlignedNumberOfBlts = 4; auto expectedAlignedSize = sizeof(typename FamilyType::XY_COPY_BLT) * alignedNumberOfBlts; auto expectedNotAlignedSize = sizeof(typename FamilyType::XY_COPY_BLT) * notAlignedNumberOfBlts; + auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize({alignedBltSize, 1, 1}, csrDependencies, false); + auto notAlignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize({notAlignedBltSize, 1, 1}, csrDependencies, false); + + EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); + EXPECT_EQ(expectedNotAlignedSize, notAlignedEstimatedSize); +} + +HWTEST_F(BcsTests, givenBltSizeWhenEstimatingCommandSizeForReadBufferRectThenAddAllRequiredCommands) { + constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; + Vec3 notAlignedBltSize = {(3 * max2DBlitSize) + 1, 4, 2}; + Vec3 alignedBltSize = {(3 * max2DBlitSize), 4, 2}; + size_t alignedNumberOfBlts = 3 * alignedBltSize.y * alignedBltSize.z; + size_t notAlignedNumberOfBlts = 4 * notAlignedBltSize.y * notAlignedBltSize.z; + + auto expectedAlignedSize = sizeof(typename FamilyType::XY_COPY_BLT) * alignedNumberOfBlts; + auto expectedNotAlignedSize = sizeof(typename FamilyType::XY_COPY_BLT) * notAlignedNumberOfBlts; + auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize(alignedBltSize, csrDependencies, false); auto notAlignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize(notAlignedBltSize, csrDependencies, false); @@ -370,7 +387,7 @@ HWTEST_F(BcsTests, whenAskingForCmdSizeForMiFlushDwWithMemoryWriteThenReturnCorr HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeThenCalculateForAllAttachedProperites) { const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; const uint32_t numberOfBlts = 3; - const uint64_t bltSize = (3 * max2DBlitSize); + const size_t bltSize = (3 * max2DBlitSize); const uint32_t numberOfBlitOperations = 4; auto baseSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END); @@ -378,6 +395,33 @@ HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeThenCa auto expectedAlignedSize = baseSize + MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()); + BlitPropertiesContainer blitPropertiesContainer; + for (uint32_t i = 0; i < numberOfBlitOperations; i++) { + BlitProperties blitProperties; + blitProperties.copySize = {bltSize, 1, 1}; + blitPropertiesContainer.push_back(blitProperties); + + expectedAlignedSize += expectedBlitInstructionsSize; + } + + expectedAlignedSize = alignUp(expectedAlignedSize, MemoryConstants::cacheLineSize); + + auto alignedEstimatedSize = BlitCommandsHelper::estimateBlitCommandsSize(blitPropertiesContainer, pDevice->getHardwareInfo()); + + EXPECT_EQ(expectedAlignedSize, alignedEstimatedSize); +} + +HWTEST_F(BcsTests, givenBlitPropertiesContainerWhenExstimatingCommandsSizeForReadBufferRectThenCalculateForAllAttachedProperites) { + const auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; + const Vec3 bltSize = {(3 * max2DBlitSize), 4, 2}; + const size_t numberOfBlts = 3 * bltSize.y * bltSize.z; + const size_t numberOfBlitOperations = 4 * bltSize.y * bltSize.z; + + auto baseSize = EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite() + sizeof(typename FamilyType::MI_BATCH_BUFFER_END); + auto expectedBlitInstructionsSize = sizeof(typename FamilyType::XY_COPY_BLT) * numberOfBlts; + + auto expectedAlignedSize = baseSize + MemorySynchronizationCommands::getSizeForAdditonalSynchronization(pDevice->getHardwareInfo()); + BlitPropertiesContainer blitPropertiesContainer; for (uint32_t i = 0; i < numberOfBlitOperations; i++) { BlitProperties blitProperties; @@ -400,8 +444,8 @@ HWTEST_F(BcsTests, givenTimestampPacketWriteRequestWhenEstimatingSizeForCommands auto expectedSizeWithTimestampPacketWrite = expectedBaseSize + EncodeMiFlushDW::getMiFlushDwCmdSizeForDataWrite(); auto expectedSizeWithoutTimestampPacketWrite = expectedBaseSize; - auto estimatedSizeWithTimestampPacketWrite = BlitCommandsHelper::estimateBlitCommandsSize(1, csrDependencies, true); - auto estimatedSizeWithoutTimestampPacketWrite = BlitCommandsHelper::estimateBlitCommandsSize(1, csrDependencies, false); + auto estimatedSizeWithTimestampPacketWrite = BlitCommandsHelper::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, true); + auto estimatedSizeWithoutTimestampPacketWrite = BlitCommandsHelper::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, false); EXPECT_EQ(expectedSizeWithTimestampPacketWrite, estimatedSizeWithTimestampPacketWrite); EXPECT_EQ(expectedSizeWithoutTimestampPacketWrite, estimatedSizeWithoutTimestampPacketWrite); @@ -420,7 +464,7 @@ HWTEST_F(BcsTests, givenBltSizeAndCsrDependenciesWhenEstimatingCommandSizeThenAd size_t expectedSize = (sizeof(typename FamilyType::XY_COPY_BLT) * numberOfBlts) + TimestampPacketHelper::getRequiredCmdStreamSize(csrDependencies); - auto estimatedSize = BlitCommandsHelper::estimateBlitCommandsSize(1, csrDependencies, false); + auto estimatedSize = BlitCommandsHelper::estimateBlitCommandsSize({1, 1, 1}, csrDependencies, false); EXPECT_EQ(expectedSize, estimatedSize); } @@ -434,7 +478,7 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC static_cast(csr.getMemoryManager())->turnOnFakingBigAllocations(); uint32_t bltLeftover = 17; - uint64_t bltSize = (2 * max2DBlitSize) + bltLeftover; + size_t bltSize = (2 * max2DBlitSize) + bltLeftover; uint32_t numberOfBlts = 3; cl_int retVal = CL_SUCCESS; @@ -447,7 +491,7 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer->getGraphicsAllocation(), nullptr, hostPtr, buffer->getGraphicsAllocation()->getGpuAddress(), 0, - 0, 0, bltSize); + 0, 0, {bltSize, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); EXPECT_EQ(newTaskCount, csr.taskCount); @@ -517,6 +561,107 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC } } +HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredCommandsForReadBufferRect) { + using MI_FLUSH_DW = typename FamilyType::MI_FLUSH_DW; + using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT; + constexpr auto max2DBlitSize = BlitterConstants::maxBlitWidth * BlitterConstants::maxBlitHeight; + + auto &csr = pDevice->getUltCommandStreamReceiver(); + static_cast(csr.getMemoryManager())->turnOnFakingBigAllocations(); + + uint32_t bltLeftover = 17; + Vec3 bltSize = {(2 * max2DBlitSize) + bltLeftover, 2, 2}; + size_t numberOfBlts = 3 * bltSize.y * bltSize.z; + + cl_int retVal = CL_SUCCESS; + auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, static_cast(bltSize.x * bltSize.y * bltSize.z), nullptr, retVal)); + void *hostPtr = reinterpret_cast(0x12340000); + + uint32_t newTaskCount = 19; + csr.taskCount = newTaskCount - 1; + EXPECT_EQ(0u, csr.recursiveLockCounter.load()); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, + csr, buffer->getGraphicsAllocation(), nullptr, hostPtr, + buffer->getGraphicsAllocation()->getGpuAddress(), 0, + 0, 0, bltSize, 0, 0, 0, 0); + + blitBuffer(&csr, blitProperties, true); + EXPECT_EQ(newTaskCount, csr.taskCount); + EXPECT_EQ(newTaskCount, csr.latestFlushedTaskCount); + EXPECT_EQ(newTaskCount, csr.latestSentTaskCount); + EXPECT_EQ(newTaskCount, csr.latestSentTaskCountValueDuringFlush); + EXPECT_EQ(1u, csr.recursiveLockCounter.load()); + + HardwareParse hwParser; + hwParser.parseCommands(csr.commandStream); + auto &cmdList = hwParser.cmdList; + + auto cmdIterator = cmdList.begin(); + + uint64_t offset = 0; + for (uint32_t i = 0; i < numberOfBlts; i++) { + auto bltCmd = genCmdCast(*(cmdIterator++)); + EXPECT_NE(nullptr, bltCmd); + + uint32_t expectedWidth = static_cast(BlitterConstants::maxBlitWidth); + uint32_t expectedHeight = static_cast(BlitterConstants::maxBlitHeight); + if (i % 3 == 2) { + expectedWidth = bltLeftover; + expectedHeight = 1; + } + EXPECT_EQ(expectedWidth, bltCmd->getTransferWidth()); + EXPECT_EQ(expectedHeight, bltCmd->getTransferHeight()); + EXPECT_EQ(expectedWidth, bltCmd->getDestinationPitch()); + EXPECT_EQ(expectedWidth, bltCmd->getSourcePitch()); + + auto dstAddr = NEO::BlitCommandsHelper::calculateBlitCommandDestinationBaseAddress(blitProperties, offset, i % bltSize.y, i % bltSize.z); + auto srcAddr = NEO::BlitCommandsHelper::calculateBlitCommandSourceBaseAddress(blitProperties, offset, i % bltSize.y, i % bltSize.z); + + EXPECT_EQ(dstAddr, bltCmd->getDestinationBaseAddress()); + EXPECT_EQ(srcAddr, bltCmd->getSourceBaseAddress()); + + offset = (i % 3 == 2) ? 0 : offset + (expectedWidth * expectedHeight); + } + + if (UnitTestHelper::isSynchronizationWArequired(pDevice->getHardwareInfo())) { + auto miSemaphoreWaitCmd = genCmdCast(*(cmdIterator++)); + EXPECT_NE(nullptr, miSemaphoreWaitCmd); + EXPECT_TRUE(UnitTestHelper::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd)); + } + + auto miFlushCmd = genCmdCast(*(cmdIterator++)); + + if (UnitTestHelper::additionalMiFlushDwRequired) { + uint64_t gpuAddress = 0x0; + uint64_t immData = 0; + + EXPECT_NE(nullptr, miFlushCmd); + EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_NO_WRITE, miFlushCmd->getPostSyncOperation()); + EXPECT_EQ(gpuAddress, miFlushCmd->getDestinationAddress()); + EXPECT_EQ(immData, miFlushCmd->getImmediateData()); + + miFlushCmd = genCmdCast(*(cmdIterator++)); + } + + EXPECT_NE(cmdIterator, cmdList.end()); + EXPECT_EQ(MI_FLUSH_DW::POST_SYNC_OPERATION_WRITE_IMMEDIATE_DATA_QWORD, miFlushCmd->getPostSyncOperation()); + EXPECT_EQ(csr.getTagAllocation()->getGpuAddress(), miFlushCmd->getDestinationAddress()); + EXPECT_EQ(newTaskCount, miFlushCmd->getImmediateData()); + + if (UnitTestHelper::isSynchronizationWArequired(pDevice->getHardwareInfo())) { + auto miSemaphoreWaitCmd = genCmdCast(*(cmdIterator++)); + EXPECT_NE(nullptr, miSemaphoreWaitCmd); + EXPECT_TRUE(UnitTestHelper::isAdditionalMiSemaphoreWait(*miSemaphoreWaitCmd)); + } + + EXPECT_NE(nullptr, genCmdCast(*(cmdIterator++))); + + // padding + while (cmdIterator != cmdList.end()) { + EXPECT_NE(nullptr, genCmdCast(*(cmdIterator++))); + } +} + HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaphoreAndAtomic) { auto &csr = pDevice->getUltCommandStreamReceiver(); @@ -529,7 +674,7 @@ HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaph auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer->getGraphicsAllocation(), nullptr, hostPtr, buffer->getGraphicsAllocation()->getGpuAddress(), 0, - 0, 0, 1); + 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp0(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), numberNodesPerContainer); @@ -581,11 +726,11 @@ HWTEST_F(BcsTests, givenMultipleBlitPropertiesWhenDispatchingThenProgramCommands auto blitProperties1 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer1->getGraphicsAllocation(), nullptr, hostPtr1, buffer1->getGraphicsAllocation()->getGpuAddress(), 0, - 0, 0, 1); + 0, 0, {1, 1, 1}, 0, 0, 0, 0); auto blitProperties2 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer2->getGraphicsAllocation(), nullptr, hostPtr2, buffer2->getGraphicsAllocation()->getGpuAddress(), 0, - 0, 0, 1); + 0, 0, {1, 1, 1}, 0, 0, 0, 0); MockTimestampPacketContainer timestamp1(*csr.getTimestampPacketAllocator(), 1); MockTimestampPacketContainer timestamp2(*csr.getTimestampPacketAllocator(), 1); @@ -640,12 +785,12 @@ HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocations auto blitProperties1 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer1->getGraphicsAllocation(), nullptr, hostPtr1, buffer1->getGraphicsAllocation()->getGpuAddress(), 0, - 0, 0, 1); + 0, 0, {1, 1, 1}, 0, 0, 0, 0); auto blitProperties2 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer2->getGraphicsAllocation(), nullptr, hostPtr2, buffer2->getGraphicsAllocation()->getGpuAddress(), 0, - 0, 0, 1); + 0, 0, {1, 1, 1}, 0, 0, 0, 0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties1); @@ -683,12 +828,12 @@ HWTEST_F(BcsTests, givenFenceAllocationIsRequiredWhenBlitDispatchedThenMakeAllAl auto blitProperties1 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, buffer1->getGraphicsAllocation(), nullptr, hostPtr1, buffer1->getGraphicsAllocation()->getGpuAddress(), 0, - 0, 0, 1); + 0, 0, {1, 1, 1}, 0, 0, 0, 0); auto blitProperties2 = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, buffer2->getGraphicsAllocation(), nullptr, hostPtr2, buffer2->getGraphicsAllocation()->getGpuAddress(), 0, - 0, 0, 1); + 0, 0, {1, 1, 1}, 0, 0, 0, 0); BlitPropertiesContainer blitPropertiesContainer; blitPropertiesContainer.push_back(blitProperties1); @@ -723,7 +868,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) { auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, buffer->getGraphicsAllocation(), nullptr, hostPtr, buffer->getGraphicsAllocation()->getGpuAddress(), 0, - 0, 0, 1); + 0, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); @@ -774,7 +919,7 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) { *myMockCsr, buffer->getGraphicsAllocation(), nullptr, hostPtr, buffer->getGraphicsAllocation()->getGpuAddress(), 0, - 0, 0, 1); + 0, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(myMockCsr.get(), blitProperties, false); @@ -805,7 +950,7 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCleanTemporaryAllocations) { auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, bcsCsr, buffer->getGraphicsAllocation(), nullptr, hostPtr, buffer->getGraphicsAllocation()->getGpuAddress(), 0, - 0, 0, 1); + 0, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&bcsCsr, blitProperties, false); @@ -840,7 +985,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres nullptr, hostPtr, subBuffer1->getGraphicsAllocation()->getGpuAddress() + subBuffer1->getOffset(), - 0, hostPtrOffset, 0, 1); + 0, {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); @@ -862,7 +1007,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres nullptr, hostPtr, subBuffer1->getGraphicsAllocation()->getGpuAddress() + subBuffer1->getOffset(), - 0, hostPtrOffset, 0, 1); + 0, {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); @@ -875,6 +1020,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres } EXPECT_EQ(subBuffer1->getGraphicsAllocation()->getGpuAddress() + subBuffer1Offset, bltCmd->getSourceBaseAddress()); } + { // Buffer to Buffer HardwareParse hwParser; @@ -941,7 +1087,7 @@ HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidG mapAllocation, mapPtr, buffer->getGraphicsAllocation()->getGpuAddress(), castToUint64(mapPtr), - hostPtrOffset, 0, 1); + {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); @@ -963,7 +1109,29 @@ HWTEST_F(BcsTests, givenMapAllocationWhenDispatchReadWriteOperationThenSetValidG csr, buffer->getGraphicsAllocation(), mapAllocation, mapPtr, buffer->getGraphicsAllocation()->getGpuAddress(), - castToUint64(mapPtr), hostPtrOffset, 0, 1); + castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {1, 1, 1}, 0, 0, 0, 0); + + blitBuffer(&csr, blitProperties, true); + + hwParser.parseCommands(csr.commandStream, offset); + + auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); + EXPECT_NE(nullptr, bltCmd); + if (pDevice->isFullRangeSvm()) { + EXPECT_EQ(reinterpret_cast(ptrOffset(mapPtr, hostPtrOffset)), bltCmd->getDestinationBaseAddress()); + } + EXPECT_EQ(buffer->getGraphicsAllocation()->getGpuAddress(), bltCmd->getSourceBaseAddress()); + } + + { + // bufferRect to hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::BufferToHostPtr, + csr, buffer->getGraphicsAllocation(), + mapAllocation, mapPtr, + buffer->getGraphicsAllocation()->getGpuAddress(), + castToUint64(mapPtr), {hostPtrOffset, 0, 0}, 0, {4, 2, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); @@ -998,7 +1166,7 @@ HWTEST_F(BcsTests, givenMapAllocationInBuiltinOpParamsWhenConstructingThenUseItA BuiltinOpParams builtinOpParams = {}; builtinOpParams.dstMemObj = buffer.get(); builtinOpParams.srcPtr = mapPtr; - builtinOpParams.size.x = 1; + builtinOpParams.size = {1, 1, 1}; builtinOpParams.transferAllocation = mapAllocation; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, @@ -1010,7 +1178,7 @@ HWTEST_F(BcsTests, givenMapAllocationInBuiltinOpParamsWhenConstructingThenUseItA BuiltinOpParams builtinOpParams = {}; builtinOpParams.srcMemObj = buffer.get(); builtinOpParams.dstPtr = mapPtr; - builtinOpParams.size.x = 1; + builtinOpParams.size = {1, 1, 1}; builtinOpParams.transferAllocation = mapAllocation; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, @@ -1040,7 +1208,7 @@ HWTEST_F(BcsTests, givenNonZeroCopySvmAllocationWhenConstructingBlitPropertiesFo builtinOpParams.dstSvmAlloc = svmData->gpuAllocation; builtinOpParams.srcSvmAlloc = svmData->cpuAllocation; builtinOpParams.srcPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress()); - builtinOpParams.size.x = 1; + builtinOpParams.size = {1, 1, 1}; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, builtinOpParams); @@ -1053,7 +1221,7 @@ HWTEST_F(BcsTests, givenNonZeroCopySvmAllocationWhenConstructingBlitPropertiesFo builtinOpParams.srcSvmAlloc = svmData->gpuAllocation; builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress()); - builtinOpParams.size.x = 1; + builtinOpParams.size = {1, 1, 1}; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, csr, builtinOpParams); @@ -1087,7 +1255,7 @@ HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) { builtinOpParams.srcSvmAlloc = svmData->gpuAllocation; builtinOpParams.srcPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + srcOffset); builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation->getGpuAddress() + dstOffset); - builtinOpParams.size.x = 1; + builtinOpParams.size = {1, 1, 1}; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::HostPtrToBuffer, csr, builtinOpParams); @@ -1111,7 +1279,7 @@ HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) { builtinOpParams.dstSvmAlloc = svmData->cpuAllocation; builtinOpParams.dstPtr = reinterpret_cast(svmData->cpuAllocation + dstOffset); builtinOpParams.srcPtr = reinterpret_cast(svmData->gpuAllocation + srcOffset); - builtinOpParams.size.x = 1; + builtinOpParams.size = {1, 1, 1}; auto blitProperties = ClBlitProperties::constructProperties(BlitterConstants::BlitDirection::BufferToHostPtr, csr, builtinOpParams); @@ -1127,7 +1295,6 @@ HWTEST_F(BcsTests, givenSvmAllocationWhenBlitCalledThenUsePassedPointers) { EXPECT_EQ(castToUint64(builtinOpParams.dstPtr), bltCmd->getDestinationBaseAddress()); EXPECT_EQ(castToUint64(builtinOpParams.srcPtr), bltCmd->getSourceBaseAddress()); } - svmAllocsManager.freeSVMAlloc(svmAlloc); } @@ -1150,7 +1317,7 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec csr, buffer1->getGraphicsAllocation(), nullptr, hostPtr, buffer1->getGraphicsAllocation()->getGpuAddress(), - 0, 0, buffer1Offset, 1); + 0, 0, {buffer1Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); @@ -1171,7 +1338,7 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec csr, buffer1->getGraphicsAllocation(), nullptr, hostPtr, buffer1->getGraphicsAllocation()->getGpuAddress(), - 0, 0, buffer1Offset, 1); + 0, 0, {buffer1Offset, 0, 0}, {1, 1, 1}, 0, 0, 0, 0); blitBuffer(&csr, blitProperties, true); @@ -1184,7 +1351,6 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec } EXPECT_EQ(ptrOffset(buffer1->getGraphicsAllocation()->getGpuAddress(), buffer1Offset), bltCmd->getSourceBaseAddress()); } - for (auto buffer2Offset : addressOffsets) { // Buffer to Buffer HardwareParse hwParser; @@ -1240,6 +1406,12 @@ HWTEST_F(BcsTests, givenAuxTranslationRequestWhenBlitCalledThenProgramCommandCor } } +HWTEST_F(BcsTests, givenInvalidBlitDirectionWhenConstructPropertiesThenExceptionIsThrow) { + auto &csr = pDevice->getUltCommandStreamReceiver(); + + EXPECT_THROW(ClBlitProperties::constructProperties(static_cast(7), csr, {}), std::exception); +} + struct MockScratchSpaceController : ScratchSpaceControllerBase { using ScratchSpaceControllerBase::privateScratchAllocation; using ScratchSpaceControllerBase::ScratchSpaceControllerBase; diff --git a/opencl/test/unit_test/mem_obj/buffer_tests.cpp b/opencl/test/unit_test/mem_obj/buffer_tests.cpp index 0ce7c19b41..15e47a22e3 100644 --- a/opencl/test/unit_test/mem_obj/buffer_tests.cpp +++ b/opencl/test/unit_test/mem_obj/buffer_tests.cpp @@ -10,6 +10,7 @@ #include "shared/source/gmm_helper/resource_info.h" #include "shared/source/helpers/array_count.h" #include "shared/source/helpers/hw_helper.h" +#include "shared/source/helpers/vec.h" #include "shared/source/memory_manager/allocations_list.h" #include "shared/source/memory_manager/memory_operations_handler.h" #include "shared/source/memory_manager/unified_memory_manager.h" @@ -665,12 +666,12 @@ struct BcsBufferTests : public ::testing::Test { bcsCsr->initializeTagAllocation(); } - BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, size_t size) const override { + BlitOperationResult blitMemoryToAllocation(MemObj &memObj, GraphicsAllocation *memory, void *hostPtr, Vec3 size) const override { auto blitProperties = BlitProperties::constructPropertiesForReadWriteBuffer(BlitterConstants::BlitDirection::HostPtrToBuffer, *bcsCsr, memory, nullptr, hostPtr, memory->getGpuAddress(), 0, - 0, 0, size); + 0, 0, size, 0, 0, 0, 0); BlitPropertiesContainer container; container.push_back(blitProperties); @@ -768,40 +769,60 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenEnqueueBufferOperationIs bufferForBlt1->forceDisallowCPUCopy = true; auto *hwInfo = device->getRootDeviceEnvironment().getMutableHardwareInfo(); + size_t bufferOrigin[] = {0, 0, 0}; + size_t hostOrigin[] = {0, 0, 0}; + size_t region[] = {1, 2, 1}; + DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(0); hwInfo->capabilityTable.blitterOperationsSupported = false; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); + commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, + MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, + MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1); hwInfo->capabilityTable.blitterOperationsSupported = false; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); + commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, + MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, + MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(0); hwInfo->capabilityTable.blitterOperationsSupported = true; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); + commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, + MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, + MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(-1); hwInfo->capabilityTable.blitterOperationsSupported = true; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); + commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, + MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, + MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); - EXPECT_EQ(3u, bcsCsr->blitBufferCalled); + EXPECT_EQ(4u, bcsCsr->blitBufferCalled); DebugManager.flags.EnableBlitterOperationsForReadWriteBuffers.set(1); hwInfo->capabilityTable.blitterOperationsSupported = true; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(4u, bcsCsr->blitBufferCalled); - commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(5u, bcsCsr->blitBufferCalled); - commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); + commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_TRUE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(6u, bcsCsr->blitBufferCalled); + commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); + EXPECT_EQ(7u, bcsCsr->blitBufferCalled); + commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_TRUE, bufferOrigin, hostOrigin, region, + MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, + MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); + EXPECT_EQ(8u, bcsCsr->blitBufferCalled); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDispatchBlitWhenUnblocked) { @@ -812,23 +833,34 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBcsSupportedWhenQueueIsBlockedThenDispat bufferForBlt0->forceDisallowCPUCopy = true; bufferForBlt1->forceDisallowCPUCopy = true; UserEvent userEvent(bcsMockContext.get()); + cl_event waitlist = &userEvent; + size_t bufferOrigin[] = {0, 0, 0}; + size_t hostOrigin[] = {0, 0, 0}; + size_t region[] = {1, 2, 1}; commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 1, &waitlist, nullptr); commandQueue->enqueueReadBuffer(bufferForBlt1.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); + commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region, + MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, + MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); EXPECT_EQ(0u, bcsCsr->blitBufferCalled); userEvent.setStatus(CL_COMPLETE); - EXPECT_EQ(3u, bcsCsr->blitBufferCalled); + EXPECT_EQ(4u, bcsCsr->blitBufferCalled); commandQueue->enqueueWriteBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); - EXPECT_EQ(4u, bcsCsr->blitBufferCalled); - commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); EXPECT_EQ(5u, bcsCsr->blitBufferCalled); + commandQueue->enqueueReadBuffer(bufferForBlt0.get(), CL_FALSE, 0, 1, &hostPtr, nullptr, 0, nullptr, nullptr); + EXPECT_EQ(6u, bcsCsr->blitBufferCalled); commandQueue->enqueueCopyBuffer(bufferForBlt0.get(), bufferForBlt1.get(), 0, 1, 1, 0, nullptr, nullptr); + EXPECT_EQ(7u, bcsCsr->blitBufferCalled); + commandQueue->enqueueReadBufferRect(bufferForBlt0.get(), CL_FALSE, bufferOrigin, hostOrigin, region, + MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, + MemoryConstants::cacheLineSize, &hostPtr, 0, nullptr, nullptr); } HWTEST_TEMPLATED_F(BcsBufferTests, givenBuffersWhenCopyBufferCalledThenUseBcs) { @@ -1345,6 +1377,53 @@ HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingWriteBufferWhenUsingBcsThenCallW EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); } +HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferRectWhenUsingBcsThenCallWait) { + auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex()); + myMockCsr->taskCount = 1234; + myMockCsr->initializeTagAllocation(); + myMockCsr->setupContext(*bcsMockContext->bcsOsContext); + bcsMockContext->bcsCsr.reset(myMockCsr); + + EngineControl bcsEngineControl = {myMockCsr, bcsMockContext->bcsOsContext.get()}; + + auto cmdQ = clUniquePtr(new MockCommandQueueHw(bcsMockContext.get(), device.get(), nullptr)); + cmdQ->bcsEngine = &bcsEngineControl; + auto &gpgpuCsr = cmdQ->getGpgpuCommandStreamReceiver(); + myMockCsr->gpgpuCsr = &gpgpuCsr; + + cl_int retVal = CL_SUCCESS; + auto buffer = clUniquePtr(Buffer::create(bcsMockContext.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); + buffer->forceDisallowCPUCopy = true; + void *hostPtr = reinterpret_cast(0x12340000); + + size_t bufferOrigin[] = {0, 0, 0}; + size_t hostOrigin[] = {0, 0, 0}; + size_t region[] = {1, 2, 1}; + + cmdQ->enqueueReadBufferRect(buffer.get(), false, bufferOrigin, hostOrigin, region, + MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, + MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); + EXPECT_EQ(0u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); + EXPECT_TRUE(gpgpuCsr.getTemporaryAllocations().peekIsEmpty()); + EXPECT_FALSE(myMockCsr->getTemporaryAllocations().peekIsEmpty()); + + bool tempAllocationFound = false; + auto tempAllocation = myMockCsr->getTemporaryAllocations().peekHead(); + while (tempAllocation) { + if (tempAllocation->getUnderlyingBuffer() == hostPtr) { + tempAllocationFound = true; + break; + } + tempAllocation = tempAllocation->next; + } + EXPECT_TRUE(tempAllocationFound); + + cmdQ->enqueueReadBufferRect(buffer.get(), true, bufferOrigin, hostOrigin, region, + MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, MemoryConstants::cacheLineSize, + MemoryConstants::cacheLineSize, hostPtr, 0, nullptr, nullptr); + EXPECT_EQ(1u, myMockCsr->waitForTaskCountAndCleanAllocationListCalled); +} + HWTEST_TEMPLATED_F(BcsBufferTests, givenBlockingReadBufferWhenUsingBcsThenCallWait) { auto myMockCsr = new MyMockCsr(*device->getExecutionEnvironment(), device->getRootDeviceIndex()); myMockCsr->taskCount = 1234; diff --git a/shared/source/helpers/blit_commands_helper.cpp b/shared/source/helpers/blit_commands_helper.cpp index 6e2fa277dc..f71cb30643 100644 --- a/shared/source/helpers/blit_commands_helper.cpp +++ b/shared/source/helpers/blit_commands_helper.cpp @@ -16,22 +16,26 @@ BlitProperties BlitProperties::constructPropertiesForReadWriteBuffer(BlitterCons GraphicsAllocation *memObjAllocation, GraphicsAllocation *preallocatedHostAllocation, void *hostPtr, uint64_t memObjGpuVa, - uint64_t hostAllocGpuVa, size_t hostPtrOffset, - size_t copyOffset, uint64_t copySize) { - + uint64_t hostAllocGpuVa, Vec3 hostPtrOffset, + Vec3 copyOffset, Vec3 copySize, + size_t hostRowPitch, size_t hostSlicePitch, + size_t gpuRowPitch, size_t gpuSlicePitch) { GraphicsAllocation *hostAllocation = nullptr; if (preallocatedHostAllocation) { hostAllocation = preallocatedHostAllocation; UNRECOVERABLE_IF(hostAllocGpuVa == 0); } else { - HostPtrSurface hostPtrSurface(hostPtr, static_cast(copySize), true); + HostPtrSurface hostPtrSurface(hostPtr, static_cast(copySize.x), true); bool success = commandStreamReceiver.createAllocationForHostSurface(hostPtrSurface, false); UNRECOVERABLE_IF(!success); hostAllocation = hostPtrSurface.getAllocation(); hostAllocGpuVa = hostAllocation->getGpuAddress(); } + copySize.y = copySize.y ? copySize.y : 1; + copySize.z = copySize.z ? copySize.z : 1; + if (BlitterConstants::BlitDirection::HostPtrToBuffer == blitDirection) { return { nullptr, // outputTimestampPacket @@ -57,12 +61,16 @@ BlitProperties BlitProperties::constructPropertiesForReadWriteBuffer(BlitterCons memObjGpuVa, // srcGpuAddress copySize, // copySize hostPtrOffset, // dstOffset - copyOffset}; // srcOffset - } + copyOffset, // srcOffset + hostRowPitch, // dstRowPitch + hostSlicePitch, // dstSlicePitch + gpuRowPitch, // srcRowPitch + gpuSlicePitch}; // srcSlicePitch + }; } BlitProperties BlitProperties::constructPropertiesForCopyBuffer(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation, - size_t dstOffset, size_t srcOffset, uint64_t copySize) { + size_t dstOffset, size_t srcOffset, size_t copySize) { return { nullptr, // outputTimestampPacket @@ -73,9 +81,9 @@ BlitProperties BlitProperties::constructPropertiesForCopyBuffer(GraphicsAllocati srcAllocation, // srcAllocation dstAllocation->getGpuAddress(), // dstGpuAddress srcAllocation->getGpuAddress(), // srcGpuAddress - copySize, // copySize - dstOffset, // dstOffset - srcOffset}; // srcOffset + {copySize, 1, 1}, // copySize + {dstOffset, 0, 0}, // dstOffset + {srcOffset, 0, 0}}; // srcOffset } BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection, @@ -91,7 +99,7 @@ BlitProperties BlitProperties::constructPropertiesForAuxTranslation(AuxTranslati allocation, // srcAllocation allocation->getGpuAddress(), // dstGpuAddress allocation->getGpuAddress(), // srcGpuAddress - allocationSize, // copySize + {allocationSize, 1, 1}, // copySize 0, // dstOffset 0 // srcOffset }; diff --git a/shared/source/helpers/blit_commands_helper.h b/shared/source/helpers/blit_commands_helper.h index 5471be2898..37f433caba 100644 --- a/shared/source/helpers/blit_commands_helper.h +++ b/shared/source/helpers/blit_commands_helper.h @@ -8,6 +8,7 @@ #pragma once #include "shared/source/command_stream/csr_deps.h" #include "shared/source/helpers/aux_translation.h" +#include "shared/source/helpers/vec.h" #include "shared/source/memory_manager/memory_constants.h" #include "shared/source/utilities/stackvec.h" @@ -34,11 +35,13 @@ struct BlitProperties { GraphicsAllocation *memObjAllocation, GraphicsAllocation *preallocatedHostAllocation, void *hostPtr, uint64_t memObjGpuVa, - uint64_t hostAllocGpuVa, size_t hostPtrOffset, - size_t copyOffset, uint64_t copySize); + uint64_t hostAllocGpuVa, Vec3 hostPtrOffset, + Vec3 copyOffset, Vec3 copySize, + size_t hostRowPitch, size_t hostSlicePitch, + size_t gpuRowPitch, size_t gpuSlicePitch); static BlitProperties constructPropertiesForCopyBuffer(GraphicsAllocation *dstAllocation, GraphicsAllocation *srcAllocation, - size_t dstOffset, size_t srcOffset, uint64_t copySize); + size_t dstOffset, size_t srcOffset, size_t copySize); static BlitProperties constructPropertiesForAuxTranslation(AuxTranslationDirection auxTranslationDirection, GraphicsAllocation *allocation); @@ -47,8 +50,6 @@ struct BlitProperties { TimestampPacketContainer &kernelTimestamps, const CsrDependencies &depsFromEvents, CommandStreamReceiver &gpguCsr, CommandStreamReceiver &bcsCsr); - static BlitterConstants::BlitDirection obtainBlitDirection(uint32_t commandType); - TagNode *outputTimestampPacket = nullptr; BlitterConstants::BlitDirection blitDirection; CsrDependencies csrDependencies; @@ -58,15 +59,23 @@ struct BlitProperties { GraphicsAllocation *srcAllocation = nullptr; uint64_t dstGpuAddress = 0; uint64_t srcGpuAddress = 0; - uint64_t copySize = 0; - size_t dstOffset = 0; - size_t srcOffset = 0; + + Vec3 copySize = 0; + Vec3 dstOffset = 0; + Vec3 srcOffset = 0; + + size_t dstRowPitch = 0; + size_t dstSlicePitch = 0; + size_t srcRowPitch = 0; + size_t srcSlicePitch = 0; }; template struct BlitCommandsHelper { - static size_t estimateBlitCommandsSize(uint64_t copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket); + static size_t estimateBlitCommandsSize(Vec3 copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket); static size_t estimateBlitCommandsSize(const BlitPropertiesContainer &blitPropertiesContainer, const HardwareInfo &hwInfo); + static uint64_t calculateBlitCommandDestinationBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); + static uint64_t calculateBlitCommandSourceBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice); static void dispatchBlitCommandsForBuffer(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment); static void appendBlitCommandsForBuffer(const BlitProperties &blitProperties, typename GfxFamily::XY_COPY_BLT &blitCmd, const RootDeviceEnvironment &rootDeviceEnvironment); }; diff --git a/shared/source/helpers/blit_commands_helper_base.inl b/shared/source/helpers/blit_commands_helper_base.inl index 5aef3f2b8b..7c3336fd0b 100644 --- a/shared/source/helpers/blit_commands_helper_base.inl +++ b/shared/source/helpers/blit_commands_helper_base.inl @@ -13,24 +13,29 @@ namespace NEO { template -size_t BlitCommandsHelper::estimateBlitCommandsSize(uint64_t copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket) { +size_t BlitCommandsHelper::estimateBlitCommandsSize(Vec3 copySize, const CsrDependencies &csrDependencies, bool updateTimestampPacket) { size_t numberOfBlits = 0; - uint64_t sizeToBlit = copySize; uint64_t width = 1; uint64_t height = 1; - while (sizeToBlit != 0) { - if (sizeToBlit > BlitterConstants::maxBlitWidth) { - // 2D: maxBlitWidth x (1 .. maxBlitHeight) - width = BlitterConstants::maxBlitWidth; - height = std::min((sizeToBlit / width), BlitterConstants::maxBlitHeight); - } else { - // 1D: (1 .. maxBlitWidth) x 1 - width = sizeToBlit; - height = 1; + for (uint64_t slice = 0; slice < copySize.z; slice++) { + for (uint64_t row = 0; row < copySize.y; row++) { + uint64_t sizeToBlit = copySize.x; + while (sizeToBlit != 0) { + if (sizeToBlit > BlitterConstants::maxBlitWidth) { + // dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight) + width = BlitterConstants::maxBlitWidth; + height = std::min((sizeToBlit / width), BlitterConstants::maxBlitHeight); + + } else { + // dispatch 1D blt: (1 .. maxBlitWidth) x 1 + width = sizeToBlit; + height = 1; + } + sizeToBlit -= (width * height); + numberOfBlits++; + } } - sizeToBlit -= (width * height); - numberOfBlits++; } return TimestampPacketHelper::getRequiredCmdStreamSize(csrDependencies) + @@ -51,41 +56,65 @@ size_t BlitCommandsHelper::estimateBlitCommandsSize(const BlitPropert return alignUp(size, MemoryConstants::cacheLineSize); } +template +uint64_t BlitCommandsHelper::calculateBlitCommandDestinationBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice) { + return blitProperties.dstGpuAddress + blitProperties.dstOffset.x + offset + + blitProperties.dstOffset.y * blitProperties.dstRowPitch + + blitProperties.dstOffset.z * blitProperties.dstSlicePitch + + row * blitProperties.dstRowPitch + + slice * blitProperties.dstSlicePitch; +} + +template +uint64_t BlitCommandsHelper::calculateBlitCommandSourceBaseAddress(const BlitProperties &blitProperties, uint64_t offset, uint64_t row, uint64_t slice) { + return blitProperties.srcGpuAddress + blitProperties.srcOffset.x + offset + + blitProperties.srcOffset.y * blitProperties.srcRowPitch + + blitProperties.srcOffset.z * blitProperties.srcSlicePitch + + row * blitProperties.srcRowPitch + + slice * blitProperties.srcSlicePitch; +} + template void BlitCommandsHelper::dispatchBlitCommandsForBuffer(const BlitProperties &blitProperties, LinearStream &linearStream, const RootDeviceEnvironment &rootDeviceEnvironment) { - uint64_t sizeToBlit = blitProperties.copySize; uint64_t width = 1; uint64_t height = 1; - uint64_t offset = 0; - while (sizeToBlit != 0) { - if (sizeToBlit > BlitterConstants::maxBlitWidth) { - // dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight) - width = BlitterConstants::maxBlitWidth; - height = std::min((sizeToBlit / width), BlitterConstants::maxBlitHeight); - } else { - // dispatch 1D blt: (1 .. maxBlitWidth) x 1 - width = sizeToBlit; - height = 1; + for (uint64_t slice = 0; slice < blitProperties.copySize.z; slice++) { + for (uint64_t row = 0; row < blitProperties.copySize.y; row++) { + uint64_t offset = 0; + uint64_t sizeToBlit = blitProperties.copySize.x; + while (sizeToBlit != 0) { + if (sizeToBlit > BlitterConstants::maxBlitWidth) { + // dispatch 2D blit: maxBlitWidth x (1 .. maxBlitHeight) + width = BlitterConstants::maxBlitWidth; + height = std::min((sizeToBlit / width), BlitterConstants::maxBlitHeight); + } else { + // dispatch 1D blt: (1 .. maxBlitWidth) x 1 + width = sizeToBlit; + height = 1; + } + + auto bltCmd = linearStream.getSpaceForCmd(); + *bltCmd = GfxFamily::cmdInitXyCopyBlt; + + bltCmd->setTransferWidth(static_cast(width)); + bltCmd->setTransferHeight(static_cast(height)); + bltCmd->setDestinationPitch(static_cast(width)); + bltCmd->setSourcePitch(static_cast(width)); + + auto dstAddr = calculateBlitCommandDestinationBaseAddress(blitProperties, offset, row, slice); + auto srcAddr = calculateBlitCommandSourceBaseAddress(blitProperties, offset, row, slice); + + bltCmd->setDestinationBaseAddress(dstAddr); + bltCmd->setSourceBaseAddress(srcAddr); + + appendBlitCommandsForBuffer(blitProperties, *bltCmd, rootDeviceEnvironment); + + auto blitSize = width * height; + sizeToBlit -= blitSize; + offset += blitSize; + } } - - auto bltCmd = linearStream.getSpaceForCmd(); - *bltCmd = GfxFamily::cmdInitXyCopyBlt; - - bltCmd->setTransferWidth(static_cast(width)); - bltCmd->setTransferHeight(static_cast(height)); - - bltCmd->setDestinationPitch(static_cast(width)); - bltCmd->setSourcePitch(static_cast(width)); - - bltCmd->setDestinationBaseAddress(blitProperties.dstGpuAddress + blitProperties.dstOffset + offset); - bltCmd->setSourceBaseAddress(blitProperties.srcGpuAddress + blitProperties.srcOffset + offset); - - appendBlitCommandsForBuffer(blitProperties, *bltCmd, rootDeviceEnvironment); - - auto blitSize = width * height; - sizeToBlit -= blitSize; - offset += blitSize; } }