diff --git a/runtime/command_stream/command_stream_receiver.cpp b/runtime/command_stream/command_stream_receiver.cpp index c3a66ee2fa..bd9bd471c3 100644 --- a/runtime/command_stream/command_stream_receiver.cpp +++ b/runtime/command_stream/command_stream_receiver.cpp @@ -418,22 +418,22 @@ cl_int CommandStreamReceiver::expectMemory(const void *gfxAddress, const void *s return (isMemoryEqual == isEqualMemoryExpected) ? CL_SUCCESS : CL_INVALID_VALUE; } -void CommandStreamReceiver::blitWithHostPtr(Buffer &buffer, void *hostPtr, bool blocking, uint64_t hostPtrSize, +void CommandStreamReceiver::blitWithHostPtr(Buffer &buffer, void *hostPtr, bool blocking, size_t bufferOffset, uint64_t copySize, BlitterConstants::BlitWithHostPtrDirection copyDirection, CsrDependencies &csrDependencies) { - HostPtrSurface hostPtrSurface(hostPtr, static_cast(hostPtrSize), true); + HostPtrSurface hostPtrSurface(hostPtr, static_cast(copySize), true); bool success = createAllocationForHostSurface(hostPtrSurface, false); UNRECOVERABLE_IF(!success); auto hostPtrAllocation = hostPtrSurface.getAllocation(); auto device = buffer.getContext()->getDevice(0); - auto hostPtrBuffer = std::unique_ptr(Buffer::createBufferHwFromDevice(device, CL_MEM_READ_ONLY, static_cast(hostPtrSize), + auto hostPtrBuffer = std::unique_ptr(Buffer::createBufferHwFromDevice(device, CL_MEM_READ_WRITE, static_cast(copySize), hostPtr, hostPtr, hostPtrAllocation, true, false, true)); if (BlitterConstants::BlitWithHostPtrDirection::FromHostPtr == copyDirection) { - blitBuffer(buffer, *hostPtrBuffer, blocking, 0, 0, hostPtrSize, csrDependencies); + blitBuffer(buffer, *hostPtrBuffer, blocking, bufferOffset, 0, copySize, csrDependencies); } else { - blitBuffer(*hostPtrBuffer, buffer, blocking, 0, 0, hostPtrSize, csrDependencies); + blitBuffer(*hostPtrBuffer, buffer, blocking, 0, bufferOffset, copySize, csrDependencies); } } } // namespace NEO diff --git a/runtime/command_stream/command_stream_receiver.h b/runtime/command_stream/command_stream_receiver.h index 4f27c5757c..7047ea3d74 100644 --- a/runtime/command_stream/command_stream_receiver.h +++ b/runtime/command_stream/command_stream_receiver.h @@ -176,7 +176,7 @@ class CommandStreamReceiver { this->latestSentTaskCount = latestSentTaskCount; } - void blitWithHostPtr(Buffer &buffer, void *hostPtr, bool blocking, uint64_t hostPtrSize, + void blitWithHostPtr(Buffer &buffer, void *hostPtr, bool blocking, size_t bufferOffset, uint64_t copySize, BlitterConstants::BlitWithHostPtrDirection copyDirection, CsrDependencies &csrDependencies); virtual void blitBuffer(Buffer &dstBuffer, Buffer &srcBuffer, bool blocking, uint64_t dstOffset, uint64_t srcOffset, uint64_t copySize, CsrDependencies &csrDependencies) = 0; diff --git a/runtime/mem_obj/buffer.cpp b/runtime/mem_obj/buffer.cpp index 573f5a45bc..eff811554a 100644 --- a/runtime/mem_obj/buffer.cpp +++ b/runtime/mem_obj/buffer.cpp @@ -279,7 +279,7 @@ Buffer *Buffer::create(Context *context, auto blitCommandStreamReceiver = context->getCommandStreamReceiverForBlitOperation(*pBuffer); if (blitCommandStreamReceiver) { CsrDependencies dependencies; - blitCommandStreamReceiver->blitWithHostPtr(*pBuffer, hostPtr, true, size, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, + blitCommandStreamReceiver->blitWithHostPtr(*pBuffer, hostPtr, true, 0, size, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, dependencies); } else { auto cmdQ = context->getSpecialQueue(); diff --git a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp index d7eef808d1..f123c1be89 100644 --- a/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp +++ b/unit_tests/command_stream/command_stream_receiver_hw_tests.cpp @@ -347,7 +347,7 @@ HWTEST_F(BcsTests, givenBltSizeWithLeftoverWhenDispatchedThenProgramAllRequiredC uint32_t newTaskCount = 19; csr.taskCount = newTaskCount - 1; EXPECT_EQ(0u, csr.recursiveLockCounter.load()); - csr.blitWithHostPtr(*buffer, hostPtr, true, bltSize, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); + csr.blitWithHostPtr(*buffer, hostPtr, true, 0, bltSize, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); EXPECT_EQ(newTaskCount, csr.taskCount); EXPECT_EQ(newTaskCount, csr.latestFlushedTaskCount); EXPECT_EQ(newTaskCount, csr.latestSentTaskCount); @@ -408,7 +408,7 @@ HWTEST_F(BcsTests, givenCsrDependenciesWhenProgrammingCommandStreamThenAddSemaph csrDependencies.push_back(×tamp0); csrDependencies.push_back(×tamp1); - csr.blitWithHostPtr(*buffer, hostPtr, true, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); + csr.blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); HardwareParse hwParser; hwParser.parseCommands(csr.commandStream); @@ -448,7 +448,7 @@ HWTEST_F(BcsTests, givenInputAllocationsWhenBlitDispatchedThenMakeAllAllocations EXPECT_EQ(0u, csr.makeSurfacePackNonResidentCalled); - csr.blitWithHostPtr(*buffer, hostPtr, true, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); + csr.blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); EXPECT_TRUE(csr.isMadeResident(buffer->getGraphicsAllocation())); EXPECT_TRUE(csr.isMadeResident(csr.commandStream.getGraphicsAllocation())); @@ -472,7 +472,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitCalledThenFlushCommandBuffer) { uint32_t newTaskCount = 17; csr.taskCount = newTaskCount - 1; - csr.blitWithHostPtr(*buffer, hostPtr, true, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); + csr.blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); EXPECT_EQ(commandStream.getGraphicsAllocation(), csr.latestFlushedBatchBuffer.commandBufferAllocation); EXPECT_EQ(commandStreamOffset, csr.latestFlushedBatchBuffer.startOffset); @@ -517,10 +517,10 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCallWaitWithKmdFallback) { auto buffer = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); void *hostPtr = reinterpret_cast(0x12340000); - myMockCsr->blitWithHostPtr(*buffer, hostPtr, false, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); + myMockCsr->blitWithHostPtr(*buffer, hostPtr, false, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); EXPECT_EQ(0u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled); - myMockCsr->blitWithHostPtr(*buffer, hostPtr, true, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); + myMockCsr->blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); EXPECT_EQ(1u, myMockCsr->waitForTaskCountWithKmdNotifyFallbackCalled); EXPECT_EQ(myMockCsr->taskCount, myMockCsr->taskCountToWaitPassed); EXPECT_EQ(myMockCsr->flushStamp->peekStamp(), myMockCsr->flushStampToWaitPassed); @@ -541,10 +541,10 @@ HWTEST_F(BcsTests, whenBlitFromHostPtrCalledThenCleanTemporaryAllocations) { EXPECT_EQ(0u, mockInternalAllocationsStorage->cleanAllocationsCalled); - bcsCsr.blitWithHostPtr(*buffer, hostPtr, false, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); + bcsCsr.blitWithHostPtr(*buffer, hostPtr, false, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); EXPECT_EQ(0u, mockInternalAllocationsStorage->cleanAllocationsCalled); - bcsCsr.blitWithHostPtr(*buffer, hostPtr, true, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); + bcsCsr.blitWithHostPtr(*buffer, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); EXPECT_EQ(1u, mockInternalAllocationsStorage->cleanAllocationsCalled); EXPECT_EQ(bcsCsr.taskCount, mockInternalAllocationsStorage->lastCleanAllocationsTaskCount); EXPECT_TRUE(TEMPORARY_ALLOCATION == mockInternalAllocationsStorage->lastCleanAllocationUsage); @@ -561,7 +561,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres { // from hostPtr HardwareParse hwParser; - csr.blitWithHostPtr(*buffer1, hostPtr, true, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); + csr.blitWithHostPtr(*buffer1, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); hwParser.parseCommands(csr.commandStream); @@ -574,7 +574,7 @@ HWTEST_F(BcsTests, givenBufferWhenBlitOperationCalledThenProgramCorrectGpuAddres // to hostPtr HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); - csr.blitWithHostPtr(*buffer1, hostPtr, true, 1, BlitterConstants::BlitWithHostPtrDirection::ToHostPtr, csrDependencies); + csr.blitWithHostPtr(*buffer1, hostPtr, true, 0, 1, BlitterConstants::BlitWithHostPtrDirection::ToHostPtr, csrDependencies); hwParser.parseCommands(csr.commandStream, offset); @@ -604,11 +604,40 @@ HWTEST_F(BcsTests, givenBufferWithOffsetWhenBlitOperationCalledThenProgramCorrec cl_int retVal = CL_SUCCESS; auto buffer1 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); auto buffer2 = clUniquePtr(Buffer::create(context.get(), CL_MEM_READ_WRITE, 1, nullptr, retVal)); + void *hostPtr = reinterpret_cast(0x12340000); size_t addressOffsets[] = {0, 1, 1234}; for (auto buffer1Offset : addressOffsets) { + { + // from hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + csr.blitWithHostPtr(*buffer1, hostPtr, true, buffer1Offset, 1, BlitterConstants::BlitWithHostPtrDirection::FromHostPtr, csrDependencies); + + hwParser.parseCommands(csr.commandStream, offset); + + auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); + EXPECT_NE(nullptr, bltCmd); + EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getSourceBaseAddress()); + EXPECT_EQ(ptrOffset(buffer1->getGraphicsAllocation()->getGpuAddress(), buffer1Offset), bltCmd->getDestinationBaseAddress()); + } + { + // to hostPtr + HardwareParse hwParser; + auto offset = csr.commandStream.getUsed(); + csr.blitWithHostPtr(*buffer1, hostPtr, true, buffer1Offset, 1, BlitterConstants::BlitWithHostPtrDirection::ToHostPtr, csrDependencies); + + hwParser.parseCommands(csr.commandStream, offset); + + auto bltCmd = genCmdCast(*hwParser.cmdList.begin()); + EXPECT_NE(nullptr, bltCmd); + EXPECT_EQ(reinterpret_cast(hostPtr), bltCmd->getDestinationBaseAddress()); + EXPECT_EQ(ptrOffset(buffer1->getGraphicsAllocation()->getGpuAddress(), buffer1Offset), bltCmd->getSourceBaseAddress()); + } + for (auto buffer2Offset : addressOffsets) { + // Buffer to Buffer HardwareParse hwParser; auto offset = csr.commandStream.getUsed(); csr.blitBuffer(*buffer1, *buffer2, true, buffer1Offset, buffer2Offset, 1, csrDependencies);